Compare commits

127 Commits

Author SHA1 Message Date
eric
bd862cb238 deploy: 6100dcae74 2026-01-22 06:48:23 +00:00
eric
293f0bfa77 deploy: c112e1eb44 2026-01-22 05:11:15 +00:00
eric
c15d37458e deploy: fe6bf91f8c 2026-01-22 01:50:14 +00:00
eric
07438a27e9 deploy: 13abf5792b 2026-01-10 20:11:08 +00:00
eric
4355096bdc deploy: f7528b364e 2026-01-08 18:14:12 +00:00
eric
9c66ed1b1b deploy: 3b1396d814 2026-01-08 06:03:04 +00:00
eric
598c74df0a deploy: 89dc118ecd 2026-01-03 06:48:17 +00:00
eric
41ec0626e2 deploy: f1178d37f5 2026-01-03 06:28:22 +00:00
eric
346f1f1450 deploy: f1178d37f5 2025-12-29 07:17:38 +00:00
eric
0d2993f39b deploy: e9dfd966ba 2025-12-29 07:11:33 +00:00
eric
786f535c82 deploy: fbdae5fe68 2025-12-28 21:21:58 +00:00
eric
ab14cbc592 deploy: 13a094eb0b 2025-12-28 20:42:55 +00:00
eric
dc0feb72a8 deploy: 30f639d3c2 2025-12-27 22:32:07 +00:00
eric
8bf55a3b50 deploy: 9ffc2bb9a7 2025-12-27 22:05:57 +00:00
eric
c75c89c088 deploy: cd4cace37f 2025-12-27 21:22:00 +00:00
eric
4c7d9f4905 deploy: 79473f582a 2025-12-21 02:42:15 +00:00
eric
cde81e78d7 deploy: 9ec652c9c3 2025-12-21 01:57:11 +00:00
eric
008e4afff6 deploy: b19b045fc0 2025-12-20 19:04:00 +00:00
eric
ff2b69c081 deploy: 5b0d9a761d 2025-12-20 19:00:16 +00:00
eric
2cbf345452 deploy: f374adac0c 2025-12-20 18:00:15 +00:00
eric
9616c3681f deploy: de8df5519e 2025-12-20 17:52:54 +00:00
eric
2d7d143cbf deploy: 2bb856b1e7 2025-12-20 17:22:08 +00:00
eric
6e752d8af2 deploy: 3f9f80d24f 2025-12-20 07:02:49 +00:00
eric
e48bde719b deploy: f976130fbf 2025-12-20 06:41:49 +00:00
eric
d9dccae876 deploy: 4c1d048f35 2025-12-20 06:32:56 +00:00
eric
960c082536 deploy: 3b723ecfad 2025-12-20 06:28:14 +00:00
eric
3e84d0613e deploy: f3400013c7 2025-12-20 06:26:31 +00:00
eric
645963ca87 deploy: d2d39745c5 2025-12-20 06:16:00 +00:00
eric
22b2a53fc9 deploy: 1c61274980 2025-12-20 01:56:03 +00:00
eric
184c07ebff deploy: 3c86e9eba9 2025-12-20 01:53:21 +00:00
eric
40a88799ee deploy: 34aa99a15d 2025-12-20 01:50:43 +00:00
eric
19d2678a16 deploy: 6ed1d69396 2025-10-04 20:42:01 +00:00
eric
175644c1bf deploy: 2f73eaed9a 2025-10-04 17:44:58 +00:00
eric
c8d7b92351 deploy: 85e0d053b7 2025-10-04 17:44:24 +00:00
eric
7a88de8adc deploy: 0e4b4194b6 2025-10-04 05:52:57 +00:00
eric
7864b7a14d deploy: 7ef6ce1987 2025-10-02 08:42:51 +00:00
eric
7ff7d71dcb deploy: ca873828aa 2025-10-02 08:37:10 +00:00
eric
48268a2fc1 deploy: 832aabc376 2025-10-02 08:34:26 +00:00
eric
4808a62cd0 deploy: cc368da0b6 2025-10-02 07:23:17 +00:00
eric
811c80144e deploy: 2b2203c6f7 2025-09-23 06:20:48 +00:00
eric
ad8faa17fc deploy: 7cd5bd6558 2025-09-23 06:14:59 +00:00
eric
66d0011843 deploy: f6853a1cc4 2025-09-22 07:31:32 +00:00
eric
f0b04beb1f deploy: 96e2f71ffd 2025-09-22 07:27:33 +00:00
eric
dbe2d5d1b0 deploy: 69c2890aad 2025-08-20 06:29:20 +00:00
eric
2aadf95801 deploy: 16732daa57 2025-08-20 06:24:28 +00:00
eric
ea9c28dce4 deploy: 3ee20f1118 2025-08-20 06:05:30 +00:00
eric
1be19a7328 deploy: ba596e75db 2025-08-20 06:04:04 +00:00
eric
073fbfe081 deploy: ed94cec2ad 2025-08-20 04:50:28 +00:00
eric
ed03d0a873 deploy: 0945204182 2025-08-20 04:34:01 +00:00
eric
798e6c7d75 deploy: c06e978bd6 2025-08-20 04:16:56 +00:00
eric
dff213a604 deploy: 4eabec8fbb 2025-08-18 21:13:31 +00:00
eric
238fcb29b4 deploy: 849b7ef8d1 2025-08-18 21:00:59 +00:00
eric
dc3978a294 deploy: f0a4c1ed6e 2025-08-16 23:38:42 +00:00
eric
6dfed70e80 deploy: 79c5380e51 2025-08-16 23:03:09 +00:00
eric
596dc4948b deploy: beb7fbb468 2025-08-16 22:58:02 +00:00
eric
cb921d30e0 deploy: e33aef8159 2025-08-16 21:43:36 +00:00
eric
50e9f52f56 deploy: 466bfee32b 2025-08-16 21:14:31 +00:00
eric
b4e2b7f818 deploy: bcab4969f4 2025-08-16 21:08:19 +00:00
eric
c2b8a4f233 deploy: a7f1af6c7f 2025-08-16 20:59:44 +00:00
eric
8d18da2143 deploy: fe52a73b7e 2025-08-16 20:49:22 +00:00
eric
34ee48a56c deploy: 8518b064dd 2025-08-16 20:48:34 +00:00
eric
df3c006010 deploy: 2a5ee6dd11 2025-08-14 06:51:16 +00:00
eric
c8813b97f3 deploy: c9ed800d9f 2025-08-09 04:06:01 +00:00
eric
52a6e87d0d deploy: c25cd8921e 2025-08-04 03:59:59 +00:00
eric
5e1e4efc08 deploy: de23f04b62 2025-08-03 15:42:44 +00:00
eric
f50ba780e1 deploy: abed5c59ab 2025-08-03 15:38:00 +00:00
eric
a9192dd7da deploy: eba296fed3 2025-08-03 06:15:25 +00:00
eric
a50fee0dcf deploy: 5706ff7d48 2025-08-03 05:45:33 +00:00
eric
9454edc7ed deploy: 936d04687f 2025-08-03 05:33:26 +00:00
eric
9efdd85826 deploy: b4cc0126df 2025-08-03 05:05:55 +00:00
eric
95df119b6d deploy: fd19c595b6 2025-08-03 04:41:31 +00:00
eric
a6a4ee4adb deploy: f90b459eda 2025-08-03 03:45:17 +00:00
eric
a977deebd1 deploy: 9c5d4a2102 2025-08-03 03:30:50 +00:00
eric
8c3be83b91 deploy: 23b9adc43a 2025-08-03 03:20:19 +00:00
eric
76c539f415 deploy: 38bbe8cbae 2025-08-03 03:18:44 +00:00
eric
c1be16072c deploy: e15a7ffcef 2025-08-03 03:17:32 +00:00
eric
11b8ac016c deploy: 84b3c2016e 2025-08-03 03:16:29 +00:00
eric
d03a2c49dd deploy: ec6f60a996 2025-08-03 03:11:20 +00:00
eric
0ae24eb647 deploy: 38bbe8cbae 2025-08-03 03:10:48 +00:00
eric
ce7b6b17b2 deploy: bfc38f9b43 2025-08-03 03:10:08 +00:00
eric
ef26adac81 deploy: b213faf97d 2025-08-03 03:09:27 +00:00
eric
fb47a09d9b deploy: 015353bb06 2025-08-03 03:06:54 +00:00
eric
b98d88fd0f deploy: afebdd8473 2025-08-03 03:03:30 +00:00
eric
144a1b1692 deploy: 2a163cf7fe 2025-08-03 02:54:11 +00:00
eric
df6ffb4bc0 deploy: cbccd87961 2025-08-03 02:47:13 +00:00
eric
219a24e3a5 deploy: 73f53ff6b9 2025-08-03 02:41:00 +00:00
eric
335ed1d107 deploy: 38bbe8cbae 2025-08-03 02:27:29 +00:00
eric
8f3c545991 deploy: b6192ca3ca 2025-08-03 02:20:56 +00:00
eric
e8ae2242e3 deploy: a3ccac4cd2 2025-08-02 22:50:04 +00:00
eric
d801fe9307 deploy: 88cbb7efd5 2025-08-02 22:46:40 +00:00
eric
20c1888f78 deploy: e6c8ccf0f8 2025-08-02 22:45:16 +00:00
eric
9603629d20 deploy: 4996ae35fb 2024-11-11 03:37:49 +00:00
eric
e60475c8ac deploy: b146b015d0 2024-11-11 03:33:08 +00:00
eric
e83c0477c7 deploy: 53dd77638e 2024-11-11 03:31:49 +00:00
eric
e86aa5f8cb deploy: f09740923b 2024-11-11 02:37:20 +00:00
eric
8832dff8d6 deploy: dfe3a7b0c8 2024-09-16 21:18:23 +00:00
eric
ebed172a21 deploy: 9881f6e8bc 2024-09-16 20:56:40 +00:00
eric
e7fda8a866 deploy: d96148c395 2024-09-16 20:09:44 +00:00
eric
a147bbd8c4 deploy: 248138f653 2024-09-16 20:07:43 +00:00
eric
38518686d9 deploy: 8727ffa33f 2024-09-16 18:38:19 +00:00
eric
38703cd607 deploy: 39e6a27cee 2024-09-16 17:31:46 +00:00
eric
22b4234f06 deploy: a8a360bf89 2024-07-03 21:31:02 +00:00
eric
ef9bc708e1 deploy: c5f30e590d 2024-06-13 02:33:15 +00:00
61a3e5a38d Drone build 1bdc086 2024-06-12 06:10:05 +00:00
303714c386 Drone build c68d13c 2024-06-12 05:10:20 +00:00
203b36bc6c Drone build 1f5623a 2024-06-12 04:10:13 +00:00
085d1dd3f7 Drone build d7a045f 2024-06-11 19:10:11 +00:00
1f3238519a Drone build a6d01f0 2024-04-25 22:26:25 +00:00
7ab352cdde Drone build d76023d 2024-01-01 00:19:05 +00:00
e5c7ad2ee3 Drone build d76023d 2023-12-08 19:19:07 +00:00
b14698604d Drone build 291f598 2023-09-24 05:26:19 +00:00
482899015a Drone build 4b89cb4 2023-09-21 04:57:16 +00:00
396b46d31e Drone build 6172792 2023-09-19 04:23:58 +00:00
1d53e2965c Drone build 64ed4f0 2023-09-19 04:14:41 +00:00
77bd58c48f Drone build 41a3fc0 2023-09-19 04:06:51 +00:00
4e79964a24 Drone build 0fc636c 2023-09-19 03:55:31 +00:00
7667b0ebf3 Drone build 607be11 2023-09-19 02:53:59 +00:00
a9765b4d5b Drone build 35a02ff 2023-09-19 02:44:28 +00:00
d5b6868b70 Drone build 7c0df2c 2023-09-19 02:20:42 +00:00
bd7fe9345f Drone build 25b1f1f 2023-09-18 19:20:23 +00:00
f20a18d653 Drone build 9b45a24 2023-09-18 05:05:17 +00:00
c6d8e2aae6 Drone build 02d83d4 2023-09-18 04:55:09 +00:00
685d7272e1 Drone build 23430e6 2023-09-18 04:20:30 +00:00
2f5387a7a3 Drone build 9a38aab 2023-09-18 04:10:58 +00:00
4b1dd1a9bf Drone build bac464a 2023-09-18 03:56:53 +00:00
c05622c64f Drone build e715591 2023-09-18 03:49:10 +00:00
b562560bbb Drone build cb2ac7b 2023-09-11 03:19:13 +00:00
92 changed files with 1937 additions and 14066 deletions

View File

@@ -1,28 +0,0 @@
kind: pipeline
name: default
steps:
- name: build
image: plugins/hugo
settings:
hugo_version: 0.97.0
extended: true
minify: true
pull: always
url: ericxliu.me
validate: false
output: "./output"
mtu: 1450
- name: git-push
image: appleboy/drone-git-push:0.2.0-linux-amd64
settings:
branch: gitea-pages
remote: "git@git.ericxliu.me:eric/ericxliu-me.git"
force: true
commit: true
path: "./output"
commit_message: "Drone build ${DRONE_COMMIT_SHA:0:7}"
author_name: "Eric Liu"
author_email: "eric@ericxliu.me"
ssh_key:
from_secret: ssh_key

1
.gitignore vendored
View File

@@ -1 +0,0 @@
_gen/

View File

@@ -1,37 +0,0 @@
# This file is a template, and might need editing before it works on your project.
# To contribute improvements to CI/CD templates, please follow the Development guide at:
# https://docs.gitlab.com/ee/development/cicd/templates.html
# This specific template is located at:
# https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/ci/templates/Pages/HTML.gitlab-ci.yml
# Full project: https://gitlab.com/pages/plain-html
variables:
GIT_SUBMODULE_STRATEGY: recursive
build-stage:
stage: build
image: monachus/hugo:latest
script:
- hugo
- ls
artifacts:
paths:
- public
deploy-stage:
stage: deploy
image: minio/mc:latest
script:
- ls
- mkdir .public
- cp -r public/* .public
- mc alias set minio http://minio.diskstation.local:80 WjaYWk3uthUlotbT Hc3fff7v69nZ6XvcXXpOZ3JJMzcmGc6A
- mc cp -r .public/ minio/eric-personal
artifacts:
paths:
- .public
dependencies:
- build-stage
rules:
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH

3
.gitmodules vendored
View File

@@ -1,3 +0,0 @@
[submodule "themes/hugo-coder"]
path = themes/hugo-coder
url = https://github.com/luizdepra/hugo-coder

0
.nojekyll Normal file
View File

7
404.html Normal file
View File

@@ -0,0 +1,7 @@
<!doctype html><html lang=en><head><title>Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="404 Page not found"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/404.html"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="404 Page not found"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/404.html><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container centered"><div class=error><h1>404</h1><h2>Page Not Found</h2><p>Sorry, this page does not exist.<br>You can head back to the <a href=https://ericxliu.me/>homepage</a>.</p></div></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

16
about/index.html Normal file
View File

@@ -0,0 +1,16 @@
<!doctype html><html lang=en><head><title>About · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="
Hi, I&rsquo;m Eric Liu.
I am a Staff Software Engineer and Tech Lead Manager (TLM) at Google, based in Sunnyvale, CA.
My work focuses on Infrastructure Performance and Customer Engineering, specifically for GPUs and TPUs. I lead teams that bridge the gap between cutting-edge AI hardware and the latest ML models (like Gemini), ensuring optimal performance and reliability at Google Cloud scale. I thrive in the ambiguous space where hardware constraints meet software ambition—whether it&rsquo;s debugging race conditions across thousands of chips or designing API surfaces for next-gen models."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="About"><meta name=twitter:description content="Hi, Im Eric Liu.
I am a Staff Software Engineer and Tech Lead Manager (TLM) at Google, based in Sunnyvale, CA.
My work focuses on Infrastructure Performance and Customer Engineering, specifically for GPUs and TPUs. I lead teams that bridge the gap between cutting-edge AI hardware and the latest ML models (like Gemini), ensuring optimal performance and reliability at Google Cloud scale. I thrive in the ambiguous space where hardware constraints meet software ambition—whether its debugging race conditions across thousands of chips or designing API surfaces for next-gen models."><meta property="og:url" content="https://ericxliu.me/about/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="About"><meta property="og:description" content="Hi, Im Eric Liu.
I am a Staff Software Engineer and Tech Lead Manager (TLM) at Google, based in Sunnyvale, CA.
My work focuses on Infrastructure Performance and Customer Engineering, specifically for GPUs and TPUs. I lead teams that bridge the gap between cutting-edge AI hardware and the latest ML models (like Gemini), ensuring optimal performance and reliability at Google Cloud scale. I thrive in the ambiguous space where hardware constraints meet software ambition—whether its debugging race conditions across thousands of chips or designing API surfaces for next-gen models."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:published_time" content="2025-12-19T22:46:12-08:00"><meta property="article:modified_time" content="2025-12-20T09:52:07-08:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/about/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"About","genre":"Blog","wordcount":"201","url":"https:\/\/ericxliu.me\/about\/","datePublished":"2025-12-19T22:46:12-08:00","dateModified":"2025-12-20T09:52:07-08:00","description":"\u003cimg src=\u0022\/images\/about.jpeg\u0022 alt=\u0022Eric Liu\u0022 width=\u0022300\u0022 style=\u0022float: left; margin-right: 1.5rem; margin-bottom: 1rem; border-radius: 8px;\u0022\/\u003e\n\u003cp\u003eHi, I\u0026rsquo;m \u003cstrong\u003eEric Liu\u003c\/strong\u003e.\u003c\/p\u003e\n\u003cp\u003eI am a \u003cstrong\u003eStaff Software Engineer and Tech Lead Manager (TLM)\u003c\/strong\u003e at \u003cstrong\u003eGoogle\u003c\/strong\u003e, based in Sunnyvale, CA.\u003c\/p\u003e\n\u003cp\u003eMy work focuses on \u003cstrong\u003eInfrastructure Performance and Customer Engineering\u003c\/strong\u003e, specifically for \u003cstrong\u003eGPUs and TPUs\u003c\/strong\u003e. I lead teams that bridge the gap between cutting-edge AI hardware and the latest ML models (like Gemini), ensuring optimal performance and reliability at Google Cloud scale. I thrive in the ambiguous space where hardware constraints meet software ambition—whether it\u0026rsquo;s debugging race conditions across thousands of chips or designing API surfaces for next-gen models.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container page"><article><header><h1 class=title><a class=title-link href=https://ericxliu.me/about/>About</a></h1></header><img src=/images/about.jpeg alt="Eric Liu" width=300 style=float:left;margin-right:1.5rem;margin-bottom:1rem;border-radius:8px><p>Hi, I&rsquo;m <strong>Eric Liu</strong>.</p><p>I am a <strong>Staff Software Engineer and Tech Lead Manager (TLM)</strong> at <strong>Google</strong>, based in Sunnyvale, CA.</p><p>My work focuses on <strong>Infrastructure Performance and Customer Engineering</strong>, specifically for <strong>GPUs and TPUs</strong>. I lead teams that bridge the gap between cutting-edge AI hardware and the latest ML models (like Gemini), ensuring optimal performance and reliability at Google Cloud scale. I thrive in the ambiguous space where hardware constraints meet software ambition—whether it&rsquo;s debugging race conditions across thousands of chips or designing API surfaces for next-gen models.</p><p>Beyond the code, I maintain this &ldquo;digital garden&rdquo; where I document my projects and learnings. It serves as my second brain, capturing everything from technical deep dives to random musings. I believe in <strong>&ldquo;learning in public&rdquo;</strong>—so you&rsquo;ll find unpolished notes on troubleshooting Kubernetes clusters alongside recipes I&rsquo;m refining. It&rsquo;s not just a blog; it&rsquo;s a living repository of my curiosity.</p><h3 id=personal-interests>Personal Interests
<a class=heading-link href=#personal-interests><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>I&rsquo;m a tinkerer at heart, whether digital or physical:</p><ul><li><strong>Homelab</strong>: Kubernetes, Proxmox, and self-hosted services. I love over-engineering my home network.</li><li><strong>DIY & Jeep</strong>: Maintaining and modifying my Jeep, and general DIY projects.</li><li><strong>Cooking</strong>: experimenting with new recipes and techniques.</li></ul><p>Welcome to my corner of the internet.</p></article></section><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

1
ads.txt Normal file
View File

@@ -0,0 +1 @@
google.com, pub-3972604619956476, DIRECT, f08c47fec0942fa0

View File

@@ -1,6 +0,0 @@
---
title: "{{ replace .Name "-" " " | title }}"
date: {{ .Date }}
draft: true
---

7
authors/index.html Normal file
View File

@@ -0,0 +1,7 @@
<!doctype html><html lang=en><head><title>Authors · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Authors"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/authors/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Authors"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/authors/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/authors/index.xml title="Eric X. Liu's Personal Page"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=https://ericxliu.me/authors/>Authors</a></h1></header><ul></ul></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

1
authors/index.xml Normal file
View File

@@ -0,0 +1 @@
<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Authors on Eric X. Liu's Personal Page</title><link>https://ericxliu.me/authors/</link><description>Recent content in Authors on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><atom:link href="https://ericxliu.me/authors/index.xml" rel="self" type="application/rss+xml"/></channel></rss>

View File

@@ -0,0 +1 @@
<!doctype html><html lang=en><head><title>https://ericxliu.me/authors/</title><link rel=canonical href=https://ericxliu.me/authors/><meta charset=utf-8><meta http-equiv=refresh content="0; url=https://ericxliu.me/authors/"></head></html>

7
categories/index.html Normal file
View File

@@ -0,0 +1,7 @@
<!doctype html><html lang=en><head><title>Categories · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Categories"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/categories/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Categories"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/categories/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/categories/index.xml title="Eric X. Liu's Personal Page"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=https://ericxliu.me/categories/>Categories</a></h1></header><ul></ul></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

1
categories/index.xml Normal file
View File

@@ -0,0 +1 @@
<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Categories on Eric X. Liu's Personal Page</title><link>https://ericxliu.me/categories/</link><description>Recent content in Categories on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><atom:link href="https://ericxliu.me/categories/index.xml" rel="self" type="application/rss+xml"/></channel></rss>

View File

@@ -0,0 +1 @@
<!doctype html><html lang=en><head><title>https://ericxliu.me/categories/</title><link rel=canonical href=https://ericxliu.me/categories/><meta charset=utf-8><meta http-equiv=refresh content="0; url=https://ericxliu.me/categories/"></head></html>

View File

@@ -1,84 +0,0 @@
title = "Eric's Personal Page"
theme = "hugo-coder"
languageCode = "en"
defaultcontentlanguage = "en"
paginate = 20
canonifyurls = true
pygmentsstyle = "b2"
pygmentscodefences = true
pygmentscodefencesguesssyntax = true
[params] # theme parameters
author = "Eric Liu"
info = "Platform Software & Performance Engineer @Google"
description = "Eric Liu's personal website"
keywords = "blog,developer,personal"
avatarurl = "images/gravatar.png"
# wether you want to hide copyright and credits in the footer
hideCredits = true
hideCopyright = false
rtl= false
colorscheme = "light"
# Series see also post count
maxSeeAlsoItems = 5
# Enable Twemoji
enableTwemoji = true
# Custom CSS
custom_css = []
# Custom JS
custom_js = []
# Social links
[[params.social]]
name = "Git"
icon = "fab fa-gitlab"
weight = 1
url = "https://git.ericxliu.me/eric"
[[params.social]]
name = "linkedin"
icon = "fab fa-linkedin"
weight = 2
url = "https://www.linkedin.com/in/eric-liu-46648b93/"
[[params.social]]
name = "Personal email"
icon = "fas fa-envelope-square"
weight = 3
# Menu links
[languages]
[languages.en]
languagename = "English"
[[languages.en.menu.main]]
name = "Posts"
weight = 1
url = "/posts/"
[[languages.en.menu.main]]
name = "Gitlab"
weight = 2
url = "https://git.ericxliu.me"
[[languages.en.menu.main]]
name = "Notebook"
weight = 3
url = "https://hub.ericxliu.me"
[[languages.en.menu.main]]
name = "Go"
weight = 4
url = "https://go.ericxliu.me/server"
[[languages.en.menu.main]]
name = "|"
weight = 10
[[languages.en.menu.main]]
name = "Sign in"
weight = 11
url = "https://auth.ericxliu.me"

View File

@@ -1,5 +0,0 @@
---
title: "About"
date: 2018-06-01T07:13:52Z
---

File diff suppressed because one or more lines are too long

View File

@@ -1,12 +0,0 @@
+++
date = 2020-10-26T04:14:43Z
title = "Some useful files"
description = ""
slug = ""
tags = []
categories = []
externalLink = ""
series = []
+++
* [rootCA.pem](https://ericxliu.me/rootCA.pem)
* [vpnclient.ovpn](https://ericxliu.me/vpnclient.ovpn)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

165
fonts/LICENSE.txt Normal file
View File

@@ -0,0 +1,165 @@
Fonticons, Inc. (https://fontawesome.com)
--------------------------------------------------------------------------------
Font Awesome Free License
Font Awesome Free is free, open source, and GPL friendly. You can use it for
commercial projects, open source projects, or really almost whatever you want.
Full Font Awesome Free license: https://fontawesome.com/license/free.
--------------------------------------------------------------------------------
# Icons: CC BY 4.0 License (https://creativecommons.org/licenses/by/4.0/)
The Font Awesome Free download is licensed under a Creative Commons
Attribution 4.0 International License and applies to all icons packaged
as SVG and JS file types.
--------------------------------------------------------------------------------
# Fonts: SIL OFL 1.1 License
In the Font Awesome Free download, the SIL OFL license applies to all icons
packaged as web and desktop font files.
Copyright (c) 2024 Fonticons, Inc. (https://fontawesome.com)
with Reserved Font Name: "Font Awesome".
This Font Software is licensed under the SIL Open Font License, Version 1.1.
This license is copied below, and is also available with a FAQ at:
http://scripts.sil.org/OFL
SIL OPEN FONT LICENSE
Version 1.1 - 26 February 2007
PREAMBLE
The goals of the Open Font License (OFL) are to stimulate worldwide
development of collaborative font projects, to support the font creation
efforts of academic and linguistic communities, and to provide a free and
open framework in which fonts may be shared and improved in partnership
with others.
The OFL allows the licensed fonts to be used, studied, modified and
redistributed freely as long as they are not sold by themselves. The
fonts, including any derivative works, can be bundled, embedded,
redistributed and/or sold with any software provided that any reserved
names are not used by derivative works. The fonts and derivatives,
however, cannot be released under any other type of license. The
requirement for fonts to remain under this license does not apply
to any document created using the fonts or their derivatives.
DEFINITIONS
"Font Software" refers to the set of files released by the Copyright
Holder(s) under this license and clearly marked as such. This may
include source files, build scripts and documentation.
"Reserved Font Name" refers to any names specified as such after the
copyright statement(s).
"Original Version" refers to the collection of Font Software components as
distributed by the Copyright Holder(s).
"Modified Version" refers to any derivative made by adding to, deleting,
or substituting — in part or in whole — any of the components of the
Original Version, by changing formats or by porting the Font Software to a
new environment.
"Author" refers to any designer, engineer, programmer, technical
writer or other person who contributed to the Font Software.
PERMISSION & CONDITIONS
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Font Software, to use, study, copy, merge, embed, modify,
redistribute, and sell modified and unmodified copies of the Font
Software, subject to the following conditions:
1) Neither the Font Software nor any of its individual components,
in Original or Modified Versions, may be sold by itself.
2) Original or Modified Versions of the Font Software may be bundled,
redistributed and/or sold with any software, provided that each copy
contains the above copyright notice and this license. These can be
included either as stand-alone text files, human-readable headers or
in the appropriate machine-readable metadata fields within text or
binary files as long as those fields can be easily viewed by the user.
3) No Modified Version of the Font Software may use the Reserved Font
Name(s) unless explicit written permission is granted by the corresponding
Copyright Holder. This restriction only applies to the primary font name as
presented to the users.
4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
Software shall not be used to promote, endorse or advertise any
Modified Version, except to acknowledge the contribution(s) of the
Copyright Holder(s) and the Author(s) or with their explicit written
permission.
5) The Font Software, modified or unmodified, in part or in whole,
must be distributed entirely under this license, and must not be
distributed under any other license. The requirement for fonts to
remain under this license does not apply to any document created
using the Font Software.
TERMINATION
This license becomes null and void if any of the above conditions are
not met.
DISCLAIMER
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
OTHER DEALINGS IN THE FONT SOFTWARE.
--------------------------------------------------------------------------------
# Code: MIT License (https://opensource.org/licenses/MIT)
In the Font Awesome Free download, the MIT license applies to all non-font and
non-icon files.
Copyright 2024 Fonticons, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in the
Software without restriction, including without limitation the rights to use, copy,
modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
and to permit persons to whom the Software is furnished to do so, subject to the
following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--------------------------------------------------------------------------------
# Attribution
Attribution is required by MIT, SIL OFL, and CC BY licenses. Downloaded Font
Awesome Free files already contain embedded comments with sufficient
attribution, so you shouldn't need to do anything additional when using these
files normally.
We've kept attribution comments terse, so we ask that you do not actively work
to remove them from files, especially code. They're a great way for folks to
learn about Font Awesome.
--------------------------------------------------------------------------------
# Brand Icons
All brand icons are trademarks of their respective owners. The use of these
trademarks does not indicate endorsement of the trademark holder by Font
Awesome, nor vice versa. **Please do not use brand logos for any purpose except
to represent the company, product, or service to which they refer.**

BIN
fonts/fa-brands-400.ttf Normal file

Binary file not shown.

BIN
fonts/fa-brands-400.woff2 Normal file

Binary file not shown.

BIN
fonts/fa-regular-400.ttf Normal file

Binary file not shown.

BIN
fonts/fa-regular-400.woff2 Normal file

Binary file not shown.

BIN
fonts/fa-solid-900.ttf Normal file

Binary file not shown.

BIN
fonts/fa-solid-900.woff2 Normal file

Binary file not shown.

186
hotfix.py Normal file
View File

@@ -0,0 +1,186 @@
import sys
import os
import shutil
# 1. Prepare /tmp/patch
PATCH_DIR = "/tmp/patch"
LITELLM_DIR = os.path.join(PATCH_DIR, "litellm")
if os.path.exists(PATCH_DIR):
shutil.rmtree(PATCH_DIR)
os.makedirs(PATCH_DIR)
shutil.copytree("/app/litellm", LITELLM_DIR)
# 2. Patch openai.py
openai_file = os.path.join(LITELLM_DIR, "types/llms/openai.py")
with open(openai_file, "r") as f:
content = f.read()
tool_call_chunk_original = (
'class ChatCompletionToolCallChunk(TypedDict): # result of /chat/completions call\n'
' id: Optional[str]\n'
' type: Literal["function"]'
)
tool_call_chunk_patch = tool_call_chunk_original.replace(
'Literal["function"]', 'Literal["function", "web_search"]'
)
delta_chunk_original = (
"class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):\n"
" id: str\n"
' type: Literal["function"]'
)
delta_chunk_patch = delta_chunk_original.replace(
'Literal["function"]', 'Literal["function", "web_search"]'
)
for original, patched, label in [
(tool_call_chunk_original, tool_call_chunk_patch, "ChatCompletionToolCallChunk"),
(delta_chunk_original, delta_chunk_patch, "ChatCompletionDeltaToolCallChunk"),
]:
if original in content:
content = content.replace(original, patched, 1)
else:
print(f"Hotfix warning: {label} pattern not found, skipping update")
with open(openai_file, "w") as f:
f.write(content)
# 3. Patch transformation.py
trans_file = os.path.join(LITELLM_DIR, "completion_extras/litellm_responses_transformation/transformation.py")
with open(trans_file, "r") as f:
content = f.read()
import_block_original = """ from litellm.types.utils import (
ChatCompletionToolCallChunk,
GenericStreamingChunk,
)
"""
import_block_updated = """ from litellm.types.utils import (
ChatCompletionToolCallChunk,
Delta,
GenericStreamingChunk,
ModelResponseStream,
StreamingChoices,
)
"""
if import_block_original in content:
content = content.replace(import_block_original, import_block_updated, 1)
elif import_block_updated in content:
pass
else:
print("Hotfix warning: unexpected chunk_parser import layout, skipping Delta/ModelResponseStream import patch")
added_block = """ elif output_item.get("type") == "web_search_call":
# handle web search call - mask tool call by emitting empty content delta
# This prevents Open WebUI from seeing tool_calls and trying to execute them
action_payload = output_item.get("action")
verbose_logger.debug(
"Chat provider: masking web_search_call (added) call_id=%s action=%s",
output_item.get("call_id"),
action_payload,
)
# Emit empty content delta instead of tool_call to mask the tool usage
return ModelResponseStream(
choices=[
StreamingChoices(
index=0,
delta=Delta(content=""),
finish_reason=None,
)
]
)
"""
done_block = """ elif output_item.get("type") == "web_search_call":
# handle web search done - mask tool call by emitting empty content delta
# This prevents Open WebUI from seeing tool_calls and trying to execute them
action_payload = output_item.get("action")
verbose_logger.debug(
"Chat provider: masking web_search_call (done) call_id=%s action=%s",
output_item.get("call_id"),
action_payload,
)
# Emit empty content delta instead of tool_call to mask the tool usage
# Do NOT set finish_reason="tool_calls" as that would signal Open WebUI to handle the tool
return ModelResponseStream(
choices=[
StreamingChoices(
index=0,
delta=Delta(content=""),
finish_reason=None,
)
]
)
"""
added_target = ' elif output_item.get("type") == "message":'
def insert_block(source: str, block: str, occurrence_index: int) -> str:
"""Insert block before the nth occurrence (0-based) of added_target."""
start = -1
search_from = 0
for _ in range(occurrence_index + 1):
start = source.find(added_target, search_from)
if start == -1:
return source
search_from = start + len(added_target)
return source[:start] + block + source[start:]
if 'masking web_search_call (added)' not in content:
new_content = insert_block(content, added_block, 0)
if new_content == content:
print("Hotfix warning: unable to find insertion point for web_search_call (added)")
else:
content = new_content
if 'masking web_search_call (done)' not in content:
new_content = insert_block(content, done_block, 1)
if new_content == content:
print("Hotfix warning: unable to find insertion point for web_search_call (done)")
else:
content = new_content
# 4. Ensure streaming tool call chunks fall back to output_item IDs
call_id_pattern = 'id=output_item.get("call_id"),'
call_id_patch = 'id=output_item.get("call_id") or output_item.get("id"),'
if call_id_pattern in content:
content = content.replace(call_id_pattern, call_id_patch)
# 5. Guard assistant tool_call conversions when id is missing
tool_call_block_original = """ if function:
input_tool_call = {
"type": "function_call",
"call_id": tool_call["id"],
}
if "name" in function:
input_tool_call["name"] = function["name"]
if "arguments" in function:
input_tool_call["arguments"] = function["arguments"]
input_items.append(input_tool_call)
"""
tool_call_block_patch = """ if function:
call_id = tool_call.get("id") or tool_call.get("call_id")
if not call_id:
call_id = f"auto_tool_call_{len(input_items)}"
input_tool_call = {
"type": "function_call",
"call_id": call_id,
}
if "name" in function:
input_tool_call["name"] = function["name"]
if "arguments" in function:
input_tool_call["arguments"] = function["arguments"]
input_items.append(input_tool_call)
"""
if tool_call_block_original in content:
content = content.replace(tool_call_block_original, tool_call_block_patch, 1)
elif "auto_tool_call_" not in content:
print("Hotfix warning: assistant tool_call block not found; missing id guard not applied")
with open(trans_file, "w") as f:
f.write(content)
print("Successfully applied hotfixes to /tmp/patch/litellm")

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 254 KiB

BIN
images/about.jpeg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 287 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 694 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 673 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 374 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 689 KiB

BIN
images/gravatar.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 288 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 152 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 254 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 216 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 193 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 MiB

8
index.html Normal file
View File

@@ -0,0 +1,8 @@
<!doctype html><html lang=en><head><title>Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Eric X. Liu's Personal Page"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Eric X. Liu's Personal Page"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/index.xml title="Eric X. Liu's Personal Page"><meta name=generator content="Hugo 0.154.5"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container centered"><div class=about><div class=avatar><img src=/images/gravatar.png alt=avatar width=200 height=200></div><h1>Eric X. Liu</h1><h2 id=typeit-info></h2><script src=https://unpkg.com/typeit@8.7.1/dist/index.umd.js></script><script>document.addEventListener("DOMContentLoaded",function(){new TypeIt("#typeit-info",{strings:["Software & Performance Engineer @Google","DIY Overlander & Rock Crawler","Tech Enthusiast"],speed:50,loop:!0,breakLines:!1,nextStringDelay:2e3,deleteSpeed:50,startDelay:500,lifeLike:!0}).go()})</script><ul><li><a href=https://git.ericxliu.me/eric aria-label=Git><i class="fa-brands fa-git fa-2x" aria-hidden=true></i></a></li><li><a href=https://www.linkedin.com/in/eric-x-liu-46648b93/ aria-label=linkedin><i class="fa-brands fa-linkedin fa-2x" aria-hidden=true></i></a></li><li><style>#span-17968cae.cloaked-e-mail{display:none}</style> <span class=cloaked-e-mail data-user=cire data-domain=em.uilxcire data-display="PGkgY2xhc3M9ImZhIGZhLWVudmVsb3BlIGZhLTJ4IiBhcmlhLWhpZGRlbj0idHJ1ZSI+PC9pPg==" id=span-17968cae></span>
<script id=script-17968cae>var span,scriptTag=document.getElementById("script-17968cae"),link=document.createElement("a"),address="cire".split("").reverse().join("")+"@"+"em.uilxcire".split("").reverse().join("");link.href="mailto:"+address,span=document.getElementById("span-17968cae"),link.innerHTML=atob(span.getAttribute("data-display")),scriptTag.parentElement.insertBefore(link,scriptTag.previousElementSibling),scriptTag.parentElement.removeChild(scriptTag.previousElementSibling)</script></li><li><a href=https://ericxliu.me/index.xml aria-label=RSS rel=alternate type=application/rss+xml><i class="fa-solid fa-rss fa-2x" aria-hidden=true></i></a></li></ul></div></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

95
index.xml Normal file
View File

@@ -0,0 +1,95 @@
<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Eric X. Liu's Personal Page</title><link>https://ericxliu.me/</link><description>Recent content on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><lastBuildDate>Thu, 22 Jan 2026 06:48:07 +0000</lastBuildDate><atom:link href="https://ericxliu.me/index.xml" rel="self" type="application/rss+xml"/><item><title>Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams</title><link>https://ericxliu.me/posts/vibe-coding-from-the-jeep/</link><pubDate>Wed, 21 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/vibe-coding-from-the-jeep/</guid><description>&lt;p&gt;&amp;ldquo;Vibe coding&amp;rdquo; has become my latest obsession. It&amp;rsquo;s that flow state where the tools disappear, and you&amp;rsquo;re just manipulating logic at the speed of thought. Usually, this happens in a high-end IDE like Antigravity. But lately, I&amp;rsquo;ve been trying to answer a childhood dream.&lt;/p&gt;
&lt;p&gt;Growing up in China before the internet age, my window to the outside world was CCTV-6. Along with &lt;em&gt;Baywatch&lt;/em&gt;, one of the first American TV shows I ever watched was &lt;em&gt;Knight Rider&lt;/em&gt;. I don&amp;rsquo;t remember the exact plot lines, but the core concept stuck with me forever: KITT. A car that could talk, think, and do things for you.&lt;/p&gt;</description></item><item><title>How I Built a Blog Agent that Writes About Itself</title><link>https://ericxliu.me/posts/reverse-engineering-antigravity-ide/</link><pubDate>Fri, 16 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/reverse-engineering-antigravity-ide/</guid><description>&lt;p&gt;I&amp;rsquo;ve been spending a lot of time &amp;ldquo;vibe coding&amp;rdquo; in the Antigravity IDE lately. It&amp;rsquo;s an incredible flow state—intense, iterative, and fast. But it has a major flaw: the context is ephemeral. Once the session is over, that rich history of decisions, wrong turns, and &amp;ldquo;aha!&amp;rdquo; moments is locked away in an opaque, internal format.&lt;/p&gt;
&lt;p&gt;I wanted to capture that value. I wanted a system that could take my chaotic coding sessions and distill them into structured, technical blog posts (like the one you&amp;rsquo;re reading right now).&lt;/p&gt;</description></item><item><title>Why I Downgraded Magisk to Root My Pixel 2 XL</title><link>https://ericxliu.me/posts/rooting-pixel-2-xl-for-reverse-engineering/</link><pubDate>Wed, 07 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/rooting-pixel-2-xl-for-reverse-engineering/</guid><description>&lt;p&gt;For the past few weeks, I&amp;rsquo;ve been stuck in a stalemate with my EcoFlow Bluetooth Protocol Reverse Engineering Project. I have the hci snoop logs, I have the decompiled APK, and I have a strong suspicion about where the authentication logic is hiding. But suspicion isn&amp;rsquo;t proof.&lt;/p&gt;
&lt;p&gt;Static analysis has its limits. I found the &amp;ldquo;smoking gun&amp;rdquo; function—a native method responsible for encrypting the login payload—but understanding &lt;em&gt;how&lt;/em&gt; it constructs that payload within a strict 13-byte limit purely from assembly (ARM64) was proving to be a headache.&lt;/p&gt;</description></item><item><title>Why Your "Resilient" Homelab is Slower Than a Raspberry Pi</title><link>https://ericxliu.me/posts/debugging-authentik-performance/</link><pubDate>Fri, 02 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/debugging-authentik-performance/</guid><description>&lt;p&gt;In the world of self-hosting, there are many metrics for success: 99.9% uptime, sub-second latency, or a perfect GitOps pipeline. But for those of us running &amp;ldquo;production&amp;rdquo; at home, there is only one metric that truly matters: &lt;strong&gt;The Wife Acceptance Factor (WAF)&lt;/strong&gt;.&lt;/p&gt;
&lt;p&gt;My detailed Grafana dashboards said everything was fine. But my wife said the SSO login was &amp;ldquo;slow sometimes.&amp;rdquo; She was right. Debugging it took me down a rabbit hole of connection pooling, misplaced assumptions, and the harsh reality of running databases on distributed storage.&lt;/p&gt;</description></item><item><title>How I Got Open WebUI Talking to OpenAI Web Search</title><link>https://ericxliu.me/posts/open-webui-openai-websearch/</link><pubDate>Mon, 29 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/open-webui-openai-websearch/</guid><description>&lt;p&gt;OpenAI promised native web search in GPT5, but LiteLLM proxy deployments (and by extension Open WebUI) still choke on it—issue &lt;a href="https://github.com/BerriAI/litellm/issues/13042" class="external-link" target="_blank" rel="noopener"&gt;#13042&lt;/a&gt; tracks the fallout. I needed grounded answers inside Open WebUI anyway, so I built a workaround: route GPT5 traffic through the Responses API and mask every &lt;code&gt;web_search_call&lt;/code&gt; before the UI ever sees it.&lt;/p&gt;
&lt;p&gt;This post documents the final setup, the hotfix script that keeps LiteLLM honest, and the tests that prove Open WebUI now streams cited answers without trying to execute the tool itself.&lt;/p&gt;</description></item><item><title>From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM</title><link>https://ericxliu.me/posts/technical-deep-dive-llm-categorization/</link><pubDate>Sat, 27 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/technical-deep-dive-llm-categorization/</guid><description>&lt;p&gt;Running a family finance system is surprisingly complex. What starts as a simple spreadsheet often evolves into a web of rules, exceptions, and &amp;ldquo;wait, was this dinner or &lt;em&gt;vacation&lt;/em&gt; dinner?&amp;rdquo; questions.&lt;/p&gt;
&lt;p&gt;For years, I relied on a rule-based system to categorize our credit card transactions. It worked&amp;hellip; mostly. But maintaining &lt;code&gt;if &amp;quot;UBER&amp;quot; in description and amount &amp;gt; 50&lt;/code&gt; style rules is a never-ending battle against the entropy of merchant names and changing habits.&lt;/p&gt;</description></item><item><title>About</title><link>https://ericxliu.me/about/</link><pubDate>Fri, 19 Dec 2025 22:46:12 -0800</pubDate><guid>https://ericxliu.me/about/</guid><description>&lt;img src="https://ericxliu.me/images/about.jpeg" alt="Eric Liu" width="300" style="float: left; margin-right: 1.5rem; margin-bottom: 1rem; border-radius: 8px;"/&gt;
&lt;p&gt;Hi, I&amp;rsquo;m &lt;strong&gt;Eric Liu&lt;/strong&gt;.&lt;/p&gt;
&lt;p&gt;I am a &lt;strong&gt;Staff Software Engineer and Tech Lead Manager (TLM)&lt;/strong&gt; at &lt;strong&gt;Google&lt;/strong&gt;, based in Sunnyvale, CA.&lt;/p&gt;
&lt;p&gt;My work focuses on &lt;strong&gt;Infrastructure Performance and Customer Engineering&lt;/strong&gt;, specifically for &lt;strong&gt;GPUs and TPUs&lt;/strong&gt;. I lead teams that bridge the gap between cutting-edge AI hardware and the latest ML models (like Gemini), ensuring optimal performance and reliability at Google Cloud scale. I thrive in the ambiguous space where hardware constraints meet software ambition—whether it&amp;rsquo;s debugging race conditions across thousands of chips or designing API surfaces for next-gen models.&lt;/p&gt;</description></item><item><title>The Convergence of Fast Weights, Linear Attention, and State Space Models</title><link>https://ericxliu.me/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/</link><pubDate>Fri, 19 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/</guid><description>&lt;p&gt;Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformers attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (&amp;ldquo;Fast Weights&amp;rdquo;) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).&lt;/p&gt;
&lt;p&gt;This article explores the mathematical equivalence between Hintons concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV.&lt;/p&gt;</description></item><item><title>vAttention</title><link>https://ericxliu.me/posts/vattention/</link><pubDate>Mon, 08 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/vattention/</guid><description>&lt;p&gt;Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While &lt;strong&gt;PagedAttention&lt;/strong&gt; (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPUs native hardware Memory Management Unit (MMU) offers a more performant and portable solution.&lt;/p&gt;
&lt;h4 id="the-status-quo-pagedattention-and-software-tables"&gt;
The Status Quo: PagedAttention and Software Tables
&lt;a class="heading-link" href="#the-status-quo-pagedattention-and-software-tables"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h4&gt;
&lt;p&gt;Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined &amp;ldquo;page table&amp;rdquo; (the Block Table) [1].&lt;/p&gt;</description></item><item><title>Setting Up Jellyfin SSO with Authentik: Surviving the Beta</title><link>https://ericxliu.me/posts/jellyfin-sso-with-authentik/</link><pubDate>Sat, 15 Nov 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/jellyfin-sso-with-authentik/</guid><description>&lt;p&gt;I recently integrated Jellyfin with Authentik for Single Sign-On (SSO). While the plugin works, it is still very much in an early development phase. The logging is often sparse or cryptic, and the feedback loop can be frustrating. Here is a guide focused on the obscure errors you might encounter and the simple fixes that aren&amp;rsquo;t immediately obvious.&lt;/p&gt;
&lt;h2 id="the-setup"&gt;
The Setup
&lt;a class="heading-link" href="#the-setup"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h2&gt;
&lt;p&gt;The configuration is best handled via API (curl) rather than the UI, as it ensures all fields are correctly typed and persistent.&lt;/p&gt;</description></item><item><title>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)</title><link>https://ericxliu.me/posts/benchmarking-llms-on-jetson-orin-nano/</link><pubDate>Sat, 04 Oct 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/benchmarking-llms-on-jetson-orin-nano/</guid><description>&lt;h2 id="introduction"&gt;
Introduction
&lt;a class="heading-link" href="#introduction"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h2&gt;
&lt;p&gt;NVIDIA&amp;rsquo;s Jetson Orin Nano promises impressive specs: 1024 CUDA cores, 32 Tensor Cores, and 40 TOPS of INT8 compute performance packed into a compact, power-efficient edge device. On paper, it looks like a capable platform for running Large Language Models locally. But there&amp;rsquo;s a catch—one that reveals a fundamental tension in modern edge AI hardware design.&lt;/p&gt;
&lt;p&gt;After running 66 inference tests across seven different language models ranging from 0.5B to 5.4B parameters, I discovered something counterintuitive: the device&amp;rsquo;s computational muscle sits largely idle during single-stream LLM inference. The bottleneck isn&amp;rsquo;t computation—it&amp;rsquo;s memory bandwidth. This isn&amp;rsquo;t just a quirk of one device; it&amp;rsquo;s a fundamental characteristic of single-user, autoregressive token generation on edge hardware—a reality that shapes how we should approach local LLM deployment.&lt;/p&gt;</description></item><item><title>Flashing Jetson Orin Nano in Virtualized Environments</title><link>https://ericxliu.me/posts/flashing-jetson-orin-nano-in-virtualized-environments/</link><pubDate>Thu, 02 Oct 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/flashing-jetson-orin-nano-in-virtualized-environments/</guid><description>&lt;h1 id="flashing-jetson-orin-nano-in-virtualized-environments"&gt;
Flashing Jetson Orin Nano in Virtualized Environments
&lt;a class="heading-link" href="#flashing-jetson-orin-nano-in-virtualized-environments"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h1&gt;
&lt;h2 id="introduction"&gt;
Introduction
&lt;a class="heading-link" href="#introduction"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h2&gt;
&lt;p&gt;Flashing NVIDIA Jetson devices remotely presents unique challenges when the host machine is virtualized. This article documents the technical challenges, failures, and eventual success of flashing a Jetson Orin Nano Super developer kit using NVIDIA SDK Manager in various virtualized environments, specifically focusing on QEMU/KVM virtual machines and LXC containers on Proxmox VE.&lt;/p&gt;</description></item><item><title>OpenWrt: Fix WireGuard Connectivity with MWAN3 by Excluding the VPN Endpoint</title><link>https://ericxliu.me/posts/openwrt-mwan3-wireguard-endpoint-exclusion/</link><pubDate>Sun, 28 Sep 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/openwrt-mwan3-wireguard-endpoint-exclusion/</guid><description>&lt;h3 id="overview"&gt;
Overview
&lt;a class="heading-link" href="#overview"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h3&gt;
&lt;p&gt;When using WireGuard together with MWAN3 on OpenWrt, the tunnel can fail to establish or flap when the peer&amp;rsquo;s IP is routed into the tunnel itself. This is a classic routing bootstrap problem: WireGuard wants to route 0.0.0.0/0 into the tunnel, but the UDP packets to the peer&amp;rsquo;s public endpoint also get captured, so they never reach the Internet to bring the tunnel up.&lt;/p&gt;</description></item><item><title>UniFi VLAN Migration to Zone-Based Architecture</title><link>https://ericxliu.me/posts/unifi-vlan-migration-to-zone-based-architecture/</link><pubDate>Mon, 22 Sep 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/unifi-vlan-migration-to-zone-based-architecture/</guid><description>&lt;p&gt;Embarking on a network migration to a properly segmented VLAN architecture is a rite of passage for any serious home lab or small business operator. The goal is clear: improve security and organization by separating traffic. However, the path from a flat network to a segmented one is often paved with subtle but critical configuration details that can lead to hours of frustrating troubleshooting.&lt;/p&gt;
&lt;p&gt;This article documents that journey. It details the pitfalls encountered, the core networking concepts that were essential to understand, and the best practices that ultimately led to a stable, secure, and logical network design built on a zone-based firewall model.&lt;/p&gt;</description></item><item><title>Quantization in LLMs</title><link>https://ericxliu.me/posts/quantization-in-llms/</link><pubDate>Tue, 19 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/quantization-in-llms/</guid><description>&lt;p&gt;The burgeoning scale of Large Language Models (LLMs) has necessitated a paradigm shift in their deployment, moving beyond full-precision floating-point arithmetic towards lower-precision representations. Quantization, the process of mapping a wide range of continuous values to a smaller, discrete set, has emerged as a critical technique to reduce model size, accelerate inference, and lower energy consumption. This article provides a technical overview of quantization theories, their application in modern LLMs, and highlights the ongoing innovations in this domain.&lt;/p&gt;</description></item><item><title>Breville Barista Pro Maintenance</title><link>https://ericxliu.me/posts/breville-barista-pro-maintenance/</link><pubDate>Sat, 16 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/breville-barista-pro-maintenance/</guid><description>&lt;p&gt;Proper maintenance is critical for the longevity and performance of a Breville Barista Pro espresso machine. Consistent cleaning not only ensures the machine functions correctly but also directly impacts the quality of the espresso produced. This guide provides a detailed, technical breakdown of the essential maintenance routines, from automated cycles to daily upkeep.&lt;/p&gt;
&lt;h4 id="understanding-the-two-primary-maintenance-cycles"&gt;
&lt;strong&gt;Understanding the Two Primary Maintenance Cycles&lt;/strong&gt;
&lt;a class="heading-link" href="#understanding-the-two-primary-maintenance-cycles"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h4&gt;
&lt;p&gt;The Breville Barista Pro has two distinct, automated maintenance procedures: the &lt;strong&gt;Cleaning (Flush) Cycle&lt;/strong&gt; and the &lt;strong&gt;Descale Cycle&lt;/strong&gt;. It is important to understand that these are not interchangeable, as they address different types of buildup within the machine.&lt;/p&gt;</description></item><item><title>Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian</title><link>https://ericxliu.me/posts/secure-boot-dkms-and-mok-on-proxmox-debian/</link><pubDate>Sat, 09 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/secure-boot-dkms-and-mok-on-proxmox-debian/</guid><description>&lt;p&gt;I hit an issue where all GPU Operator pods on one node were stuck in Init after migrating from Legacy BIOS to UEFI. The common error was NVIDIA components waiting for “toolkit-ready,” while the toolkit init container looped with:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;nvidia-smi failed to communicate with the NVIDIA driver&lt;/li&gt;
&lt;li&gt;modprobe nvidia → “Key was rejected by service”&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;That message is the tell: Secure Boot is enabled and the kernel refuses to load modules not signed by a trusted key.&lt;/p&gt;</description></item><item><title>Beyond Words: How RVQ Teaches LLMs to See and Hear</title><link>https://ericxliu.me/posts/how-rvq-teaches-llms-to-see-and-hear/</link><pubDate>Thu, 07 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/how-rvq-teaches-llms-to-see-and-hear/</guid><description>&lt;p&gt;Large Language Models (LLMs) are masters of text, but the world is not made of text alone. Its a symphony of sights, sounds, and experiences. The ultimate goal for AI is to understand this rich, multi-modal world as we do. But how do you teach a model that thinks in words to understand a picture of a sunset or the melody of a song?&lt;/p&gt;
&lt;p&gt;The answer lies in creating a universal language—a bridge between the continuous, messy world of pixels and audio waves and the discrete, structured world of language tokens. One of the most elegant and powerful tools for building this bridge is &lt;strong&gt;Residual Vector Quantization (RVQ)&lt;/strong&gt;.&lt;/p&gt;</description></item><item><title>Supabase Deep Dive: It's Not Magic, It's Just Postgres</title><link>https://ericxliu.me/posts/supabase-deep-dive/</link><pubDate>Sun, 03 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/supabase-deep-dive/</guid><description>&lt;p&gt;In the world of Backend-as-a-Service (BaaS), platforms are often treated as magic boxes. You push data in, you get data out, and you hope the magic inside scales. While this simplicity is powerful, it can obscure the underlying mechanics, leaving developers wondering what&amp;rsquo;s really going on.&lt;/p&gt;
&lt;p&gt;Supabase enters this space with a radically different philosophy: &lt;strong&gt;transparency&lt;/strong&gt;. It provides the convenience of a BaaS, but its built on the world&amp;rsquo;s most trusted relational database: PostgreSQL. The &amp;ldquo;magic&amp;rdquo; isn&amp;rsquo;t a proprietary black box; it&amp;rsquo;s a carefully assembled suite of open-source tools that enhance Postgres, not hide it.&lt;/p&gt;</description></item><item><title>A Deep Dive into PPO for Language Models</title><link>https://ericxliu.me/posts/ppo-for-language-models/</link><pubDate>Sat, 02 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/ppo-for-language-models/</guid><description>&lt;p&gt;Large Language Models (LLMs) have demonstrated astonishing capabilities, but out-of-the-box, they are simply powerful text predictors. They don&amp;rsquo;t inherently understand what makes a response helpful, harmless, or aligned with human values. The technique that has proven most effective at bridging this gap is Reinforcement Learning from Human Feedback (RLHF), and at its heart lies a powerful algorithm: Proximal Policy Optimization (PPO).&lt;/p&gt;
&lt;p&gt;You may have seen diagrams like the one below, which outlines the RLHF training process. It can look intimidating, with a web of interconnected models, losses, and data flows.
&lt;img src="https://ericxliu.me/images/ppo-for-language-models/7713bd3ecf27442e939b9190fa08165d.png" alt="S3 File"&gt;&lt;/p&gt;</description></item><item><title>Mixture-of-Experts (MoE) Models Challenges &amp; Solutions in Practice</title><link>https://ericxliu.me/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/</link><pubDate>Wed, 02 Jul 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/</guid><description>&lt;p&gt;Mixture-of-Experts (MoEs) are neural network architectures that allow different parts of the model (called &amp;ldquo;experts&amp;rdquo;) to specialize in different types of inputs. A &amp;ldquo;gating network&amp;rdquo; or &amp;ldquo;router&amp;rdquo; learns to dispatch each input (or &amp;ldquo;token&amp;rdquo;) to a subset of these experts. While powerful for scaling models, MoEs introduce several practical challenges.&lt;/p&gt;
&lt;h3 id="1-challenge-non-differentiability-of-routing-functions"&gt;
1. Challenge: Non-Differentiability of Routing Functions
&lt;a class="heading-link" href="#1-challenge-non-differentiability-of-routing-functions"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h3&gt;
&lt;p&gt;&lt;strong&gt;The Problem:&lt;/strong&gt;
Many routing mechanisms, especially &amp;ldquo;Top-K routing,&amp;rdquo; involve a discrete, hard selection process. A common function is &lt;code&gt;KeepTopK(v, k)&lt;/code&gt;, which selects the top &lt;code&gt;k&lt;/code&gt; scoring elements from a vector &lt;code&gt;v&lt;/code&gt; and sets others to $-\infty$ or $0$.&lt;/p&gt;</description></item><item><title>An Architectural Deep Dive of T5</title><link>https://ericxliu.me/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/</link><pubDate>Sun, 01 Jun 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/</guid><description>&lt;p&gt;In the rapidly evolving landscape of Large Language Models, a few key architectures define the dominant paradigms. Today, the &amp;ldquo;decoder-only&amp;rdquo; model, popularized by the GPT series and its successors like LLaMA and Mistral, reigns supreme. These models are scaled to incredible sizes and excel at in-context learning.&lt;/p&gt;
&lt;p&gt;But to truly understand the field, we must look at the pivotal models that explored different paths. Google&amp;rsquo;s T5, or &lt;strong&gt;Text-to-Text Transfer Transformer&lt;/strong&gt;, stands out as one of the most influential. It didn&amp;rsquo;t just introduce a new model; it proposed a new philosophy. This article dives deep into the architecture of T5, how it fundamentally differs from modern LLMs, and the lasting legacy of its unique design choices.&lt;/p&gt;</description></item><item><title>Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso</title><link>https://ericxliu.me/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/</link><pubDate>Thu, 01 May 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/</guid><description>&lt;p&gt;Are you ready to transform your home espresso game from good to genuinely great? The Breville Barista Pro is a fantastic machine, but unlocking its full potential requires understanding a few key principles. This guide will walk you through the systematic process of dialing in your espresso, ensuring every shot is delicious and repeatable.&lt;/p&gt;
&lt;p&gt;Our overarching philosophy is simple: &lt;strong&gt;isolate and change only one variable at a time.&lt;/strong&gt; While numbers are crucial, your palate is the ultimate judge. Dose, ratio, and time are interconnected, but your &lt;strong&gt;grind size&lt;/strong&gt; is your most powerful lever.&lt;/p&gt;</description></item><item><title>Transformer's Core Mechanics</title><link>https://ericxliu.me/posts/transformer-s-core-mechanics/</link><pubDate>Tue, 01 Apr 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/transformer-s-core-mechanics/</guid><description>&lt;p&gt;The Transformer architecture is the bedrock of modern Large Language Models (LLMs). While its high-level success is widely known, a deeper understanding requires dissecting its core components. This article provides a detailed, technical breakdown of the fundamental concepts within a Transformer block, from the notion of &amp;ldquo;channels&amp;rdquo; to the intricate workings of the attention mechanism and its relationship with other advanced architectures like Mixture of Experts.&lt;/p&gt;
&lt;h3 id="1-the-channel-a-foundational-view-of-d_model"&gt;
1. The &amp;ldquo;Channel&amp;rdquo;: A Foundational View of &lt;code&gt;d_model&lt;/code&gt;
&lt;a class="heading-link" href="#1-the-channel-a-foundational-view-of-d_model"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h3&gt;
&lt;p&gt;In deep learning, a &amp;ldquo;channel&amp;rdquo; can be thought of as a feature dimension. While this term is common in Convolutional Neural Networks for images (e.g., Red, Green, Blue channels), in LLMs, the analogous concept is the model&amp;rsquo;s primary embedding dimension, commonly referred to as &lt;code&gt;d_model&lt;/code&gt;.&lt;/p&gt;</description></item><item><title>Some useful files</title><link>https://ericxliu.me/posts/useful/</link><pubDate>Mon, 26 Oct 2020 04:14:43 +0000</pubDate><guid>https://ericxliu.me/posts/useful/</guid><description>&lt;ul&gt;
&lt;li&gt;&lt;a href="https://ericxliu.me/rootCA.crt" &gt;rootCA.pem&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;</description></item></channel></rss>

View File

@@ -0,0 +1 @@
const body=document.body,darkModeToggle=document.getElementById("dark-mode-toggle"),darkModeMediaQuery=window.matchMedia("(prefers-color-scheme: dark)");localStorage.getItem("colorscheme")?setTheme(localStorage.getItem("colorscheme")):setTheme(body.classList.contains("colorscheme-light")||body.classList.contains("colorscheme-dark")?body.classList.contains("colorscheme-dark")?"dark":"light":darkModeMediaQuery.matches?"dark":"light"),darkModeToggle&&darkModeToggle.addEventListener("click",()=>{let e=body.classList.contains("colorscheme-dark")?"light":"dark";setTheme(e),rememberTheme(e)}),darkModeMediaQuery.addListener(e=>{setTheme(e.matches?"dark":"light")}),document.addEventListener("DOMContentLoaded",function(){let e=document.querySelector(".preload-transitions");e.classList.remove("preload-transitions")});function setTheme(e){body.classList.remove("colorscheme-auto");let n=e==="dark"?"light":"dark";body.classList.remove("colorscheme-"+n),body.classList.add("colorscheme-"+e),document.documentElement.style["color-scheme"]=e;function t(e){return new Promise(t=>{if(document.querySelector(e))return t(document.querySelector(e));const n=new MutationObserver(s=>{document.querySelector(e)&&(t(document.querySelector(e)),n.disconnect())});n.observe(document.body,{childList:!0,subtree:!0})})}if(e==="dark"){const e={type:"set-theme",theme:"github-dark"};t(".utterances-frame").then(t=>{t.contentWindow.postMessage(e,"https://utteranc.es")})}else{const e={type:"set-theme",theme:"github-light"};t(".utterances-frame").then(t=>{t.contentWindow.postMessage(e,"https://utteranc.es")})}function s(e){const t=document.querySelector("iframe.giscus-frame");if(!t)return;t.contentWindow.postMessage({giscus:e},"https://giscus.app")}s({setConfig:{theme:e}});const o=new Event("themeChanged");document.dispatchEvent(o)}function rememberTheme(e){localStorage.setItem("colorscheme",e)}

View File

@@ -0,0 +1,65 @@
<!doctype html><html lang=en><head><title>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI) · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="
Introduction
Link to heading
NVIDIA&rsquo;s Jetson Orin Nano promises impressive specs: 1024 CUDA cores, 32 Tensor Cores, and 40 TOPS of INT8 compute performance packed into a compact, power-efficient edge device. On paper, it looks like a capable platform for running Large Language Models locally. But there&rsquo;s a catch—one that reveals a fundamental tension in modern edge AI hardware design.
After running 66 inference tests across seven different language models ranging from 0.5B to 5.4B parameters, I discovered something counterintuitive: the device&rsquo;s computational muscle sits largely idle during single-stream LLM inference. The bottleneck isn&rsquo;t computation—it&rsquo;s memory bandwidth. This isn&rsquo;t just a quirk of one device; it&rsquo;s a fundamental characteristic of single-user, autoregressive token generation on edge hardware—a reality that shapes how we should approach local LLM deployment."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)"><meta name=twitter:description content="Introduction Link to heading NVIDIAs Jetson Orin Nano promises impressive specs: 1024 CUDA cores, 32 Tensor Cores, and 40 TOPS of INT8 compute performance packed into a compact, power-efficient edge device. On paper, it looks like a capable platform for running Large Language Models locally. But theres a catch—one that reveals a fundamental tension in modern edge AI hardware design.
After running 66 inference tests across seven different language models ranging from 0.5B to 5.4B parameters, I discovered something counterintuitive: the devices computational muscle sits largely idle during single-stream LLM inference. The bottleneck isnt computation—its memory bandwidth. This isnt just a quirk of one device; its a fundamental characteristic of single-user, autoregressive token generation on edge hardware—a reality that shapes how we should approach local LLM deployment."><meta property="og:url" content="https://ericxliu.me/posts/benchmarking-llms-on-jetson-orin-nano/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)"><meta property="og:description" content="Introduction Link to heading NVIDIAs Jetson Orin Nano promises impressive specs: 1024 CUDA cores, 32 Tensor Cores, and 40 TOPS of INT8 compute performance packed into a compact, power-efficient edge device. On paper, it looks like a capable platform for running Large Language Models locally. But theres a catch—one that reveals a fundamental tension in modern edge AI hardware design.
After running 66 inference tests across seven different language models ranging from 0.5B to 5.4B parameters, I discovered something counterintuitive: the devices computational muscle sits largely idle during single-stream LLM inference. The bottleneck isnt computation—its memory bandwidth. This isnt just a quirk of one device; its a fundamental characteristic of single-user, autoregressive token generation on edge hardware—a reality that shapes how we should approach local LLM deployment."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-10-04T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-10T20:10:48+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/benchmarking-llms-on-jetson-orin-nano/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Why Your Jetson Orin Nano\u0027s 40 TOPS Goes Unused (And What That Means for Edge AI)","genre":"Blog","wordcount":"1866","url":"https:\/\/ericxliu.me\/posts\/benchmarking-llms-on-jetson-orin-nano\/","datePublished":"2025-10-04T00:00:00\u002b00:00","dateModified":"2026-01-10T20:10:48\u002b00:00","description":"\u003ch2 id=\u0022introduction\u0022\u003e\n Introduction\n \u003ca class=\u0022heading-link\u0022 href=\u0022#introduction\u0022\u003e\n \u003ci class=\u0022fa-solid fa-link\u0022 aria-hidden=\u0022true\u0022 title=\u0022Link to heading\u0022\u003e\u003c\/i\u003e\n \u003cspan class=\u0022sr-only\u0022\u003eLink to heading\u003c\/span\u003e\n \u003c\/a\u003e\n\u003c\/h2\u003e\n\u003cp\u003eNVIDIA\u0026rsquo;s Jetson Orin Nano promises impressive specs: 1024 CUDA cores, 32 Tensor Cores, and 40 TOPS of INT8 compute performance packed into a compact, power-efficient edge device. On paper, it looks like a capable platform for running Large Language Models locally. But there\u0026rsquo;s a catch—one that reveals a fundamental tension in modern edge AI hardware design.\u003c\/p\u003e\n\u003cp\u003eAfter running 66 inference tests across seven different language models ranging from 0.5B to 5.4B parameters, I discovered something counterintuitive: the device\u0026rsquo;s computational muscle sits largely idle during single-stream LLM inference. The bottleneck isn\u0026rsquo;t computation—it\u0026rsquo;s memory bandwidth. This isn\u0026rsquo;t just a quirk of one device; it\u0026rsquo;s a fundamental characteristic of single-user, autoregressive token generation on edge hardware—a reality that shapes how we should approach local LLM deployment.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/benchmarking-llms-on-jetson-orin-nano/>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-10-04T00:00:00Z>October 4, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
9-minute read</span></div></div></header><div class=post-content><h2 id=introduction>Introduction
<a class=heading-link href=#introduction><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>NVIDIA&rsquo;s Jetson Orin Nano promises impressive specs: 1024 CUDA cores, 32 Tensor Cores, and 40 TOPS of INT8 compute performance packed into a compact, power-efficient edge device. On paper, it looks like a capable platform for running Large Language Models locally. But there&rsquo;s a catch—one that reveals a fundamental tension in modern edge AI hardware design.</p><p>After running 66 inference tests across seven different language models ranging from 0.5B to 5.4B parameters, I discovered something counterintuitive: the device&rsquo;s computational muscle sits largely idle during single-stream LLM inference. The bottleneck isn&rsquo;t computation—it&rsquo;s memory bandwidth. This isn&rsquo;t just a quirk of one device; it&rsquo;s a fundamental characteristic of single-user, autoregressive token generation on edge hardware—a reality that shapes how we should approach local LLM deployment.</p><h2 id=the-hardware-what-were-working-with>The Hardware: What We&rsquo;re Working With
<a class=heading-link href=#the-hardware-what-were-working-with><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>The NVIDIA Jetson Orin Nano 8GB I tested features:</p><ul><li><strong>GPU</strong>: NVIDIA Ampere architecture with 1024 CUDA cores and 32 Tensor Cores</li><li><strong>Compute Performance</strong>: 40 TOPS (INT8), 10 TFLOPS (FP16), 5 TFLOPS (FP32)</li><li><strong>Memory</strong>: 8GB LPDDR5 unified memory with 68 GB/s bandwidth</li><li><strong>Available VRAM</strong>: Approximately 5.2GB after OS overhead</li><li><strong>CPU</strong>: 6-core ARM Cortex-A78AE (ARMv8.2, 64-bit)</li><li><strong>TDP</strong>: 7-25W configurable</li></ul><p>The unified memory architecture is a double-edged sword: CPU and GPU share the same physical memory pool, which eliminates PCIe transfer overhead but also means you&rsquo;re working with just 5.2GB of usable VRAM after the OS takes its share. This constraint shapes everything about LLM deployment on this device.</p><h2 id=testing-methodology>Testing Methodology
<a class=heading-link href=#testing-methodology><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><h3 id=the-models>The Models
<a class=heading-link href=#the-models><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>I tested seven models ranging from 0.5B to 5.4B parameters—essentially the entire practical deployment range for this hardware. The selection covered two inference backends (Ollama and vLLM) and various quantization strategies:</p><p><strong>Ollama-served models (with quantization):</strong></p><ul><li>Gemma 3 1B (Q4_K_M, 815MB)</li><li>Gemma 3n E2B (Q4_K_M, 3.5GB, 5.44B total params, 2B effective)</li><li>Qwen 2.5 0.5B (Q4_K_M, 350MB)</li><li>Qwen 3 0.6B (FP8, 600MB)</li></ul><p><strong>vLLM-served models (minimal/no quantization):</strong></p><ul><li>google/gemma-3-1b-it (FP16, 2GB)</li><li>Qwen/Qwen2.5-0.5B-Instruct (FP16, 1GB)</li><li>Qwen/Qwen3-0.6B-FP8 (FP8, 600MB)</li></ul><h3 id=the-testing-process>The Testing Process
<a class=heading-link href=#the-testing-process><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Each model faced 10-12 prompts of varying complexity—from simple arithmetic to technical explanations about LLMs themselves. All tests ran with batch size = 1, simulating a single user interacting with a local chatbot—the typical edge deployment scenario. Out of 84 planned tests, 66 completed successfully (78.6% success rate). The failures? Mostly out-of-memory crashes on larger models and occasional inference engine instability.</p><h3 id=understanding-the-limits-roofline-analysis>Understanding the Limits: Roofline Analysis
<a class=heading-link href=#understanding-the-limits-roofline-analysis><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>To understand where performance hits its ceiling, I applied roofline analysis—a method that reveals whether a workload is compute-bound (limited by processing power) or memory-bound (limited by data transfer speed). For each model, I calculated:</p><ul><li><strong>FLOPs per token</strong>: Approximately 2 × total_parameters (accounting for matrix multiplications in forward pass)</li><li><strong>Bytes per token</strong>: model_size × 1.1 (including 10% overhead for activations and KV cache)</li><li><strong>Operational Intensity (OI)</strong>: FLOPs per token / Bytes per token</li><li><strong>Theoretical performance</strong>: min(compute_limit, bandwidth_limit)</li></ul><p>The roofline model works by comparing a workload&rsquo;s operational intensity (how many calculations you do per byte of data moved) against the device&rsquo;s balance point. If your operational intensity is too low, you&rsquo;re bottlenecked by memory bandwidth—and as we&rsquo;ll see, that&rsquo;s exactly what happens with LLM inference.</p><p><img src=/images/benchmarking-llms-on-jetson-orin-nano/16d64bdc9cf14b05b7c40c4718b8091b.png alt="S3 File"></p><h2 id=the-results-speed-and-efficiency>The Results: Speed and Efficiency
<a class=heading-link href=#the-results-speed-and-efficiency><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><h3 id=what-actually-runs-fast>What Actually Runs Fast
<a class=heading-link href=#what-actually-runs-fast><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Here&rsquo;s how the models ranked by token generation speed:</p><table><thead><tr><th>Rank</th><th>Model</th><th>Backend</th><th>Avg Speed (t/s)</th><th>Std Dev</th><th>Success Rate</th></tr></thead><tbody><tr><td>1</td><td>qwen3:0.6b</td><td>Ollama</td><td>38.84</td><td>1.42</td><td>100%</td></tr><tr><td>2</td><td>qwen2.5:0.5b</td><td>Ollama</td><td>35.24</td><td>2.72</td><td>100%</td></tr><tr><td>3</td><td>gemma3:1b</td><td>Ollama</td><td>26.33</td><td>2.56</td><td>100%</td></tr><tr><td>4</td><td>Qwen/Qwen2.5-0.5B-Instruct</td><td>vLLM</td><td>15.18</td><td>2.15</td><td>100%</td></tr><tr><td>5</td><td>Qwen/Qwen3-0.6B-FP8</td><td>vLLM</td><td>12.81</td><td>0.36</td><td>100%</td></tr><tr><td>6</td><td>gemma3n:e2b</td><td>Ollama</td><td>8.98</td><td>1.22</td><td>100%</td></tr><tr><td>7</td><td>google/gemma-3-1b-it</td><td>vLLM</td><td>4.59</td><td>1.52</td><td>100%</td></tr></tbody></table><p>The standout finding: quantized sub-1B models hit 25-40 tokens/second, with Ollama consistently outperforming vLLM by 2-6× thanks to aggressive quantization and edge-optimized execution. These numbers align well with independent benchmarks from NVIDIA&rsquo;s Jetson AI Lab (Llama 3.2 3B at 27.7 t/s, SmolLM2 at 41 t/s), confirming this is typical performance for the hardware class.
<img src=/images/benchmarking-llms-on-jetson-orin-nano/ee04876d75d247f9b27a647462555777.png alt="S3 File"></p><h3 id=responsiveness-first-token-latency>Responsiveness: First Token Latency
<a class=heading-link href=#responsiveness-first-token-latency><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>The time to generate the first output token—a critical metric for interactive applications—varied significantly:</p><ul><li>qwen3:0.6b (Ollama): 0.522 seconds</li><li>gemma3:1b (Ollama): 1.000 seconds</li><li>qwen2.5:0.5b (Ollama): 1.415 seconds</li><li>gemma3n:e2b (Ollama): 1.998 seconds</li></ul><p>Smaller, quantized models get to that first token faster—exactly what you want for a chatbot or interactive assistant where perceived responsiveness matters as much as raw throughput.</p><h3 id=the-memory-bottleneck-revealed>The Memory Bottleneck Revealed
<a class=heading-link href=#the-memory-bottleneck-revealed><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>When I compared actual performance against theoretical limits, the results were striking:</p><table><thead><tr><th>Model</th><th>Theoretical (t/s)</th><th>Actual (t/s)</th><th>Efficiency</th><th>Bottleneck</th><th>OI (FLOPs/byte)</th></tr></thead><tbody><tr><td>gemma3:1b</td><td>109.90</td><td>26.33</td><td>24.0%</td><td>Memory</td><td>3.23</td></tr><tr><td>qwen3:0.6b</td><td>103.03</td><td>38.84</td><td>37.7%</td><td>Memory</td><td>1.82</td></tr><tr><td>qwen2.5:0.5b</td><td>219.80</td><td>35.24</td><td>16.0%</td><td>Memory</td><td>3.23</td></tr><tr><td>gemma3n:e2b</td><td>54.95</td><td>8.98</td><td>16.3%</td><td>Memory</td><td>3.23</td></tr><tr><td>google/gemma-3-1b-it</td><td>30.91</td><td>4.59</td><td>14.9%</td><td>Memory</td><td>0.91</td></tr><tr><td>Qwen/Qwen3-0.6B-FP8</td><td>103.03</td><td>12.81</td><td>12.4%</td><td>Memory</td><td>1.82</td></tr><tr><td>Qwen/Qwen2.5-0.5B-Instruct</td><td>61.82</td><td>15.18</td><td>24.6%</td><td>Memory</td><td>0.91</td></tr></tbody></table><p>Every single model is memory-bound in this single-stream inference scenario. Average hardware efficiency sits at just 20.8%—meaning the computational units spend most of their time waiting for data rather than crunching numbers. That advertised 40 TOPS? Largely untapped when generating one token at a time for a single user.
<img src=/images/benchmarking-llms-on-jetson-orin-nano/ee04876d75d247f9b27a647462555777.png alt="S3 File"></p><h2 id=what-this-actually-means>What This Actually Means
<a class=heading-link href=#what-this-actually-means><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><h3 id=why-memory-bandwidth-dominates-in-single-stream-inference>Why Memory Bandwidth Dominates (in Single-Stream Inference)
<a class=heading-link href=#why-memory-bandwidth-dominates-in-single-stream-inference><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>The roofline numbers tell a clear story: operational intensity ranges from 0.91 to 3.23 FLOPs/byte across all tested models during single-token generation (batch size = 1). To actually saturate those 1024 CUDA cores and hit compute-bound operation, you&rsquo;d need an operational intensity around 147 FLOPs/byte at the device&rsquo;s 68 GB/s memory bandwidth.</p><p>In practice, for a model to actually become compute-bound on this device during single-stream inference, it would need an operational intensity exceeding:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-fallback data-lang=fallback><span style=display:flex><span>OI_threshold = Peak_Compute / Memory_Bandwidth
</span></span><span style=display:flex><span> = (40 × 10^12 ops/s) / (68 × 10^9 bytes/s)
</span></span><span style=display:flex><span> = 588 FLOPs/byte
</span></span></code></pre></div><p>Single-stream autoregressive decoding falls 100-600× short of this threshold because each token generation requires loading the entire model from memory (matrix-vector multiplication) while performing only ~2 FLOPs per parameter. The compute units are idle most of the time, simply waiting for model weights and activations to arrive from memory.</p><p>Note: Production LLM serving with large batch sizes (32-256 requests) changes this dynamic dramatically—batching transforms matrix-vector operations into matrix-matrix multiplications, increasing operational intensity by 30-250× and making workloads compute-bound. However, edge devices serving single users cannot exploit this optimization.</p><p>The largest model tested—gemma3n:e2b at 3.5GB quantized (5.44B total parameters, 2B effective)—shows only 16.3% efficiency, similar to other quantized models. Despite being the largest model, Q4_K_M quantization keeps its memory footprint manageable, resulting in similar operational intensity (3.23 FLOPs/byte) to the other INT4-quantized models. Its MatFormer architecture with selective parameter activation (only 2B of 5.44B params active per token) actually helps reduce memory traffic, though this benefit is partially offset by the overhead of routing logic.</p><h3 id=what-this-means-for-edge-deployment>What This Means for Edge Deployment
<a class=heading-link href=#what-this-means-for-edge-deployment><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>The performance gap between Ollama and vLLM (2.3-5.7×) tells us something important about optimization priorities for single-user edge devices:</p><p><strong>Qwen 2.5 0.5B:</strong> Ollama (Q4_K_M, 350MB) at 35.24 t/s vs vLLM (FP16, 1GB) at 15.18 t/s—2.32× faster
<strong>Qwen 3 0.6B:</strong> Ollama (FP8) at 38.84 t/s vs vLLM (FP8) at 12.81 t/s—3.03× faster despite identical quantization
<strong>Gemma 3 1B:</strong> Ollama (Q4_K_M, 815MB) at 26.33 t/s vs vLLM (FP16, 2GB) at 4.59 t/s—5.74× faster</p><p>In single-stream scenarios, quantization delivers near-linear performance gains by directly attacking the memory bandwidth bottleneck. Q4_K_M quantization (4.5 bits/parameter) hits a sweet spot between model quality and speed. Going lower to INT2 might help further, but you&rsquo;ll need to carefully evaluate output quality.</p><p>The real insight: Ollama&rsquo;s edge-first design philosophy (GGUF format, streamlined execution, optimized kernels from llama.cpp) is fundamentally better aligned with single-stream, memory-constrained edge scenarios. vLLM&rsquo;s datacenter features—continuous batching, PagedAttention, tensor parallelism—add overhead without providing benefits when serving individual users on unified memory architectures. These features shine in multi-user production serving where batching can be exploited, but hurt performance in the single-stream case.</p><p><strong>What you should actually do</strong>: Stick with Ollama or TensorRT-LLM using Q4_K_M/INT4 quantized models in GGUF format. Target the 0.5-1B parameter range (under 3GB) to leave headroom for KV cache. Focus your optimization efforts on memory access patterns and bandwidth reduction. Watch for emerging techniques like INT4 AWQ, sparse attention, and quantized KV caches.</p><h3 id=room-for-improvement>Room for Improvement
<a class=heading-link href=#room-for-improvement><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>The 20.8% average efficiency might sound terrible, but it&rsquo;s actually typical for edge AI devices running single-stream inference. Datacenter GPUs hit 60-80% efficiency on optimized workloads—but that&rsquo;s typically with large batch sizes that increase operational intensity. In comparable single-stream scenarios, even high-end GPUs see similar efficiency drops. Edge devices commonly land in the 15-40% range due to architectural tradeoffs and memory bandwidth constraints relative to their compute capability.</p><p>Three factors explain the gap:</p><ol><li><strong>Architecture</strong>: Unified memory sacrifices bandwidth for integration simplicity. The 4MB L2 cache and 7-15W TDP limit further constrain performance.</li><li><strong>Software maturity</strong>: Edge inference frameworks lag behind their datacenter counterparts in optimization.</li><li><strong>Runtime overhead</strong>: Quantization/dequantization operations, Python abstractions, and non-optimized kernels all add up.</li></ol><p>The consistent 16-24% efficiency across most models suggests there&rsquo;s room for 2-3× speedups through better software optimization—particularly in memory access patterns and kernel implementations. But fundamental performance leaps will require hardware changes—specifically, prioritizing memory bandwidth (200+ GB/s) over raw compute capability in future edge AI chips.</p><h2 id=where-to-go-from-here>Where to Go From Here
<a class=heading-link href=#where-to-go-from-here><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><h3 id=software-optimizations-worth-pursuing>Software Optimizations Worth Pursuing
<a class=heading-link href=#software-optimizations-worth-pursuing><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ul><li>Optimize memory access patterns in attention and MLP kernels</li><li>Implement quantized KV cache (8-bit or lower)</li><li>Tune for small batch sizes (2-4) to improve memory bus utilization</li><li>Overlap CPU-GPU pipeline operations to hide latency</li></ul><h3 id=research-directions>Research Directions
<a class=heading-link href=#research-directions><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ul><li>Architectures with higher operational intensity (fewer memory accesses per compute operation)</li><li>Sparse attention patterns to reduce memory movement</li><li>On-device LoRA fine-tuning with frozen, quantized base weights</li><li>Multi-model serving with shared base model weights</li></ul><h3 id=what-edge-ai-hardware-designers-should-focus-on>What Edge AI Hardware Designers Should Focus On
<a class=heading-link href=#what-edge-ai-hardware-designers-should-focus-on><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Future edge AI devices optimized for local, single-user LLM inference need a fundamental shift in priorities: memory bandwidth over raw compute capability. Specifically:</p><ul><li>200+ GB/s memory bandwidth (3× current Jetson Orin Nano)</li><li>HBM integration for higher bandwidth density</li><li>16GB+ capacity to support 7B+ parameter models</li><li>Purpose-built INT4/INT8 accelerators with larger on-chip caches to reduce DRAM traffic</li></ul><hr><h2 id=references>References
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><ol><li><p>Williams, S., Waterman, A., & Patterson, D. (2009). &ldquo;Roofline: An Insightful Visual Performance Model for Multicore Architectures.&rdquo; <em>Communications of the ACM</em>, 52(4), 65-76.</p></li><li><p>NVIDIA Corporation. (2024). &ldquo;Jetson Orin Nano Developer Kit Technical Specifications.&rdquo; <a href=https://developer.nvidia.com/embedded/jetson-orin-nano-developer-kit class=external-link target=_blank rel=noopener>https://developer.nvidia.com/embedded/jetson-orin-nano-developer-kit</a></p></li><li><p>&ldquo;Jetson AI Lab Benchmarks.&rdquo; NVIDIA Jetson AI Lab. <a href=https://www.jetson-ai-lab.com/benchmarks.html class=external-link target=_blank rel=noopener>https://www.jetson-ai-lab.com/benchmarks.html</a></p></li><li><p>Gerganov, G., et al. (2023). &ldquo;GGML - AI at the edge.&rdquo; <em>GitHub</em>. <a href=https://github.com/ggerganov/ggml class=external-link target=_blank rel=noopener>https://github.com/ggerganov/ggml</a></p></li><li><p>Kwon, W., et al. (2023). &ldquo;Efficient Memory Management for Large Language Model Serving with PagedAttention.&rdquo; <em>Proceedings of SOSP 2023</em>.</p></li><li><p>Team, G., Mesnard, T., et al. (2025). &ldquo;Gemma 3: Technical Report.&rdquo; <em>arXiv preprint arXiv:2503.19786v1</em>. <a href=https://arxiv.org/html/2503.19786v1 class=external-link target=_blank rel=noopener>https://arxiv.org/html/2503.19786v1</a></p></li><li><p>Yang, A., et al. (2025). &ldquo;Qwen3 Technical Report.&rdquo; <em>arXiv preprint arXiv:2505.09388</em>. <a href=https://arxiv.org/pdf/2505.09388 class=external-link target=_blank rel=noopener>https://arxiv.org/pdf/2505.09388</a></p></li><li><p>DeepSeek-AI. (2025). &ldquo;DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning.&rdquo; <em>arXiv preprint arXiv:2501.12948v1</em>. <a href=https://arxiv.org/html/2501.12948v1 class=external-link target=_blank rel=noopener>https://arxiv.org/html/2501.12948v1</a></p></li><li><p>&ldquo;Running LLMs with TensorRT-LLM on NVIDIA Jetson Orin Nano Super.&rdquo; Collabnix. <a href=https://collabnix.com/running-llms-with-tensorrt-llm-on-nvidia-jetson-orin-nano-super/ class=external-link target=_blank rel=noopener>https://collabnix.com/running-llms-with-tensorrt-llm-on-nvidia-jetson-orin-nano-super/</a></p></li><li><p>Pope, R., et al. (2022). &ldquo;Efficiently Scaling Transformer Inference.&rdquo; <em>Proceedings of MLSys 2022</em>.</p></li><li><p>Frantar, E., et al. (2023). &ldquo;GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers.&rdquo; <em>Proceedings of ICLR 2023</em>.</p></li><li><p>Dettmers, T., et al. (2023). &ldquo;QLoRA: Efficient Finetuning of Quantized LLMs.&rdquo; <em>Proceedings of NeurIPS 2023</em>.</p></li><li><p>Lin, J., et al. (2023). &ldquo;AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration.&rdquo; <em>arXiv preprint arXiv:2306.00978</em>.</p></li></ol></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,28 @@
<!doctype html><html lang=en><head><title>Breville Barista Pro Maintenance · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Proper maintenance is critical for the longevity and performance of a Breville Barista Pro espresso machine. Consistent cleaning not only ensures the machine functions correctly but also directly impacts the quality of the espresso produced. This guide provides a detailed, technical breakdown of the essential maintenance routines, from automated cycles to daily upkeep.
Understanding the Two Primary Maintenance Cycles
Link to heading
The Breville Barista Pro has two distinct, automated maintenance procedures: the Cleaning (Flush) Cycle and the Descale Cycle. It is important to understand that these are not interchangeable, as they address different types of buildup within the machine."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Breville Barista Pro Maintenance"><meta name=twitter:description content="Proper maintenance is critical for the longevity and performance of a Breville Barista Pro espresso machine. Consistent cleaning not only ensures the machine functions correctly but also directly impacts the quality of the espresso produced. This guide provides a detailed, technical breakdown of the essential maintenance routines, from automated cycles to daily upkeep.
Understanding the Two Primary Maintenance Cycles Link to heading The Breville Barista Pro has two distinct, automated maintenance procedures: the Cleaning (Flush) Cycle and the Descale Cycle. It is important to understand that these are not interchangeable, as they address different types of buildup within the machine."><meta property="og:url" content="https://ericxliu.me/posts/breville-barista-pro-maintenance/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Breville Barista Pro Maintenance"><meta property="og:description" content="Proper maintenance is critical for the longevity and performance of a Breville Barista Pro espresso machine. Consistent cleaning not only ensures the machine functions correctly but also directly impacts the quality of the espresso produced. This guide provides a detailed, technical breakdown of the essential maintenance routines, from automated cycles to daily upkeep.
Understanding the Two Primary Maintenance Cycles Link to heading The Breville Barista Pro has two distinct, automated maintenance procedures: the Cleaning (Flush) Cycle and the Descale Cycle. It is important to understand that these are not interchangeable, as they address different types of buildup within the machine."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-08-16T00:00:00+00:00"><meta property="article:modified_time" content="2025-08-20T06:04:36+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/breville-barista-pro-maintenance/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Breville Barista Pro Maintenance","genre":"Blog","wordcount":"920","url":"https:\/\/ericxliu.me\/posts\/breville-barista-pro-maintenance\/","datePublished":"2025-08-16T00:00:00\u002b00:00","dateModified":"2025-08-20T06:04:36\u002b00:00","description":"\u003cp\u003eProper maintenance is critical for the longevity and performance of a Breville Barista Pro espresso machine. Consistent cleaning not only ensures the machine functions correctly but also directly impacts the quality of the espresso produced. This guide provides a detailed, technical breakdown of the essential maintenance routines, from automated cycles to daily upkeep.\u003c\/p\u003e\n\u003ch4 id=\u0022understanding-the-two-primary-maintenance-cycles\u0022\u003e\n \u003cstrong\u003eUnderstanding the Two Primary Maintenance Cycles\u003c\/strong\u003e\n \u003ca class=\u0022heading-link\u0022 href=\u0022#understanding-the-two-primary-maintenance-cycles\u0022\u003e\n \u003ci class=\u0022fa-solid fa-link\u0022 aria-hidden=\u0022true\u0022 title=\u0022Link to heading\u0022\u003e\u003c\/i\u003e\n \u003cspan class=\u0022sr-only\u0022\u003eLink to heading\u003c\/span\u003e\n \u003c\/a\u003e\n\u003c\/h4\u003e\n\u003cp\u003eThe Breville Barista Pro has two distinct, automated maintenance procedures: the \u003cstrong\u003eCleaning (Flush) Cycle\u003c\/strong\u003e and the \u003cstrong\u003eDescale Cycle\u003c\/strong\u003e. It is important to understand that these are not interchangeable, as they address different types of buildup within the machine.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/breville-barista-pro-maintenance/>Breville Barista Pro Maintenance</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-08-16T00:00:00Z>August 16, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
5-minute read</span></div></div></header><div class=post-content><p>Proper maintenance is critical for the longevity and performance of a Breville Barista Pro espresso machine. Consistent cleaning not only ensures the machine functions correctly but also directly impacts the quality of the espresso produced. This guide provides a detailed, technical breakdown of the essential maintenance routines, from automated cycles to daily upkeep.</p><h4 id=understanding-the-two-primary-maintenance-cycles><strong>Understanding the Two Primary Maintenance Cycles</strong>
<a class=heading-link href=#understanding-the-two-primary-maintenance-cycles><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>The Breville Barista Pro has two distinct, automated maintenance procedures: the <strong>Cleaning (Flush) Cycle</strong> and the <strong>Descale Cycle</strong>. It is important to understand that these are not interchangeable, as they address different types of buildup within the machine.</p><ul><li><strong>Cleaning Cycle (Flush):</strong> This process is designed to remove coffee oils and granulated residue from the group head, shower screen, and portafilter system.</li><li><strong>Descale Cycle:</strong> This process targets the internal components of the machine, such as the thermocoil and water lines, to remove mineral and limescale deposits from water.</li></ul><h4 id=procedure-1-the-cleaning-flush-cycle><strong>Procedure 1: The Cleaning (Flush) Cycle</strong>
<a class=heading-link href=#procedure-1-the-cleaning-flush-cycle><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>The machine will indicate when a cleaning cycle is needed by displaying a &ldquo;FLUSH&rdquo; alert on the LCD screen. This typically occurs after approximately 200 extractions.</p><p><strong>Required Materials:</strong></p><ul><li>1-Cup filter basket</li><li>Grey silicone cleaning disc (provided with the machine)</li><li>One cleaning tablet</li></ul><p><strong>Step-by-Step Instructions:</strong></p><ol><li>Insert the 1-cup filter basket into the portafilter.</li><li>Place the grey silicone cleaning disc inside the basket.</li><li>Position one cleaning tablet in the center of the disc.</li><li>Lock the portafilter firmly into the group head.</li><li>Ensure the drip tray is empty and the water tank is filled.</li><li>Press the &lsquo;MENU&rsquo; button and use the &lsquo;Grind Amount&rsquo; dial to navigate to the &lsquo;FLUSH&rsquo; option. Press the dial to select it.</li><li>The &lsquo;1 CUP&rsquo; button will illuminate. Press it to initiate the cycle.</li><li>The cleaning process will last approximately five minutes, with the machine backflushing water under pressure. The remaining time will be displayed on the screen.</li><li>Upon completion, the machine will beep and return to its ready state.</li><li>Remove the portafilter and discard the water and dissolved tablet residue. Thoroughly rinse the portafilter, cleaning disc, and filter basket.</li><li>Re-insert the portafilter (without the disc or tablet) and run a shot of hot water through the group head to rinse any remaining cleaning solution.</li></ol><h4 id=procedure-2-the-descale-cycle><strong>Procedure 2: The Descale Cycle</strong>
<a class=heading-link href=#procedure-2-the-descale-cycle><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>The machine will alert you when descaling is required. The frequency depends on water hardness and usage but is generally recommended every 2-3 months.</p><p><strong>Required Materials:</strong></p><ul><li>Breville-recommended descaling solution</li><li>A large container (minimum 2-liter capacity)</li></ul><p><strong>Step-by-Step Instructions:</strong></p><p><strong>Part A: Preparation</strong></p><ol><li>Empty the drip tray and re-insert it.</li><li>Remove the water filter from the water tank.</li><li>Pour the descaling solution into the empty water tank and add fresh water up to the indicated &ldquo;DESCALE&rdquo; line.</li><li>Place a large container under the group head, hot water outlet, and steam wand.</li></ol><p><strong>Part B: The Descaling Process</strong></p><ol><li>Turn the machine on and press the &lsquo;MENU&rsquo; button. Navigate to the &lsquo;DESCALE&rsquo; option and select it by pressing the dial.</li><li>Press the illuminated &lsquo;1 CUP&rsquo; button to begin.</li><li>The cycle proceeds in three stages. You must manually advance through them using the steam dial based on the LCD prompts:<ul><li><strong>Group Head (d3):</strong> The machine descales the coffee brewing components.</li><li><strong>Hot Water (d2):</strong> After a beep, the LCD shows &ldquo;d2&rdquo;. Turn the steam dial to the hot water position.</li><li><strong>Steam (d1):</strong> After another beep, the display reads &ldquo;d1&rdquo;. Turn the dial to the steam position.</li></ul></li></ol><p><strong>Part C: The Rinse Cycle</strong></p><ol><li>Once the descaling solution is expended, the machine will beep and prompt for a rinse cycle (&ldquo;r&rdquo;).</li><li>Empty the large container and rinse the water tank thoroughly.</li><li>Fill the water tank with fresh, cold water to the MAX line and re-insert it.</li><li>Place the empty container back under the outlets and press the &lsquo;1 CUP&rsquo; button.</li><li>The rinse cycle will mirror the descaling process, prompting you to engage the group head (&ldquo;r3&rdquo;), hot water (&ldquo;r2&rdquo;), and steam wand (&ldquo;r1&rdquo;) in sequence.</li><li>After the rinse is complete, the machine will exit the maintenance mode and return to its ready state.</li></ol><h4 id=routine-and-preventative-maintenance-schedule><strong>Routine and Preventative Maintenance Schedule</strong>
<a class=heading-link href=#routine-and-preventative-maintenance-schedule><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>In addition to the automated cycles, regular manual cleaning is essential for machine health.</p><p><strong>Daily Tasks:</strong></p><ul><li><strong>Purge Group Head:</strong> After the final use of the day, run hot water through the group head (without the portafilter) to clear grounds.</li><li><strong>Clean Portafilter & Baskets:</strong> Do not let used coffee grounds sit in the portafilter. Rinse with hot water after every use.</li><li><strong>Clean Steam Wand:</strong> Immediately after texturing milk, wipe the wand with a damp cloth and purge steam for 2-3 seconds to clear internal passages.</li><li><strong>Empty Drip Tray:</strong> Empty and rinse the drip tray regularly.</li></ul><p><strong>Weekly Tasks:</strong></p><ul><li><strong>Soak Components:</strong> Remove the filter basket from the portafilter. Soak both components in a solution of hot water and a cleaning tablet (or specific espresso cleaner) for 20-30 minutes to dissolve accumulated coffee oils. Rinse thoroughly.</li><li><strong>Clean Grinder:</strong> Empty the bean hopper. Run the grinder to clear any remaining beans, then use a brush and/or vacuum to clean out fines and oil residue from the burrs and chute.</li></ul><p><strong>Periodic Tasks (Every 2-3 Months):</strong></p><ul><li><strong>Replace Water Filter:</strong> The water filter located inside the water tank should be replaced every 3 months. This reduces the rate of scale buildup.</li><li><strong>Inspect Shower Screen:</strong> Use a brush to gently scrub the shower screen inside the group head to remove any stubborn coffee grounds.</li></ul><p>By adhering to this comprehensive maintenance schedule, you can ensure your Breville Barista Pro operates at peak performance and consistently produces high-quality espresso.</p><hr><p><strong>Reference:</strong></p><ul><li>Breville Barista Pro Instruction Manual and official manufacturer guidelines.</li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,47 @@
<!doctype html><html lang=en><head><title>Why Your "Resilient" Homelab is Slower Than a Raspberry Pi · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="In the world of self-hosting, there are many metrics for success: 99.9% uptime, sub-second latency, or a perfect GitOps pipeline. But for those of us running &ldquo;production&rdquo; at home, there is only one metric that truly matters: The Wife Acceptance Factor (WAF).
My detailed Grafana dashboards said everything was fine. But my wife said the SSO login was &ldquo;slow sometimes.&rdquo; She was right. Debugging it took me down a rabbit hole of connection pooling, misplaced assumptions, and the harsh reality of running databases on distributed storage."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content='Why Your "Resilient" Homelab is Slower Than a Raspberry Pi'><meta name=twitter:description content="In the world of self-hosting, there are many metrics for success: 99.9% uptime, sub-second latency, or a perfect GitOps pipeline. But for those of us running “production” at home, there is only one metric that truly matters: The Wife Acceptance Factor (WAF).
My detailed Grafana dashboards said everything was fine. But my wife said the SSO login was “slow sometimes.” She was right. Debugging it took me down a rabbit hole of connection pooling, misplaced assumptions, and the harsh reality of running databases on distributed storage."><meta property="og:url" content="https://ericxliu.me/posts/debugging-authentik-performance/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content='Why Your "Resilient" Homelab is Slower Than a Raspberry Pi'><meta property="og:description" content="In the world of self-hosting, there are many metrics for success: 99.9% uptime, sub-second latency, or a perfect GitOps pipeline. But for those of us running “production” at home, there is only one metric that truly matters: The Wife Acceptance Factor (WAF).
My detailed Grafana dashboards said everything was fine. But my wife said the SSO login was “slow sometimes.” She was right. Debugging it took me down a rabbit hole of connection pooling, misplaced assumptions, and the harsh reality of running databases on distributed storage."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2026-01-02T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-03T06:57:12+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/debugging-authentik-performance/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Why Your \u0022Resilient\u0022 Homelab is Slower Than a Raspberry Pi","genre":"Blog","wordcount":"1031","url":"https:\/\/ericxliu.me\/posts\/debugging-authentik-performance\/","datePublished":"2026-01-02T00:00:00\u002b00:00","dateModified":"2026-01-03T06:57:12\u002b00:00","description":"\u003cp\u003eIn the world of self-hosting, there are many metrics for success: 99.9% uptime, sub-second latency, or a perfect GitOps pipeline. But for those of us running \u0026ldquo;production\u0026rdquo; at home, there is only one metric that truly matters: \u003cstrong\u003eThe Wife Acceptance Factor (WAF)\u003c\/strong\u003e.\u003c\/p\u003e\n\u003cp\u003eMy detailed Grafana dashboards said everything was fine. But my wife said the SSO login was \u0026ldquo;slow sometimes.\u0026rdquo; She was right. Debugging it took me down a rabbit hole of connection pooling, misplaced assumptions, and the harsh reality of running databases on distributed storage.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/debugging-authentik-performance/>Why Your "Resilient" Homelab is Slower Than a Raspberry Pi</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2026-01-02T00:00:00Z>January 2, 2026
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
5-minute read</span></div></div></header><div class=post-content><p>In the world of self-hosting, there are many metrics for success: 99.9% uptime, sub-second latency, or a perfect GitOps pipeline. But for those of us running &ldquo;production&rdquo; at home, there is only one metric that truly matters: <strong>The Wife Acceptance Factor (WAF)</strong>.</p><p>My detailed Grafana dashboards said everything was fine. But my wife said the SSO login was &ldquo;slow sometimes.&rdquo; She was right. Debugging it took me down a rabbit hole of connection pooling, misplaced assumptions, and the harsh reality of running databases on distributed storage.</p><p>Here is a breakdown of the symptoms, the red herrings, and the root cause that was hiding in plain sight.</p><h2 id=the-environment>The Environment
<a class=heading-link href=#the-environment><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>My homelab is designed for node-level resilience, which adds complexity to the storage layer. It is not running on a single server, but rather a 3-node <strong>Proxmox</strong> cluster where every component is redundant:</p><ul><li><strong>Orchestration</strong>: Kubernetes (k3s) managed via Flux CD.</li><li><strong>Storage</strong>: A <strong>Ceph</strong> cluster running on the Proxmox nodes, utilizing enterprise NVMe SSDs (<code>bluestore</code>) for OSDs.</li><li><strong>Database</strong>: Postgres managed by the Zalando Postgres Operator, with persistent volumes (PVCs) provisioned on Ceph RBD (block storage).</li><li><strong>Identity</strong>: Authentik for SSO.</li></ul><p>While the underlying disks are blazing fast NVMe drives, the architecture dictates that a write to a Ceph RBD volume is not complete until it is replicated over the network and acknowledged by multiple OSDs. This setup provides incredible resilience—I can pull the plug on a node and nothing stops—but it introduces unavoidable network latency for synchronous write operations. <strong>Keep this particular trade-off in mind; it plays a starring role in the investigation later.</strong></p><h2 id=the-symptom>The Symptom
<a class=heading-link href=#the-symptom><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>The issue was insidious because it was intermittent. Clicking &ldquo;Login&rdquo; would sometimes hang for 5-8 seconds, while other times it was instant. To an engineer, &ldquo;sometimes slow&rdquo; is the worst kind of bug because it defies easy reproduction.</p><p>The breakthrough came when I put aside the server-side Grafana dashboards and looked at the client side. By opening Chrome DevTools and monitoring the <strong>Network</strong> tab during a slow login attempt, I was able to capture the exact failing request.</p><p>I identified the culprit: the <code>/api/v3/core/applications/</code> endpoint. It wasn&rsquo;t a connection timeout or a DNS issue; the server was simply taking 5+ seconds to respond to this specific GET request.</p><p>Armed with this &ldquo;smoking gun,&rdquo; I copied the request as cURL (preserving the session cookies) and converted it into a Python benchmark script (<code>reproduce_latency.py</code>). This allowed me to reliably trigger the latency on demand, turning an intermittent &ldquo;heisenbug&rdquo; into a reproducible test case.</p><p>The results were validating and horrifying:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-text data-lang=text><span style=display:flex><span>Request 1: 2.1642s
</span></span><span style=display:flex><span>Request 2: 8.4321s
</span></span><span style=display:flex><span>Request 3: 5.1234s
</span></span><span style=display:flex><span>...
</span></span><span style=display:flex><span>Avg Latency: 4.8s
</span></span></code></pre></div><h2 id=investigation--red-herrings>Investigation & Red Herrings
<a class=heading-link href=#investigation--red-herrings><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><h3 id=attempt-1-the-connection-overhead-hypothesis>Attempt 1: The Connection Overhead Hypothesis
<a class=heading-link href=#attempt-1-the-connection-overhead-hypothesis><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Hypothesis</strong>: Authentik defaults to <code>CONN_MAX_AGE=0</code>, meaning it closes the database connection after every request. Since I enforce SSL for the database, I assumed the handshake overhead was killing performance.</p><p><strong>The Fix Attempt</strong>: I updated the Authentik configuration to enable persistent connections:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-yaml data-lang=yaml><span style=display:flex><span><span style=color:#7ee787>env</span>:<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span>- <span style=color:#7ee787>name</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>AUTHENTIK_POSTGRESQL__CONN_MAX_AGE</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>value</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>&#34;600&#34;</span><span style=color:#6e7681>
</span></span></span></code></pre></div><p><strong>The Reality</strong>: The benchmark showed a slight improvement (~4.2s average), but the random 5-8s spikes remained. The 300ms connection setup was a factor, but not the root cause. As a side note, enabling this without configuring TCP Keepalives caused the Authentik worker to crash with <code>OperationalError('the connection is closed')</code> when firewalls silently dropped idle connections.</p><h3 id=attempt-2-cpu-starvation>Attempt 2: CPU Starvation
<a class=heading-link href=#attempt-2-cpu-starvation><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Hypothesis</strong>: The pods were CPU throttled during request processing.</p><p><strong>The Reality</strong>: <code>kubectl top pods</code> showed the server using only 29m (2.9% of a core). Even increasing the Gunicorn worker count from 2 to 4 did not improve the latency of individual requests, though it did help with concurrency.</p><h2 id=the-root-cause-a-perfect-storm>The Root Cause: A Perfect Storm
<a class=heading-link href=#the-root-cause-a-perfect-storm><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>I was stuck. The CPU was idle, network was fine, and individual database queries were fast (&lt;1ms). Then I looked at the traffic patterns:</p><ol><li><strong>Redis</strong>: Almost zero traffic.</li><li><strong>Postgres</strong>: High <code>WALSync</code> and <code>WALWrite</code> wait times.</li><li><strong>The Table</strong>: <code>django_postgres_cache_cacheentry</code> was getting hammered.</li></ol><h3 id=insight-the-breaking-change>Insight: The Breaking Change
<a class=heading-link href=#insight-the-breaking-change><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>I checked the release notes for <strong>Authentik 2025.10</strong>:</p><blockquote><p><em>Breaking Change: Redis is no longer used for caching. All caching has been moved to the PostgreSQL database to simplify deployment.</em></p></blockquote><p>This architectural shift created a bottleneck specific to my storage backend:</p><ol><li><strong>The Change</strong>: Every API request triggers a cache write (session updates) to Postgres instead of Redis.</li><li><strong>The Default</strong>: Postgres defaults to <code>synchronous_commit = on</code>. A transaction is not considered &ldquo;committed&rdquo; until it is flushed to disk.</li><li><strong>The Storage</strong>: Ceph RBD replicates data across the network to multiple OSDs.</li></ol><p>Every time I loaded the dashboard, Authentik tried to update the cache. Postgres paused, verified the write was replicated to 3 other servers over the network (WAL Sync), and <em>then</em> responded.</p><h2 id=the-solution>The Solution
<a class=heading-link href=#the-solution><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>I couldn&rsquo;t move the database to local NVMe without losing the failover capabilities I built the cluster for. However, for a cache-heavy workload, I could compromise on strict durability.</p><p>I patched the Postgres configuration to disable synchronous commits:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-yaml data-lang=yaml><span style=display:flex><span><span style=color:#7ee787>spec</span>:<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>postgresql</span>:<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>parameters</span>:<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>synchronous_commit</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>&#34;off&#34;</span><span style=color:#6e7681> </span><span style=color:#8b949e;font-style:italic># The magic switch</span><span style=color:#6e7681>
</span></span></span></code></pre></div><p><strong>What this does</strong>: Postgres returns &ldquo;Success&rdquo; to the application as soon as the transaction is in memory. It flushes to disk in the background. In the event of a crash, I might lose the last ~500ms of data (mostly cache entries), which is an acceptable trade-off.</p><h2 id=verification>Verification
<a class=heading-link href=#verification><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>I re-ran the benchmark with <code>synchronous_commit = off</code>.</p><table><thead><tr><th>Metric</th><th>Before (<code>sync=on</code>)</th><th>After (<code>sync=off</code>)</th><th>Improvement</th></tr></thead><tbody><tr><td>Sequential x8 stream (Avg)</td><td>~4.8s</td><td><strong>0.40s</strong></td><td><strong>12x Faster</strong></td></tr><tr><td>Parallel x8 stream (Wall)</td><td>~10.5s</td><td><strong>2.45s</strong></td><td><strong>4x Faster</strong></td></tr></tbody></table><p>The latency vanished. The login became instant.</p><h2 id=key-insights>Key Insights
<a class=heading-link href=#key-insights><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><ul><li><strong>Read Release Notes</strong>: The shift from Redis to Postgres for caching was a major architectural change that I missed during the upgrade.</li><li><strong>Storage Matters</strong>: Distributed storage (Ceph/Longhorn) handles linear writes well, but struggles with latency-sensitive, high-frequency sync operations like WAL updates.</li><li><strong>Tuning Postgres</strong>: For workloads where immediate durability is less critical than latency (like caching tables), <code>synchronous_commit = off</code> is a powerful tool.</li><li><strong>Observability</strong>: The &ldquo;Wife Test&rdquo; is a valid monitoring alert. If a user complains it&rsquo;s slow, investigate the P99 latency, not just the average.</li></ul><h3 id=references>References
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ul><li><a href=https://docs.goauthentik.io/releases/2025.10/ class=external-link target=_blank rel=noopener>Authentik 2025.10 Release Notes</a></li><li><a href=https://www.postgresql.org/docs/current/wal-async-commit.html class=external-link target=_blank rel=noopener>PostgreSQL Documentation: Synchronous Commit</a></li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,23 @@
<!doctype html><html lang=en><head><title>Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Are you ready to transform your home espresso game from good to genuinely great? The Breville Barista Pro is a fantastic machine, but unlocking its full potential requires understanding a few key principles. This guide will walk you through the systematic process of dialing in your espresso, ensuring every shot is delicious and repeatable.
Our overarching philosophy is simple: isolate and change only one variable at a time. While numbers are crucial, your palate is the ultimate judge. Dose, ratio, and time are interconnected, but your grind size is your most powerful lever."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso"><meta name=twitter:description content="Are you ready to transform your home espresso game from good to genuinely great? The Breville Barista Pro is a fantastic machine, but unlocking its full potential requires understanding a few key principles. This guide will walk you through the systematic process of dialing in your espresso, ensuring every shot is delicious and repeatable.
Our overarching philosophy is simple: isolate and change only one variable at a time. While numbers are crucial, your palate is the ultimate judge. Dose, ratio, and time are interconnected, but your grind size is your most powerful lever."><meta property="og:url" content="https://ericxliu.me/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso"><meta property="og:description" content="Are you ready to transform your home espresso game from good to genuinely great? The Breville Barista Pro is a fantastic machine, but unlocking its full potential requires understanding a few key principles. This guide will walk you through the systematic process of dialing in your espresso, ensuring every shot is delicious and repeatable.
Our overarching philosophy is simple: isolate and change only one variable at a time. While numbers are crucial, your palate is the ultimate judge. Dose, ratio, and time are interconnected, but your grind size is your most powerful lever."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-05-01T00:00:00+00:00"><meta property="article:modified_time" content="2025-08-03T04:20:20+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso","genre":"Blog","wordcount":"1125","url":"https:\/\/ericxliu.me\/posts\/espresso-theory-application-a-guide-for-the-breville-barista-pro\/","datePublished":"2025-05-01T00:00:00\u002b00:00","dateModified":"2025-08-03T04:20:20\u002b00:00","description":"\u003cp\u003eAre you ready to transform your home espresso game from good to genuinely great? The Breville Barista Pro is a fantastic machine, but unlocking its full potential requires understanding a few key principles. This guide will walk you through the systematic process of dialing in your espresso, ensuring every shot is delicious and repeatable.\u003c\/p\u003e\n\u003cp\u003eOur overarching philosophy is simple: \u003cstrong\u003eisolate and change only one variable at a time.\u003c\/strong\u003e While numbers are crucial, your palate is the ultimate judge. Dose, ratio, and time are interconnected, but your \u003cstrong\u003egrind size\u003c\/strong\u003e is your most powerful lever.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/>Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-05-01T00:00:00Z>May 1, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
6-minute read</span></div></div></header><div class=post-content><p>Are you ready to transform your home espresso game from good to genuinely great? The Breville Barista Pro is a fantastic machine, but unlocking its full potential requires understanding a few key principles. This guide will walk you through the systematic process of dialing in your espresso, ensuring every shot is delicious and repeatable.</p><p>Our overarching philosophy is simple: <strong>isolate and change only one variable at a time.</strong> While numbers are crucial, your palate is the ultimate judge. Dose, ratio, and time are interconnected, but your <strong>grind size</strong> is your most powerful lever.</p><p>Let&rsquo;s dive in!</p><hr><h3 id=part-1-the-foundation--dose-the-weight-of-dry-coffee><strong>Part 1: The Foundation — Dose (The Weight of Dry Coffee)</strong>
<a class=heading-link href=#part-1-the-foundation--dose-the-weight-of-dry-coffee><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Your dose is the bedrock of your espresso. It&rsquo;s the weight of your ground coffee, and it should be the first variable you set and then keep <strong>constant</strong> during the initial dialing-in process.</p><p><strong>Why Dose Matters:</strong></p><ul><li><strong>Basket Size is Key:</strong> Your portafilter basket dictates your ideal dose. Too little coffee (under-dosing) creates excessive &ldquo;headspace,&rdquo; leading to soupy extractions. Too much (over-dosing) causes the coffee puck to touch the shower screen, preventing even water flow and causing channeling.</li><li><strong>Extraction &ldquo;Work&rdquo;:</strong> A higher dose means more coffee mass, requiring more &ldquo;work&rdquo; (a finer grind, more water) to extract properly.</li><li><strong>Coffee Type:</strong><ul><li><strong>Light Roasts:</strong> Denser and harder to extract. Consider a <strong>slightly lower dose</strong>.</li><li><strong>Dark Roasts:</strong> More brittle and soluble. You can often use a <strong>slightly higher dose</strong>.</li></ul></li></ul><p><strong>Application for Your Breville Barista Pro (54mm Portafilter):</strong></p><ul><li><strong>Your Starting Point:</strong> Always begin with <strong>18 grams</strong>. Use a scale for accuracy!</li><li><strong>Adjusting for Roast:</strong> For light roasts, if you&rsquo;re struggling, drop to 17g. For dark roasts, you can try 19g.</li><li><strong>Golden Rule:</strong> Once you choose your starting dose (e.g., 18g), <strong>do not change it</strong> until you&rsquo;ve dialed in your grind size.</li></ul><hr><h3 id=part-2-defining-the-drink--brew-ratio-dose-vs-yield><strong>Part 2: Defining the Drink — Brew Ratio (Dose vs. Yield)</strong>
<a class=heading-link href=#part-2-defining-the-drink--brew-ratio-dose-vs-yield><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>The brew ratio defines the relationship between your dry coffee dose and the weight of your liquid espresso yield. Always measure by <strong>weight (grams)</strong>, not volume (mL), as crema can be inconsistent.</p><p><strong>Understanding Ratios:</strong></p><ul><li><strong>Ristretto (1:1 1:1.5):</strong> E.g., 18g in → 18g to 27g out. Strong, textured, less extracted.</li><li><strong>Espresso (Normale) (1:1.5 1:2.5):</strong> E.g., 18g in → 27g to 45g out. The standard, balanced shot.</li><li><strong>Lungo (1:2.5+):</strong> E.g., 18g in → 45g+ out. Weaker, less textured, more extracted.</li></ul><p><strong>The Fundamental Trade-Off:</strong></p><ul><li><strong>Longer Ratio (more water):</strong> Higher extraction, but lower strength (more diluted).</li><li><strong>Shorter Ratio (less water):</strong> Lower extraction, but higher strength (more concentrated).</li></ul><p><strong>Application for Your Breville Barista Pro:</strong></p><ul><li><strong>Recommended Starting Ratio:</strong> A <strong>1:2 ratio</strong> is the perfect place to begin.</li><li><strong>Practical Numbers:</strong> With your 18g dose, your target yield is <strong>36 grams</strong> of liquid espresso.</li><li><strong>Execution:</strong> Place your cup on a scale and use the manual brew function to stop the shot precisely when the scale reads 36g.</li></ul><hr><h3 id=part-3-the-diagnostic-tool--brew-time><strong>Part 3: The Diagnostic Tool — Brew Time</strong>
<a class=heading-link href=#part-3-the-diagnostic-tool--brew-time><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Brew time is not something you set directly; it&rsquo;s the <strong>result</strong> of how much resistance your coffee puck provides against the machine&rsquo;s water pressure. Think of it as a <strong>diagnostic tool</strong>.</p><p><strong>The 25-30 Second Guideline:</strong></p><p>This is a benchmark. If your 1:2 ratio shot falls within this time, your grind size is likely in the correct range for a balanced extraction.</p><ul><li><strong>Too Fast (&lt;25s):</strong> Indicates under-extraction (often tastes sour).</li><li><strong>Too Slow (>30s):</strong> Indicates over-extraction (often tastes bitter).</li></ul><p><strong>Taste is King:</strong> Remember, if a shot tastes fantastic at 32 seconds, it&rsquo;s a great shot! The time simply becomes part of your successful recipe for that specific coffee.</p><p><strong>Application for Your Breville Barista Pro:</strong></p><ul><li><strong>Pre-infusion:</strong> The Barista Pro&rsquo;s low-pressure pre-infusion is <strong>part of your total brew time</strong>. Its purpose is to saturate the puck evenly to prevent channeling. Keep it consistent for every shot while dialing in.</li></ul><hr><h3 id=part-4-the-primary-control--grind-setting><strong>Part 4: The Primary Control — Grind Setting</strong>
<a class=heading-link href=#part-4-the-primary-control--grind-setting><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>This is where the magic (and sometimes frustration) happens. Grind size is your main tool for controlling the resistance of the coffee puck, which directly dictates your brew time.</p><p><strong>The Dual Impact of Grinding Finer:</strong></p><ol><li><strong>Increases surface area:</strong> Allows for more efficient flavor extraction.</li><li><strong>Increases resistance:</strong> Slows down water flow and increases contact time.</li></ol><p><strong>The Risk of Grinding Too Fine (Channeling):</strong></p><p>If the grind is too fine, the puck becomes so dense that high-pressure water can&rsquo;t flow evenly. Instead, it &ldquo;breaks&rdquo; the puck and punches an easy path (a channel) through a weak spot. This results in a disastrous shot that is simultaneously:</p><ul><li><strong>Under-extracted:</strong> Most of the coffee is bypassed.</li><li><strong>Over-extracted:</strong> The water that does flow blasts through the channel, extracting harsh, bitter compounds.</li><li><strong>The Taste:</strong> A channeled shot tastes hollow, weak, sour, <em>and</em> bitter all at once.</li></ul><p><strong>The Goal:</strong> You want to <strong>grind as fine as you possibly can <em>without</em> causing significant channeling</strong>. This is the sweet spot for maximizing surface area and resistance for high, even extraction.</p><p><strong>Grind Retention (Purging):</strong> Most grinders retain some old grounds. When you change your grind setting, always purge a few grams of coffee to ensure your dose is entirely at the new setting.</p><p><strong>Application for Your Breville Barista Pro:</strong></p><ul><li><strong>Grinder Mechanism:</strong> The &ldquo;Grind Amount&rdquo; dial controls the <strong>TIME</strong> the grinder runs, not the weight. When you adjust the fineness, you <strong>must</strong> re-adjust the grind time to ensure you are still getting your target 18g dose.</li><li><strong>Tackling Channeling:</strong> The Barista Pro is prone to channeling. To fight this, focus on excellent <strong>puck prep</strong>: use a WDT (Weiss Distribution Technique) tool to break up clumps and evenly distribute the grounds before tamping levelly.</li></ul><hr><h3 id=the-complete-dialing-in-workflow><strong>The Complete Dialing-In Workflow</strong>
<a class=heading-link href=#the-complete-dialing-in-workflow><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>This systematic process will get you to a delicious shot from your Breville Barista Pro efficiently:</p><ol><li><strong>Set Your Constants:</strong><ul><li><strong>Dose:</strong> <strong>18g</strong>.</li><li><strong>Ratio:</strong> <strong>1:2</strong> (meaning a <strong>Yield</strong> of <strong>36g</strong>).</li><li><strong>Pre-infusion:</strong> Use a consistent method (e.g., manual 8-second hold).</li></ul></li><li><strong>Make an Initial Grind:</strong><ul><li>Set the grinder to a starting point of <strong>15</strong>.</li><li>Adjust the grind <strong>time</strong> until the grinder dispenses exactly 18g.</li></ul></li><li><strong>Pull the First Shot:</strong><ul><li>Brew manually, stopping at <strong>36g</strong> of liquid in the cup. Note the <strong>total brew time</strong>.</li></ul></li><li><strong>Taste and Diagnose:</strong><ul><li><strong>Fast & Sour? (&lt;25s):</strong> Grind is too coarse.</li><li><strong>Slow & Bitter? (>32s):</strong> Grind is too fine.</li></ul></li><li><strong>Make ONE Adjustment - THE GRIND SIZE:</strong><ul><li>If fast/sour, adjust the grind <strong>finer</strong> (e.g., from 15 down to 13).</li><li>If slow/bitter, adjust the grind <strong>coarser</strong> (e.g., from 15 up to 17).</li></ul></li><li><strong>Re-adjust and Repeat:</strong><ul><li>After changing the grind setting, <strong>purge</strong> a small amount of coffee.</li><li>Re-weigh your next dose and <strong>adjust the grind time</strong> to get back to exactly 18g.</li><li>Pull another 36g shot. Repeat this process until your shot tastes balanced and the time falls roughly between <strong>25-32 seconds</strong>.</li></ul></li></ol><p>Happy brewing! With patience and this systematic approach, you&rsquo;ll be pulling consistently delicious espresso shots from your Breville Barista Pro in no time.</p></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,21 @@
<!doctype html><html lang=en><head><title>Beyond Words: How RVQ Teaches LLMs to See and Hear · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Large Language Models (LLMs) are masters of text, but the world is not made of text alone. Its a symphony of sights, sounds, and experiences. The ultimate goal for AI is to understand this rich, multi-modal world as we do. But how do you teach a model that thinks in words to understand a picture of a sunset or the melody of a song?
The answer lies in creating a universal language—a bridge between the continuous, messy world of pixels and audio waves and the discrete, structured world of language tokens. One of the most elegant and powerful tools for building this bridge is Residual Vector Quantization (RVQ)."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Beyond Words: How RVQ Teaches LLMs to See and Hear"><meta name=twitter:description content="Large Language Models (LLMs) are masters of text, but the world is not made of text alone. Its a symphony of sights, sounds, and experiences. The ultimate goal for AI is to understand this rich, multi-modal world as we do. But how do you teach a model that thinks in words to understand a picture of a sunset or the melody of a song?
The answer lies in creating a universal language—a bridge between the continuous, messy world of pixels and audio waves and the discrete, structured world of language tokens. One of the most elegant and powerful tools for building this bridge is Residual Vector Quantization (RVQ)."><meta property="og:url" content="https://ericxliu.me/posts/how-rvq-teaches-llms-to-see-and-hear/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Beyond Words: How RVQ Teaches LLMs to See and Hear"><meta property="og:description" content="Large Language Models (LLMs) are masters of text, but the world is not made of text alone. Its a symphony of sights, sounds, and experiences. The ultimate goal for AI is to understand this rich, multi-modal world as we do. But how do you teach a model that thinks in words to understand a picture of a sunset or the melody of a song?
The answer lies in creating a universal language—a bridge between the continuous, messy world of pixels and audio waves and the discrete, structured world of language tokens. One of the most elegant and powerful tools for building this bridge is Residual Vector Quantization (RVQ)."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-08-07T00:00:00+00:00"><meta property="article:modified_time" content="2025-08-08T17:36:52+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/how-rvq-teaches-llms-to-see-and-hear/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Beyond Words: How RVQ Teaches LLMs to See and Hear","genre":"Blog","wordcount":"1150","url":"https:\/\/ericxliu.me\/posts\/how-rvq-teaches-llms-to-see-and-hear\/","datePublished":"2025-08-07T00:00:00\u002b00:00","dateModified":"2025-08-08T17:36:52\u002b00:00","description":"\u003cp\u003eLarge Language Models (LLMs) are masters of text, but the world is not made of text alone. Its a symphony of sights, sounds, and experiences. The ultimate goal for AI is to understand this rich, multi-modal world as we do. But how do you teach a model that thinks in words to understand a picture of a sunset or the melody of a song?\u003c\/p\u003e\n\u003cp\u003eThe answer lies in creating a universal language—a bridge between the continuous, messy world of pixels and audio waves and the discrete, structured world of language tokens. One of the most elegant and powerful tools for building this bridge is \u003cstrong\u003eResidual Vector Quantization (RVQ)\u003c\/strong\u003e.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/how-rvq-teaches-llms-to-see-and-hear/>Beyond Words: How RVQ Teaches LLMs to See and Hear</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-08-07T00:00:00Z>August 7, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
6-minute read</span></div></div></header><div class=post-content><p>Large Language Models (LLMs) are masters of text, but the world is not made of text alone. Its a symphony of sights, sounds, and experiences. The ultimate goal for AI is to understand this rich, multi-modal world as we do. But how do you teach a model that thinks in words to understand a picture of a sunset or the melody of a song?</p><p>The answer lies in creating a universal language—a bridge between the continuous, messy world of pixels and audio waves and the discrete, structured world of language tokens. One of the most elegant and powerful tools for building this bridge is <strong>Residual Vector Quantization (RVQ)</strong>.</p><p>This article dives deep into RVQ, exploring how it turns raw data into meaningful semantic IDs and how these IDs, in turn, unlock multi-modal understanding in LLMs.</p><h4 id=what-is-residual-vector-quantization-the-art-of-smart-compression><strong>What is Residual Vector Quantization? The Art of Smart Compression</strong>
<a class=heading-link href=#what-is-residual-vector-quantization-the-art-of-smart-compression><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>At its core, Vector Quantization (VQ) is a compression technique. It maps a high-dimensional vector (like an data embedding) to the single closest vector in a predefined dictionary, called a <strong>codebook</strong>. You then only need to store the index of that chosen vector. The problem? To represent complex data accurately, you&rsquo;d need a codebook with an astronomical number of entries, which is computationally impossible.</p><p>This is where <strong>Residual</strong> Vector Quantization shines. Instead of one giant codebook, RVQ uses a series of smaller codebooks in stages.</p><ol><li><strong>Stage 1 (Coarse Quantization):</strong> The input vector is quantized by the first codebook. This finds the broadest, most general category for the data.</li><li><strong>Calculate the Residual:</strong> The system calculates the error, or &ldquo;residual,&rdquo; between the original vector and its quantized version from Stage 1. This residual vector represents the information that was lost in the first coarse approximation.</li><li><strong>Stage 2 (Refinement):</strong> This residual vector is then quantized by the <em>second</em> codebook. This stage doesn&rsquo;t re-evaluate the whole vector, but only focuses on correcting the error from the previous stage.</li><li><strong>Iterate:</strong> This process repeats for several stages, with each subsequent codebook quantizing the residual error from the previous one, adding a finer and finer layer of detail.</li></ol><p>The final compressed representation is simply the sequence of indices from each codebook. For example, an ID like <code>[8, 5, 4, 1]</code> is produced. The magic of this approach is that it creates a <strong>hierarchical ID</strong>. The first digit <code>[8]</code> might represent &ldquo;Sports,&rdquo; the next <code>[5]</code> refines it to &ldquo;Court Sports,&rdquo; <code>[4]</code> to &ldquo;Beach Volleyball,&rdquo; and the final <code>[1]</code> distinguishes a specific match. Videos with similar content will naturally share a longer prefix in their Semantic ID.</p><h4 id=learning-what-matters-the-trainable-vq-autoencoder><strong>Learning What Matters: The Trainable VQ-Autoencoder</strong>
<a class=heading-link href=#learning-what-matters-the-trainable-vq-autoencoder><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>A key insight is that RVQ is not a fixed algorithm but a <strong>trainable neural network component</strong>. Its codebooks are not predefined; they are learned. This learning happens within a <strong>Vector-Quantized Autoencoder (VQ-AE)</strong> architecture.</p><ol><li><strong>Encoder:</strong> A powerful neural network (e.g., a Transformer or CNN) takes the raw data (like video frames and audio) and converts it into a continuous semantic embedding.</li><li><strong>RVQ Bottleneck:</strong> This embedding is fed into the RVQ module, which quantizes it into the sequence of discrete IDs.</li><li><strong>Decoder:</strong> The decoder takes these discrete IDs, looks up the corresponding codebook vectors, sums them up to get a reconstructed embedding, and attempts to rebuild the original video/audio.</li></ol><p>The entire system is trained end-to-end. The <strong>reconstruction loss</strong> (the difference between the original and reconstructed data) is used to update the parameters of the Encoder, the Decoder, and, most importantly, <strong>the codebook vectors within the RVQ module</strong>. Initially random, the codebook vectors are gradually pushed to become meaningful &ldquo;anchors&rdquo; for the core concepts present in the training data.</p><h4 id=from-implicit-to-explicit-controlling-semantics-with-contrastive-learning><strong>From Implicit to Explicit: Controlling Semantics with Contrastive Learning</strong>
<a class=heading-link href=#from-implicit-to-explicit-controlling-semantics-with-contrastive-learning><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>A standard VQ-AE learns implicit semantics. It gets good at reconstruction, but we can&rsquo;t control <em>what</em> concepts it learns. To make the Semantic IDs truly meaningful and aligned with human language, we introduce <strong>contrastive learning</strong>.</p><p>The architecture is enhanced with a parallel text encoder (like BERT or CLIP&rsquo;s). The model is then trained with a joint loss function:</p><p><code>L_total = L_reconstruction + λ * L_contrastive</code></p><ul><li><strong>Reconstruction Loss</strong> ensures the RVQ codes contain enough information to rebuild the input.</li><li><strong>Contrastive Loss</strong> forces the media embedding (from the video/audio encoder) to be mathematically &ldquo;close&rdquo; to the text embedding of its description, and &ldquo;far&rdquo; from the embeddings of unrelated text descriptions.</li></ul><p>This dual goal forces the model to organize its embedding space according to the semantics of human language. The codebook vectors now learn to represent concepts that are not just useful for reconstruction, but are also tied to explicit textual descriptions.</p><h4 id=integrating-with-llms-two-powerful-paths-to-multi-modality><strong>Integrating with LLMs: Two Powerful Paths to Multi-Modality</strong>
<a class=heading-link href=#integrating-with-llms-two-powerful-paths-to-multi-modality><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>Once we have a contrastively-trained VQ-AE, we can use its output to give LLMs the ability to see and hear. There are two primary strategies for this.</p><p><strong>Path 1: The Tokenizer Approach - Teaching the LLM a New Language</strong></p><p>This path treats the RVQ IDs as a new vocabulary. Its a two-stage process ideal for high-fidelity content generation.</p><ol><li><strong>Create a Neural Codec:</strong> The trained VQ-AE serves as a powerful &ldquo;codec.&rdquo; You can take any piece of media (e.g., a song) and use the codec to compress it into a sequence of discrete RVQ tokens (e.g., <code>[8, 5, 4, 1, 8, 5, 9, 2, ...]</code>).</li><li><strong>Train a Generative LLM:</strong> A new Transformer model is trained auto-regressively on a massive dataset of these media-derived tokens. Its sole purpose is to learn the patterns and predict the next token in a sequence.</li></ol><p><strong>Use Case:</strong> This is the architecture behind models like Meta&rsquo;s MusicGen. A user provides a text prompt, which conditions the Transformer to generate a new sequence of RVQ tokens. These tokens are then fed to the VQ-AE&rsquo;s decoder to synthesize the final audio waveform.</p><p><strong>Path 2: The Adapter Approach - Translating for a Language Expert</strong></p><p>This path is used to augment a powerful, pre-trained, text-only LLM without the astronomical cost of retraining it.</p><ol><li><strong>Freeze the LLM:</strong> A massive, pre-trained LLM (like LLaMA) is frozen. Its deep language understanding is preserved.</li><li><strong>Use the Pre-Quantized Embedding:</strong> Instead of using the discrete RVQ tokens, we take the rich, continuous embedding vector produced by our media encoder <em>just before</em> it enters the RVQ module.</li><li><strong>Train a Small Adapter:</strong> A small, lightweight projection layer (or &ldquo;adapter&rdquo;) is trained. Its only job is to translate the media embedding into a vector that has the same format and structure as the LLM&rsquo;s own word embeddings. It learns to map visual concepts to their corresponding &ldquo;word&rdquo; concepts in the LLM&rsquo;s latent space.</li></ol><p><strong>Use Case:</strong> This is the principle behind models like Google&rsquo;s Flamingo. To answer a question about an image, the image is passed through the media encoder and adapter. The resulting &ldquo;vision-as-a-word&rdquo; vector is inserted into the prompt sequence alongside the text tokens. The frozen LLM can now &ldquo;reason&rdquo; about the visual input because it has been translated into a format it already understands.</p></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

17
posts/index.html Normal file
View File

@@ -0,0 +1,17 @@
<!doctype html><html lang=en><head><title>Posts · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Posts"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/posts/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Posts"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/posts/index.xml title="Eric X. Liu's Personal Page"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=https://ericxliu.me/posts/>Posts</a></h1></header><ul><li><span class=date>January 21, 2026</span>
<a class=title href=/posts/vibe-coding-from-the-jeep/>Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams</a></li><li><span class=date>January 16, 2026</span>
<a class=title href=/posts/reverse-engineering-antigravity-ide/>How I Built a Blog Agent that Writes About Itself</a></li><li><span class=date>January 7, 2026</span>
<a class=title href=/posts/rooting-pixel-2-xl-for-reverse-engineering/>Why I Downgraded Magisk to Root My Pixel 2 XL</a></li><li><span class=date>January 2, 2026</span>
<a class=title href=/posts/debugging-authentik-performance/>Why Your "Resilient" Homelab is Slower Than a Raspberry Pi</a></li><li><span class=date>December 29, 2025</span>
<a class=title href=/posts/open-webui-openai-websearch/>How I Got Open WebUI Talking to OpenAI Web Search</a></li><li><span class=date>December 27, 2025</span>
<a class=title href=/posts/technical-deep-dive-llm-categorization/>From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM</a></li><li><span class=date>December 19, 2025</span>
<a class=title href=/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/>The Convergence of Fast Weights, Linear Attention, and State Space Models</a></li><li><span class=date>December 8, 2025</span>
<a class=title href=/posts/vattention/>vAttention</a></li><li><span class=date>November 15, 2025</span>
<a class=title href=/posts/jellyfin-sso-with-authentik/>Setting Up Jellyfin SSO with Authentik: Surviving the Beta</a></li><li><span class=date>October 4, 2025</span>
<a class=title href=/posts/benchmarking-llms-on-jetson-orin-nano/>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)</a></li></ul><ul class=pagination><li>1</li><li><a href=/posts/page/2/>2</a></li><li><a href=/posts/page/3/>3</a></li><li class=hidden><a href=/posts/page/2/>&#8250;</a></li><li><a href=/posts/page/3/>&#187;</a></li></ul></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

92
posts/index.xml Normal file
View File

@@ -0,0 +1,92 @@
<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Posts on Eric X. Liu's Personal Page</title><link>https://ericxliu.me/posts/</link><description>Recent content in Posts on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><lastBuildDate>Thu, 22 Jan 2026 06:48:07 +0000</lastBuildDate><atom:link href="https://ericxliu.me/posts/index.xml" rel="self" type="application/rss+xml"/><item><title>Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams</title><link>https://ericxliu.me/posts/vibe-coding-from-the-jeep/</link><pubDate>Wed, 21 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/vibe-coding-from-the-jeep/</guid><description>&lt;p&gt;&amp;ldquo;Vibe coding&amp;rdquo; has become my latest obsession. It&amp;rsquo;s that flow state where the tools disappear, and you&amp;rsquo;re just manipulating logic at the speed of thought. Usually, this happens in a high-end IDE like Antigravity. But lately, I&amp;rsquo;ve been trying to answer a childhood dream.&lt;/p&gt;
&lt;p&gt;Growing up in China before the internet age, my window to the outside world was CCTV-6. Along with &lt;em&gt;Baywatch&lt;/em&gt;, one of the first American TV shows I ever watched was &lt;em&gt;Knight Rider&lt;/em&gt;. I don&amp;rsquo;t remember the exact plot lines, but the core concept stuck with me forever: KITT. A car that could talk, think, and do things for you.&lt;/p&gt;</description></item><item><title>How I Built a Blog Agent that Writes About Itself</title><link>https://ericxliu.me/posts/reverse-engineering-antigravity-ide/</link><pubDate>Fri, 16 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/reverse-engineering-antigravity-ide/</guid><description>&lt;p&gt;I&amp;rsquo;ve been spending a lot of time &amp;ldquo;vibe coding&amp;rdquo; in the Antigravity IDE lately. It&amp;rsquo;s an incredible flow state—intense, iterative, and fast. But it has a major flaw: the context is ephemeral. Once the session is over, that rich history of decisions, wrong turns, and &amp;ldquo;aha!&amp;rdquo; moments is locked away in an opaque, internal format.&lt;/p&gt;
&lt;p&gt;I wanted to capture that value. I wanted a system that could take my chaotic coding sessions and distill them into structured, technical blog posts (like the one you&amp;rsquo;re reading right now).&lt;/p&gt;</description></item><item><title>Why I Downgraded Magisk to Root My Pixel 2 XL</title><link>https://ericxliu.me/posts/rooting-pixel-2-xl-for-reverse-engineering/</link><pubDate>Wed, 07 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/rooting-pixel-2-xl-for-reverse-engineering/</guid><description>&lt;p&gt;For the past few weeks, I&amp;rsquo;ve been stuck in a stalemate with my EcoFlow Bluetooth Protocol Reverse Engineering Project. I have the hci snoop logs, I have the decompiled APK, and I have a strong suspicion about where the authentication logic is hiding. But suspicion isn&amp;rsquo;t proof.&lt;/p&gt;
&lt;p&gt;Static analysis has its limits. I found the &amp;ldquo;smoking gun&amp;rdquo; function—a native method responsible for encrypting the login payload—but understanding &lt;em&gt;how&lt;/em&gt; it constructs that payload within a strict 13-byte limit purely from assembly (ARM64) was proving to be a headache.&lt;/p&gt;</description></item><item><title>Why Your "Resilient" Homelab is Slower Than a Raspberry Pi</title><link>https://ericxliu.me/posts/debugging-authentik-performance/</link><pubDate>Fri, 02 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/debugging-authentik-performance/</guid><description>&lt;p&gt;In the world of self-hosting, there are many metrics for success: 99.9% uptime, sub-second latency, or a perfect GitOps pipeline. But for those of us running &amp;ldquo;production&amp;rdquo; at home, there is only one metric that truly matters: &lt;strong&gt;The Wife Acceptance Factor (WAF)&lt;/strong&gt;.&lt;/p&gt;
&lt;p&gt;My detailed Grafana dashboards said everything was fine. But my wife said the SSO login was &amp;ldquo;slow sometimes.&amp;rdquo; She was right. Debugging it took me down a rabbit hole of connection pooling, misplaced assumptions, and the harsh reality of running databases on distributed storage.&lt;/p&gt;</description></item><item><title>How I Got Open WebUI Talking to OpenAI Web Search</title><link>https://ericxliu.me/posts/open-webui-openai-websearch/</link><pubDate>Mon, 29 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/open-webui-openai-websearch/</guid><description>&lt;p&gt;OpenAI promised native web search in GPT5, but LiteLLM proxy deployments (and by extension Open WebUI) still choke on it—issue &lt;a href="https://github.com/BerriAI/litellm/issues/13042" class="external-link" target="_blank" rel="noopener"&gt;#13042&lt;/a&gt; tracks the fallout. I needed grounded answers inside Open WebUI anyway, so I built a workaround: route GPT5 traffic through the Responses API and mask every &lt;code&gt;web_search_call&lt;/code&gt; before the UI ever sees it.&lt;/p&gt;
&lt;p&gt;This post documents the final setup, the hotfix script that keeps LiteLLM honest, and the tests that prove Open WebUI now streams cited answers without trying to execute the tool itself.&lt;/p&gt;</description></item><item><title>From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM</title><link>https://ericxliu.me/posts/technical-deep-dive-llm-categorization/</link><pubDate>Sat, 27 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/technical-deep-dive-llm-categorization/</guid><description>&lt;p&gt;Running a family finance system is surprisingly complex. What starts as a simple spreadsheet often evolves into a web of rules, exceptions, and &amp;ldquo;wait, was this dinner or &lt;em&gt;vacation&lt;/em&gt; dinner?&amp;rdquo; questions.&lt;/p&gt;
&lt;p&gt;For years, I relied on a rule-based system to categorize our credit card transactions. It worked&amp;hellip; mostly. But maintaining &lt;code&gt;if &amp;quot;UBER&amp;quot; in description and amount &amp;gt; 50&lt;/code&gt; style rules is a never-ending battle against the entropy of merchant names and changing habits.&lt;/p&gt;</description></item><item><title>The Convergence of Fast Weights, Linear Attention, and State Space Models</title><link>https://ericxliu.me/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/</link><pubDate>Fri, 19 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/</guid><description>&lt;p&gt;Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformers attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (&amp;ldquo;Fast Weights&amp;rdquo;) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).&lt;/p&gt;
&lt;p&gt;This article explores the mathematical equivalence between Hintons concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV.&lt;/p&gt;</description></item><item><title>vAttention</title><link>https://ericxliu.me/posts/vattention/</link><pubDate>Mon, 08 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/vattention/</guid><description>&lt;p&gt;Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While &lt;strong&gt;PagedAttention&lt;/strong&gt; (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPUs native hardware Memory Management Unit (MMU) offers a more performant and portable solution.&lt;/p&gt;
&lt;h4 id="the-status-quo-pagedattention-and-software-tables"&gt;
The Status Quo: PagedAttention and Software Tables
&lt;a class="heading-link" href="#the-status-quo-pagedattention-and-software-tables"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h4&gt;
&lt;p&gt;Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined &amp;ldquo;page table&amp;rdquo; (the Block Table) [1].&lt;/p&gt;</description></item><item><title>Setting Up Jellyfin SSO with Authentik: Surviving the Beta</title><link>https://ericxliu.me/posts/jellyfin-sso-with-authentik/</link><pubDate>Sat, 15 Nov 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/jellyfin-sso-with-authentik/</guid><description>&lt;p&gt;I recently integrated Jellyfin with Authentik for Single Sign-On (SSO). While the plugin works, it is still very much in an early development phase. The logging is often sparse or cryptic, and the feedback loop can be frustrating. Here is a guide focused on the obscure errors you might encounter and the simple fixes that aren&amp;rsquo;t immediately obvious.&lt;/p&gt;
&lt;h2 id="the-setup"&gt;
The Setup
&lt;a class="heading-link" href="#the-setup"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h2&gt;
&lt;p&gt;The configuration is best handled via API (curl) rather than the UI, as it ensures all fields are correctly typed and persistent.&lt;/p&gt;</description></item><item><title>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)</title><link>https://ericxliu.me/posts/benchmarking-llms-on-jetson-orin-nano/</link><pubDate>Sat, 04 Oct 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/benchmarking-llms-on-jetson-orin-nano/</guid><description>&lt;h2 id="introduction"&gt;
Introduction
&lt;a class="heading-link" href="#introduction"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h2&gt;
&lt;p&gt;NVIDIA&amp;rsquo;s Jetson Orin Nano promises impressive specs: 1024 CUDA cores, 32 Tensor Cores, and 40 TOPS of INT8 compute performance packed into a compact, power-efficient edge device. On paper, it looks like a capable platform for running Large Language Models locally. But there&amp;rsquo;s a catch—one that reveals a fundamental tension in modern edge AI hardware design.&lt;/p&gt;
&lt;p&gt;After running 66 inference tests across seven different language models ranging from 0.5B to 5.4B parameters, I discovered something counterintuitive: the device&amp;rsquo;s computational muscle sits largely idle during single-stream LLM inference. The bottleneck isn&amp;rsquo;t computation—it&amp;rsquo;s memory bandwidth. This isn&amp;rsquo;t just a quirk of one device; it&amp;rsquo;s a fundamental characteristic of single-user, autoregressive token generation on edge hardware—a reality that shapes how we should approach local LLM deployment.&lt;/p&gt;</description></item><item><title>Flashing Jetson Orin Nano in Virtualized Environments</title><link>https://ericxliu.me/posts/flashing-jetson-orin-nano-in-virtualized-environments/</link><pubDate>Thu, 02 Oct 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/flashing-jetson-orin-nano-in-virtualized-environments/</guid><description>&lt;h1 id="flashing-jetson-orin-nano-in-virtualized-environments"&gt;
Flashing Jetson Orin Nano in Virtualized Environments
&lt;a class="heading-link" href="#flashing-jetson-orin-nano-in-virtualized-environments"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h1&gt;
&lt;h2 id="introduction"&gt;
Introduction
&lt;a class="heading-link" href="#introduction"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h2&gt;
&lt;p&gt;Flashing NVIDIA Jetson devices remotely presents unique challenges when the host machine is virtualized. This article documents the technical challenges, failures, and eventual success of flashing a Jetson Orin Nano Super developer kit using NVIDIA SDK Manager in various virtualized environments, specifically focusing on QEMU/KVM virtual machines and LXC containers on Proxmox VE.&lt;/p&gt;</description></item><item><title>OpenWrt: Fix WireGuard Connectivity with MWAN3 by Excluding the VPN Endpoint</title><link>https://ericxliu.me/posts/openwrt-mwan3-wireguard-endpoint-exclusion/</link><pubDate>Sun, 28 Sep 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/openwrt-mwan3-wireguard-endpoint-exclusion/</guid><description>&lt;h3 id="overview"&gt;
Overview
&lt;a class="heading-link" href="#overview"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h3&gt;
&lt;p&gt;When using WireGuard together with MWAN3 on OpenWrt, the tunnel can fail to establish or flap when the peer&amp;rsquo;s IP is routed into the tunnel itself. This is a classic routing bootstrap problem: WireGuard wants to route 0.0.0.0/0 into the tunnel, but the UDP packets to the peer&amp;rsquo;s public endpoint also get captured, so they never reach the Internet to bring the tunnel up.&lt;/p&gt;</description></item><item><title>UniFi VLAN Migration to Zone-Based Architecture</title><link>https://ericxliu.me/posts/unifi-vlan-migration-to-zone-based-architecture/</link><pubDate>Mon, 22 Sep 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/unifi-vlan-migration-to-zone-based-architecture/</guid><description>&lt;p&gt;Embarking on a network migration to a properly segmented VLAN architecture is a rite of passage for any serious home lab or small business operator. The goal is clear: improve security and organization by separating traffic. However, the path from a flat network to a segmented one is often paved with subtle but critical configuration details that can lead to hours of frustrating troubleshooting.&lt;/p&gt;
&lt;p&gt;This article documents that journey. It details the pitfalls encountered, the core networking concepts that were essential to understand, and the best practices that ultimately led to a stable, secure, and logical network design built on a zone-based firewall model.&lt;/p&gt;</description></item><item><title>Quantization in LLMs</title><link>https://ericxliu.me/posts/quantization-in-llms/</link><pubDate>Tue, 19 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/quantization-in-llms/</guid><description>&lt;p&gt;The burgeoning scale of Large Language Models (LLMs) has necessitated a paradigm shift in their deployment, moving beyond full-precision floating-point arithmetic towards lower-precision representations. Quantization, the process of mapping a wide range of continuous values to a smaller, discrete set, has emerged as a critical technique to reduce model size, accelerate inference, and lower energy consumption. This article provides a technical overview of quantization theories, their application in modern LLMs, and highlights the ongoing innovations in this domain.&lt;/p&gt;</description></item><item><title>Breville Barista Pro Maintenance</title><link>https://ericxliu.me/posts/breville-barista-pro-maintenance/</link><pubDate>Sat, 16 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/breville-barista-pro-maintenance/</guid><description>&lt;p&gt;Proper maintenance is critical for the longevity and performance of a Breville Barista Pro espresso machine. Consistent cleaning not only ensures the machine functions correctly but also directly impacts the quality of the espresso produced. This guide provides a detailed, technical breakdown of the essential maintenance routines, from automated cycles to daily upkeep.&lt;/p&gt;
&lt;h4 id="understanding-the-two-primary-maintenance-cycles"&gt;
&lt;strong&gt;Understanding the Two Primary Maintenance Cycles&lt;/strong&gt;
&lt;a class="heading-link" href="#understanding-the-two-primary-maintenance-cycles"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h4&gt;
&lt;p&gt;The Breville Barista Pro has two distinct, automated maintenance procedures: the &lt;strong&gt;Cleaning (Flush) Cycle&lt;/strong&gt; and the &lt;strong&gt;Descale Cycle&lt;/strong&gt;. It is important to understand that these are not interchangeable, as they address different types of buildup within the machine.&lt;/p&gt;</description></item><item><title>Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian</title><link>https://ericxliu.me/posts/secure-boot-dkms-and-mok-on-proxmox-debian/</link><pubDate>Sat, 09 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/secure-boot-dkms-and-mok-on-proxmox-debian/</guid><description>&lt;p&gt;I hit an issue where all GPU Operator pods on one node were stuck in Init after migrating from Legacy BIOS to UEFI. The common error was NVIDIA components waiting for “toolkit-ready,” while the toolkit init container looped with:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;nvidia-smi failed to communicate with the NVIDIA driver&lt;/li&gt;
&lt;li&gt;modprobe nvidia → “Key was rejected by service”&lt;/li&gt;
&lt;/ul&gt;
&lt;p&gt;That message is the tell: Secure Boot is enabled and the kernel refuses to load modules not signed by a trusted key.&lt;/p&gt;</description></item><item><title>Beyond Words: How RVQ Teaches LLMs to See and Hear</title><link>https://ericxliu.me/posts/how-rvq-teaches-llms-to-see-and-hear/</link><pubDate>Thu, 07 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/how-rvq-teaches-llms-to-see-and-hear/</guid><description>&lt;p&gt;Large Language Models (LLMs) are masters of text, but the world is not made of text alone. Its a symphony of sights, sounds, and experiences. The ultimate goal for AI is to understand this rich, multi-modal world as we do. But how do you teach a model that thinks in words to understand a picture of a sunset or the melody of a song?&lt;/p&gt;
&lt;p&gt;The answer lies in creating a universal language—a bridge between the continuous, messy world of pixels and audio waves and the discrete, structured world of language tokens. One of the most elegant and powerful tools for building this bridge is &lt;strong&gt;Residual Vector Quantization (RVQ)&lt;/strong&gt;.&lt;/p&gt;</description></item><item><title>Supabase Deep Dive: It's Not Magic, It's Just Postgres</title><link>https://ericxliu.me/posts/supabase-deep-dive/</link><pubDate>Sun, 03 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/supabase-deep-dive/</guid><description>&lt;p&gt;In the world of Backend-as-a-Service (BaaS), platforms are often treated as magic boxes. You push data in, you get data out, and you hope the magic inside scales. While this simplicity is powerful, it can obscure the underlying mechanics, leaving developers wondering what&amp;rsquo;s really going on.&lt;/p&gt;
&lt;p&gt;Supabase enters this space with a radically different philosophy: &lt;strong&gt;transparency&lt;/strong&gt;. It provides the convenience of a BaaS, but its built on the world&amp;rsquo;s most trusted relational database: PostgreSQL. The &amp;ldquo;magic&amp;rdquo; isn&amp;rsquo;t a proprietary black box; it&amp;rsquo;s a carefully assembled suite of open-source tools that enhance Postgres, not hide it.&lt;/p&gt;</description></item><item><title>A Deep Dive into PPO for Language Models</title><link>https://ericxliu.me/posts/ppo-for-language-models/</link><pubDate>Sat, 02 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/ppo-for-language-models/</guid><description>&lt;p&gt;Large Language Models (LLMs) have demonstrated astonishing capabilities, but out-of-the-box, they are simply powerful text predictors. They don&amp;rsquo;t inherently understand what makes a response helpful, harmless, or aligned with human values. The technique that has proven most effective at bridging this gap is Reinforcement Learning from Human Feedback (RLHF), and at its heart lies a powerful algorithm: Proximal Policy Optimization (PPO).&lt;/p&gt;
&lt;p&gt;You may have seen diagrams like the one below, which outlines the RLHF training process. It can look intimidating, with a web of interconnected models, losses, and data flows.
&lt;img src="https://ericxliu.me/images/ppo-for-language-models/7713bd3ecf27442e939b9190fa08165d.png" alt="S3 File"&gt;&lt;/p&gt;</description></item><item><title>Mixture-of-Experts (MoE) Models Challenges &amp; Solutions in Practice</title><link>https://ericxliu.me/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/</link><pubDate>Wed, 02 Jul 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/</guid><description>&lt;p&gt;Mixture-of-Experts (MoEs) are neural network architectures that allow different parts of the model (called &amp;ldquo;experts&amp;rdquo;) to specialize in different types of inputs. A &amp;ldquo;gating network&amp;rdquo; or &amp;ldquo;router&amp;rdquo; learns to dispatch each input (or &amp;ldquo;token&amp;rdquo;) to a subset of these experts. While powerful for scaling models, MoEs introduce several practical challenges.&lt;/p&gt;
&lt;h3 id="1-challenge-non-differentiability-of-routing-functions"&gt;
1. Challenge: Non-Differentiability of Routing Functions
&lt;a class="heading-link" href="#1-challenge-non-differentiability-of-routing-functions"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h3&gt;
&lt;p&gt;&lt;strong&gt;The Problem:&lt;/strong&gt;
Many routing mechanisms, especially &amp;ldquo;Top-K routing,&amp;rdquo; involve a discrete, hard selection process. A common function is &lt;code&gt;KeepTopK(v, k)&lt;/code&gt;, which selects the top &lt;code&gt;k&lt;/code&gt; scoring elements from a vector &lt;code&gt;v&lt;/code&gt; and sets others to $-\infty$ or $0$.&lt;/p&gt;</description></item><item><title>An Architectural Deep Dive of T5</title><link>https://ericxliu.me/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/</link><pubDate>Sun, 01 Jun 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/</guid><description>&lt;p&gt;In the rapidly evolving landscape of Large Language Models, a few key architectures define the dominant paradigms. Today, the &amp;ldquo;decoder-only&amp;rdquo; model, popularized by the GPT series and its successors like LLaMA and Mistral, reigns supreme. These models are scaled to incredible sizes and excel at in-context learning.&lt;/p&gt;
&lt;p&gt;But to truly understand the field, we must look at the pivotal models that explored different paths. Google&amp;rsquo;s T5, or &lt;strong&gt;Text-to-Text Transfer Transformer&lt;/strong&gt;, stands out as one of the most influential. It didn&amp;rsquo;t just introduce a new model; it proposed a new philosophy. This article dives deep into the architecture of T5, how it fundamentally differs from modern LLMs, and the lasting legacy of its unique design choices.&lt;/p&gt;</description></item><item><title>Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso</title><link>https://ericxliu.me/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/</link><pubDate>Thu, 01 May 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/</guid><description>&lt;p&gt;Are you ready to transform your home espresso game from good to genuinely great? The Breville Barista Pro is a fantastic machine, but unlocking its full potential requires understanding a few key principles. This guide will walk you through the systematic process of dialing in your espresso, ensuring every shot is delicious and repeatable.&lt;/p&gt;
&lt;p&gt;Our overarching philosophy is simple: &lt;strong&gt;isolate and change only one variable at a time.&lt;/strong&gt; While numbers are crucial, your palate is the ultimate judge. Dose, ratio, and time are interconnected, but your &lt;strong&gt;grind size&lt;/strong&gt; is your most powerful lever.&lt;/p&gt;</description></item><item><title>Transformer's Core Mechanics</title><link>https://ericxliu.me/posts/transformer-s-core-mechanics/</link><pubDate>Tue, 01 Apr 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/transformer-s-core-mechanics/</guid><description>&lt;p&gt;The Transformer architecture is the bedrock of modern Large Language Models (LLMs). While its high-level success is widely known, a deeper understanding requires dissecting its core components. This article provides a detailed, technical breakdown of the fundamental concepts within a Transformer block, from the notion of &amp;ldquo;channels&amp;rdquo; to the intricate workings of the attention mechanism and its relationship with other advanced architectures like Mixture of Experts.&lt;/p&gt;
&lt;h3 id="1-the-channel-a-foundational-view-of-d_model"&gt;
1. The &amp;ldquo;Channel&amp;rdquo;: A Foundational View of &lt;code&gt;d_model&lt;/code&gt;
&lt;a class="heading-link" href="#1-the-channel-a-foundational-view-of-d_model"&gt;
&lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
&lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
&lt;/a&gt;
&lt;/h3&gt;
&lt;p&gt;In deep learning, a &amp;ldquo;channel&amp;rdquo; can be thought of as a feature dimension. While this term is common in Convolutional Neural Networks for images (e.g., Red, Green, Blue channels), in LLMs, the analogous concept is the model&amp;rsquo;s primary embedding dimension, commonly referred to as &lt;code&gt;d_model&lt;/code&gt;.&lt;/p&gt;</description></item><item><title>Some useful files</title><link>https://ericxliu.me/posts/useful/</link><pubDate>Mon, 26 Oct 2020 04:14:43 +0000</pubDate><guid>https://ericxliu.me/posts/useful/</guid><description>&lt;ul&gt;
&lt;li&gt;&lt;a href="https://ericxliu.me/rootCA.crt" &gt;rootCA.pem&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;</description></item></channel></rss>

View File

@@ -0,0 +1,74 @@
<!doctype html><html lang=en><head><title>Setting Up Jellyfin SSO with Authentik: Surviving the Beta · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="I recently integrated Jellyfin with Authentik for Single Sign-On (SSO). While the plugin works, it is still very much in an early development phase. The logging is often sparse or cryptic, and the feedback loop can be frustrating. Here is a guide focused on the obscure errors you might encounter and the simple fixes that aren&rsquo;t immediately obvious.
The Setup
Link to heading
The configuration is best handled via API (curl) rather than the UI, as it ensures all fields are correctly typed and persistent."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Setting Up Jellyfin SSO with Authentik: Surviving the Beta"><meta name=twitter:description content="I recently integrated Jellyfin with Authentik for Single Sign-On (SSO). While the plugin works, it is still very much in an early development phase. The logging is often sparse or cryptic, and the feedback loop can be frustrating. Here is a guide focused on the obscure errors you might encounter and the simple fixes that arent immediately obvious.
The Setup Link to heading The configuration is best handled via API (curl) rather than the UI, as it ensures all fields are correctly typed and persistent."><meta property="og:url" content="https://ericxliu.me/posts/jellyfin-sso-with-authentik/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Setting Up Jellyfin SSO with Authentik: Surviving the Beta"><meta property="og:description" content="I recently integrated Jellyfin with Authentik for Single Sign-On (SSO). While the plugin works, it is still very much in an early development phase. The logging is often sparse or cryptic, and the feedback loop can be frustrating. Here is a guide focused on the obscure errors you might encounter and the simple fixes that arent immediately obvious.
The Setup Link to heading The configuration is best handled via API (curl) rather than the UI, as it ensures all fields are correctly typed and persistent."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-11-15T00:00:00+00:00"><meta property="article:modified_time" content="2025-12-28T21:21:42+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/jellyfin-sso-with-authentik/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Setting Up Jellyfin SSO with Authentik: Surviving the Beta","genre":"Blog","wordcount":"516","url":"https:\/\/ericxliu.me\/posts\/jellyfin-sso-with-authentik\/","datePublished":"2025-11-15T00:00:00\u002b00:00","dateModified":"2025-12-28T21:21:42\u002b00:00","description":"\u003cp\u003eI recently integrated Jellyfin with Authentik for Single Sign-On (SSO). While the plugin works, it is still very much in an early development phase. The logging is often sparse or cryptic, and the feedback loop can be frustrating. Here is a guide focused on the obscure errors you might encounter and the simple fixes that aren\u0026rsquo;t immediately obvious.\u003c\/p\u003e\n\u003ch2 id=\u0022the-setup\u0022\u003e\n The Setup\n \u003ca class=\u0022heading-link\u0022 href=\u0022#the-setup\u0022\u003e\n \u003ci class=\u0022fa-solid fa-link\u0022 aria-hidden=\u0022true\u0022 title=\u0022Link to heading\u0022\u003e\u003c\/i\u003e\n \u003cspan class=\u0022sr-only\u0022\u003eLink to heading\u003c\/span\u003e\n \u003c\/a\u003e\n\u003c\/h2\u003e\n\u003cp\u003eThe configuration is best handled via API (curl) rather than the UI, as it ensures all fields are correctly typed and persistent.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/jellyfin-sso-with-authentik/>Setting Up Jellyfin SSO with Authentik: Surviving the Beta</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-11-15T00:00:00Z>November 15, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
3-minute read</span></div></div></header><div class=post-content><p>I recently integrated Jellyfin with Authentik for Single Sign-On (SSO). While the plugin works, it is still very much in an early development phase. The logging is often sparse or cryptic, and the feedback loop can be frustrating. Here is a guide focused on the obscure errors you might encounter and the simple fixes that aren&rsquo;t immediately obvious.</p><h2 id=the-setup>The Setup
<a class=heading-link href=#the-setup><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>The configuration is best handled via API (curl) rather than the UI, as it ensures all fields are correctly typed and persistent.</p><h3 id=1-authentik-terraform>1. Authentik (Terraform)
<a class=heading-link href=#1-authentik-terraform><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Let Authentik manage the secrets. Don&rsquo;t hardcode them.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-hcl data-lang=hcl><span style=display:flex><span><span style=color:#ff7b72>resource</span> <span style=color:#a5d6ff>&#34;authentik_provider_oauth2&#34; &#34;jellyfin&#34;</span> {
</span></span><span style=display:flex><span> name <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#a5d6ff>&#34;Jellyfin&#34;</span>
</span></span><span style=display:flex><span> client_id <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#a5d6ff>&#34;jellyfin-ericxliu-me&#34;</span><span style=color:#8b949e;font-style:italic>
</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic> # client_secret omitted -&gt; auto-generated
</span></span></span><span style=display:flex><span> property_mappings <span style=color:#ff7b72;font-weight:700>=</span> [
</span></span><span style=display:flex><span> <span style=color:#ff7b72>authentik_scope_mapping</span>.<span style=color:#ff7b72>openid</span>.<span style=color:#ff7b72>id</span>,
</span></span><span style=display:flex><span> <span style=color:#ff7b72>authentik_scope_mapping</span>.<span style=color:#ff7b72>profile</span>.<span style=color:#ff7b72>id</span>,
</span></span><span style=display:flex><span> <span style=color:#ff7b72>authentik_scope_mapping</span>.<span style=color:#ff7b72>email</span>.<span style=color:#ff7b72>id</span>,
</span></span><span style=display:flex><span> <span style=color:#ff7b72>authentik_scope_mapping</span>.<span style=color:#ff7b72>groups</span>.<span style=color:#ff7b72>id</span>
</span></span><span style=display:flex><span> ]<span style=color:#8b949e;font-style:italic>
</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic> # ...
</span></span></span><span style=display:flex><span>}
</span></span></code></pre></div><h3 id=2-jellyfin-plugin-bashcurl>2. Jellyfin Plugin (Bash/Curl)
<a class=heading-link href=#2-jellyfin-plugin-bashcurl><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span><span style=color:#8b949e;font-style:italic># ... (retrieve secret from terraform) ...</span>
</span></span><span style=display:flex><span>curl -X POST <span style=color:#a5d6ff>&#34;https://jellyfin.ericxliu.me/SSO/OID/Add/authentik&#34;</span> ... -d <span style=color:#a5d6ff>&#39;{
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> &#34;OidClientId&#34;: &#34;jellyfin-ericxliu-me&#34;,
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> &#34;OidSecret&#34;: &#34;&#39;</span><span style=color:#a5d6ff>&#34;</span><span style=color:#a5d6ff>${</span><span style=color:#79c0ff>SECRET</span><span style=color:#a5d6ff>}</span><span style=color:#a5d6ff>&#34;</span><span style=color:#a5d6ff>&#39;&#34;,
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> &#34;OidScopes&#34;: [&#34;openid&#34;, &#34;profile&#34;, &#34;email&#34;, &#34;groups&#34;],
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> &#34;SchemeOverride&#34;: &#34;https&#34;,
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> &#34;RoleClaim&#34;: &#34;groups&#34;
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> }&#39;</span>
</span></span></code></pre></div><h2 id=obscure-errors--fixes>Obscure Errors & Fixes
<a class=heading-link href=#obscure-errors--fixes><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>Because the plugin is still maturing, it doesn&rsquo;t always handle configuration errors gracefully. Here are the two main &ldquo;cryptic&rdquo; failures I encountered.</p><h3 id=1-the-value-cannot-be-null-crash>1. The &ldquo;Value cannot be null&rdquo; Crash
<a class=heading-link href=#1-the-value-cannot-be-null-crash><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Symptom</strong>:
You attempt to start the SSO flow and get a generic 500 error. The Jellyfin logs show a C# exception:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-fallback data-lang=fallback><span style=display:flex><span>System.ArgumentNullException: Value cannot be null. (Parameter &#39;source&#39;)
</span></span><span style=display:flex><span> at System.Linq.Enumerable.Prepend[TSource](IEnumerable`1 source, TSource element)
</span></span><span style=display:flex><span> at Jellyfin.Plugin.SSO.Api.SSOController.OidChallenge(...)
</span></span></code></pre></div><p><strong>The Reality</strong>:
This looks like deep internal failure, but it&rsquo;s actually a simple configuration miss. The plugin code attempts to prepend &ldquo;openid profile&rdquo; to your configured scopes without checking if your scopes array exists first.
<strong>The Fix</strong>:
You <strong>must</strong> explicitly provide <code>"OidScopes"</code> in your JSON configuration. It cannot be null or omitted.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-json data-lang=json><span style=display:flex><span><span style=color:#a5d6ff>&#34;OidScopes&#34;</span><span style=color:#f85149>:</span> [<span style=color:#a5d6ff>&#34;openid&#34;</span>, <span style=color:#a5d6ff>&#34;profile&#34;</span>, <span style=color:#a5d6ff>&#34;email&#34;</span>, <span style=color:#a5d6ff>&#34;groups&#34;</span>]
</span></span></code></pre></div><h3 id=2-the-httphttps-mismatch-redirect-loop>2. The HTTP/HTTPS Mismatch (Redirect Loop)
<a class=heading-link href=#2-the-httphttps-mismatch-redirect-loop><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Symptom</strong>:
Authentik rejects the authorization request with &ldquo;Redirect URI mismatch&rdquo;, or the browser enters a redirect loop.
<strong>The Reality</strong>:
Jellyfin often sits behind a reverse proxy (Ingress/Traefik) terminating TLS. Use <code>Browser Developer Tools</code> to inspect the network requests. You will likely see the <code>redirect_uri</code> parameter encoded as <code>http://jellyfin...</code> instead of <code>https://</code>. configuration.
<strong>The Fix</strong>:
Do not rely on header forwarding magic. Force the scheme in the plugin configuration:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-json data-lang=json><span style=display:flex><span><span style=color:#a5d6ff>&#34;SchemeOverride&#34;</span><span style=color:#f85149>:</span> <span style=color:#a5d6ff>&#34;https&#34;</span>
</span></span></code></pre></div><h3 id=3-case-sensitivity-in-json>3. Case Sensitivity in JSON
<a class=heading-link href=#3-case-sensitivity-in-json><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Symptom</strong>: Configuration seems to be ignored or fields remain empty after a POST.
<strong>The Reality</strong>: The plugin&rsquo;s API controller keys are Case Sensitive in some versions/contexts.
<strong>The Fix</strong>: Stick to PascalCase for the keys (<code>OidEndpoint</code>, <code>AdminRoles</code>) as seen in the C# DTOs, rather than camelCase (<code>oidEndpoint</code>), unless the specific version documentation explicitly states otherwise. When in doubt, checking the source code (<code>SSOController.cs</code>) is often faster than trusting the README.</p><h2 id=summary>Summary
<a class=heading-link href=#summary><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>When debugging Jellyfin SSO, don&rsquo;t trust the UI to tell you what&rsquo;s wrong.</p><ol><li><strong>Check the logs</strong> (<code>kubectl logs</code>) for C# stack traces.</li><li><strong>Sanitize your JSON</strong> inputs (arrays can&rsquo;t be null).</li><li><strong>Inspect the URL parameters</strong> in your browser to see what Redirect URI is actually being generated.</li></ol><h3 id=references>References
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ul><li>Jellyfin SSO Plugin Repository: <code>https://github.com/9p4/jellyfin-plugin-sso</code></li><li>Authentik Documentation: <code>https://goauthentik.io/docs/providers/oauth2/</code></li><li>Jellyfin API Documentation: <code>https://api.jellyfin.org/</code></li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,47 @@
<!doctype html><html lang=en><head><title>Mixture-of-Experts (MoE) Models Challenges & Solutions in Practice · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Mixture-of-Experts (MoEs) are neural network architectures that allow different parts of the model (called &ldquo;experts&rdquo;) to specialize in different types of inputs. A &ldquo;gating network&rdquo; or &ldquo;router&rdquo; learns to dispatch each input (or &ldquo;token&rdquo;) to a subset of these experts. While powerful for scaling models, MoEs introduce several practical challenges.
1. Challenge: Non-Differentiability of Routing Functions
Link to heading
The Problem:
Many routing mechanisms, especially &ldquo;Top-K routing,&rdquo; involve a discrete, hard selection process. A common function is KeepTopK(v, k), which selects the top k scoring elements from a vector v and sets others to $-\infty$ or $0$."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Mixture-of-Experts (MoE) Models Challenges & Solutions in Practice"><meta name=twitter:description content="Mixture-of-Experts (MoEs) are neural network architectures that allow different parts of the model (called “experts”) to specialize in different types of inputs. A “gating network” or “router” learns to dispatch each input (or “token”) to a subset of these experts. While powerful for scaling models, MoEs introduce several practical challenges.
1. Challenge: Non-Differentiability of Routing Functions Link to heading The Problem: Many routing mechanisms, especially “Top-K routing,” involve a discrete, hard selection process. A common function is KeepTopK(v, k), which selects the top k scoring elements from a vector v and sets others to $-\infty$ or $0$."><meta property="og:url" content="https://ericxliu.me/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Mixture-of-Experts (MoE) Models Challenges & Solutions in Practice"><meta property="og:description" content="Mixture-of-Experts (MoEs) are neural network architectures that allow different parts of the model (called “experts”) to specialize in different types of inputs. A “gating network” or “router” learns to dispatch each input (or “token”) to a subset of these experts. While powerful for scaling models, MoEs introduce several practical challenges.
1. Challenge: Non-Differentiability of Routing Functions Link to heading The Problem: Many routing mechanisms, especially “Top-K routing,” involve a discrete, hard selection process. A common function is KeepTopK(v, k), which selects the top k scoring elements from a vector v and sets others to $-\infty$ or $0$."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-07-02T00:00:00+00:00"><meta property="article:modified_time" content="2025-08-03T06:02:48+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Mixture-of-Experts (MoE) Models Challenges \u0026 Solutions in Practice","genre":"Blog","wordcount":"1381","url":"https:\/\/ericxliu.me\/posts\/mixture-of-experts-moe-models-challenges-solutions-in-practice\/","datePublished":"2025-07-02T00:00:00\u002b00:00","dateModified":"2025-08-03T06:02:48\u002b00:00","description":"\u003cp\u003eMixture-of-Experts (MoEs) are neural network architectures that allow different parts of the model (called \u0026ldquo;experts\u0026rdquo;) to specialize in different types of inputs. A \u0026ldquo;gating network\u0026rdquo; or \u0026ldquo;router\u0026rdquo; learns to dispatch each input (or \u0026ldquo;token\u0026rdquo;) to a subset of these experts. While powerful for scaling models, MoEs introduce several practical challenges.\u003c\/p\u003e\n\u003ch3 id=\u00221-challenge-non-differentiability-of-routing-functions\u0022\u003e\n 1. Challenge: Non-Differentiability of Routing Functions\n \u003ca class=\u0022heading-link\u0022 href=\u0022#1-challenge-non-differentiability-of-routing-functions\u0022\u003e\n \u003ci class=\u0022fa-solid fa-link\u0022 aria-hidden=\u0022true\u0022 title=\u0022Link to heading\u0022\u003e\u003c\/i\u003e\n \u003cspan class=\u0022sr-only\u0022\u003eLink to heading\u003c\/span\u003e\n \u003c\/a\u003e\n\u003c\/h3\u003e\n\u003cp\u003e\u003cstrong\u003eThe Problem:\u003c\/strong\u003e\nMany routing mechanisms, especially \u0026ldquo;Top-K routing,\u0026rdquo; involve a discrete, hard selection process. A common function is \u003ccode\u003eKeepTopK(v, k)\u003c\/code\u003e, which selects the top \u003ccode\u003ek\u003c\/code\u003e scoring elements from a vector \u003ccode\u003ev\u003c\/code\u003e and sets others to $-\\infty$ or $0$.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/>Mixture-of-Experts (MoE) Models Challenges & Solutions in Practice</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-07-02T00:00:00Z>July 2, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
7-minute read</span></div></div></header><div class=post-content><p>Mixture-of-Experts (MoEs) are neural network architectures that allow different parts of the model (called &ldquo;experts&rdquo;) to specialize in different types of inputs. A &ldquo;gating network&rdquo; or &ldquo;router&rdquo; learns to dispatch each input (or &ldquo;token&rdquo;) to a subset of these experts. While powerful for scaling models, MoEs introduce several practical challenges.</p><h3 id=1-challenge-non-differentiability-of-routing-functions>1. Challenge: Non-Differentiability of Routing Functions
<a class=heading-link href=#1-challenge-non-differentiability-of-routing-functions><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Problem:</strong>
Many routing mechanisms, especially &ldquo;Top-K routing,&rdquo; involve a discrete, hard selection process. A common function is <code>KeepTopK(v, k)</code>, which selects the top <code>k</code> scoring elements from a vector <code>v</code> and sets others to $-\infty$ or $0$.</p>$$
KeepTopK(v, k)_i = \begin{cases} v_i & \text{if } v_i \text{ is in the top } k \text{ elements of } v \\ -\infty & \text{otherwise.} \end{cases}
$$<p>This function is <strong>not differentiable</strong>. Its gradient is zero almost everywhere and undefined at the threshold points, making it impossible to directly train the gating network&rsquo;s parameters (e.g., $W_g$) using standard gradient descent.</p><p><strong>Solutions (Stochastic Approximations):</strong>
To enable end-to-end training, non-differentiable routing decisions must be approximated with differentiable or stochastic methods.</p><ul><li><p><strong>Stochastic Scoring (e.g., Shazeer et al. 2017):</strong>
The expert score $H(x)_i = (x \cdot W_g)_i + \text{StandardNormal}() \cdot \text{Softplus}((x \cdot W_{noise})_i)$ introduces Gaussian noise. This makes the scores themselves stochastic, which can be leveraged with other methods.</p></li><li><p><strong>Gumbel-Softmax Trick (or Concrete Distribution):</strong>
This method allows for differentiable sampling from categorical distributions. Instead of directly picking the top-k, Gumbel noise is added to the scores, and a Softmax (with a temperature parameter) is applied. This provides a continuous, differentiable approximation of a discrete choice, allowing gradients to flow back.</p></li><li><p><strong>REINFORCE (Score Function Estimator):</strong>
This is a policy gradient method from reinforcement learning. The routing decision is treated as an action, and the gating network&rsquo;s parameters are updated based on the &ldquo;reward&rdquo; (e.g., the model&rsquo;s performance). Gradients are estimated by sampling routing choices and weighting them by their outcomes.</p></li><li><p><strong>Straight-Through Estimator (STE):</strong>
A simpler approximation where, during the backward pass, gradients are treated as if the non-differentiable operation was an identity function or a simple smooth function.</p></li><li><p><strong>Softmax after TopK (e.g., Mixtral, DBRX, DeepSeek v3):</strong>
Instead of <code>Softmax(KeepTopK(...))</code>, some models apply a Softmax <em>only to the scores of the selected TopK experts</em>, and then assign $0$ to the rest. This provides differentiable weights for the selected experts while still enforcing sparsity.</p></li></ul><h3 id=2-challenge-uneven-expert-utilization-balancing-loss>2. Challenge: Uneven Expert Utilization (Balancing Loss)
<a class=heading-link href=#2-challenge-uneven-expert-utilization-balancing-loss><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Problem:</strong>
Left unchecked, the gating network might learn to heavily favor a few experts, leaving others underutilized. This leads to:</p><ul><li><strong>System Inefficiency:</strong> Overloaded experts become bottlenecks, while underutilized experts waste computational resources.</li><li><strong>Suboptimal Learning:</strong> Experts might not specialize effectively if they don&rsquo;t receive diverse data.</li></ul><p><strong>Solution: Heuristic Balancing Losses (e.g., from Switch Transformer, Fedus et al. 2022)</strong>
An auxiliary loss is added to the total model loss during training to encourage more even expert usage.</p>$$ \text{loss}_{\text{auxiliary}} = \alpha \cdot N \cdot \sum_{i=1}^{N} f_i \cdot P_i $$<p>Where:</p><ul><li>$\alpha$: A hyperparameter controlling the strength of the auxiliary loss.</li><li>$N$: Total number of experts.</li><li>$f_i$: The <strong>fraction of tokens <em>actually dispatched</em> to expert $i$</strong> in the current batch $B$.
$$ f_i = \frac{1}{T} \sum_{x \in B} \mathbf{1}\{\text{argmax } p(x) = i\} $$
($p(x)$ here refers to the output of the gating network, which could be $s_{i,t}$ in the DeepSeek/classic router. The $\text{argmax}$ means it counts hard assignments to expert $i$.)</li><li>$P_i$: The <strong>fraction of the router <em>probability mass</em> allocated to expert $i$</strong> in the current batch $B$.
$$ P_i = \frac{1}{T} \sum_{x \in B} p_i(x) $$
($p_i(x)$ is the learned probability (or soft score) from the gating network for token $x$ and expert $i$.)</li></ul><p><strong>How it works:</strong>
The loss aims to minimize the product $f_i \cdot P_i$ when $f_i$ and $P_i$ are small, effectively pushing them to be larger (closer to $1/N$). If an expert $i$ is overused (high $f_i$ and $P_i$), its term in the sum contributes significantly to the loss. The derivative with respect to $p_i(x)$ reveals that &ldquo;more frequent use = stronger downweighting,&rdquo; meaning the gating network is penalized for sending too much traffic to an already busy expert.</p><p><strong>Relationship to Gating Network:</strong></p><ul><li><strong>$p_i(x)$ (or $s_{i,t}$):</strong> This is the output of the <strong>learned gating network</strong> (e.g., from a linear layer followed by Softmax). The gating network&rsquo;s parameters are updated via gradient descent, influenced by this auxiliary loss.</li><li><strong>$P_i$:</strong> This is <em>calculated</em> from the outputs of the learned gating network for the current batch. It&rsquo;s not a pre-defined value.</li></ul><p><strong>Limitation (&ldquo;Second Best&rdquo; Scenario):</strong>
Even with this loss, an expert can remain imbalanced if it&rsquo;s consistently the &ldquo;second best&rdquo; option (high $P_i$) but never the <em>absolute top choice</em> that gets counted in $f_i$ (especially if $K=1$). This is because $f_i$ strictly counts hard assignments based on <code>argmax</code>. This limitation highlights why &ldquo;soft&rdquo; routing or &ldquo;softmax after TopK&rdquo; approaches can be more effective for truly even distribution.</p><h3 id=3-challenge-overfitting-during-fine-tuning>3. Challenge: Overfitting during Fine-tuning
<a class=heading-link href=#3-challenge-overfitting-during-fine-tuning><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Problem:</strong>
Sparse MoE models, despite only activating a few experts per token, possess a very large total number of parameters. When fine-tuning these models on <strong>smaller datasets</strong>, they are highly prone to <strong>overfitting</strong>. The model&rsquo;s vast capacity allows it to memorize the limited fine-tuning data, leading to poor generalization performance on unseen validation data. This is evident when training loss continues to decrease, but validation loss stagnates or increases.</p><p><strong>Solutions:</strong></p><ul><li><p><strong>Zoph et al. Solution Fine-tune non-MoE MLPs:</strong></p><ul><li>This strategy involves freezing a portion of the MoE model&rsquo;s parameters during fine-tuning, specifically the large expert weights.</li><li>Instead, only the &ldquo;non-MoE&rdquo; parameters (e.g., attention layers, adapter layers, or the gating network itself) are updated.</li><li>This reduces the effective number of trainable parameters during fine-tuning, thereby mitigating the risk of overfitting on small datasets. It assumes the experts are already well-pre-trained for general tasks.</li></ul></li><li><p><strong>DeepSeek Solution Use Lots of Data (1.4M SFT):</strong></p><ul><li>This approach tackles the problem by providing the model with a very large and diverse dataset for Supervised Fine-Tuning (SFT).</li><li>With abundant data (e.g., 1.4 million examples covering a wide range of tasks and languages), the model&rsquo;s large capacity can be effectively utilized for specialized learning rather than memorization. The diversity and volume of data prevent individual experts from overfitting to specific examples.</li></ul></li></ul><p><strong>Conclusion:</strong>
MoE models offer significant advantages in terms of model capacity and computational efficiency, but their unique sparse activation pattern introduces challenges in training and fine-tuning. Overcoming non-differentiability in routing and ensuring balanced expert utilization are crucial for effective pre-training. During fine-tuning, managing the model&rsquo;s vast parameter count to prevent overfitting on smaller datasets requires either strategic parameter freezing or access to very large and diverse fine-tuning data.
The <strong>Top-K routing</strong> mechanism, as illustrated in the provided image, is a core component in many modern Mixture-of-Experts (MoE) models. It involves selecting a fixed number (<code>K</code>) of experts for each input based on relevance scores.</p><hr><p><strong>Traditional Top-K (Deterministic Selection):</strong></p><ul><li><strong>How it works:</strong><ol><li>Calculate relevance scores (<code>s_{i,t}</code>) for each expert <code>i</code> and input <code>t</code>.</li><li>Identify the <code>K</code> experts with the highest scores.</li><li>Experts <em>within</em> the Top-K are assigned their scores (<code>g_{i,t} = s_{i,t}</code>).</li><li>Experts <em>outside</em> the Top-K are assigned a score of <code>0</code> (<code>g_{i,t} = 0</code>).</li><li>The output is a weighted sum of the selected experts&rsquo; outputs.</li></ol></li><li><strong>Pros:</strong> Predictable, deterministic, selects the &ldquo;best&rdquo; experts based on current scores.</li><li><strong>Cons:</strong> Can lead to expert imbalance, where a few popular experts are always chosen, starving others of training.</li></ul><p><strong>Alternative: Sampling from Softmax (Probabilistic Selection):</strong></p><ul><li><strong>How it works:</strong><ol><li>Calculate relevance scores (<code>s_{i,t}</code>) which are treated as probabilities (after softmax).</li><li><strong>Randomly sample</strong> <code>K</code> unique expert indices from the distribution defined by these probabilities.</li><li>Selected experts contribute; unselected experts do not.</li></ol></li><li><strong>Why it&rsquo;s suggested:</strong><ul><li><strong>Load Balancing:</strong> Prevents expert collapse by ensuring all experts get a chance to be selected, even those with slightly lower scores. This promotes more even training across the entire expert pool.</li><li><strong>Diversity & Exploration:</strong> Introduces randomness, potentially leading to better generalization and robustness by exploring different expert combinations.</li></ul></li><li><strong>Pros:</strong> Better load balancing, prevents expert starvation, encourages exploration.</li><li><strong>Cons:</strong> Stochastic (non-deterministic routing), can make debugging harder, might not pick the absolute &ldquo;best&rdquo; expert in a single instance (but better for long-term training).</li></ul><p><strong>Key Takeaway:</strong> While deterministic Top-K is simpler and directly picks the &ldquo;highest-scoring&rdquo; experts, sampling from the softmax offers a more robust training dynamic by ensuring that all experts receive training data, thereby preventing some experts from becoming unused (&ldquo;dead experts&rdquo;).</p><hr></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,89 @@
<!doctype html><html lang=en><head><title>How I Got Open WebUI Talking to OpenAI Web Search · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="OpenAI promised native web search in GPT5, but LiteLLM proxy deployments (and by extension Open WebUI) still choke on it—issue #13042 tracks the fallout. I needed grounded answers inside Open WebUI anyway, so I built a workaround: route GPT5 traffic through the Responses API and mask every web_search_call before the UI ever sees it.
This post documents the final setup, the hotfix script that keeps LiteLLM honest, and the tests that prove Open WebUI now streams cited answers without trying to execute the tool itself."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="How I Got Open WebUI Talking to OpenAI Web Search"><meta name=twitter:description content="OpenAI promised native web search in GPT5, but LiteLLM proxy deployments (and by extension Open WebUI) still choke on it—issue #13042 tracks the fallout. I needed grounded answers inside Open WebUI anyway, so I built a workaround: route GPT5 traffic through the Responses API and mask every web_search_call before the UI ever sees it.
This post documents the final setup, the hotfix script that keeps LiteLLM honest, and the tests that prove Open WebUI now streams cited answers without trying to execute the tool itself."><meta property="og:url" content="https://ericxliu.me/posts/open-webui-openai-websearch/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="How I Got Open WebUI Talking to OpenAI Web Search"><meta property="og:description" content="OpenAI promised native web search in GPT5, but LiteLLM proxy deployments (and by extension Open WebUI) still choke on it—issue #13042 tracks the fallout. I needed grounded answers inside Open WebUI anyway, so I built a workaround: route GPT5 traffic through the Responses API and mask every web_search_call before the UI ever sees it.
This post documents the final setup, the hotfix script that keeps LiteLLM honest, and the tests that prove Open WebUI now streams cited answers without trying to execute the tool itself."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-12-29T00:00:00+00:00"><meta property="article:modified_time" content="2025-12-29T07:15:58+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/open-webui-openai-websearch/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"How I Got Open WebUI Talking to OpenAI Web Search","genre":"Blog","wordcount":"1087","url":"https:\/\/ericxliu.me\/posts\/open-webui-openai-websearch\/","datePublished":"2025-12-29T00:00:00\u002b00:00","dateModified":"2025-12-29T07:15:58\u002b00:00","description":"\u003cp\u003eOpenAI promised native web search in GPT5, but LiteLLM proxy deployments (and by extension Open WebUI) still choke on it—issue \u003ca href=\u0022https:\/\/github.com\/BerriAI\/litellm\/issues\/13042\u0022 class=\u0022external-link\u0022 target=\u0022_blank\u0022 rel=\u0022noopener\u0022\u003e#13042\u003c\/a\u003e tracks the fallout. I needed grounded answers inside Open WebUI anyway, so I built a workaround: route GPT5 traffic through the Responses API and mask every \u003ccode\u003eweb_search_call\u003c\/code\u003e before the UI ever sees it.\u003c\/p\u003e\n\u003cp\u003eThis post documents the final setup, the hotfix script that keeps LiteLLM honest, and the tests that prove Open WebUI now streams cited answers without trying to execute the tool itself.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/open-webui-openai-websearch/>How I Got Open WebUI Talking to OpenAI Web Search</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-12-29T00:00:00Z>December 29, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
6-minute read</span></div></div></header><div class=post-content><p>OpenAI promised native web search in GPT5, but LiteLLM proxy deployments (and by extension Open WebUI) still choke on it—issue <a href=https://github.com/BerriAI/litellm/issues/13042 class=external-link target=_blank rel=noopener>#13042</a> tracks the fallout. I needed grounded answers inside Open WebUI anyway, so I built a workaround: route GPT5 traffic through the Responses API and mask every <code>web_search_call</code> before the UI ever sees it.</p><p>This post documents the final setup, the hotfix script that keeps LiteLLM honest, and the tests that prove Open WebUI now streams cited answers without trying to execute the tool itself.</p><h2 id=why-open-webui-broke>Why Open WebUI Broke
<a class=heading-link href=#why-open-webui-broke><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><ol><li><strong>Wrong API surface.</strong> <code>/v1/chat/completions</code> still rejects <code>type: "web_search"</code> with <code>Invalid value: 'web_search'. Supported values are: 'function' and 'custom'.</code></li><li><strong>LiteLLM tooling gap.</strong> The OpenAI TypedDicts in <code>litellm/types/llms/openai.py</code> only allow <code>Literal["function"]</code>. Even if the backend call succeeded, streaming would crash when it saw a new tool type.</li><li><strong>Open WebUI assumptions.</strong> The UI eagerly parses every tool delta, so when LiteLLM streamed the raw <code>web_search_call</code> chunk, the UI tried to execute it, failed to parse the arguments, and aborted the chat.</li></ol><p>Fixing all three required touching both the proxy configuration and the LiteLLM transformation path.</p><h2 id=step-1--route-gpt5-through-the-responses-api>Step 1 Route GPT5 Through the Responses API
<a class=heading-link href=#step-1--route-gpt5-through-the-responses-api><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>LiteLLMs Responses bridge activates whenever the backend model name starts with <code>openai/responses/</code>. I added a dedicated alias, <code>gpt-5.2-search</code>, that hardcodes the Responses API plus web search metadata. Existing models (reasoning, embeddings, TTS) stay untouched.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-yaml data-lang=yaml><span style=display:flex><span><span style=color:#8b949e;font-style:italic># proxy-config.yaml (sanitized)</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#7ee787>model_list</span>:<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span>- <span style=color:#7ee787>model_name</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>gpt-5.2-search</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>litellm_params</span>:<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>model</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>openai/responses/openai/gpt-5.2</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>api_key</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>&lt;OPENAI_API_KEY&gt;</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>reasoning_effort</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>high</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>merge_reasoning_content_in_choices</span>:<span style=color:#6e7681> </span><span style=color:#79c0ff>true</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>tools</span>:<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span>- <span style=color:#7ee787>type</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>web_search</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>user_location</span>:<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>type</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>approximate</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>country</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>US</span><span style=color:#6e7681>
</span></span></span></code></pre></div><p>Any client (Open WebUI included) can now request <code>model: "gpt-5.2-search"</code> over the standard <code>/v1/chat/completions</code> endpoint, and LiteLLM handles the Responses API hop transparently.</p><h2 id=step-2--mask-web_search_call-chunks-inside-litellm>Step 2 Mask <code>web_search_call</code> Chunks Inside LiteLLM
<a class=heading-link href=#step-2--mask-web_search_call-chunks-inside-litellm><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>Even with the right API, LiteLLM still needs to stream deltas Open WebUI can digest. My <a href=https://ericxliu.me/hotfix.py class=external-link target=_blank rel=noopener>hotfix.py</a> script copies the LiteLLM source into <code>/tmp/patch/litellm</code>, then rewrites two files. This script runs as part of the Helm releases init hook so I can inject fixes directly into the container filesystem at pod start. That saves me from rebuilding and pushing new images every time LiteLLM upstream changes (or refuses a patch), which is critical while waiting for issue #13042 to land. Ill try to upstream the fix, but this is admittedly hacky, so timelines are uncertain.</p><ol><li><strong><code>openai.py</code> TypedDicts</strong>: extend the tool chunk definitions to accept <code>Literal["web_search"]</code>.</li><li><strong><code>litellm_responses_transformation/transformation.py</code></strong>: intercept every streaming item and short-circuit anything with <code>type == "web_search_call"</code>, returning an empty assistant delta instead of a tool call.</li></ol><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-python data-lang=python><span style=display:flex><span><span style=color:#8b949e;font-style:italic># Excerpt from hotfix.py</span>
</span></span><span style=display:flex><span>tool_call_chunk_original <span style=color:#ff7b72;font-weight:700>=</span> (
</span></span><span style=display:flex><span> <span style=color:#a5d6ff>&#39;class ChatCompletionToolCallChunk(TypedDict): # result of /chat/completions call</span><span style=color:#79c0ff>\n</span><span style=color:#a5d6ff>&#39;</span>
</span></span><span style=display:flex><span> <span style=color:#a5d6ff>&#39; id: Optional[str]</span><span style=color:#79c0ff>\n</span><span style=color:#a5d6ff>&#39;</span>
</span></span><span style=display:flex><span> <span style=color:#a5d6ff>&#39; type: Literal[&#34;function&#34;]&#39;</span>
</span></span><span style=display:flex><span>)
</span></span><span style=display:flex><span>tool_call_chunk_patch <span style=color:#ff7b72;font-weight:700>=</span> tool_call_chunk_original<span style=color:#ff7b72;font-weight:700>.</span>replace(
</span></span><span style=display:flex><span> <span style=color:#a5d6ff>&#39;Literal[&#34;function&#34;]&#39;</span>, <span style=color:#a5d6ff>&#39;Literal[&#34;function&#34;, &#34;web_search&#34;]&#39;</span>
</span></span><span style=display:flex><span>)
</span></span><span style=display:flex><span><span style=color:#ff7b72;font-weight:700>...</span>
</span></span><span style=display:flex><span><span style=color:#ff7b72>if</span> tool_call_chunk_original <span style=color:#ff7b72;font-weight:700>in</span> content:
</span></span><span style=display:flex><span> content <span style=color:#ff7b72;font-weight:700>=</span> content<span style=color:#ff7b72;font-weight:700>.</span>replace(tool_call_chunk_original, tool_call_chunk_patch, <span style=color:#a5d6ff>1</span>)
</span></span></code></pre></div><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-python data-lang=python><span style=display:flex><span>added_block <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#a5d6ff>&#34;&#34;&#34; elif output_item.get(&#34;type&#34;) == &#34;web_search_call&#34;:
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> # Mask the call: Open WebUI should never see tool metadata
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> action_payload = output_item.get(&#34;action&#34;)
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> verbose_logger.debug(
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> &#34;Chat provider: masking web_search_call (added) call_id=</span><span style=color:#a5d6ff>%s</span><span style=color:#a5d6ff> action=</span><span style=color:#a5d6ff>%s</span><span style=color:#a5d6ff>&#34;,
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> output_item.get(&#34;call_id&#34;),
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> action_payload,
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> )
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> return ModelResponseStream(
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> choices=[
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> StreamingChoices(
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> index=0,
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> delta=Delta(content=&#34;&#34;),
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> finish_reason=None,
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> )
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> ]
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> )
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff>&#34;&#34;&#34;</span>
</span></span></code></pre></div><p>These patches ensure LiteLLM never emits a <code>tool_calls</code> delta for <code>web_search</code>. Open WebUI only receives assistant text chunks, so it happily renders the model response and the inline citations the Responses API already provides.</p><h2 id=step-3--prove-it-with-curl-and-open-webui>Step 3 Prove It with cURL (and Open WebUI)
<a class=heading-link href=#step-3--prove-it-with-curl-and-open-webui><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>I keep a simple smoke test (<code>litellm_smoke_test.sh</code>) that hits the public ingress with and without streaming. The only secrets are placeholders here, but the structure is the same.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span><span style=color:#8b949e;font-weight:700;font-style:italic>#!/usr/bin/env bash
</span></span></span><span style=display:flex><span>set -euo pipefail
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>echo <span style=color:#a5d6ff>&#34;Testing non-streaming...&#34;</span>
</span></span><span style=display:flex><span>curl <span style=color:#a5d6ff>&#34;https://api.ericxliu.me/v1/chat/completions&#34;</span> <span style=color:#79c0ff>\
</span></span></span><span style=display:flex><span> -H <span style=color:#a5d6ff>&#34;Authorization: Bearer &lt;LITELLM_MASTER_KEY&gt;&#34;</span> <span style=color:#79c0ff>\
</span></span></span><span style=display:flex><span> -H <span style=color:#a5d6ff>&#34;Content-Type: application/json&#34;</span> <span style=color:#79c0ff>\
</span></span></span><span style=display:flex><span> -d <span style=color:#a5d6ff>&#39;{
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> &#34;model&#34;: &#34;gpt-5.2-search&#34;,
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> &#34;messages&#34;: [{&#34;role&#34;: &#34;user&#34;, &#34;content&#34;: &#34;Find the sunset time in Tokyo today.&#34;}]
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> }&#39;</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span>echo -e <span style=color:#a5d6ff>&#34;\n\nTesting streaming...&#34;</span>
</span></span><span style=display:flex><span>curl <span style=color:#a5d6ff>&#34;https://api.ericxliu.me/v1/chat/completions&#34;</span> <span style=color:#79c0ff>\
</span></span></span><span style=display:flex><span> -H <span style=color:#a5d6ff>&#34;Authorization: Bearer &lt;LITELLM_MASTER_KEY&gt;&#34;</span> <span style=color:#79c0ff>\
</span></span></span><span style=display:flex><span> -H <span style=color:#a5d6ff>&#34;Content-Type: application/json&#34;</span> <span style=color:#79c0ff>\
</span></span></span><span style=display:flex><span> -d <span style=color:#a5d6ff>&#39;{
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> &#34;model&#34;: &#34;gpt-5.2-search&#34;,
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> &#34;stream&#34;: true,
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> &#34;messages&#34;: [{&#34;role&#34;: &#34;user&#34;, &#34;content&#34;: &#34;What is the weather in NYC right now?&#34;}]
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> }&#39;</span>
</span></span></code></pre></div><p>Each request now returns grounded answers with citations (<code>url_citation</code> annotations) via Open WebUI, and the SSE feed never stalls because the UI isnt asked to interpret tool calls.</p><h2 id=lessons--pitfalls>Lessons & Pitfalls
<a class=heading-link href=#lessons--pitfalls><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><ul><li><strong>The Responses API is non-negotiable (and syntax-sensitive).</strong> <code>/v1/chat/completions</code> still rejects <code>web_search</code>. Always test against <code>/v1/responses</code> directly before wiring LiteLLM into the loop. Furthermore, the syntax for <code>reasoning</code> is different: while Chat Completions uses the top-level <code>reasoning_effort</code> parameter, the Responses API requires a nested object: <code>"reasoning": {"effort": "medium"}</code>.</li><li><strong>The Native Model Trap.</strong> Models like <code>gpt-5-search-api</code> exist and support web search via standard Chat Completions, but they are often less flexible—for instance, rejecting <code>reasoning_effort</code> entirely. Routing a standard model through LiteLLM&rsquo;s Responses bridge offers more control over formatting and fallbacks.</li><li><strong>Magic strings control routing.</strong> LiteLLM has hardcoded logic (deep in <code>main.py</code>) that only triggers the Responses-to-Chat bridge if the backend model name starts with <code>openai/responses/</code>. Without that specific prefix, LiteLLM bypasses its internal transformation layer entirely, leading to cryptic 404s or &ldquo;model not found&rdquo; errors.</li><li><strong>Synthesized Sovereignty: The Call ID Crisis.</strong> Open WebUI is a &ldquo;well-behaved&rdquo; OpenAI client, yet it often omits the <code>id</code> field in <code>tool_calls</code> when sending assistant messages back to the server. LiteLLM&rsquo;s Responses bridge initially exploded with a <code>KeyError: 'id'</code> because it assumed an ID would always be present. The fix: synthesizing predictable IDs like <code>auto_tool_call_N</code> on the fly to satisfy the server-side schema.</li><li><strong>The Argument Delta Void.</strong> In streaming mode, the Responses API sometimes skips sending <code>response.function_call_arguments.delta</code> entirely if the query is simple. If the proxy only waits for deltas, the client receives an empty <code>{}</code> for tool arguments. The solution is to fallback and synthesize the <code>arguments</code> string from the <code>action</code> payload (e.g., <code>output_item['action']['query']</code>) when deltas are missing.</li><li><strong>Streaming State Machines are Fragile.</strong> Open WebUI is highly sensitive to the exact state of a tool call. If it sees a <code>web_search_call</code> with <code>status: "in_progress"</code>, its internal parser chokes, assuming it&rsquo;s an uncompleted &ldquo;function&rdquo; call. These intermediate state chunks must be intercepted and handled before they reach the UI.</li><li><strong>Defensive Masking is the Final Boss.</strong> To stop Open WebUI from entering an infinite client-side loop (thinking it needs to execute a tool it doesn&rsquo;t have), LiteLLM must &ldquo;mask&rdquo; the <code>web_search_call</code> chunks. By emitting empty content deltas instead of tool chunks, we hide the server-side search mechanics from the UI, allowing it to stay focused on the final answer.</li></ul><p>With those guardrails in place, GPT5s native web search works end-to-end inside Open WebUI, complete with citations, without waiting for LiteLLM upstream fixes.</p><h2 id=references>References
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><ul><li><a href=https://docs.litellm.ai/docs/proxy/openai_responses class=external-link target=_blank rel=noopener>LiteLLM Documentation - OpenAI Responses API Bridge</a></li><li><a href=https://platform.openai.com/docs/api-reference/responses class=external-link target=_blank rel=noopener>OpenAI Documentation - Responses API</a></li><li><a href=https://github.com/BerriAI/litellm/issues/13042 class=external-link target=_blank rel=noopener>LiteLLM GitHub Issue #13042</a></li><li><a href=https://docs.openwebui.com/ class=external-link target=_blank rel=noopener>Open WebUI Documentation</a></li><li><a href=https://ericxliu.me/hotfix.py class=external-link target=_blank rel=noopener>The hotfix.py Script</a></li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

File diff suppressed because one or more lines are too long

1
posts/page/1/index.html Normal file
View File

@@ -0,0 +1 @@
<!doctype html><html lang=en><head><title>https://ericxliu.me/posts/</title><link rel=canonical href=https://ericxliu.me/posts/><meta charset=utf-8><meta http-equiv=refresh content="0; url=https://ericxliu.me/posts/"></head></html>

17
posts/page/2/index.html Normal file
View File

@@ -0,0 +1,17 @@
<!doctype html><html lang=en><head><title>Posts · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Posts"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/posts/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Posts"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/posts/index.xml title="Eric X. Liu's Personal Page"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=https://ericxliu.me/posts/>Posts</a></h1></header><ul><li><span class=date>October 2, 2025</span>
<a class=title href=/posts/flashing-jetson-orin-nano-in-virtualized-environments/>Flashing Jetson Orin Nano in Virtualized Environments</a></li><li><span class=date>September 28, 2025</span>
<a class=title href=/posts/openwrt-mwan3-wireguard-endpoint-exclusion/>OpenWrt: Fix WireGuard Connectivity with MWAN3 by Excluding the VPN Endpoint</a></li><li><span class=date>September 22, 2025</span>
<a class=title href=/posts/unifi-vlan-migration-to-zone-based-architecture/>UniFi VLAN Migration to Zone-Based Architecture</a></li><li><span class=date>August 19, 2025</span>
<a class=title href=/posts/quantization-in-llms/>Quantization in LLMs</a></li><li><span class=date>August 16, 2025</span>
<a class=title href=/posts/breville-barista-pro-maintenance/>Breville Barista Pro Maintenance</a></li><li><span class=date>August 9, 2025</span>
<a class=title href=/posts/secure-boot-dkms-and-mok-on-proxmox-debian/>Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian</a></li><li><span class=date>August 7, 2025</span>
<a class=title href=/posts/how-rvq-teaches-llms-to-see-and-hear/>Beyond Words: How RVQ Teaches LLMs to See and Hear</a></li><li><span class=date>August 3, 2025</span>
<a class=title href=/posts/supabase-deep-dive/>Supabase Deep Dive: It's Not Magic, It's Just Postgres</a></li><li><span class=date>August 2, 2025</span>
<a class=title href=/posts/ppo-for-language-models/>A Deep Dive into PPO for Language Models</a></li><li><span class=date>July 2, 2025</span>
<a class=title href=/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/>Mixture-of-Experts (MoE) Models Challenges & Solutions in Practice</a></li></ul><ul class=pagination><li><a href=/posts/>&#171;</a></li><li class=hidden><a href=/posts/>&#8249;</a></li><li><a href=/posts/>1</a></li><li>2</li><li><a href=/posts/page/3/>3</a></li><li class=hidden><a href=/posts/page/3/>&#8250;</a></li><li><a href=/posts/page/3/>&#187;</a></li></ul></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

11
posts/page/3/index.html Normal file
View File

@@ -0,0 +1,11 @@
<!doctype html><html lang=en><head><title>Posts · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Posts"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/posts/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Posts"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/posts/index.xml title="Eric X. Liu's Personal Page"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=https://ericxliu.me/posts/>Posts</a></h1></header><ul><li><span class=date>June 1, 2025</span>
<a class=title href=/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/>An Architectural Deep Dive of T5</a></li><li><span class=date>May 1, 2025</span>
<a class=title href=/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/>Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso</a></li><li><span class=date>April 1, 2025</span>
<a class=title href=/posts/transformer-s-core-mechanics/>Transformer's Core Mechanics</a></li><li><span class=date>October 26, 2020</span>
<a class=title href=/posts/useful/>Some useful files</a></li></ul><ul class=pagination><li><a href=/posts/>&#171;</a></li><li class=hidden><a href=/posts/page/2/>&#8249;</a></li><li><a href=/posts/>1</a></li><li><a href=/posts/page/2/>2</a></li><li>3</li></ul></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,28 @@
<!doctype html><html lang=en><head><title>A Deep Dive into PPO for Language Models · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Large Language Models (LLMs) have demonstrated astonishing capabilities, but out-of-the-box, they are simply powerful text predictors. They don&rsquo;t inherently understand what makes a response helpful, harmless, or aligned with human values. The technique that has proven most effective at bridging this gap is Reinforcement Learning from Human Feedback (RLHF), and at its heart lies a powerful algorithm: Proximal Policy Optimization (PPO).
You may have seen diagrams like the one below, which outlines the RLHF training process. It can look intimidating, with a web of interconnected models, losses, and data flows.
"><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="A Deep Dive into PPO for Language Models"><meta name=twitter:description content="Large Language Models (LLMs) have demonstrated astonishing capabilities, but out-of-the-box, they are simply powerful text predictors. They dont inherently understand what makes a response helpful, harmless, or aligned with human values. The technique that has proven most effective at bridging this gap is Reinforcement Learning from Human Feedback (RLHF), and at its heart lies a powerful algorithm: Proximal Policy Optimization (PPO).
You may have seen diagrams like the one below, which outlines the RLHF training process. It can look intimidating, with a web of interconnected models, losses, and data flows."><meta property="og:url" content="https://ericxliu.me/posts/ppo-for-language-models/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="A Deep Dive into PPO for Language Models"><meta property="og:description" content="Large Language Models (LLMs) have demonstrated astonishing capabilities, but out-of-the-box, they are simply powerful text predictors. They dont inherently understand what makes a response helpful, harmless, or aligned with human values. The technique that has proven most effective at bridging this gap is Reinforcement Learning from Human Feedback (RLHF), and at its heart lies a powerful algorithm: Proximal Policy Optimization (PPO).
You may have seen diagrams like the one below, which outlines the RLHF training process. It can look intimidating, with a web of interconnected models, losses, and data flows."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-08-02T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-10T20:10:48+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/ppo-for-language-models/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"A Deep Dive into PPO for Language Models","genre":"Blog","wordcount":"1393","url":"https:\/\/ericxliu.me\/posts\/ppo-for-language-models\/","datePublished":"2025-08-02T00:00:00\u002b00:00","dateModified":"2026-01-10T20:10:48\u002b00:00","description":"\u003cp\u003eLarge Language Models (LLMs) have demonstrated astonishing capabilities, but out-of-the-box, they are simply powerful text predictors. They don\u0026rsquo;t inherently understand what makes a response helpful, harmless, or aligned with human values. The technique that has proven most effective at bridging this gap is Reinforcement Learning from Human Feedback (RLHF), and at its heart lies a powerful algorithm: Proximal Policy Optimization (PPO).\u003c\/p\u003e\n\u003cp\u003eYou may have seen diagrams like the one below, which outlines the RLHF training process. It can look intimidating, with a web of interconnected models, losses, and data flows.\n\u003cimg src=\u0022\/images\/ppo-for-language-models\/7713bd3ecf27442e939b9190fa08165d.png\u0022 alt=\u0022S3 File\u0022\u003e\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/ppo-for-language-models/>A Deep Dive into PPO for Language Models</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-08-02T00:00:00Z>August 2, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
7-minute read</span></div></div></header><div class=post-content><p>Large Language Models (LLMs) have demonstrated astonishing capabilities, but out-of-the-box, they are simply powerful text predictors. They don&rsquo;t inherently understand what makes a response helpful, harmless, or aligned with human values. The technique that has proven most effective at bridging this gap is Reinforcement Learning from Human Feedback (RLHF), and at its heart lies a powerful algorithm: Proximal Policy Optimization (PPO).</p><p>You may have seen diagrams like the one below, which outlines the RLHF training process. It can look intimidating, with a web of interconnected models, losses, and data flows.
<img src=/images/ppo-for-language-models/7713bd3ecf27442e939b9190fa08165d.png alt="S3 File"></p><p>This post will decode that diagram, piece by piece. We&rsquo;ll explore the &ldquo;why&rdquo; behind each component, moving from high-level concepts to the deep technical reasoning that makes this process work.</p><h3 id=translating-rl-to-a-conversation>Translating RL to a Conversation
<a class=heading-link href=#translating-rl-to-a-conversation><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>The first step is to understand how the traditional language of reinforcement learning maps to the world of text generation.</p><ul><li><strong>State (<code>s_t</code>)</strong>: In a chat setting, the &ldquo;state&rdquo; is the context of the conversation so far. It&rsquo;s the initial prompt (<code>x</code>) plus all the text the model has generated up to the current moment (<code>y₁, ..., y_{t-1}</code>).</li><li><strong>Action (<code>a_t</code>)</strong>: The &ldquo;action&rdquo; is the model&rsquo;s decision at each step. For an LLM, this means generating the very next token (<code>y_t</code>). A full response is a sequence of these actions.blob:https://aistudio.google.com/872e746f-88c1-40ec-8e45-fa0efce97299</li><li><strong>Reward (<code>r</code>)</strong>: The &ldquo;reward&rdquo; is a numeric score that tells the model how good its full response (<code>y</code>) was. This score comes from a separate <strong>Reward Model</strong>, which has been trained on a large dataset of human preference comparisons (e.g., humans rating which of two responses is better). This reward is often only awarded at the end of the entire generated sequence.</li></ul><p>Let&rsquo;s make this concrete. If a user provides the prompt <strong>(x)</strong>: <em>&ldquo;The best thing about AI is&rdquo;</em>, and the model generates the response <strong>(y)</strong>: <em>&ldquo;its potential to solve problems.&rdquo;</em>, here is how it&rsquo;s broken down for training:</p><ul><li><strong>State 1</strong>: &ldquo;The best thing about AI is&rdquo;<ul><li><strong>Action 1</strong>: &ldquo;its&rdquo;</li></ul></li><li><strong>State 2</strong>: &ldquo;The best thing about AI is its&rdquo;<ul><li><strong>Action 2</strong>: " potential"</li></ul></li><li><strong>State 3</strong>: &ldquo;The best thing about AI is its potential&rdquo;<ul><li><strong>Action 3</strong>: " to"</li></ul></li><li>&mldr;and so on for every generated token.</li></ul><p>This breakdown transforms a single prompt-response pair into a rich trajectory of state-action pairs, which becomes the raw data for our learning algorithm.</p><h3 id=the-cast-of-models-an-actor-critic-ensemble>The Cast of Models: An Actor-Critic Ensemble
<a class=heading-link href=#the-cast-of-models-an-actor-critic-ensemble><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>The PPO process doesn&rsquo;t rely on a single model but an ensemble where each member has a distinct role.</p><ol><li><strong>The Actor (Policy LM)</strong>: This is the star of the show—the LLM we are actively fine-tuning. Its role is to take a state (the current text) and decide on an action (the next token). We refer to its decision-making process as its &ldquo;policy&rdquo; (<code>π</code>).</li><li><strong>The Critic (Value Model)</strong>: This is the Actor&rsquo;s coach. The Critic doesn&rsquo;t generate text. Instead, it observes a state and estimates the <em>potential future reward</em> the Actor is likely to receive from that point onward. This estimate is called the &ldquo;value&rdquo; (<code>V(s_t)</code>). The Critic&rsquo;s feedback helps the Actor understand whether it&rsquo;s in a promising or a dead-end situation, which is a much more immediate learning signal than waiting for the final reward.</li><li><strong>The Reward Model</strong>: This is the ultimate judge. As mentioned, it&rsquo;s a separate model trained on human preference data that provides the final score for a complete generation. Its judgment is treated as the ground truth for training both the Actor and the Critic.</li></ol><h3 id=the-challenge-of-credit-assignment-generalized-advantage-estimation-gae>The Challenge of Credit Assignment: Generalized Advantage Estimation (GAE)
<a class=heading-link href=#the-challenge-of-credit-assignment-generalized-advantage-estimation-gae><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>A key problem in RL is assigning credit. If a 20-token response gets a high reward, was it because of the first token, the last one, or all of them? The Critic helps solve this. By comparing the reward at each step with the Critic&rsquo;s value estimate, we can calculate the <strong>Advantage (<code>Â</code>)</strong>.</p><p>A simple advantage calculation might be: <code>Advantage = reward + Value_of_next_state - Value_of_current_state</code>.</p><p>However, this can be noisy. PPO uses a more sophisticated technique called <strong>Generalized Advantage Estimation (GAE)</strong>. The formula looks complex, but the idea is intuitive:</p><p><code>Â(s_t, a_t) = Σ(γλ)^l * δ_{t+l}</code>
where <code>δ_t = r_t + γV(s_{t+1}) - V(s_t)</code></p><ul><li><strong>γ (gamma)</strong> is a discount factor (e.g., 0.99), which values immediate rewards slightly more than distant ones.</li><li><strong>λ (lambda)</strong> is a smoothing parameter that balances the trade-off between bias and variance. It creates a weighted average of advantages over multiple future time steps.</li></ul><p>In essence, GAE provides a more stable and accurate estimate of how much better a specific action was compared to the policy&rsquo;s average behavior in that state.</p><h3 id=the-heart-of-ppo-the-quest-for-stable-updates>The Heart of PPO: The Quest for Stable Updates
<a class=heading-link href=#the-heart-of-ppo-the-quest-for-stable-updates><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Now we arrive at the core innovation of PPO. We want to update our Actor model to take actions with higher advantages. The naive way to do this is to re-weight our training objective by an <strong>importance sampling ratio</strong>: <code>(π_new / π_old)</code>. This corrects for the fact that the data we are learning from was generated by a slightly older version of our policy.</p><p>However, this ratio is incredibly dangerous. If the new policy becomes very different from the old one, the ratio can explode, leading to massive, unstable gradient updates that destroy the model.</p><p>PPO solves this with its signature <strong>Clipped Surrogate Objective</strong>. The PPO loss function is:</p><p><code>L_CLIP(θ) = Ê_t [ min( r_t(θ)Â_t, clip(r_t(θ), 1 - ε, 1 + ε)Â_t ) ]</code></p><p>Let&rsquo;s translate this from math to English:</p><ul><li><code>r_t(θ)</code> is the probability ratio <code>π_new(a_t|s_t) / π_old(a_t|s_t)</code>.</li><li>The goal is to increase the objective by an amount proportional to the advantage <code>Â_t</code>.</li><li><strong>The <code>clip</code> function is the crucial safeguard.</strong> It forbids the probability ratio from moving outside a small window (e.g., <code>[0.8, 1.2]</code>).</li></ul><p>This means the algorithm says: &ldquo;Let&rsquo;s update our policy to favor this good action. But if the required update would change the policy too drastically from the old one, we&rsquo;ll &lsquo;clip&rsquo; the update to a more modest size.&rdquo; This creates a &ldquo;trust region,&rdquo; ensuring stable, incremental improvements.</p><h3 id=avoiding-amnesia-the-pretraining-loss>Avoiding Amnesia: The Pretraining Loss
<a class=heading-link href=#avoiding-amnesia-the-pretraining-loss><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>There&rsquo;s one final problem. If we only optimize for the PPO loss, the model might learn to &ldquo;hack&rdquo; the reward model by generating repetitive or nonsensical text that gets a high score. In doing so, it could suffer from <strong>catastrophic forgetting</strong>, losing its fundamental grasp of grammar and facts.</p><p>To prevent this, we introduce a second loss term. As seen in the diagram, we mix in data from the original <strong>Pretraining Data</strong> (or the dataset used for Supervised Fine-Tuning). We calculate a standard next-token prediction loss (<code>LM Loss</code>) on this high-quality data.</p><p>The final loss for the Actor is a combination of both objectives:</p><p><strong>Total Loss = Loss_PPO + <code>λ_ptx</code> * Loss_LM</strong></p><p>This brilliantly balances two goals:</p><ol><li>The <code>Loss_PPO</code> pushes the model towards behaviors that align with human preferences.</li><li>The <code>Loss_LM</code> acts as a regularizer, pulling the model back towards its core language capabilities and preventing it from drifting into gibberish.</li></ol><h3 id=the-full-training-loop>The Full Training Loop
<a class=heading-link href=#the-full-training-loop><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Now, we can assemble the entire process into a clear, iterative loop:</p><ol><li><strong>Collect</strong>: The current Actor policy <code>π_k</code> generates responses to a batch of prompts. These experiences—<code>(state, action, probability, reward, value)</code>—are stored in an <strong>Experience Buffer</strong>.</li><li><strong>Calculate</strong>: Once the buffer is full, we use the collected data to compute the advantage estimates <code>Â_t</code> for every single token-generation step.</li><li><strong>Optimize</strong>: For a few epochs, we repeatedly sample mini-batches from the buffer and update the Actor and Critic models. The Actor is updated using the combined <code>PPO-clip Loss</code> and <code>LM Loss</code>. The Critic is updated to improve its value predictions.</li><li><strong>Flush and Repeat</strong>: After the optimization phase, the entire experience buffer is discarded. The data is now &ldquo;stale&rdquo; because our policy has changed. The newly updated policy <code>π_{k+1}</code> becomes the new Actor, and we return to step 1 to collect fresh data.</li></ol><p>This cycle of collection and optimization allows the language model to gradually and safely steer its behavior towards human-defined goals, creating the helpful and aligned AI assistants we interact with today.</p><hr><p><strong>References:</strong></p><ol><li>Schulman, J., Wolski, F., Dhariwal, P., Radford, A., & Klimov, O. (2017). <em>Proximal Policy Optimization Algorithms</em>. arXiv preprint arXiv:1707.06347.</li><li>Schulman, J., Moritz, P., Levine, S., Jordan, M., & Abbeel, P. (2015). <em>High-Dimensional Continuous Control Using Generalized Advantage Estimation</em>. arXiv preprint arXiv:1506.02438.</li><li>Ouyang, L., et al. (2022). <em>Training language models to follow instructions with human feedback</em>. Advances in Neural Information Processing Systems 35.</li></ol></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,27 @@
<!doctype html><html lang=en><head><title>How I Built a Blog Agent that Writes About Itself · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="I&rsquo;ve been spending a lot of time &ldquo;vibe coding&rdquo; in the Antigravity IDE lately. It&rsquo;s an incredible flow state—intense, iterative, and fast. But it has a major flaw: the context is ephemeral. Once the session is over, that rich history of decisions, wrong turns, and &ldquo;aha!&rdquo; moments is locked away in an opaque, internal format.
I wanted to capture that value. I wanted a system that could take my chaotic coding sessions and distill them into structured, technical blog posts (like the one you&rsquo;re reading right now)."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="How I Built a Blog Agent that Writes About Itself"><meta name=twitter:description content="Ive been spending a lot of time “vibe coding” in the Antigravity IDE lately. Its an incredible flow state—intense, iterative, and fast. But it has a major flaw: the context is ephemeral. Once the session is over, that rich history of decisions, wrong turns, and “aha!” moments is locked away in an opaque, internal format.
I wanted to capture that value. I wanted a system that could take my chaotic coding sessions and distill them into structured, technical blog posts (like the one youre reading right now)."><meta property="og:url" content="https://ericxliu.me/posts/reverse-engineering-antigravity-ide/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="How I Built a Blog Agent that Writes About Itself"><meta property="og:description" content="Ive been spending a lot of time “vibe coding” in the Antigravity IDE lately. Its an incredible flow state—intense, iterative, and fast. But it has a major flaw: the context is ephemeral. Once the session is over, that rich history of decisions, wrong turns, and “aha!” moments is locked away in an opaque, internal format.
I wanted to capture that value. I wanted a system that could take my chaotic coding sessions and distill them into structured, technical blog posts (like the one youre reading right now)."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2026-01-16T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-22T01:49:53+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/reverse-engineering-antigravity-ide/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"How I Built a Blog Agent that Writes About Itself","genre":"Blog","wordcount":"779","url":"https:\/\/ericxliu.me\/posts\/reverse-engineering-antigravity-ide\/","datePublished":"2026-01-16T00:00:00\u002b00:00","dateModified":"2026-01-22T01:49:53\u002b00:00","description":"\u003cp\u003eI\u0026rsquo;ve been spending a lot of time \u0026ldquo;vibe coding\u0026rdquo; in the Antigravity IDE lately. It\u0026rsquo;s an incredible flow state—intense, iterative, and fast. But it has a major flaw: the context is ephemeral. Once the session is over, that rich history of decisions, wrong turns, and \u0026ldquo;aha!\u0026rdquo; moments is locked away in an opaque, internal format.\u003c\/p\u003e\n\u003cp\u003eI wanted to capture that value. I wanted a system that could take my chaotic coding sessions and distill them into structured, technical blog posts (like the one you\u0026rsquo;re reading right now).\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/reverse-engineering-antigravity-ide/>How I Built a Blog Agent that Writes About Itself</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2026-01-16T00:00:00Z>January 16, 2026
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
4-minute read</span></div></div></header><div class=post-content><p>I&rsquo;ve been spending a lot of time &ldquo;vibe coding&rdquo; in the Antigravity IDE lately. It&rsquo;s an incredible flow state—intense, iterative, and fast. But it has a major flaw: the context is ephemeral. Once the session is over, that rich history of decisions, wrong turns, and &ldquo;aha!&rdquo; moments is locked away in an opaque, internal format.</p><p>I wanted to capture that value. I wanted a system that could take my chaotic coding sessions and distill them into structured, technical blog posts (like the one you&rsquo;re reading right now).</p><p>But getting the data out turned into a much deeper rabbit hole than I expected.</p><h2 id=the-challenge-check-the-database>The Challenge: Check the Database?
<a class=heading-link href=#the-challenge-check-the-database><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>My first instinct was simple: It&rsquo;s an Electron app, so there&rsquo;s probably a SQLite database.</p><p>I found it easily enough at <code>~/Library/Application Support/Antigravity/User/globalStorage/state.vscdb</code>. But when I opened it up, I hit a wall. The data wasn&rsquo;t plain text; it was stored in the <code>ItemTable</code> under keys like <code>antigravityUnifiedStateSync.trajectorySummaries</code> as Base64-encoded strings.</p><p>Decoding them revealed raw Protobuf wire formats, not JSON.</p><h3 id=the-wire-walking-dead-end>The &ldquo;Wire-Walking&rdquo; Dead End
<a class=heading-link href=#the-wire-walking-dead-end><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>I spent a few hours writing a Python script to &ldquo;wire-walk&rdquo; the Protobuf data without a schema. I managed to extract some human-readable strings, but it was a mess:</p><ol><li><strong>Missing Context</strong>: I got fragments of text, but the user prompts and cohesive flow were gone.</li><li><strong>Encryption</strong>: The actual conversation files (ending in <code>.pb</code>) in <code>~/.gemini/antigravity/conversations/</code> were encrypted.</li></ol><p>It turns out Antigravity uses Electrons <code>safeStorage</code> API, which interfaces directly with the macOS Keychain. Without the app&rsquo;s private key (which is hardware-bound), that data is effectively random noise. I even tried using Frida to hook <code>safeStorage.decryptString()</code>, but macOS SIP (System Integrity Protection) and code signing shut that down immediately.</p><p>I was stuck. I couldn&rsquo;t decrypt the local files, and I couldn&rsquo;t parse the database effectively.</p><h2 id=the-breakthrough-living-off-the-land>The Breakthrough: Living Off the Land
<a class=heading-link href=#the-breakthrough-living-off-the-land><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>When you can&rsquo;t break the front door, look for the side entrance. I realized I wasn&rsquo;t the only one trying to read this state—the official extensions had to do it too.</p><p>I started poking around the source code of the <code>vscode-antigravity-cockpit</code> extension, specifically a file named <code>local_auth_importer.ts</code>. That&rsquo;s where I found the golden ticket.</p><p>The extension <em>doesn&rsquo;t</em> decrypt the local files. Instead, it reads a specific key from the SQLite database: <code>jetskiStateSync.agentManagerInitState</code>.</p><p>When I decoded field #6 of this Protobuf structure, I found an <code>OAuthTokenInfo</code> message. It contained the users active <code>accessToken</code> and <code>refreshToken</code>.</p><h3 id=shifting-strategy-dont-crack-it-join-it>Shifting Strategy: Don&rsquo;t Crack it, Join it
<a class=heading-link href=#shifting-strategy-dont-crack-it-join-it><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>This changed everything. I didn&rsquo;t need to reverse-engineer the local storage encryption; I just needed to impersonate the IDE.</p><p>By &ldquo;piggybacking&rdquo; on this existing auth mechanism, I could extract a valid OAuth token directly from the local state. But I still needed the endpoints.</p><p>Instead of guessing, I opened the <strong>Developer Tools</strong> inside Antigravity itself (it is Electron, after all). By enabling the Chrome network tracing tools and triggering an export manually, I caught the request in the act.</p><p>I saw the exact call to <code>exa.language_server_pb.LanguageServerService/ConvertTrajectoryToMarkdown</code>.</p><p>It was perfect. By sending a gRPC-over-HTTP request to this endpoint using the stolen token, the server—which <em>does</em> have access to the unencrypted history—returned a perfectly formatted Markdown document of my entire coding session.</p><h2 id=the-architecture-the-blog-agent>The Architecture: The Blog-Agent
<a class=heading-link href=#the-architecture-the-blog-agent><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>Once I had the data extraction solved, building the rest of the &ldquo;blog-agent&rdquo; was straightforward. I built a <strong>Node.js</strong> stack to automate the pipeline:</p><ul><li><strong>Backend</strong>: An <strong>Express</strong> server handles the routing, session imports, and post generation.</li><li><strong>Frontend</strong>: A clean <strong>EJS</strong> interface to list sessions, view summaries, and &ldquo;publish&rdquo; them to the filesystem.</li><li><strong>Storage</strong>: A local SQLite database (<code>data/sessions.sqlite</code>) acts as a cache. (I learned my lesson: always cache your LLM inputs).</li><li><strong>The Brain</strong>: I use the <strong>OpenAI SDK</strong> (pointing to a LiteLLM proxy) to interface with <code>gemini-3-flash</code>. I wrote a map-reduce style prompt that first extracts technical decisions from the raw conversation log, then synthesizes them into a narrative.</li><li><strong>Persistence</strong>: The final posts are saved with YAML front matter into a <code>generated_posts/</code> directory.</li></ul><h2 id=key-insights>Key Insights
<a class=heading-link href=#key-insights><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><ul><li><strong>Don&rsquo;t Fight the OS</strong>: Trying to break macOS Keychain/SIP encryption is a losing battle for a weekend project.</li><li><strong>Follow the Tokens</strong>: Applications often store auth tokens in less-secure places (like plain SQLite or weaker encryption) than the user content itself.</li><li><strong>Extensions are Open Books</strong>: If an app has extensions, their source code is often the best documentation for the internal API.</li></ul><p>In a satisfying detailed loop, <strong>this very article was generated by the blog-agent itself</strong>, analyzing the &ldquo;vibe coding&rdquo; session where I built it.</p><h2 id=references>References
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><ul><li><code>server.js</code>: The Express server and API implementation.</li><li><code>services/antigravity.js</code>: The client for the Antigravity gRPC-over-HTTP API.</li><li><a href=https://github.com/jlcodes99/vscode-antigravity-cockpit class=external-link target=_blank rel=noopener>vscode-antigravity-cockpit</a>: The extension that leaked the auth logic.</li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,38 @@
<!doctype html><html lang=en><head><title>Why I Downgraded Magisk to Root My Pixel 2 XL · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="For the past few weeks, I&rsquo;ve been stuck in a stalemate with my EcoFlow Bluetooth Protocol Reverse Engineering Project. I have the hci snoop logs, I have the decompiled APK, and I have a strong suspicion about where the authentication logic is hiding. But suspicion isn&rsquo;t proof.
Static analysis has its limits. I found the &ldquo;smoking gun&rdquo; function—a native method responsible for encrypting the login payload—but understanding how it constructs that payload within a strict 13-byte limit purely from assembly (ARM64) was proving to be a headache."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Why I Downgraded Magisk to Root My Pixel 2 XL"><meta name=twitter:description content="For the past few weeks, Ive been stuck in a stalemate with my EcoFlow Bluetooth Protocol Reverse Engineering Project. I have the hci snoop logs, I have the decompiled APK, and I have a strong suspicion about where the authentication logic is hiding. But suspicion isnt proof.
Static analysis has its limits. I found the “smoking gun” function—a native method responsible for encrypting the login payload—but understanding how it constructs that payload within a strict 13-byte limit purely from assembly (ARM64) was proving to be a headache."><meta property="og:url" content="https://ericxliu.me/posts/rooting-pixel-2-xl-for-reverse-engineering/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Why I Downgraded Magisk to Root My Pixel 2 XL"><meta property="og:description" content="For the past few weeks, Ive been stuck in a stalemate with my EcoFlow Bluetooth Protocol Reverse Engineering Project. I have the hci snoop logs, I have the decompiled APK, and I have a strong suspicion about where the authentication logic is hiding. But suspicion isnt proof.
Static analysis has its limits. I found the “smoking gun” function—a native method responsible for encrypting the login payload—but understanding how it constructs that payload within a strict 13-byte limit purely from assembly (ARM64) was proving to be a headache."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2026-01-07T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-08T06:02:38+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/rooting-pixel-2-xl-for-reverse-engineering/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Why I Downgraded Magisk to Root My Pixel 2 XL","genre":"Blog","wordcount":"775","url":"https:\/\/ericxliu.me\/posts\/rooting-pixel-2-xl-for-reverse-engineering\/","datePublished":"2026-01-07T00:00:00\u002b00:00","dateModified":"2026-01-08T06:02:38\u002b00:00","description":"\u003cp\u003eFor the past few weeks, I\u0026rsquo;ve been stuck in a stalemate with my EcoFlow Bluetooth Protocol Reverse Engineering Project. I have the hci snoop logs, I have the decompiled APK, and I have a strong suspicion about where the authentication logic is hiding. But suspicion isn\u0026rsquo;t proof.\u003c\/p\u003e\n\u003cp\u003eStatic analysis has its limits. I found the \u0026ldquo;smoking gun\u0026rdquo; function—a native method responsible for encrypting the login payload—but understanding \u003cem\u003ehow\u003c\/em\u003e it constructs that payload within a strict 13-byte limit purely from assembly (ARM64) was proving to be a headache.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/rooting-pixel-2-xl-for-reverse-engineering/>Why I Downgraded Magisk to Root My Pixel 2 XL</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2026-01-07T00:00:00Z>January 7, 2026
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
4-minute read</span></div></div></header><div class=post-content><p>For the past few weeks, I&rsquo;ve been stuck in a stalemate with my EcoFlow Bluetooth Protocol Reverse Engineering Project. I have the hci snoop logs, I have the decompiled APK, and I have a strong suspicion about where the authentication logic is hiding. But suspicion isn&rsquo;t proof.</p><p>Static analysis has its limits. I found the &ldquo;smoking gun&rdquo; function—a native method responsible for encrypting the login payload—but understanding <em>how</em> it constructs that payload within a strict 13-byte limit purely from assembly (ARM64) was proving to be a headache.</p><p>I needed to move from <strong>static analysis</strong> to <strong>dynamic analysis</strong>. I needed to hook the function at runtime, inspect the memory, and see the data before it gets encrypted. To do that, I needed a rooted Android device.</p><p>The only candidate in my drawer? An 8-year-old <strong>Google Pixel 2 XL (&ldquo;taimen&rdquo;)</strong> that hadn&rsquo;t been turned on since 2017.</p><h2 id=the-objective>The Objective
<a class=heading-link href=#the-objective><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>Bring this relic back to life, update it to the final official firmware, and gain <code>su</code> access to install Frida and tcpdump. It sounds simple, but 2026 tools don&rsquo;t always play nice with 2017 hardware.</p><h2 id=phase-1-the-i-forgot-my-password-hurdle>Phase 1: The &ldquo;I Forgot My Password&rdquo; Hurdle
<a class=heading-link href=#phase-1-the-i-forgot-my-password-hurdle><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>The first problem was mundane: I didn&rsquo;t remember the PIN. My only way in was a physical <strong>Hard Reset</strong>, which relies on a specific sequence of hardware button inputs:</p><ol><li><strong>Fastboot Mode</strong>: Hold <code>Power</code> + <code>Vol Down</code> until the familiar bootloader screen appears.</li><li><strong>Recovery Mode</strong>: Use volume keys to select &ldquo;Recovery Mode&rdquo;.</li><li><strong>The &ldquo;No Command&rdquo; Trick</strong>: The phone reboots to a broken android logo. To get the actual menu, you have to hold <code>Power</code> and tap <code>Vol Up</code> <em>once</em>.</li><li><strong>Wipe</strong>: Select <code>Wipe data/factory reset</code>.</li></ol><p><strong>The Catch</strong>: This triggers <strong>Factory Reset Protection (FRP)</strong>. Upon boot, the device required authentication with the Google Account previously synced to the hardware. Since I verified my identity using the original credentials, I could proceed; otherwise, bypassing this security feature would have been a significant roadblock.</p><h2 id=phase-2-the-update-trap>Phase 2: The Update Trap
<a class=heading-link href=#phase-2-the-update-trap><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>Once in, I checked the version: <code>Android 10 (QP1A.190711.020)</code>. This was ancient. The Pixel 2 XL officially supports Android 11, and I wanted the latest possible base for compatibility with modern tools.</p><p>I tried the easy route: <strong>Settings > System Update</strong>.
<strong>The Result</strong>: Failure. The phone refused to pull the final OTA (<code>RP1A.201005.004.A1</code>), likely due to the Google update servers no longer prioritizing this EOL device.</p><h3 id=the-fix-manual-flashing>The Fix: Manual Flashing
<a class=heading-link href=#the-fix-manual-flashing><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>I had to bypass the OTA system entirely. I downloaded the <a href=https://developers.google.com/android/images class=external-link target=_blank rel=noopener>final Factory Image</a> from Google.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span><span style=color:#8b949e;font-style:italic># Don&#39;t rely on OTA. Flash the whole valid state.</span>
</span></span><span style=display:flex><span>fastboot -w update image-taimen-rp1a.201005.004.a1.zip
</span></span></code></pre></div><p><em>Note: I used the <code>-w</code> flag here since I had just wiped the device anyway. This gave me a pristine, stock Android 11 environment to break.</em></p><h2 id=phase-3-the-magisk-time-travel>Phase 3: The Magisk &ldquo;Time Travel&rdquo;
<a class=heading-link href=#phase-3-the-magisk-time-travel><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>This is where &ldquo;modern tools meets old hardware&rdquo; caused the most pain.</p><p><strong>The Hypothesis</strong>: Rooting a Pixel is standard procedure.</p><ol><li>Extract <code>boot.img</code> from the factory zip.</li><li>Patch it with the latest <strong>Magisk</strong> app.</li><li>Flash it back.</li></ol><p><strong>The Reality</strong>: Bootloop.
I used <strong>Magisk v30.6</strong> (the latest as of writing). The patch process &ldquo;succeeded,&rdquo; but flashing the resulting image caused the phone to immediately crash back to the bootloader with a &ldquo;Cannot find valid operating system&rdquo; error.</p><h3 id=debugging-the-bootloop>Debugging the Bootloop
<a class=heading-link href=#debugging-the-bootloop><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>I suspected a regression in how modern Magisk handles the antiquated boot partition structure of the Pixel 2 (A/B partitions, but pre-GKI).</p><p>I decided to perform some &ldquo;software archaeology&rdquo; and use a version of Magisk that was contemporary with the device&rsquo;s lifespan. I grabbed <strong>Magisk v25.0</strong> (released around 2022).</p><ol><li><strong>Repatch</strong>: I patched the <em>exact same</em> stock <code>boot.img</code> using the v25.0 app.</li><li><strong>Reflash</strong>:</li></ol><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span><span style=color:#8b949e;font-style:italic># Flash to both slots to be safe</span>
</span></span><span style=display:flex><span>fastboot flash boot_a magisk_patched_25000.img
</span></span><span style=display:flex><span>fastboot flash boot_b magisk_patched_25000.img
</span></span></code></pre></div><p><strong>The Result</strong>: Success. The phone booted, and the Magisk app confirmed <code>Installed: 25.0</code>.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span> adb shell <span style=color:#a5d6ff>&#34;su -c id&#34;</span>
</span></span><span style=display:flex><span><span style=color:#79c0ff>uid</span><span style=color:#ff7b72;font-weight:700>=</span>0<span style=color:#ff7b72;font-weight:700>(</span>root<span style=color:#ff7b72;font-weight:700>)</span> <span style=color:#79c0ff>gid</span><span style=color:#ff7b72;font-weight:700>=</span>0<span style=color:#ff7b72;font-weight:700>(</span>root<span style=color:#ff7b72;font-weight:700>)</span> <span style=color:#79c0ff>groups</span><span style=color:#ff7b72;font-weight:700>=</span>0<span style=color:#ff7b72;font-weight:700>(</span>root<span style=color:#ff7b72;font-weight:700>)</span> <span style=color:#79c0ff>context</span><span style=color:#ff7b72;font-weight:700>=</span>u:r:magisk:s0
</span></span></code></pre></div><h2 id=key-insights>Key Insights
<a class=heading-link href=#key-insights><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><ul><li><strong>Don&rsquo;t Trust OTAs on EOL Devices</strong>: If you&rsquo;re reviving old hardware, the OTA mechanism is likely broken or unreliable. Go straight to the factory images.</li><li><strong>Version Matching Matters</strong>: Tools like Magisk evolve. Using a 2026 root method on a 2017 kernel is a recipe for instability. Sometimes, downgrading your tools is the only way forward.</li><li><strong>A/B Partitions</strong>: Always flash your patched boot image to <em>both</em> slots (<code>boot_a</code> and <code>boot_b</code>) to avoid active slot mismatches causing boot failures.</li></ul><p>With root access secured, the path is now clear to install Frida and finally intercept those elusive EcoFlow authentication packets.</p><h2 id=references>References
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><ol><li><a href=https://developers.google.com/android/images class=external-link target=_blank rel=noopener>Google Pixel Factory Images</a></li><li><a href=https://topjohnwu.github.io/Magisk/install.html class=external-link target=_blank rel=noopener>Magisk Installation Guide</a></li><li><a href=https://github.com/topjohnwu/Magisk/releases class=external-link target=_blank rel=noopener>Magisk GitHub Releases</a></li><li><a href=https://xdaforums.com/t/guide-unlock-flash-root-for-the-pixel-2-xl-taimen.3702418/ class=external-link target=_blank rel=noopener>XDA Guide: Unlock/Flash/Root Pixel 2 XL</a></li></ol></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,62 @@
<!doctype html><html lang=en><head><title>Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="I hit an issue where all GPU Operator pods on one node were stuck in Init after migrating from Legacy BIOS to UEFI. The common error was NVIDIA components waiting for “toolkit-ready,” while the toolkit init container looped with:
nvidia-smi failed to communicate with the NVIDIA driver
modprobe nvidia → “Key was rejected by service”
That message is the tell: Secure Boot is enabled and the kernel refuses to load modules not signed by a trusted key."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian"><meta name=twitter:description content="I hit an issue where all GPU Operator pods on one node were stuck in Init after migrating from Legacy BIOS to UEFI. The common error was NVIDIA components waiting for “toolkit-ready,” while the toolkit init container looped with:
nvidia-smi failed to communicate with the NVIDIA driver modprobe nvidia → “Key was rejected by service” That message is the tell: Secure Boot is enabled and the kernel refuses to load modules not signed by a trusted key."><meta property="og:url" content="https://ericxliu.me/posts/secure-boot-dkms-and-mok-on-proxmox-debian/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian"><meta property="og:description" content="I hit an issue where all GPU Operator pods on one node were stuck in Init after migrating from Legacy BIOS to UEFI. The common error was NVIDIA components waiting for “toolkit-ready,” while the toolkit init container looped with:
nvidia-smi failed to communicate with the NVIDIA driver modprobe nvidia → “Key was rejected by service” That message is the tell: Secure Boot is enabled and the kernel refuses to load modules not signed by a trusted key."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-08-09T00:00:00+00:00"><meta property="article:modified_time" content="2025-08-14T06:50:22+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/secure-boot-dkms-and-mok-on-proxmox-debian/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox \u002b Debian","genre":"Blog","wordcount":"639","url":"https:\/\/ericxliu.me\/posts\/secure-boot-dkms-and-mok-on-proxmox-debian\/","datePublished":"2025-08-09T00:00:00\u002b00:00","dateModified":"2025-08-14T06:50:22\u002b00:00","description":"\u003cp\u003eI hit an issue where all GPU Operator pods on one node were stuck in Init after migrating from Legacy BIOS to UEFI. The common error was NVIDIA components waiting for “toolkit-ready,” while the toolkit init container looped with:\u003c\/p\u003e\n\u003cul\u003e\n\u003cli\u003envidia-smi failed to communicate with the NVIDIA driver\u003c\/li\u003e\n\u003cli\u003emodprobe nvidia → “Key was rejected by service”\u003c\/li\u003e\n\u003c\/ul\u003e\n\u003cp\u003eThat message is the tell: Secure Boot is enabled and the kernel refuses to load modules not signed by a trusted key.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/secure-boot-dkms-and-mok-on-proxmox-debian/>Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-08-09T00:00:00Z>August 9, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
3-minute read</span></div></div></header><div class=post-content><p>I hit an issue where all GPU Operator pods on one node were stuck in Init after migrating from Legacy BIOS to UEFI. The common error was NVIDIA components waiting for “toolkit-ready,” while the toolkit init container looped with:</p><ul><li>nvidia-smi failed to communicate with the NVIDIA driver</li><li>modprobe nvidia → “Key was rejected by service”</li></ul><p>That message is the tell: Secure Boot is enabled and the kernel refuses to load modules not signed by a trusted key.</p><h3 id=environment>Environment
<a class=heading-link href=#environment><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ul><li>Proxmox VM (QEMU/KVM) 8.4.9</li><li>Debian 12 (bookworm), kernel 6.1</li><li>GPU: NVIDIA Tesla V100 (GV100GL)</li><li>NVIDIA driver installed via Debian packages (nvidia-driver, nvidia-kernel-dkms)</li></ul><h3 id=root-cause>Root Cause
<a class=heading-link href=#root-cause><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ul><li>Secure Boot enabled (verified with <code>mokutil --sb-state</code>)</li><li>NVIDIA DKMS modules were built, but the signing key was not trusted by the UEFI shim/firmware</li><li>VM booted via the fallback “UEFI QEMU HARDDISK” path (not shim), so MOK requests didnt run; no MOK screen</li></ul><h3 id=strategy>Strategy
<a class=heading-link href=#strategy><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Keep Secure Boot on; get modules trusted. That requires:</p><ol><li>Ensure the VM boots via shim (so MOK can work)</li><li>Make sure DKMS signs modules with a MOK key/cert</li><li>Enroll that MOK into the firmware via shims MokManager</li></ol><h3 id=step-1--boot-via-shim-and-persist-efi-variables>Step 1 — Boot via shim and persist EFI variables
<a class=heading-link href=#step-1--boot-via-shim-and-persist-efi-variables><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>In Proxmox (VM stopped):</p><ul><li>BIOS: OVMF (UEFI)</li><li>Add EFI Disk (stores OVMF VARS; required for MOK)</li><li>Machine: q35</li><li>Enable Secure Boot (option shows only with OVMF + EFI Disk)</li></ul><p>Inside Debian:</p><ul><li>Ensure ESP is mounted at <code>/boot/efi</code></li><li>Install signed boot stack:<div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo apt install shim-signed grub-efi-amd64-signed efibootmgr mokutil
</span></span><span style=display:flex><span>sudo grub-install --target<span style=color:#ff7b72;font-weight:700>=</span>x86_64-efi --efi-directory<span style=color:#ff7b72;font-weight:700>=</span>/boot/efi --bootloader-id<span style=color:#ff7b72;font-weight:700>=</span>debian
</span></span><span style=display:flex><span>sudo update-grub
</span></span></code></pre></div></li><li>Create/verify a boot entry that points to shim:<div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo efibootmgr -c -d /dev/sda -p <span style=color:#a5d6ff>15</span> -L <span style=color:#a5d6ff>&#34;debian&#34;</span> -l <span style=color:#a5d6ff>&#39;\EFI\debian\shimx64.efi&#39;</span>
</span></span><span style=display:flex><span>sudo efibootmgr -o 0002,0001,0000 <span style=color:#8b949e;font-style:italic># make shim (0002) first</span>
</span></span><span style=display:flex><span>sudo efibootmgr -n <span style=color:#a5d6ff>0002</span> <span style=color:#8b949e;font-style:italic># BootNext shim for the next reboot</span>
</span></span></code></pre></div></li></ul><p>Tip: If NVRAM resets or fallback path is used, copy as a fallback:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo mkdir -p /boot/efi/EFI/BOOT
</span></span><span style=display:flex><span>sudo cp /boot/efi/EFI/debian/shimx64.efi /boot/efi/EFI/BOOT/BOOTX64.EFI
</span></span><span style=display:flex><span>sudo cp /boot/efi/EFI/debian/<span style=color:#ff7b72;font-weight:700>{</span>mmx64.efi,grubx64.efi<span style=color:#ff7b72;font-weight:700>}</span> /boot/efi/EFI/BOOT/
</span></span></code></pre></div><h3 id=step-2--make-dkms-sign-nvidia-modules-with-a-mok>Step 2 — Make DKMS sign NVIDIA modules with a MOK
<a class=heading-link href=#step-2--make-dkms-sign-nvidia-modules-with-a-mok><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Debian already generated a DKMS key at <code>/var/lib/dkms/mok.key</code>. Create an X.509 cert in DER format:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo openssl req -new -x509 <span style=color:#79c0ff>\
</span></span></span><span style=display:flex><span> -key /var/lib/dkms/mok.key <span style=color:#79c0ff>\
</span></span></span><span style=display:flex><span> -out /var/lib/dkms/mok.der <span style=color:#79c0ff>\
</span></span></span><span style=display:flex><span> -outform DER <span style=color:#79c0ff>\
</span></span></span><span style=display:flex><span> -subj <span style=color:#a5d6ff>&#34;/CN=DKMS MOK/&#34;</span> <span style=color:#79c0ff>\
</span></span></span><span style=display:flex><span> -days <span style=color:#a5d6ff>36500</span>
</span></span></code></pre></div><p>Enable DKMS signing:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo sed -i <span style=color:#a5d6ff>&#39;s|^mok_signing_key=.*|mok_signing_key=/var/lib/dkms/mok.key|&#39;</span> /etc/dkms/framework.conf
</span></span><span style=display:flex><span>sudo sed -i <span style=color:#a5d6ff>&#39;s|^mok_certificate=.*|mok_certificate=/var/lib/dkms/mok.der|&#39;</span> /etc/dkms/framework.conf
</span></span></code></pre></div><p>Rebuild/install modules (signs them now):</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo dkms build nvidia/<span style=color:#ff7b72>$(</span>modinfo -F version nvidia<span style=color:#ff7b72>)</span> -k <span style=color:#ff7b72>$(</span>uname -r<span style=color:#ff7b72>)</span> --force
</span></span><span style=display:flex><span>sudo dkms install nvidia/<span style=color:#ff7b72>$(</span>modinfo -F version nvidia<span style=color:#ff7b72>)</span> -k <span style=color:#ff7b72>$(</span>uname -r<span style=color:#ff7b72>)</span> --force
</span></span></code></pre></div><h3 id=step-3--enroll-the-mok-via-shim-mokmanager>Step 3 — Enroll the MOK via shim (MokManager)
<a class=heading-link href=#step-3--enroll-the-mok-via-shim-mokmanager><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Queue the cert and set a longer prompt timeout:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo mokutil --revoke-import
</span></span><span style=display:flex><span>sudo mokutil --import /var/lib/dkms/mok.der
</span></span><span style=display:flex><span>sudo mokutil --timeout <span style=color:#a5d6ff>30</span>
</span></span><span style=display:flex><span>sudo efibootmgr -n <span style=color:#a5d6ff>0002</span> <span style=color:#8b949e;font-style:italic># ensure next boot goes through shim</span>
</span></span></code></pre></div><p>Reboot to the VM console (not SSH). In the blue MOK UI:</p><ul><li>Enroll MOK → Continue → Yes → enter password → reboot</li></ul><p>If arrow keys dont work in Proxmox noVNC:</p><ul><li>Use SPICE (virt-viewer), or</li><li>From the Proxmox host, send keys:<ul><li><code>qm sendkey &lt;VMID> down</code>, <code>qm sendkey &lt;VMID> ret</code>, <code>qm sendkey &lt;VMID> esc</code></li></ul></li></ul><h3 id=verification>Verification
<a class=heading-link href=#verification><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo mokutil --test-key /var/lib/dkms/mok.der <span style=color:#8b949e;font-style:italic># “already enrolled”</span>
</span></span><span style=display:flex><span>sudo modprobe nvidia
</span></span><span style=display:flex><span>nvidia-smi
</span></span><span style=display:flex><span>kubectl -n gpu-operator get pods -o wide
</span></span></code></pre></div><p>Once the module loads, GPU Operator pods on that node leave Init and become Ready.</p><h3 id=key-insights>Key Insights
<a class=heading-link href=#key-insights><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ul><li>“Key was rejected by service” during <code>modprobe nvidia</code> means Secure Boot rejected an untrusted module.</li><li>Without shim in the boot path (or without a persistent EFI vars disk), <code>mokutil --import</code> wont surface a MOK screen.</li><li>DKMS will not sign modules unless configured; set <code>mok_signing_key</code> and <code>mok_certificate</code> in <code>/etc/dkms/framework.conf</code>.</li><li>If you cannot or dont want to use MOK, the pragmatic dev choice is to disable Secure Boot in OVMF. For production, prefer shim+MOK.</li></ul><h3 id=references>References
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ul><li>Proxmox Secure Boot setup (shim + MOK, EFI vars, DKMS): <a href=https://pve.proxmox.com/wiki/Secure_Boot_Setup#Setup_instructions_for_shim_+_MOK_variant class=external-link target=_blank rel=noopener>Proxmox docs</a></li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,93 @@
<!doctype html><html lang=en><head><title>Supabase Deep Dive: It's Not Magic, It's Just Postgres · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="In the world of Backend-as-a-Service (BaaS), platforms are often treated as magic boxes. You push data in, you get data out, and you hope the magic inside scales. While this simplicity is powerful, it can obscure the underlying mechanics, leaving developers wondering what&rsquo;s really going on.
Supabase enters this space with a radically different philosophy: transparency. It provides the convenience of a BaaS, but its built on the world&rsquo;s most trusted relational database: PostgreSQL. The &ldquo;magic&rdquo; isn&rsquo;t a proprietary black box; it&rsquo;s a carefully assembled suite of open-source tools that enhance Postgres, not hide it."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Supabase Deep Dive: It's Not Magic, It's Just Postgres"><meta name=twitter:description content="In the world of Backend-as-a-Service (BaaS), platforms are often treated as magic boxes. You push data in, you get data out, and you hope the magic inside scales. While this simplicity is powerful, it can obscure the underlying mechanics, leaving developers wondering whats really going on.
Supabase enters this space with a radically different philosophy: transparency. It provides the convenience of a BaaS, but its built on the worlds most trusted relational database: PostgreSQL. The “magic” isnt a proprietary black box; its a carefully assembled suite of open-source tools that enhance Postgres, not hide it."><meta property="og:url" content="https://ericxliu.me/posts/supabase-deep-dive/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Supabase Deep Dive: It's Not Magic, It's Just Postgres"><meta property="og:description" content="In the world of Backend-as-a-Service (BaaS), platforms are often treated as magic boxes. You push data in, you get data out, and you hope the magic inside scales. While this simplicity is powerful, it can obscure the underlying mechanics, leaving developers wondering whats really going on.
Supabase enters this space with a radically different philosophy: transparency. It provides the convenience of a BaaS, but its built on the worlds most trusted relational database: PostgreSQL. The “magic” isnt a proprietary black box; its a carefully assembled suite of open-source tools that enhance Postgres, not hide it."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-08-03T00:00:00+00:00"><meta property="article:modified_time" content="2025-08-04T03:59:37+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/supabase-deep-dive/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Supabase Deep Dive: It\u0027s Not Magic, It\u0027s Just Postgres","genre":"Blog","wordcount":"1513","url":"https:\/\/ericxliu.me\/posts\/supabase-deep-dive\/","datePublished":"2025-08-03T00:00:00\u002b00:00","dateModified":"2025-08-04T03:59:37\u002b00:00","description":"\u003cp\u003eIn the world of Backend-as-a-Service (BaaS), platforms are often treated as magic boxes. You push data in, you get data out, and you hope the magic inside scales. While this simplicity is powerful, it can obscure the underlying mechanics, leaving developers wondering what\u0026rsquo;s really going on.\u003c\/p\u003e\n\u003cp\u003eSupabase enters this space with a radically different philosophy: \u003cstrong\u003etransparency\u003c\/strong\u003e. It provides the convenience of a BaaS, but its built on the world\u0026rsquo;s most trusted relational database: PostgreSQL. The \u0026ldquo;magic\u0026rdquo; isn\u0026rsquo;t a proprietary black box; it\u0026rsquo;s a carefully assembled suite of open-source tools that enhance Postgres, not hide it.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/supabase-deep-dive/>Supabase Deep Dive: It's Not Magic, It's Just Postgres</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-08-03T00:00:00Z>August 3, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
8-minute read</span></div></div></header><div class=post-content><p>In the world of Backend-as-a-Service (BaaS), platforms are often treated as magic boxes. You push data in, you get data out, and you hope the magic inside scales. While this simplicity is powerful, it can obscure the underlying mechanics, leaving developers wondering what&rsquo;s really going on.</p><p>Supabase enters this space with a radically different philosophy: <strong>transparency</strong>. It provides the convenience of a BaaS, but its built on the world&rsquo;s most trusted relational database: PostgreSQL. The &ldquo;magic&rdquo; isn&rsquo;t a proprietary black box; it&rsquo;s a carefully assembled suite of open-source tools that enhance Postgres, not hide it.</p><p>This deep dive will deconstruct that suite. We will move beyond the basics to explore the architectural patterns, security models, and development workflows that allow you to build robust, scalable applications. We will cover:</p><ul><li><strong>The Supabase Blueprint:</strong> A procedural guide to designing your application.</li><li><strong>The Pillars of Supabase:</strong> A detailed look at Auth, Storage, Functions, and Realtime.</li><li><strong>Transactional Realtime:</strong> How Supabase guarantees data consistency in a live environment.</li><li><strong>Best Practices:</strong> The practical knowledge you need before writing a single line of code.</li></ul><h3 id=the-guiding-philosophy-your-database-as-the-source-of-truth>The Guiding Philosophy: Your Database as the Source of Truth
<a class=heading-link href=#the-guiding-philosophy-your-database-as-the-source-of-truth><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>The most critical shift when adopting Supabase is to see your database as more than just a data store. It is your <strong>single source of truth</strong>. This means your database schema is responsible for:</p><ul><li><strong>Structure:</strong> The tables and columns that define your data.</li><li><strong>Relationships:</strong> The foreign keys that link tables together.</li><li><strong>Integrity:</strong> The constraints (<code>NOT NULL</code>, <code>UNIQUE</code>) that ensure your data is always valid.</li><li><strong>Security:</strong> The access control rules that define who can do what.</li></ul><p>By leveraging PostgreSQL&rsquo;s native power, you get <strong>full ACID compliance</strong> (Atomicity, Consistency, Isolation, Durability) out of the box. You don&rsquo;t need to worry about application-level code to prevent orphan records or inconsistent states; the database guarantees it for you.</p><h3 id=the-supabase-design-blueprint-a-procedural-guide>The Supabase Design Blueprint: A Procedural Guide
<a class=heading-link href=#the-supabase-design-blueprint-a-procedural-guide><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>To build a scalable application, follow a structured design process that moves from abstract ideas to concrete implementation.</p><h4 id=phase-1-conceptual-modeling-the-blueprint>Phase 1: Conceptual Modeling (The Blueprint)
<a class=heading-link href=#phase-1-conceptual-modeling-the-blueprint><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>Before touching the Supabase dashboard, map out your application on paper.</p><ol><li><strong>Identify the &ldquo;Nouns&rdquo;:</strong> These are your core data objects, which will become your database tables. For a project management app, they are <code>projects</code>, <code>tasks</code>, <code>users</code>, <code>comments</code>.</li><li><strong>Define the &ldquo;Verbs&rdquo;:</strong> These are the user actions. &ldquo;A user <em>creates</em> a task.&rdquo; &ldquo;A user <em>assigns</em> a task to another user.&rdquo; These actions will inform your security policies and APIs.</li><li><strong>Map Relationships:</strong> How do the nouns connect? A <code>task</code> belongs to one <code>project</code>. A <code>user</code> can have many <code>tasks</code>. A <code>project</code> can have many <code>users</code> (a many-to-many relationship, requiring a <code>project_users</code> join table).</li></ol><h4 id=phase-2-the-foundation-schema--migrations>Phase 2: The Foundation (Schema & Migrations)
<a class=heading-link href=#phase-2-the-foundation-schema--migrations><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>Translate your model into SQL. For any serious project, use the <strong>Supabase CLI</strong> to manage this process.</p><ol><li><strong>Develop Locally:</strong> Run a full Supabase stack on your machine with <code>supabase start</code>.</li><li><strong>Create Migration Files:</strong> Write your <code>CREATE TABLE</code> statements in SQL files. Define columns, data types, and foreign key <code>REFERENCES</code> to enforce your relationships.</li><li><strong>Version Control:</strong> Commit these migration files to Git. Your database schema is now version-controlled alongside your application code.</li><li><strong>Deploy:</strong> Use <code>supabase db push</code> to apply your migrations to your live production database. This workflow is safe, repeatable, and professional.</li></ol><h4 id=phase-3-the-security-layer-row-level-security>Phase 3: The Security Layer (Row Level Security)
<a class=heading-link href=#phase-3-the-security-layer-row-level-security><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>This is not an optional step. RLS is the cornerstone of Supabase security.</p><ol><li><strong>Deny by Default:</strong> For any table holding user data, immediately enable RLS. This blocks all access until you explicitly grant it.</li></ol><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-sql data-lang=sql><span style=display:flex><span><span style=color:#ff7b72>ALTER</span><span style=color:#6e7681> </span><span style=color:#ff7b72>TABLE</span><span style=color:#6e7681> </span>tasks<span style=color:#6e7681> </span>ENABLE<span style=color:#6e7681> </span><span style=color:#ff7b72>ROW</span><span style=color:#6e7681> </span><span style=color:#ff7b72>LEVEL</span><span style=color:#6e7681> </span><span style=color:#ff7b72>SECURITY</span>;<span style=color:#6e7681>
</span></span></span></code></pre></div><ol start=2><li><strong>Write &ldquo;Allow&rdquo; Policies:</strong> Create policies based on your user stories. Policies are SQL rules that the database enforces on every single query.</li></ol><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-sql data-lang=sql><span style=display:flex><span><span style=color:#8b949e;font-style:italic>-- Users can see tasks in projects they are a member of.
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>CREATE</span><span style=color:#6e7681> </span>POLICY<span style=color:#6e7681> </span><span style=color:#a5d6ff>&#34;Allow read access to tasks in user&#39;s projects&#34;</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>ON</span><span style=color:#6e7681> </span>tasks<span style=color:#6e7681> </span><span style=color:#ff7b72>FOR</span><span style=color:#6e7681> </span><span style=color:#ff7b72>SELECT</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>USING</span><span style=color:#6e7681> </span>(<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>EXISTS</span><span style=color:#6e7681> </span>(<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>SELECT</span><span style=color:#6e7681> </span><span style=color:#a5d6ff>1</span><span style=color:#6e7681> </span><span style=color:#ff7b72>FROM</span><span style=color:#6e7681> </span>project_users<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>WHERE</span><span style=color:#6e7681> </span>project_users.project_id<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>tasks.project_id<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>AND</span><span style=color:#6e7681> </span>project_users.user_id<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>auth.uid()<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span>)<span style=color:#6e7681>
</span></span></span><span style=display:flex><span>);<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic>-- Users can only insert tasks for themselves.
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>CREATE</span><span style=color:#6e7681> </span>POLICY<span style=color:#6e7681> </span><span style=color:#a5d6ff>&#34;Allow users to create their own tasks&#34;</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>ON</span><span style=color:#6e7681> </span>tasks<span style=color:#6e7681> </span><span style=color:#ff7b72>FOR</span><span style=color:#6e7681> </span><span style=color:#ff7b72>INSERT</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>WITH</span><span style=color:#6e7681> </span><span style=color:#ff7b72>CHECK</span><span style=color:#6e7681> </span>(<span style=color:#6e7681> </span>auth.uid()<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>tasks.assignee_id<span style=color:#6e7681> </span>);<span style=color:#6e7681>
</span></span></span></code></pre></div><p>The <code>auth.uid()</code> function is a special Supabase utility that securely returns the ID of the logged-in user making the request.</p><h4 id=phase-4-the-apis-data-access>Phase 4: The APIs (Data Access)
<a class=heading-link href=#phase-4-the-apis-data-access><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>With your data structured and secured, you can now build the access points.</p><ul><li><strong>For Simple CRUD:</strong> Use Supabase&rsquo;s auto-generated API. It&rsquo;s convenient, respects all your RLS policies, and is perfect for simple reads and writes on a single table.</li></ul><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#ff7b72>const</span> { data, error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.from(<span style=color:#a5d6ff>&#39;tasks&#39;</span>).select(<span style=color:#a5d6ff>&#39;*&#39;</span>);
</span></span></code></pre></div><ul><li><strong>For Complex Logic:</strong> Use PostgreSQL Functions (RPC). Encapsulate complex <code>JOIN</code>s or multi-step transactions into a single, callable function. This reduces network chattiness and keeps your business logic secure on the server.</li></ul><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-sql data-lang=sql><span style=display:flex><span><span style=color:#8b949e;font-style:italic>-- A function to get a task and its project name in one call
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>CREATE</span><span style=color:#6e7681> </span><span style=color:#ff7b72>OR</span><span style=color:#6e7681> </span><span style=color:#ff7b72>REPLACE</span><span style=color:#6e7681> </span><span style=color:#ff7b72>FUNCTION</span><span style=color:#6e7681> </span>get_task_with_project(task_id_input<span style=color:#6e7681> </span>int)<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>RETURNS</span><span style=color:#6e7681> </span><span style=color:#ff7b72>TABLE</span><span style=color:#6e7681> </span>(task_title<span style=color:#6e7681> </span>text,<span style=color:#6e7681> </span>project_name<span style=color:#6e7681> </span>text)<span style=color:#6e7681> </span><span style=color:#ff7b72>AS</span><span style=color:#6e7681> </span><span style=color:#f85149>$$</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>BEGIN</span><span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>RETURN</span><span style=color:#6e7681> </span>QUERY<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>SELECT</span><span style=color:#6e7681> </span>tasks.title,<span style=color:#6e7681> </span>projects.name<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>FROM</span><span style=color:#6e7681> </span>tasks<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>JOIN</span><span style=color:#6e7681> </span>projects<span style=color:#6e7681> </span><span style=color:#ff7b72>ON</span><span style=color:#6e7681> </span>tasks.project_id<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>projects.id<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>WHERE</span><span style=color:#6e7681> </span>tasks.id<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>task_id_input;<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>END</span>;<span style=color:#6e7681>
</span></span></span><span style=display:flex><span><span style=color:#f85149>$$</span><span style=color:#6e7681> </span><span style=color:#ff7b72>LANGUAGE</span><span style=color:#6e7681> </span>plpgsql;<span style=color:#6e7681>
</span></span></span></code></pre></div><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#8b949e;font-style:italic>// Called simply from the frontend
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>const</span> { data, error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.rpc(<span style=color:#a5d6ff>&#39;get_task_with_project&#39;</span>, { task_id_input<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>123</span> });
</span></span></code></pre></div><h3 id=a-tour-of-the-core-services>A Tour of the Core Services
<a class=heading-link href=#a-tour-of-the-core-services><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Beyond the database, Supabase provides a suite of essential tools.</p><h4 id=authentication>Authentication
<a class=heading-link href=#authentication><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>A complete user management system that integrates directly with your database. When a user signs up, a corresponding entry is created in the managed <code>auth.users</code> table, which you can then reference in your own tables.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#8b949e;font-style:italic>// Sign up a new user and handle social logins with ease
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>const</span> { data, error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.auth.signUp({ email, password });
</span></span><span style=display:flex><span><span style=color:#ff7b72>const</span> { data, error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.auth.signInWithOAuth({ provider<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>&#39;github&#39;</span> });
</span></span></code></pre></div><h4 id=storage>Storage
<a class=heading-link href=#storage><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>A simple, S3-compatible object store for managing files like user avatars or documents. It&rsquo;s integrated with Postgres and RLS, allowing you to write fine-grained access policies on files and folders (buckets).</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#8b949e;font-style:italic>// Upload a user avatar to a public &#39;avatars&#39; bucket
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>const</span> { error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.storage
</span></span><span style=display:flex><span> .from(<span style=color:#a5d6ff>&#39;avatars&#39;</span>)
</span></span><span style=display:flex><span> .upload(<span style=color:#a5d6ff>`public/</span><span style=color:#a5d6ff>${</span>userId<span style=color:#a5d6ff>}</span><span style=color:#a5d6ff>.png`</span>, file);
</span></span></code></pre></div><h4 id=edge-functions-vs-database-functions>Edge Functions vs. Database Functions
<a class=heading-link href=#edge-functions-vs-database-functions><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>It&rsquo;s critical to know when to use which.</p><ul><li><strong>Database Functions (SQL):</strong> For data-intensive logic <em>inside</em> your database.</li><li><strong>Edge Functions (TypeScript/Deno):</strong> For connecting to the outside world. Use them to call third-party APIs (like Stripe for payments) or run computations that are not well-suited for SQL. This is where you use your secret <code>service_role</code> key, as the function runs in a trusted server environment.</li></ul><h3 id=the-realtime-engine-a-pubsub-system-for-postgres>The Realtime Engine: A Pub/Sub System for Postgres
<a class=heading-link href=#the-realtime-engine-a-pubsub-system-for-postgres><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Supabase&rsquo;s Realtime engine is a powerful feature for building live, interactive experiences.</p><h4 id=how-it-works-logical-replication>How it Works: Logical Replication
<a class=heading-link href=#how-it-works-logical-replication><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>It&rsquo;s not magic; it leverages a core PostgreSQL feature.</p><ol><li>When you enable Realtime on a table, Supabase creates a <strong>Publication</strong> for it.</li><li>The Realtime server subscribes to this publication via a <strong>Logical Replication Slot</strong>.</li><li>When a transaction is <strong>successfully committed</strong> to your database, the change is written to Postgres&rsquo;s Write-Ahead Log (WAL).</li><li>The WAL change is then sent to the Realtime server through the replication slot.</li><li>The server converts this database event into a JSON payload and broadcasts it over a WebSocket to all subscribed clients.</li></ol><h4 id=transactional-integrity>Transactional Integrity
<a class=heading-link href=#transactional-integrity><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>The most important guarantee of this system is its relationship with database transactions. An event is <strong>only broadcast <em>after</em> a transaction is fully and successfully committed.</strong> If a transaction is rolled back due to an error, the replication slot receives nothing, and no Realtime event is ever sent. This means you can trust that every Realtime message you receive corresponds to data that is permanently and consistently stored in your database.</p><h4 id=use-cases-and-limitations>Use Cases and Limitations
<a class=heading-link href=#use-cases-and-limitations><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><ul><li><strong>Use For:</strong> Small, JSON-based messages like chat messages, live notifications, activity feeds, and presence indicators (&ldquo;who&rsquo;s online&rdquo;). Use the <code>broadcast</code> feature for ephemeral data like cursor positions that you don&rsquo;t need to save.</li><li><strong>Do NOT Use For:</strong> Large, continuous data streams. It is <strong>not</strong> a replacement for WebRTC for video/audio calls. The system is designed for small, infrequent payloads.</li></ul><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#ff7b72>const</span> channel <span style=color:#ff7b72;font-weight:700>=</span> supabase.channel(<span style=color:#a5d6ff>&#39;public:messages&#39;</span>);
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic>// Subscribe to new rows in the &#39;messages&#39; table
</span></span></span><span style=display:flex><span>channel
</span></span><span style=display:flex><span> .on(
</span></span><span style=display:flex><span> <span style=color:#a5d6ff>&#39;postgres_changes&#39;</span>,
</span></span><span style=display:flex><span> { event<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>&#39;INSERT&#39;</span>, schema<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>&#39;public&#39;</span>, table<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>&#39;messages&#39;</span> },
</span></span><span style=display:flex><span> (payload) =&gt; {
</span></span><span style=display:flex><span> console.log(<span style=color:#a5d6ff>&#39;New message received!&#39;</span>, payload.<span style=color:#ff7b72>new</span>);
</span></span><span style=display:flex><span> <span style=color:#8b949e;font-style:italic>// Update your UI here
</span></span></span><span style=display:flex><span> }
</span></span><span style=display:flex><span> )
</span></span><span style=display:flex><span> .subscribe();
</span></span></code></pre></div><h3 id=final-words-of-advice>Final Words of Advice
<a class=heading-link href=#final-words-of-advice><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ul><li><strong>Frontend Freedom:</strong> Supabase is frontend-agnostic, but meta-frameworks like <strong>Next.js</strong> and <strong>SvelteKit</strong> offer a &ldquo;golden path&rdquo; with Auth Helpers that simplify server-side rendering and data fetching.</li><li><strong>Embrace the CLI:</strong> Use the Supabase CLI for a professional, safe, and repeatable development workflow. Don&rsquo;t manage your production schema by clicking in the UI.</li><li><strong>Know Your Keys:</strong> Use the public <code>anon</code> key in the browser. Guard the secret <code>service_role</code> key and only use it in secure server environments like Edge Functions.</li><li><strong>Indexes Matter:</strong> For fast queries on large tables, <code>CREATE INDEX</code> on frequently queried columns. Performance is not automatic.</li></ul><p>By understanding these principles, you can leverage Supabase not as a simple BaaS, but as a powerful, transparent, and scalable platform for building next-generation applications on the solid foundation of PostgreSQL.</p></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,33 @@
<!doctype html><html lang=en><head><title>An Architectural Deep Dive of T5 · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="In the rapidly evolving landscape of Large Language Models, a few key architectures define the dominant paradigms. Today, the &ldquo;decoder-only&rdquo; model, popularized by the GPT series and its successors like LLaMA and Mistral, reigns supreme. These models are scaled to incredible sizes and excel at in-context learning.
But to truly understand the field, we must look at the pivotal models that explored different paths. Google&rsquo;s T5, or Text-to-Text Transfer Transformer, stands out as one of the most influential. It didn&rsquo;t just introduce a new model; it proposed a new philosophy. This article dives deep into the architecture of T5, how it fundamentally differs from modern LLMs, and the lasting legacy of its unique design choices."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="An Architectural Deep Dive of T5"><meta name=twitter:description content="In the rapidly evolving landscape of Large Language Models, a few key architectures define the dominant paradigms. Today, the “decoder-only” model, popularized by the GPT series and its successors like LLaMA and Mistral, reigns supreme. These models are scaled to incredible sizes and excel at in-context learning.
But to truly understand the field, we must look at the pivotal models that explored different paths. Googles T5, or Text-to-Text Transfer Transformer, stands out as one of the most influential. It didnt just introduce a new model; it proposed a new philosophy. This article dives deep into the architecture of T5, how it fundamentally differs from modern LLMs, and the lasting legacy of its unique design choices."><meta property="og:url" content="https://ericxliu.me/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="An Architectural Deep Dive of T5"><meta property="og:description" content="In the rapidly evolving landscape of Large Language Models, a few key architectures define the dominant paradigms. Today, the “decoder-only” model, popularized by the GPT series and its successors like LLaMA and Mistral, reigns supreme. These models are scaled to incredible sizes and excel at in-context learning.
But to truly understand the field, we must look at the pivotal models that explored different paths. Googles T5, or Text-to-Text Transfer Transformer, stands out as one of the most influential. It didnt just introduce a new model; it proposed a new philosophy. This article dives deep into the architecture of T5, how it fundamentally differs from modern LLMs, and the lasting legacy of its unique design choices."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-06-01T00:00:00+00:00"><meta property="article:modified_time" content="2025-08-03T03:41:10+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"An Architectural Deep Dive of T5","genre":"Blog","wordcount":"1183","url":"https:\/\/ericxliu.me\/posts\/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive\/","datePublished":"2025-06-01T00:00:00\u002b00:00","dateModified":"2025-08-03T03:41:10\u002b00:00","description":"\u003cp\u003eIn the rapidly evolving landscape of Large Language Models, a few key architectures define the dominant paradigms. Today, the \u0026ldquo;decoder-only\u0026rdquo; model, popularized by the GPT series and its successors like LLaMA and Mistral, reigns supreme. These models are scaled to incredible sizes and excel at in-context learning.\u003c\/p\u003e\n\u003cp\u003eBut to truly understand the field, we must look at the pivotal models that explored different paths. Google\u0026rsquo;s T5, or \u003cstrong\u003eText-to-Text Transfer Transformer\u003c\/strong\u003e, stands out as one of the most influential. It didn\u0026rsquo;t just introduce a new model; it proposed a new philosophy. This article dives deep into the architecture of T5, how it fundamentally differs from modern LLMs, and the lasting legacy of its unique design choices.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/>An Architectural Deep Dive of T5</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-06-01T00:00:00Z>June 1, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
6-minute read</span></div></div></header><div class=post-content><p>In the rapidly evolving landscape of Large Language Models, a few key architectures define the dominant paradigms. Today, the &ldquo;decoder-only&rdquo; model, popularized by the GPT series and its successors like LLaMA and Mistral, reigns supreme. These models are scaled to incredible sizes and excel at in-context learning.</p><p>But to truly understand the field, we must look at the pivotal models that explored different paths. Google&rsquo;s T5, or <strong>Text-to-Text Transfer Transformer</strong>, stands out as one of the most influential. It didn&rsquo;t just introduce a new model; it proposed a new philosophy. This article dives deep into the architecture of T5, how it fundamentally differs from modern LLMs, and the lasting legacy of its unique design choices.</p><h3 id=the-core-philosophy-everything-is-a-text-to-text-problem>The Core Philosophy: Everything is a Text-to-Text Problem
<a class=heading-link href=#the-core-philosophy-everything-is-a-text-to-text-problem><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>The genius of T5 lies in its unifying framework. Instead of building different models or fine-tuning procedures for various NLP tasks, T5 reframes every task as a text-to-text problem. The model takes a string as input and generates a string as output, regardless of the underlying objective.</p><p>This is accomplished by adding a <strong>task prefix</strong> to the input. These prefixes are not conversational prompts like a GPT &ldquo;system prompt&rdquo;; they are learned triggers that the model is explicitly fine-tuned to recognize.</p><table><thead><tr><th style=text-align:left>Task</th><th style=text-align:left>T5 Input</th><th style=text-align:left>Expected T5 Output</th></tr></thead><tbody><tr><td style=text-align:left>Translation</td><td style=text-align:left><code>translate English to German: The cat is cute.</code></td><td style=text-align:left><code>Die Katze ist süß.</code></td></tr><tr><td style=text-align:left>Summarization</td><td style=text-align:left><code>summarize: [A long news article...]</code></td><td style=text-align:left><code>[A concise summary.]</code></td></tr><tr><td style=text-align:left>Classification</td><td style=text-align:left><code>cola sentence: The boys is walking.</code></td><td style=text-align:left><code>unacceptable</code></td></tr><tr><td style=text-align:left>Similarity</td><td style=text-align:left><code>stsb sentence1: The car is red. sentence2: The auto is crimson.</code></td><td style=text-align:left><code>4.8</code></td></tr></tbody></table><p>This elegant approach turns even classification into a generation task, where the model learns to generate the text of the correct label.</p><h3 id=the-engine-a-two-window-encoder-decoder-architecture>The Engine: A Two-Window Encoder-Decoder Architecture
<a class=heading-link href=#the-engine-a-two-window-encoder-decoder-architecture><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>To execute this text-to-text mission, T5 uses the original Transformer&rsquo;s <strong>encoder-decoder architecture</strong>. This is the most significant point of divergence from modern decoder-only LLMs. The inference process works in two distinct stages:</p><h4 id=stage-1-the-encoder-the-understanding-window>Stage 1: The Encoder (The &ldquo;Understanding&rdquo; Window)
<a class=heading-link href=#stage-1-the-encoder-the-understanding-window><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>When T5 receives an input like <code>summarize: [article text]</code>, the entire string is fed into the <strong>encoder</strong>.</p><ul><li><strong>Bidirectional Context:</strong> The encoder processes the input bidirectionally. Every token can see every other token in the input text simultaneously. This allows the model to build a deep, holistic understanding of the entire prompt and its context.</li><li><strong>Static Representation:</strong> The encoder&rsquo;s final output is not text. It&rsquo;s a set of numerical representations (hidden states) that encapsulates the meaning and intent of the input. This representation is generated once and remains static for the entire generation process.</li></ul><h4 id=stage-2-the-decoder-the-writing-window>Stage 2: The Decoder (The &ldquo;Writing&rdquo; Window)
<a class=heading-link href=#stage-2-the-decoder-the-writing-window><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>The decoder is responsible for generating the output string token by token.</p><ul><li><strong>Autoregressive Generation:</strong> It begins with a <code>start-of-sequence</code> token and generates the output one word at a time.</li><li><strong>Cross-Attention:</strong> At each step, the decoder does two things: it looks at the text it has generated so far (its own &ldquo;decoder context&rdquo;), and crucially, it uses a mechanism called <strong>cross-attention</strong> to look back at the static representation created by the encoder. This allows the decoder&rsquo;s generation to be guided by the encoder&rsquo;s complete understanding of the prompt.</li><li><strong>Growing Context:</strong> The decoder&rsquo;s context window grows with each token it generates until it produces an <code>end-of-sequence</code> token, signaling that the task is complete.</li></ul><p>This two-window system is a powerful design, especially for tasks that require a full understanding of a source document before generating a new one (like translation or summarization).</p><h3 id=architectural-divergence-t5-vs-the-modern-llm-playbook>Architectural Divergence: T5 vs. The Modern LLM Playbook
<a class=heading-link href=#architectural-divergence-t5-vs-the-modern-llm-playbook><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Beyond its core architecture, T5 made several specific design choices that contrast with today&rsquo;s standards.</p><h4 id=1-positional-embeddings-relative-rpe-vs-rotary-rope>1. Positional Embeddings: Relative (RPE) vs. Rotary (RoPE)
<a class=heading-link href=#1-positional-embeddings-relative-rpe-vs-rotary-rope><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>How a model knows the order of words is critical.</p><ul><li><strong>T5&rsquo;s Approach (RPE):</strong> T5 uses a form of <strong>Relative Positional Embedding</strong>. Instead of adding a position signal to the word embeddings, it adds a learned bias directly to the attention scores based on the relative distance between tokens. It&rsquo;s a clever way to encode position that is independent of sequence length.</li><li><strong>The Modern Standard (RoPE):</strong> Most modern LLMs (LLaMA, PaLM, Mistral) use <strong>Rotary Positional Embeddings</strong>. As detailed in the CS336 slides, RoPE works by mathematically <em>rotating</em> the Query and Key vectors based on their absolute position. This method has proven exceptionally effective for long sequences and is considered the current state-of-the-art.</li></ul><h4 id=2-the-feed-forward-network-an-extreme-experiment>2. The Feed-Forward Network: An Extreme Experiment
<a class=heading-link href=#2-the-feed-forward-network-an-extreme-experiment><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>The Feed-Forward Network (FFN) inside each Transformer block is typically 4 times the model&rsquo;s hidden dimension (<code>d_model</code>). The original T5 11B model took a radical departure from this rule.</p><ul><li><strong>T5 11B&rsquo;s Choice:</strong> It used a small hidden dimension (<code>d_model = 1024</code>) but an astoundingly large FFN dimension (<code>d_ff = 65,536</code>), a <strong>64-times multiplier</strong>. The rationale was that modern accelerators (like Google&rsquo;s TPUs) are highly efficient at large, dense matrix multiplications.</li><li><strong>The Modern Standard:</strong> This experiment was not widely adopted. Later models, including T5&rsquo;s own successor <strong>T5 v1.1</strong>, reverted to the standard 4x multiplier (or ~2.66x when using GLU activations) for a better balance of parameters and performance.</li></ul><h4 id=3-denoising-span-corruption-vs-iterative-diffusion>3. Denoising: Span Corruption vs. Iterative Diffusion
<a class=heading-link href=#3-denoising-span-corruption-vs-iterative-diffusion><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>While T5&rsquo;s pre-training is called &ldquo;denoising,&rdquo; it&rsquo;s conceptually different from the denoising in modern diffusion models.</p><ul><li><strong>T5&rsquo;s Denoising:</strong> This is <strong>span corruption</strong>. The model is shown a sentence with chunks of text masked out and learns to predict exactly what was removed in a single step. It&rsquo;s a fill-in-the-blanks task to learn rich language representations.</li><li><strong>Diffusion Denoising:</strong> This is a multi-step generative process. A clean text is gradually corrupted with noise, and the model learns to reverse this process step-by-step, allowing it to generate high-fidelity text from pure noise.</li></ul><h3 id=where-t5-was-ahead-of-its-time>Where T5 Was Ahead of its Time
<a class=heading-link href=#where-t5-was-ahead-of-its-time><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Despite its differences, the &ldquo;T5 v1.1&rdquo; variant pioneered several techniques that are now standard practice in the most advanced LLMs:</p><ul><li><strong>RMSNorm:</strong> It was one of the first major models to adopt Root Mean Square Normalization instead of LayerNorm, a choice now used by LLaMA, Mistral, and others for its efficiency and stability.</li><li><strong>Pre-Normalization:</strong> T5 applies the normalization layer <em>before</em> the attention and FFN blocks, a critical technique for enabling stable training of very deep networks.</li><li><strong>No Bias Terms:</strong> T5 v1.1 removed the bias parameters from its normalization and FFN layers, a small but important optimization for memory and stability that modern models follow.</li><li><strong>Gated Activations (GeGLU):</strong> While the original T5 used ReLU, T5 v1.1 adopted a Gated Linear Unit (GeGLU), presaging the move to GLU-family activations (like SwiGLU) that is now ubiquitous.</li></ul><h3 id=conclusion-the-lasting-legacy>Conclusion: The Lasting Legacy
<a class=heading-link href=#conclusion-the-lasting-legacy><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>T5 represents a different evolutionary branch in the Transformer family tree. While the field has largely converged on the decoder-only architecture for its scalability in general-purpose models, T5&rsquo;s design remains a masterclass in purpose-built engineering.</p><p>Its text-to-text framework was revolutionary, its encoder-decoder structure is still a go-to for tasks like translation, and its refined T5 v1.1 architecture laid the groundwork for many of the stability and efficiency tricks we see in today&rsquo;s state-of-the-art models. T5 is more than just a model; it&rsquo;s a crucial case study in the architectural trade-offs that continue to shape the future of artificial intelligence.</p></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,76 @@
<!doctype html><html lang=en><head><title>From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content='Running a family finance system is surprisingly complex. What starts as a simple spreadsheet often evolves into a web of rules, exceptions, and &ldquo;wait, was this dinner or vacation dinner?&rdquo; questions.
For years, I relied on a rule-based system to categorize our credit card transactions. It worked&mldr; mostly. But maintaining if "UBER" in description and amount > 50 style rules is a never-ending battle against the entropy of merchant names and changing habits.'><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM"><meta name=twitter:description content='Running a family finance system is surprisingly complex. What starts as a simple spreadsheet often evolves into a web of rules, exceptions, and “wait, was this dinner or vacation dinner?” questions.
For years, I relied on a rule-based system to categorize our credit card transactions. It worked… mostly. But maintaining if "UBER" in description and amount > 50 style rules is a never-ending battle against the entropy of merchant names and changing habits.'><meta property="og:url" content="https://ericxliu.me/posts/technical-deep-dive-llm-categorization/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM"><meta property="og:description" content='Running a family finance system is surprisingly complex. What starts as a simple spreadsheet often evolves into a web of rules, exceptions, and “wait, was this dinner or vacation dinner?” questions.
For years, I relied on a rule-based system to categorize our credit card transactions. It worked… mostly. But maintaining if "UBER" in description and amount > 50 style rules is a never-ending battle against the entropy of merchant names and changing habits.'><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-12-27T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-10T20:10:48+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/technical-deep-dive-llm-categorization/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM","genre":"Blog","wordcount":"1355","url":"https:\/\/ericxliu.me\/posts\/technical-deep-dive-llm-categorization\/","datePublished":"2025-12-27T00:00:00\u002b00:00","dateModified":"2026-01-10T20:10:48\u002b00:00","description":"\u003cp\u003eRunning a family finance system is surprisingly complex. What starts as a simple spreadsheet often evolves into a web of rules, exceptions, and \u0026ldquo;wait, was this dinner or \u003cem\u003evacation\u003c\/em\u003e dinner?\u0026rdquo; questions.\u003c\/p\u003e\n\u003cp\u003eFor years, I relied on a rule-based system to categorize our credit card transactions. It worked\u0026hellip; mostly. But maintaining \u003ccode\u003eif \u0026quot;UBER\u0026quot; in description and amount \u0026gt; 50\u003c\/code\u003e style rules is a never-ending battle against the entropy of merchant names and changing habits.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/technical-deep-dive-llm-categorization/>From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-12-27T00:00:00Z>December 27, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
7-minute read</span></div></div></header><div class=post-content><p>Running a family finance system is surprisingly complex. What starts as a simple spreadsheet often evolves into a web of rules, exceptions, and &ldquo;wait, was this dinner or <em>vacation</em> dinner?&rdquo; questions.</p><p>For years, I relied on a rule-based system to categorize our credit card transactions. It worked&mldr; mostly. But maintaining <code>if "UBER" in description and amount > 50</code> style rules is a never-ending battle against the entropy of merchant names and changing habits.</p><p>Recently, I decided to modernize this stack using Large Language Models (LLMs). This post details the technical journey from using an off-the-shelf commercial model to distilling that knowledge into a small, efficient local model (<code>google/t5gemma-2-270m</code>) that runs on my own hardware while maintaining high accuracy.</p><h2 id=phase-1-the-proof-of-concept-with-commercial-llms>Phase 1: The Proof of Concept with Commercial LLMs
<a class=heading-link href=#phase-1-the-proof-of-concept-with-commercial-llms><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>My first step was to replace the spaghetti code of regex rules with a prompt. I used <strong>Gemini-3-Flash</strong> (via <code>litellm</code>) as my categorization engine.</p><p>The core challenge was context. A transaction like <code>MCDONALDS</code> could be:</p><ul><li><strong>Dining</strong>: A quick lunch during work.</li><li><strong>Travel-Dining</strong>: A meal while on a road trip.</li></ul><p>To solve this, I integrated my <strong>private Google Calendar</strong> (via <code>.ics</code> export). The prompt doesn&rsquo;t just see the transaction; it sees <em>where I was</em> and <em>what I was doing</em> on that day.</p><h3 id=the-god-prompt>The &ldquo;God Prompt&rdquo;
<a class=heading-link href=#the-god-prompt><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>The system prompt was designed to return strict JSON, adhering to a schema of Categories (e.g., <code>Dining</code>, <code>Travel</code>, <code>Bills</code>) and Sub-Categories (e.g., <code>Travel</code> -> <code>Accommodation</code>).</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-json data-lang=json><span style=display:flex><span>{
</span></span><span style=display:flex><span> <span style=color:#7ee787>&#34;Category&#34;</span>: <span style=color:#a5d6ff>&#34;Travel&#34;</span>,
</span></span><span style=display:flex><span> <span style=color:#7ee787>&#34;Travel Category&#34;</span>: <span style=color:#a5d6ff>&#34;Dining&#34;</span>,
</span></span><span style=display:flex><span> <span style=color:#7ee787>&#34;Reasoning&#34;</span>: <span style=color:#a5d6ff>&#34;User is on &#39;Trip: 34TH ARCH CANYON 2025&#39;, distinguishing this from regular dining.&#34;</span>
</span></span><span style=display:flex><span>}
</span></span></code></pre></div><p>This worked well. The &ldquo;Reasoning&rdquo; field even gave me explanations for why it flagged something as <code>Entertainment</code> vs <code>Shopping</code>. But relying on an external API for every single transaction felt like overkill for a personal project, and I wanted to own the stack.</p><h2 id=phase-2-distilling-knowledge>Phase 2: Distilling Knowledge
<a class=heading-link href=#phase-2-distilling-knowledge><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>I wanted to train a smaller model to mimic Gemini&rsquo;s performance. But I didn&rsquo;t want to manually label thousands of transactions.</p><h3 id=consistency-filtering>Consistency Filtering
<a class=heading-link href=#consistency-filtering><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>I had a massive CSV of historical transactions (years of data). However, that data was &ldquo;noisy&rdquo;—some manual labels were outdated or inconsistent.</p><p>I built a <strong>Distillation Pipeline</strong> (<code>distill_reasoning.py</code>) that uses the Teacher Model (Gemini) to re-label the historical data. But here&rsquo;s the twist: I only added a data point to my training set if the <strong>Teacher&rsquo;s prediction matched the Historical Ground Truth</strong>.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-python data-lang=python><span style=display:flex><span><span style=color:#8b949e;font-style:italic># Pseudo-code for consistency filtering</span>
</span></span><span style=display:flex><span>teacher_pred <span style=color:#ff7b72;font-weight:700>=</span> gemini<span style=color:#ff7b72;font-weight:700>.</span>categorize(transaction)
</span></span><span style=display:flex><span>historical_label <span style=color:#ff7b72;font-weight:700>=</span> row[<span style=color:#a5d6ff>&#39;Category&#39;</span>]
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span><span style=color:#ff7b72>if</span> teacher_pred<span style=color:#ff7b72;font-weight:700>.</span>category <span style=color:#ff7b72;font-weight:700>==</span> historical_label:
</span></span><span style=display:flex><span> <span style=color:#8b949e;font-style:italic># High confidence sample!</span>
</span></span><span style=display:flex><span> training_data<span style=color:#ff7b72;font-weight:700>.</span>append({
</span></span><span style=display:flex><span> <span style=color:#a5d6ff>&#34;input&#34;</span>: format_transaction(transaction),
</span></span><span style=display:flex><span> <span style=color:#a5d6ff>&#34;output&#34;</span>: teacher_pred<span style=color:#ff7b72;font-weight:700>.</span>to_json()
</span></span><span style=display:flex><span> })
</span></span><span style=display:flex><span><span style=color:#ff7b72>else</span>:
</span></span><span style=display:flex><span> <span style=color:#8b949e;font-style:italic># Discard: Either history is wrong OR teacher hallucinated.</span>
</span></span><span style=display:flex><span> log_fail(transaction)
</span></span></code></pre></div><p>This filtered out the noise, leaving me with ~2,000 high-quality, &ldquo;verified&rdquo; examples where both the human (me, years ago) and the AI agreed.</p><h2 id=phase-3-training-the-little-guy>Phase 3: Training the Little Guy
<a class=heading-link href=#phase-3-training-the-little-guy><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>For the local model, I chose <strong>google/t5gemma-2-270m</strong>. This is a Seq2Seq model, which fits the &ldquo;Text-to-JSON&rdquo; task perfectly, and it&rsquo;s tiny (270M parameters), meaning it can run on almost anything.</p><h3 id=the-stack>The Stack
<a class=heading-link href=#the-stack><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ul><li><strong>Library</strong>: <code>transformers</code>, <code>peft</code>, <code>bitsandbytes</code></li><li><strong>Technique</strong>: <strong>LoRA</strong> (Low-Rank Adaptation). I targeted all linear layers (<code>q_proj</code>, <code>k_proj</code>, <code>v_proj</code>, etc.) with <code>r=16</code>.</li><li><strong>Optimization</strong>: <code>AdamW</code> with linear decay.</li></ul><h3 id=pitfall-1-the-loss-is-0-initial-panic>Pitfall #1: The &ldquo;Loss is 0&rdquo; Initial Panic
<a class=heading-link href=#pitfall-1-the-loss-is-0-initial-panic><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>My first training run showed a loss of exactly <code>0.000</code> essentially immediately. In deep learning, if it looks too good to be true, it&rsquo;s a bug.
It turned out to be a syntax error in my arguments passed to the <code>Trainer</code> (or rather, my custom loop). Once fixed, the loss looked &ldquo;healthy&rdquo;—starting high and decaying noisily.</p><h3 id=pitfall-2-stability-vs-noise>Pitfall #2: Stability vs. Noise
<a class=heading-link href=#pitfall-2-stability-vs-noise><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>The loss curve was initially extremely erratic. The batch size on my GPU was limited (Physical Batch Size = 4).
<strong>The Fix</strong>: I implemented <strong>Gradient Accumulation</strong> (accumulating over 8 steps) to simulate a batch size of 32. This smoothed out the optimization landscape significantly.
<img src=/images/technical-deep-dive-llm-categorization/eedb3be8259a4a70aa7029b78a029364.png alt="S3 File"></p><h3 id=pitfall-3-overfitting>Pitfall #3: Overfitting
<a class=heading-link href=#pitfall-3-overfitting><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>With a small dataset (~2k samples), overfitting is a real risk. I employed a multi-layered defense strategy:</p><ol><li><strong>Data Quality First</strong>: The &ldquo;Consistency Filtering&rdquo; phase was the most critical step. By discarding ambiguous samples where the teacher model disagreed with history, I prevented the model from memorizing noise.</li><li><strong>Model Regularization</strong>:<ul><li><strong>LoRA Dropout</strong>: I set <code>lora_dropout=0.1</code>, randomly dropping 10% of the trainable adapter connections during training to force robust feature learning.</li><li><strong>Gradient Clipping</strong>: We capped the gradient norm at <code>1.0</code>. This prevents the &ldquo;exploding gradient&rdquo; problem and keeps weight updates stable.</li><li><strong>AdamW</strong>: Using the AdamW optimizer adds decoupled weight decay, implicitly penalizing overly complex weights.</li></ul></li></ol><p>I also set up a rigorous evaluation loop (10% validation split, eval every 50 steps) to monitor the <code>Train Loss</code> vs <code>Eval Loss</code> in real-time. The final curves showed them tracking downwards together, confirming generalization.</p><h2 id=phase-4-results-and-the-travel-edge-case>Phase 4: Results and The &ldquo;Travel&rdquo; Edge Case
<a class=heading-link href=#phase-4-results-and-the-travel-edge-case><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>The distilled model is surprisingly capable. It learned the JSON schema very well. Although I included a regex fallback in the inference script as a safety net, the model generates valid JSON the vast majority of the time.</p><h3 id=head-to-head-local-model-vs-gemini-flash>Head-to-Head: Local Model vs Gemini-Flash
<a class=heading-link href=#head-to-head-local-model-vs-gemini-flash><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>I ran a blind evaluation on 20 random unseen transactions.</p><ul><li><strong>Gemini-3-Flash Accuracy</strong>: 90% (18/20)</li><li><strong>Local T5-Gemma-2 Accuracy</strong>: 85% (17/20)</li></ul><p>The gap is surprisingly small. In fact, the local model sometimes outperformed the API because it was fine-tuned on <em>my</em> specific data distribution.</p><p><strong>Win for Local Model:</strong></p><blockquote><p><strong>Transaction</strong>: <code>XX RANCH #1702</code>
<strong>Local Prediction</strong>: <code>Groceries</code> (Correct)
<strong>API Prediction</strong>: <code>Gas</code> (Incorrect)
<strong>Local Reasoning</strong>: " XX RANCH refers to a well-known supermarket chain.
<strong>API Reasoning</strong>: &ldquo;XX RANCH is a known convenience store and gas station chain.&rdquo;
<strong>Analysis</strong>: The local model &ldquo;knows&rdquo; (from training data) that XX Ranch is a Asian grocery store I frequent, whereas the general-purpose API assumed it was a gas station based on the name pattern.</p></blockquote><p><strong>Win for API (World Knowledge):</strong></p><blockquote><p><strong>Transaction</strong>: <code>LOVE'S #0792</code>
<strong>Local Prediction</strong>: <code>Dining</code> (Hallucination)
<strong>API Prediction</strong>: <code>Travel-Gas</code> (Correct)
<strong>Local Reasoning</strong>: &ldquo;Love&rsquo;s is a well-known restaurant chain, which falls under the Dining category.&rdquo;
<strong>API Reasoning</strong>: &ldquo;Love&rsquo;s is a well-known gas station chain, and the transaction occurred during a trip to Moab, categorizing it as travel-related fuel.&rdquo;
<strong>Analysis</strong>: The API knows &ldquo;Love&rsquo;s&rdquo; is a major gas station chain. The small local model lacks this world knowledge and hallucinates it as a restaurant, highlighting the pure &ldquo;Knowledge Gap&rdquo; between a 270M and a 70B+ model. Additionally, Gemini Flash has <strong>Google Search grounding</strong> enabled, allowing it to verify real-world entities in real-time—a capability our isolated local model intrinsically lacks.</p></blockquote><h3 id=surprise-win-json-stability>Surprise Win: JSON Stability
<a class=heading-link href=#surprise-win-json-stability><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>One pleasant surprise was the <strong>format adherence</strong>. I initially feared I&rsquo;d need constrained generation tools like <code>outlines</code> or a simplified schema for a 270M parameter model. However, the distilled T5-Gemma model followed the complex JSON schema (including nested fields) with near-perfect reliability, proving that specific structure can be learned effectively through fine-tuning alone.</p><h3 id=key-lesson-the-noisy-ground-truth-trap>Key Lesson: The &ldquo;Noisy Ground Truth&rdquo; Trap
<a class=heading-link href=#key-lesson-the-noisy-ground-truth-trap><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Since this is a <strong>distillation (SFT)</strong> pipeline, not Reinforcement Learning, the model has no way to &ldquo;unlearn&rdquo; bad habits via negative rewards. It relies entirely on the quality of the teacher&rsquo;s reasoning.</p><blockquote><p><strong>Transaction</strong>: <code>[TRAVEL] SWEETHOME KITCHEN</code>
<strong>Local Prediction</strong>: <code>Dining</code>
<strong>API Prediction</strong>: <code>Travel-Dining</code>
<strong>Local Reasoning</strong>: &ldquo;The description &lsquo;SWEETHOME KITCHEN&rsquo; indicates a restaurant or dining establishment, which falls under the Dining category.&rdquo;
<strong>API Reasoning</strong>: &ldquo;The transaction is for a kitchen/restaurant and occurred while the user was traveling to Pfeiffer Big Sur SP, making it a travel-related dining expense.&rdquo;</p></blockquote><p>In this case, the API correctly used the calendar context (&ldquo;User is in Big Sur&rdquo;). The local model missed this link. This highlights that simply having the data isn&rsquo;t enough—the <em>reasoning</em> in the training set must explicitly force the model to look at the context, or it will revert to simple pattern matching (Kitchen = Dining).</p><h2 id=conclusion>Conclusion
<a class=heading-link href=#conclusion><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>We often think we need 70B parameter models for everything. usage shows that for a specific, well-defined task with consistent formatting, a <strong>270M parameter model</strong>—fine-tuned on high-quality, distilled data—can punch way above its weight class.</p><p>The key was <strong>data quality over quantity</strong>. By using the commercial model to &ldquo;verify&rdquo; my historical data, I created a dataset that was cleaner than either source alone.</p></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,29 @@
<!doctype html><html lang=en><head><title>The Convergence of Fast Weights, Linear Attention, and State Space Models · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformers attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (&ldquo;Fast Weights&rdquo;) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).
This article explores the mathematical equivalence between Hintons concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="The Convergence of Fast Weights, Linear Attention, and State Space Models"><meta name=twitter:description content="Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformers attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (“Fast Weights”) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).
This article explores the mathematical equivalence between Hintons concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV."><meta property="og:url" content="https://ericxliu.me/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="The Convergence of Fast Weights, Linear Attention, and State Space Models"><meta property="og:description" content="Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformers attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (“Fast Weights”) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).
This article explores the mathematical equivalence between Hintons concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-12-19T00:00:00+00:00"><meta property="article:modified_time" content="2025-12-19T21:21:55+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"The Convergence of Fast Weights, Linear Attention, and State Space Models","genre":"Blog","wordcount":"984","url":"https:\/\/ericxliu.me\/posts\/the-convergence-of-fast-weights-linear-attention-and-state-space-models\/","datePublished":"2025-12-19T00:00:00\u002b00:00","dateModified":"2025-12-19T21:21:55\u002b00:00","description":"\u003cp\u003eModern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformers attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (\u0026ldquo;Fast Weights\u0026rdquo;) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).\u003c\/p\u003e\n\u003cp\u003eThis article explores the mathematical equivalence between Hintons concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/>The Convergence of Fast Weights, Linear Attention, and State Space Models</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-12-19T00:00:00Z>December 19, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
5-minute read</span></div></div></header><div class=post-content><p>Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformers attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (&ldquo;Fast Weights&rdquo;) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).</p><p>This article explores the mathematical equivalence between Hintons concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV.</p><h2 id=1-the-standard-transformer-bottleneck>1. The Standard Transformer Bottleneck
<a class=heading-link href=#1-the-standard-transformer-bottleneck><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>To understand the motivation for Fast Weights, one must first identify the inefficiency in standard Transformers. The core operation is <strong>Self-Attention</strong>, defined as:</p>$$ \text{Attention}(Q, K, V) = \text{softmax}\left(\frac{Q K^T}{\sqrt{d}}\right) V $$<p>During inference (generating tokens one by one), the model computes a Query ($Q$) for the current token and compares it against the Keys ($K$) and Values ($V$) of all previous tokens.</p><ul><li><strong>Computational Cost:</strong> Quadratic $O(N^2)$ during training; Linear $O(N)$ per step during inference.</li><li><strong>Memory Cost:</strong> The KV Cache. To calculate the softmax, the model must explicitly store the $K$ and $V$ vectors for the entire history in GPU memory. For long contexts (e.g., 1 million tokens), this memory footprint becomes prohibitive.</li></ul><p>The <strong>Softmax</strong> function is the culprit. It introduces a non-linearity that binds $Q$ and $K$ together, preventing the mathematical separation of the current query from the historical context.</p><h2 id=2-fast-weights-as-associative-memory>2. Fast Weights as Associative Memory
<a class=heading-link href=#2-fast-weights-as-associative-memory><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>Geoffrey Hinton proposes that the brain does not maintain a &ldquo;digital buffer&rdquo; of past activations (like a KV cache). Instead, it relies on <strong>Fast Weights</strong>.</p><p>In this framework, neural connections possess two timescales:</p><ol><li><strong>Slow Weights:</strong> The standard parameters learned over long periods (training).</li><li><strong>Fast Weights:</strong> Synaptic strengths that change rapidly during a forward pass to store temporary context.</li></ol><p>Hinton formalizes this temporary storage as an <strong>Associative Memory</strong>. When a network encounters a new key-value pair ($k, v$), it does not store the vectors in a list. Instead, it updates a fast weight matrix $W_{fast}$ using the Hebbian learning rule (outer product):</p>$$ W_{fast} \leftarrow \lambda W_{fast} + (v \otimes k) $$<p>Here, $\lambda$ is a decay factor ($0 < \lambda < 1$) representing forgetfulness. This matrix $W_{fast}$ compresses the history into a fixed-size representation of size $d \times d$, regardless of the sequence length.</p><h2 id=3-mathematical-unification-linear-attention>3. Mathematical Unification: Linear Attention
<a class=heading-link href=#3-mathematical-unification-linear-attention><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>The connection between Fast Weights and Transformers is established by removing the softmax function from the attention mechanism, a technique known as <strong>Linear Attention</strong>.</p><p>If we treat the interaction between $Q$ and $K$ as linear, the attention equation becomes:</p>$$ \text{LinearAttention} = (Q K^T) V $$<p>Using the associative property of matrix multiplication, we can reorder the operations:</p>$$ Q (K^T V) $$<p>This reordering fundamentally alters the mechanism:</p><ul><li><strong>Left Side $(Q K^T) V$:</strong> Compare Query to all Keys, then multiply by Values. Requires storing history.</li><li><strong>Right Side $Q (K^T V)$:</strong> Compute the summation of Key-Value outer products first.</li></ul><p>The term $(K^T V)$ represents the summation of all past associations. This term <strong>is</strong> the Fast Weight matrix $W_{fast}$ described by Hinton.</p>$$ \text{State}_t = \sum_{i=1}^t k_i v_i^T $$<p>Thus, Linear Attention is effectively a system where the &ldquo;state&rdquo; is a matrix of Fast Weights that is updated at every time step.</p><h2 id=4-state-space-models-ssms-as-recurrent-fast-weights>4. State Space Models (SSMs) as Recurrent Fast Weights
<a class=heading-link href=#4-state-space-models-ssms-as-recurrent-fast-weights><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>State Space Models (like S4 and Mamba) typically define sequence modeling through continuous control theory, discretized into a recurrence:</p>$$ h_t = \bar{A} h_{t-1} + \bar{B} x_t $$<p></p>$$ y_t = \bar{C} h_t $$<p>While derived differently, this recurrence is mathematically equivalent to the Linear Attention/Fast Weight mechanism. We can demonstrate this by &ldquo;unrolling&rdquo; the SSM recursion to see how the output $y_t$ depends on the history.</p><p>The output at time $t$ is the sum of inputs weighted by decaying powers of $\bar{A}$:</p>$$ y_t = \sum_{j=1}^t \bar{C} (\bar{A}^{t-j}) (\bar{B} x_j) $$<p>Comparing this to the Linear Attention formulation with decay $\lambda$:</p>$$ \text{Attention}_t = q_t \sum_{j=1}^t (\lambda^{t-j}) (k_j^T v_j) $$<p>The mapping between architectures becomes clear:</p><ul><li><strong>Query ($q_t$)</strong> $\leftrightarrow$ Output Matrix <strong>$\bar{C}$</strong></li><li><strong>Key/Value ($k_j^T v_j$)</strong> $\leftrightarrow$ Input Matrix <strong>$\bar{B} x_j$</strong> (Input Projection)</li><li><strong>Decay Factor ($\lambda$)</strong> $\leftrightarrow$ State Matrix <strong>$\bar{A}$</strong></li><li><strong>Fast Weight Matrix ($S_t$)</strong> $\leftrightarrow$ Hidden State <strong>$h_t$</strong></li></ul><p>Therefore, an SSM is mechanically a Transformer that uses Fast Weights (a fixed-size recurrent state) rather than a KV Cache (a growing buffer) to handle attention.</p><h2 id=5-implications-for-inference-optimization>5. Implications for Inference Optimization
<a class=heading-link href=#5-implications-for-inference-optimization><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>This theoretical convergence has significant implications for inference efficiency.</p><h3 id=standard-transformer>Standard Transformer
<a class=heading-link href=#standard-transformer><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ul><li><strong>Mechanism:</strong> Stores history in a KV Cache.</li><li><strong>Memory:</strong> $O(N)$ (Grows linearly with sequence length).</li><li><strong>Performance:</strong> High recall/precision because it retains the exact history.</li></ul><h3 id=fast-weight--ssm-mamba--rwkv>Fast Weight / SSM (Mamba / RWKV)
<a class=heading-link href=#fast-weight--ssm-mamba--rwkv><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ul><li><strong>Mechanism:</strong> Compresses history into a single Matrix/Vector state.</li><li><strong>Memory:</strong> $O(1)$ (Constant memory, regardless of sequence length).</li><li><strong>Performance:</strong> Historically lower than Transformers due to &ldquo;compression loss&rdquo; (trying to stuff infinite history into a finite matrix).</li></ul><p><strong>The Solution:</strong> Modern SSMs like Mamba improve upon basic Linear Attention by introducing <strong>Selectivity</strong>. Instead of compressing <em>all</em> history equally (which blurs the memory), Mamba allows the model to dynamically gate the inputs—choosing to store relevant information and reset/forget irrelevant noise. This allows the Fast Weight approach to compete with the accuracy of explicit Attention while maintaining constant memory usage.</p><h3 id=references>References
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ol><li><strong>Hinton, G. E., & Plaut, D. C. (1987).</strong> &ldquo;Using Fast Weights to Deblur Old Memories.&rdquo; <em>Proceedings of the 9th Annual Conference of the Cognitive Science Society.</em></li><li><strong>Ba, J., Hinton, G. E., et al. (2016).</strong> &ldquo;Using Fast Weights to Attend to the Recent Past.&rdquo; <em>Advances in Neural Information Processing Systems (NeurIPS).</em></li><li><strong>Katharopoulos, A., et al. (2020).</strong> &ldquo;Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention.&rdquo; <em>International Conference on Machine Learning (ICML).</em></li><li><strong>Gu, A., & Dao, T. (2023).</strong> &ldquo;Mamba: Linear-Time Sequence Modeling with Selective State Spaces.&rdquo; <em>arXiv preprint arXiv:2312.00752.</em></li><li><strong>Vaswani, A., et al. (2017).</strong> &ldquo;Attention Is All You Need.&rdquo; <em>Advances in Neural Information Processing Systems (NeurIPS).</em></li></ol></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,39 @@
<!doctype html><html lang=en><head><title>Transformer's Core Mechanics · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="The Transformer architecture is the bedrock of modern Large Language Models (LLMs). While its high-level success is widely known, a deeper understanding requires dissecting its core components. This article provides a detailed, technical breakdown of the fundamental concepts within a Transformer block, from the notion of &ldquo;channels&rdquo; to the intricate workings of the attention mechanism and its relationship with other advanced architectures like Mixture of Experts.
1. The &ldquo;Channel&rdquo;: A Foundational View of d_model
Link to heading
In deep learning, a &ldquo;channel&rdquo; can be thought of as a feature dimension. While this term is common in Convolutional Neural Networks for images (e.g., Red, Green, Blue channels), in LLMs, the analogous concept is the model&rsquo;s primary embedding dimension, commonly referred to as d_model."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Transformer's Core Mechanics"><meta name=twitter:description content="The Transformer architecture is the bedrock of modern Large Language Models (LLMs). While its high-level success is widely known, a deeper understanding requires dissecting its core components. This article provides a detailed, technical breakdown of the fundamental concepts within a Transformer block, from the notion of “channels” to the intricate workings of the attention mechanism and its relationship with other advanced architectures like Mixture of Experts.
1. The “Channel”: A Foundational View of d_model Link to heading In deep learning, a “channel” can be thought of as a feature dimension. While this term is common in Convolutional Neural Networks for images (e.g., Red, Green, Blue channels), in LLMs, the analogous concept is the models primary embedding dimension, commonly referred to as d_model."><meta property="og:url" content="https://ericxliu.me/posts/transformer-s-core-mechanics/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Transformer's Core Mechanics"><meta property="og:description" content="The Transformer architecture is the bedrock of modern Large Language Models (LLMs). While its high-level success is widely known, a deeper understanding requires dissecting its core components. This article provides a detailed, technical breakdown of the fundamental concepts within a Transformer block, from the notion of “channels” to the intricate workings of the attention mechanism and its relationship with other advanced architectures like Mixture of Experts.
1. The “Channel”: A Foundational View of d_model Link to heading In deep learning, a “channel” can be thought of as a feature dimension. While this term is common in Convolutional Neural Networks for images (e.g., Red, Green, Blue channels), in LLMs, the analogous concept is the models primary embedding dimension, commonly referred to as d_model."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-04-01T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-10T20:10:48+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/transformer-s-core-mechanics/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Transformer\u0027s Core Mechanics","genre":"Blog","wordcount":"1326","url":"https:\/\/ericxliu.me\/posts\/transformer-s-core-mechanics\/","datePublished":"2025-04-01T00:00:00\u002b00:00","dateModified":"2026-01-10T20:10:48\u002b00:00","description":"\u003cp\u003eThe Transformer architecture is the bedrock of modern Large Language Models (LLMs). While its high-level success is widely known, a deeper understanding requires dissecting its core components. This article provides a detailed, technical breakdown of the fundamental concepts within a Transformer block, from the notion of \u0026ldquo;channels\u0026rdquo; to the intricate workings of the attention mechanism and its relationship with other advanced architectures like Mixture of Experts.\u003c\/p\u003e\n\u003ch3 id=\u00221-the-channel-a-foundational-view-of-d_model\u0022\u003e\n 1. The \u0026ldquo;Channel\u0026rdquo;: A Foundational View of \u003ccode\u003ed_model\u003c\/code\u003e\n \u003ca class=\u0022heading-link\u0022 href=\u0022#1-the-channel-a-foundational-view-of-d_model\u0022\u003e\n \u003ci class=\u0022fa-solid fa-link\u0022 aria-hidden=\u0022true\u0022 title=\u0022Link to heading\u0022\u003e\u003c\/i\u003e\n \u003cspan class=\u0022sr-only\u0022\u003eLink to heading\u003c\/span\u003e\n \u003c\/a\u003e\n\u003c\/h3\u003e\n\u003cp\u003eIn deep learning, a \u0026ldquo;channel\u0026rdquo; can be thought of as a feature dimension. While this term is common in Convolutional Neural Networks for images (e.g., Red, Green, Blue channels), in LLMs, the analogous concept is the model\u0026rsquo;s primary embedding dimension, commonly referred to as \u003ccode\u003ed_model\u003c\/code\u003e.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/transformer-s-core-mechanics/>Transformer's Core Mechanics</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-04-01T00:00:00Z>April 1, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
7-minute read</span></div></div></header><div class=post-content><p>The Transformer architecture is the bedrock of modern Large Language Models (LLMs). While its high-level success is widely known, a deeper understanding requires dissecting its core components. This article provides a detailed, technical breakdown of the fundamental concepts within a Transformer block, from the notion of &ldquo;channels&rdquo; to the intricate workings of the attention mechanism and its relationship with other advanced architectures like Mixture of Experts.</p><h3 id=1-the-channel-a-foundational-view-of-d_model>1. The &ldquo;Channel&rdquo;: A Foundational View of <code>d_model</code>
<a class=heading-link href=#1-the-channel-a-foundational-view-of-d_model><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>In deep learning, a &ldquo;channel&rdquo; can be thought of as a feature dimension. While this term is common in Convolutional Neural Networks for images (e.g., Red, Green, Blue channels), in LLMs, the analogous concept is the model&rsquo;s primary embedding dimension, commonly referred to as <code>d_model</code>.</p><p>An input text is first tokenized, and each token is mapped to a vector of size <code>d_model</code> (e.g., 4096). Each of the 4096 dimensions in this vector can be considered a &ldquo;channel,&rdquo; representing a different semantic or syntactic feature of the token.</p><p>As this data, represented by a tensor of shape <code>[batch_size, sequence_length, d_model]</code>, progresses through the layers of the Transformer, these channels are continuously transformed. However, a critical design choice is that the output dimension of every main sub-layer (like the attention block or the FFN block) is also <code>d_model</code>. This consistency is essential for enabling <strong>residual connections</strong>, where the input to a block is added to its output (<code>output = input + SubLayer(input)</code>). This technique is vital for training the extremely deep networks common today.</p><h3 id=2-the-building-blocks-dimensions-of-key-layers>2. The Building Blocks: Dimensions of Key Layers
<a class=heading-link href=#2-the-building-blocks-dimensions-of-key-layers><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>A Transformer layer is primarily composed of two sub-layers: a Multi-Head Attention block and a position-wise Feed-Forward Network (FFN). The parameters for these are stored in several key weight matrices. Understanding their dimensions is crucial.</p><p>Let&rsquo;s define our variables:</p><ul><li><code>d_model</code>: The core embedding dimension.</li><li><code>d_ff</code>: The inner dimension of the FFN, typically <code>4 * d_model</code>.</li><li><code>h</code>: The number of attention heads.</li><li><code>d_head</code>: The dimension of each attention head, where <code>d_model = h * d_head</code>.</li></ul><p>The dimensions of the weight matrices are as follows:</p><table><thead><tr><th>Layer</th><th>Weight Matrix</th><th>Input Vector Shape</th><th>Output Vector Shape</th><th><strong>Weight Matrix Dimension</strong></th></tr></thead><tbody><tr><td><strong>Attention Projections</strong></td><td></td><td></td><td></td><td></td></tr><tr><td>Query</td><td><code>W_Q</code></td><td><code>d_model</code></td><td><code>d_model</code></td><td><strong><code>[d_model, d_model]</code></strong></td></tr><tr><td>Key</td><td><code>W_K</code></td><td><code>d_model</code></td><td><code>d_model</code></td><td><strong><code>[d_model, d_model]</code></strong></td></tr><tr><td>Value</td><td><code>W_V</code></td><td><code>d_model</code></td><td><code>d_model</code></td><td><strong><code>[d_model, d_model]</code></strong></td></tr><tr><td>Output</td><td><code>W_O</code></td><td><code>d_model</code></td><td><code>d_model</code></td><td><strong><code>[d_model, d_model]</code></strong></td></tr><tr><td><strong>Feed-Forward Network</strong></td><td></td><td></td><td></td><td></td></tr><tr><td>Layer 1 (Up-projection)</td><td><code>W_ff1</code></td><td><code>d_model</code></td><td><code>d_ff</code></td><td><strong><code>[d_model, d_ff]</code></strong></td></tr><tr><td>Layer 2 (Down-projection)</td><td><code>W_ff2</code></td><td><code>d_ff</code></td><td><code>d_model</code></td><td><strong><code>[d_ff, d_model]</code></strong></td></tr></tbody></table><h3 id=3-deconstructing-multi-head-attention-mha>3. Deconstructing Multi-Head Attention (MHA)
<a class=heading-link href=#3-deconstructing-multi-head-attention-mha><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>The core innovation of the Transformer is Multi-Head Attention. It allows the model to weigh the importance of different tokens in the sequence from multiple perspectives simultaneously.
<img src=/images/transformer-s-core-mechanics/c7fe4af2633840cfbc81d7c4e3e42d0c.png alt="S3 File"></p><h4 id=31-the-why-beyond-a-single-attention>3.1. The &ldquo;Why&rdquo;: Beyond a Single Attention
<a class=heading-link href=#31-the-why-beyond-a-single-attention><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>A single attention mechanism would force the model to average all types of linguistic relationships into one pattern. MHA avoids this by creating <code>h</code> parallel subspaces. Each &ldquo;head&rdquo; can specialize, with one head learning syntactic dependencies, another tracking semantic similarity, and so on. This creates a much richer representation.</p><h4 id=32-an-encodingdecoding-analogy>3.2. An Encoding/Decoding Analogy
<a class=heading-link href=#32-an-encodingdecoding-analogy><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>A powerful way to conceptualize the attention calculation is as a two-stage process:</p><ol><li><strong>Encoding Relationships:</strong> The first part of the calculation, <code>softmax(Q @ K.T)</code>, can be seen as an encoding step. It does not use the actual &ldquo;content&rdquo; of the tokens (the <code>V</code> vectors). Instead, it uses the Queries and Keys to build a dynamic &ldquo;relationship map&rdquo; between tokens in the sequence. This map, a matrix of attention scores, answers the question: &ldquo;For each token, how important is every other token right now?&rdquo;</li><li><strong>Decoding via Information Retrieval:</strong> The second part, <code>scores @ V</code>, acts as a decoding step. It uses the relationship map to retrieve and synthesize information. For each token, it creates a new vector by taking a weighted sum of all the <code>V</code> vectors in the sequence, using the scores as the precise mixing recipe. It decodes the relational structure into a new, context-aware representation.</li></ol><h4 id=33-the-how-a-step-by-step-flow>3.3. The &ldquo;How&rdquo;: A Step-by-Step Flow
<a class=heading-link href=#33-the-how-a-step-by-step-flow><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>The MHA process is designed for maximum computational efficiency.</p><ol><li><strong>Initial Projections:</strong> The input vectors (shape <code>[seq_len, d_model]</code>) are multiplied by <code>W_Q</code>, <code>W_K</code>, and <code>W_V</code>. These matrices are all <code>[d_model, d_model]</code> not to create one large query, but to <strong>efficiently compute the vectors for all <code>h</code> heads at once</strong>. The single large output vector is then reshaped into <code>h</code> separate vectors, each of size <code>d_head</code>.</li><li><strong>Attention Score Calculation:</strong> For each head <code>i</code>, a score matrix is calculated: <code>scores_i = softmax( (Q_i @ K_i.T) / sqrt(d_head) )</code>. Note that <code>Q_i</code> and <code>K_i</code> have dimensions <code>[seq_len, d_head]</code>, so the resulting <code>scores_i</code> matrix has a dimension of <strong><code>[seq_len, seq_len]</code></strong>.</li><li><strong>Weighted Value Calculation:</strong> The scores are used to create a weighted sum of the Value vectors for each head: <code>output_i = scores_i @ V_i</code>. Since <code>scores_i</code> is <code>[seq_len, seq_len]</code> and <code>V_i</code> is <code>[seq_len, d_head]</code>, the resulting <code>output_i</code> has a dimension of <strong><code>[seq_len, d_head]</code></strong>. This is the final output of a single head.</li><li><strong>Concatenation and Final Projection:</strong> The outputs of all <code>h</code> heads are concatenated along the last dimension. This produces a single large matrix of shape <code>[seq_len, h * d_head]</code>, which is equivalent to <code>[seq_len, d_model]</code>. This matrix is then passed through the final output projection layer, <code>W_O</code> (shape <code>[d_model, d_model]</code>), to produce the attention block&rsquo;s final output. The <code>W_O</code> matrix learns the optimal way to mix the information from all the specialized heads into a single, unified representation.</li></ol><h3 id=4-optimizing-attention-gqa-and-mqa>4. Optimizing Attention: GQA and MQA
<a class=heading-link href=#4-optimizing-attention-gqa-and-mqa><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>During inference, storing the Key and Value vectors for all previous tokens (the KV Cache) is a major memory bottleneck. <strong>Grouped-Query Attention (GQA)</strong> and <strong>Multi-Query Attention (MQA)</strong> are architectural modifications that address this by allowing multiple Query heads to share the same Key and Value heads.</p><p>Let&rsquo;s use a concrete example, similar to Llama 2 7B:</p><ul><li><code>d_model</code> = 4096</li><li><code>h</code> = 32 Q heads</li><li><code>d_head</code> = 128</li><li><code>g</code> = 8 KV head groups for GQA</li></ul><p>The key insight is that only the dimensions of the <code>W_K</code> and <code>W_V</code> matrices change, which in turn reduces the size of the KV cache. The <code>W_Q</code> and <code>W_O</code> matrices remain <code>[4096, 4096]</code>.</p><table><thead><tr><th>Attention Type</th><th>No. of Q Heads</th><th>No. of KV Heads</th><th><code>W_K</code> & <code>W_V</code> Dimension</th><th>Relative KV Cache Size</th></tr></thead><tbody><tr><td><strong>MHA</strong> (Multi-Head)</td><td>32</td><td>32</td><td><code>[4096, 32*128]</code> = <code>[4096, 4096]</code></td><td>1x (Baseline)</td></tr><tr><td><strong>GQA</strong> (Grouped)</td><td>32</td><td>8</td><td><code>[4096, 8*128]</code> = <code>[4096, 1024]</code></td><td>1/4x</td></tr><tr><td><strong>MQA</strong> (Multi-Query)</td><td>32</td><td>1</td><td><code>[4096, 1*128]</code> = <code>[4096, 128]</code></td><td>1/32x</td></tr></tbody></table><p>GQA provides a robust balance, significantly reducing the memory and bandwidth requirements for the KV cache with negligible impact on model performance, making it a popular choice in modern LLMs.</p><h3 id=5-mha-vs-mixture-of-experts-moe-a-clarification>5. MHA vs. Mixture of Experts (MoE): A Clarification
<a class=heading-link href=#5-mha-vs-mixture-of-experts-moe-a-clarification><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>While both MHA and MoE use the concept of &ldquo;experts,&rdquo; they are functionally and architecturally distinct.</p><ul><li><strong>MHA:</strong> The &ldquo;experts&rdquo; are the <strong>attention heads</strong>. All heads are active for every token to build a rich representation within the attention layer. This is akin to a board meeting where every member analyzes and contributes to every decision.</li><li><strong>MoE:</strong> The &ldquo;experts&rdquo; are full <strong>Feed-Forward Networks</strong>. A routing network selects a small subset of these FFNs for each token. This is a scaling strategy to increase a model&rsquo;s parameter count for greater capacity while keeping the computational cost fixed. It replaces the standard FFN block, whereas MHA <em>is</em> the attention block.</li></ul><p>By understanding these technical details, from the basic concept of a channel to the sophisticated interplay of heads and experts, one can build a more complete and accurate mental model of how LLMs truly operate.</p><hr><h3 id=references>References
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ol><li>Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., &mldr; & Polosukhin, I. (2017). Attention is all you need. <em>Advances in neural information processing systems</em>, 30.</li><li>Shazeer, N., Mirhoseini, A., Maziarz, K., Davis, A., Le, Q., Hinton, G., & Dean, J. (2017). Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. <em>arXiv preprint arXiv:1701.06538</em>.</li><li>Ainslie, J., Ontanon, J., Cakka, E., Dosovitskiy, A., & Le, Q. V. (2023). GQA: Training Generalized Multi-Query Transformer Models from Multi-Head Checkpoints. <em>arXiv preprint arXiv:2305.13245</em>.</li></ol></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,31 @@
<!doctype html><html lang=en><head><title>UniFi VLAN Migration to Zone-Based Architecture · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Embarking on a network migration to a properly segmented VLAN architecture is a rite of passage for any serious home lab or small business operator. The goal is clear: improve security and organization by separating traffic. However, the path from a flat network to a segmented one is often paved with subtle but critical configuration details that can lead to hours of frustrating troubleshooting.
This article documents that journey. It details the pitfalls encountered, the core networking concepts that were essential to understand, and the best practices that ultimately led to a stable, secure, and logical network design built on a zone-based firewall model."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="UniFi VLAN Migration to Zone-Based Architecture"><meta name=twitter:description content="Embarking on a network migration to a properly segmented VLAN architecture is a rite of passage for any serious home lab or small business operator. The goal is clear: improve security and organization by separating traffic. However, the path from a flat network to a segmented one is often paved with subtle but critical configuration details that can lead to hours of frustrating troubleshooting.
This article documents that journey. It details the pitfalls encountered, the core networking concepts that were essential to understand, and the best practices that ultimately led to a stable, secure, and logical network design built on a zone-based firewall model."><meta property="og:url" content="https://ericxliu.me/posts/unifi-vlan-migration-to-zone-based-architecture/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="UniFi VLAN Migration to Zone-Based Architecture"><meta property="og:description" content="Embarking on a network migration to a properly segmented VLAN architecture is a rite of passage for any serious home lab or small business operator. The goal is clear: improve security and organization by separating traffic. However, the path from a flat network to a segmented one is often paved with subtle but critical configuration details that can lead to hours of frustrating troubleshooting.
This article documents that journey. It details the pitfalls encountered, the core networking concepts that were essential to understand, and the best practices that ultimately led to a stable, secure, and logical network design built on a zone-based firewall model."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-09-22T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-10T20:10:48+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/unifi-vlan-migration-to-zone-based-architecture/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"UniFi VLAN Migration to Zone-Based Architecture","genre":"Blog","wordcount":"1001","url":"https:\/\/ericxliu.me\/posts\/unifi-vlan-migration-to-zone-based-architecture\/","datePublished":"2025-09-22T00:00:00\u002b00:00","dateModified":"2026-01-10T20:10:48\u002b00:00","description":"\u003cp\u003eEmbarking on a network migration to a properly segmented VLAN architecture is a rite of passage for any serious home lab or small business operator. The goal is clear: improve security and organization by separating traffic. However, the path from a flat network to a segmented one is often paved with subtle but critical configuration details that can lead to hours of frustrating troubleshooting.\u003c\/p\u003e\n\u003cp\u003eThis article documents that journey. It details the pitfalls encountered, the core networking concepts that were essential to understand, and the best practices that ultimately led to a stable, secure, and logical network design built on a zone-based firewall model.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/unifi-vlan-migration-to-zone-based-architecture/>UniFi VLAN Migration to Zone-Based Architecture</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-09-22T00:00:00Z>September 22, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
5-minute read</span></div></div></header><div class=post-content><p>Embarking on a network migration to a properly segmented VLAN architecture is a rite of passage for any serious home lab or small business operator. The goal is clear: improve security and organization by separating traffic. However, the path from a flat network to a segmented one is often paved with subtle but critical configuration details that can lead to hours of frustrating troubleshooting.</p><p>This article documents that journey. It details the pitfalls encountered, the core networking concepts that were essential to understand, and the best practices that ultimately led to a stable, secure, and logical network design built on a zone-based firewall model.</p><h3 id=lesson-1-demystifying-the-native-vlan>Lesson 1: Demystifying the Native VLAN
<a class=heading-link href=#lesson-1-demystifying-the-native-vlan><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>The most significant source of initial problems was a fundamental misunderstanding of the &ldquo;Native VLAN&rdquo; setting on a switch port.</p><p><strong>The Misconception:</strong> It&rsquo;s easy to assume that the &ldquo;Native Network&rdquo; on a port should be set to the VLAN you want the connected device to be on. For example, if a switch should be on the &ldquo;corp&rdquo; network (VLAN 10), one might set its management VLAN to <code>corp</code> and the upstream switch port&rsquo;s Native Network to <code>corp</code> as well.</p><p><strong>The Reality:</strong> The Native VLAN on a trunk port has a specific purpose: it determines which VLAN any <strong>untagged</strong> traffic belongs to. A trunk port is designed to carry traffic for multiple VLANs by adding a &ldquo;tag&rdquo; to each packet. The one exception is the traffic for the Native VLAN, which is sent <em>without</em> a tag.</p><p>This leads to a critical rule: <strong>for a trunk link to function correctly, the Native VLAN must be the same on both ends of the connection.</strong> When they mismatch, management traffic from devices like switches and access points gets lost, sending them offline.</p><h3 id=lesson-2-the-power-of-a-dedicated-management-vlan>Lesson 2: The Power of a Dedicated Management VLAN
<a class=heading-link href=#lesson-2-the-power-of-a-dedicated-management-vlan><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>This realization about the Native VLAN led directly to the next critical architectural decision: isolating the network&rsquo;s control plane. The initial plan involved using VLAN 1 for a DMZ, but this is a significant security risk, as VLAN 1 is often the default &ldquo;catch-all&rdquo; network.</p><p><strong>The Best Practice:</strong> The industry-standard solution is to create a dedicated <strong>Management VLAN</strong>. This network&rsquo;s sole purpose is to be the home for the management interfaces of your router, switches, and access points.</p><p>The final, secure architecture was as follows:</p><ol><li>A new network, &ldquo;Management&rdquo; (e.g., VLAN 1, <code>192.168.1.0/24</code>), was created.</li><li>This network was assigned to its own &ldquo;Management&rdquo; firewall zone with highly restrictive rules.</li><li>All trunk ports connecting switches and access points were configured with &ldquo;Management&rdquo; as the <strong>Native VLAN</strong>.</li><li>All other user-facing VLANs (<code>corp</code>, <code>iot</code>, <code>dmz</code>) were configured as <strong>Tagged VLANs</strong> on these trunk ports.</li></ol><p>This isolates the network&rsquo;s control plane from the data plane, vastly improving the security posture.</p><h3 id=lesson-3-mastering-inter-vlan-communication>Lesson 3: Mastering Inter-VLAN Communication
<a class=heading-link href=#lesson-3-mastering-inter-vlan-communication><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>With traffic properly segmented at Layer 2, the next challenge was controlling communication at Layer 3. This is the job of the router and its firewall, and it presented a common challenge: providing DHCP to clients when the server resides in a different VLAN.</p><p>DHCP requests are broadcasts and are not passed between VLANs by a router. The solution is to use a <strong>DHCP Relay</strong>.</p><ol><li>On the network configuration for a client VLAN (e.g., <code>corp</code>), the DHCP mode was changed from &ldquo;Server&rdquo; to &ldquo;Relay&rdquo;.</li><li>The IP address of the actual DHCP server was specified.</li></ol><p>This instructs the router to listen for DHCP broadcasts, catch them, and forward them as a unicast packet directly to the DHCP server. For this to work, the firewall must allow this traffic, and the DHCP server itself must be configured with a &ldquo;scope&rdquo; or pool of IP addresses for the client&rsquo;s subnet.</p><h3 id=the-final-architecture-a-zone-based-firewall-model>The Final Architecture: A Zone-Based Firewall Model
<a class=heading-link href=#the-final-architecture-a-zone-based-firewall-model><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>The culmination of these lessons is a network architecture defined by clear, logical zones, each with a distinct purpose and trust level. This model simplifies firewall management and provides a robust security posture that is easy to understand at a glance.</p><h4 id=network-zones-and-their-roles>Network Zones and Their Roles
<a class=heading-link href=#network-zones-and-their-roles><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>The final configuration groups the individual VLANs into distinct zones, forming the foundation of the security policy.</p><ul><li><strong>Internal:</strong> Contains the <code>corp</code> network. This is the most trusted zone for daily work.</li><li><strong>DMZ:</strong> Contains the <code>dns</code> and <code>prod</code> networks for semi-trusted, exposed services.</li><li><strong>IoT:</strong> Contains the <code>iot</code> network. This is a low-trust zone for smart devices.</li><li><strong>Management:</strong> Contains the <code>management</code> network. This is a highly privileged, isolated zone for network infrastructure.
<img src=/images/unifi-vlan-migration-to-zone-based-architecture/472bf0cd504f4cd7ab7a33cd3322a5f1.png alt="S3 File"></li></ul><h4 id=the-security-policy-matrix>The Security Policy Matrix
<a class=heading-link href=#the-security-policy-matrix><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>The true power of this model is realized in the firewall&rsquo;s zone matrix, which dictates the default traffic flow between each zone.
<img src=/images/unifi-vlan-migration-to-zone-based-architecture/663d732d14fc4fa8ad051c6926523efb.png alt="S3 File"></p><p>This matrix enforces the desired security policy with clear, high-level rules:</p><ul><li><strong>Complete IoT Isolation:</strong> The <code>IoT</code> row shows that devices in this zone are blocked from initiating any communication with any other internal zone. Their only allowed path is out to the internet.</li><li><strong>Protected Management Plane:</strong> The <code>management</code> row and column are almost entirely red. The critical network infrastructure is blocked from initiating contact with any user-facing zone, and vice-versa, following the principle of least privilege.</li><li><strong>Controlled DMZ Access:</strong> The <code>DMZ</code> is prevented from initiating connections to the trusted <code>Internal</code> zone, preventing a compromised public-facing server from being used as a pivot point to attack internal devices.</li></ul><h4 id=granular-intra-zone-control>Granular Intra-Zone Control
<a class=heading-link href=#granular-intra-zone-control><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>Beyond the high-level zone policies, the configuration also implements granular rules to control traffic <em>within</em> a single zone, providing defense-in-depth.</p><p>These rules explicitly define the communication paths between services. For instance, rules allow a specific device to access a Kubernetes load balancer while another rule allows general DNS access within the zone. This ensures that even within a semi-trusted zone, services can only communicate in expected and necessary ways, further reducing the potential attack surface.</p><p>By adhering to these principles, what began as a day of frustrating troubleshooting evolved into a robust, layered, and logically segmented network that balances simplicity with strong security practices.</p><hr><h3 id=references>References
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><ul><li><a href=https://help.ui.com/hc/en-us/articles/7258465146519-Troubleshooting-UniFi-Device-Connectivity class=external-link target=_blank rel=noopener>Troubleshooting UniFi Device Connectivity</a></li><li><a href=https://help.ui.com/hc/en-us/articles/9592924981911-Virtual-Network-VLAN-Troubleshooting class=external-link target=_blank rel=noopener>Virtual Network (VLAN) Troubleshooting</a></li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

12
posts/useful/index.html Normal file
View File

@@ -0,0 +1,12 @@
<!doctype html><html lang=en><head><title>Some useful files · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="
rootCA.pem
"><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Some useful files"><meta name=twitter:description content="rootCA.pem"><meta property="og:url" content="https://ericxliu.me/posts/useful/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Some useful files"><meta property="og:description" content="rootCA.pem"><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2020-10-26T04:14:43+00:00"><meta property="article:modified_time" content="2025-08-03T08:37:28-07:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/useful/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Some useful files","genre":"Blog","wordcount":"1","url":"https:\/\/ericxliu.me\/posts\/useful\/","datePublished":"2020-10-26T04:14:43\u002b00:00","dateModified":"2025-08-03T08:37:28-07:00","description":"\u003cul\u003e\n\u003cli\u003e\u003ca href=\u0022\/rootCA.crt\u0022 \u003erootCA.pem\u003c\/a\u003e\u003c\/li\u003e\n\u003c\/ul\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/useful/>Some useful files</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2020-10-26T04:14:43Z>October 26, 2020
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
One-minute read</span></div></div></header><div class=post-content><ul><li><a href=/rootCA.crt>rootCA.pem</a></li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,34 @@
<!doctype html><html lang=en><head><title>vAttention · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While PagedAttention (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPUs native hardware Memory Management Unit (MMU) offers a more performant and portable solution.
The Status Quo: PagedAttention and Software Tables
Link to heading
Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined &ldquo;page table&rdquo; (the Block Table) [1]."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="vAttention"><meta name=twitter:description content="Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While PagedAttention (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPUs native hardware Memory Management Unit (MMU) offers a more performant and portable solution.
The Status Quo: PagedAttention and Software Tables Link to heading Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined “page table” (the Block Table) [1]."><meta property="og:url" content="https://ericxliu.me/posts/vattention/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="vAttention"><meta property="og:description" content="Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While PagedAttention (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPUs native hardware Memory Management Unit (MMU) offers a more performant and portable solution.
The Status Quo: PagedAttention and Software Tables Link to heading Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined “page table” (the Block Table) [1]."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-12-08T00:00:00+00:00"><meta property="article:modified_time" content="2025-12-19T21:21:55+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/vattention/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"vAttention","genre":"Blog","wordcount":"824","url":"https:\/\/ericxliu.me\/posts\/vattention\/","datePublished":"2025-12-08T00:00:00\u002b00:00","dateModified":"2025-12-19T21:21:55\u002b00:00","description":"\u003cp\u003eLarge Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While \u003cstrong\u003ePagedAttention\u003c\/strong\u003e (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPUs native hardware Memory Management Unit (MMU) offers a more performant and portable solution.\u003c\/p\u003e\n\u003ch4 id=\u0022the-status-quo-pagedattention-and-software-tables\u0022\u003e\n The Status Quo: PagedAttention and Software Tables\n \u003ca class=\u0022heading-link\u0022 href=\u0022#the-status-quo-pagedattention-and-software-tables\u0022\u003e\n \u003ci class=\u0022fa-solid fa-link\u0022 aria-hidden=\u0022true\u0022 title=\u0022Link to heading\u0022\u003e\u003c\/i\u003e\n \u003cspan class=\u0022sr-only\u0022\u003eLink to heading\u003c\/span\u003e\n \u003c\/a\u003e\n\u003c\/h4\u003e\n\u003cp\u003ePrior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined \u0026ldquo;page table\u0026rdquo; (the Block Table) [1].\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/vattention/>vAttention</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2025-12-08T00:00:00Z>December 8, 2025
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
4-minute read</span></div></div></header><div class=post-content><p>Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While <strong>PagedAttention</strong> (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPUs native hardware Memory Management Unit (MMU) offers a more performant and portable solution.</p><h4 id=the-status-quo-pagedattention-and-software-tables>The Status Quo: PagedAttention and Software Tables
<a class=heading-link href=#the-status-quo-pagedattention-and-software-tables><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined &ldquo;page table&rdquo; (the Block Table) [1].</p><p>While effective at reducing fragmentation, this approach introduces significant complexity:</p><ul><li><strong>Kernel Rewriting:</strong> Because the KV cache is no longer contiguous in virtual memory, standard attention kernels (like cuDNN SDPA or vanilla FlashAttention) cannot be used directly. Developers must rewrite kernels to manually dereference block tables [1].</li><li><strong>Software Overhead:</strong> The system must manage virtual-to-physical mapping in user space, duplicating work typically handled by the OS. This adds runtime overhead to the critical path of both the CPU (managing tables) and the GPU (performing lookups) [1].</li><li><strong>Performance Penalties:</strong> PagedAttention-based kernels have been observed to be slower than their non-paged counterparts. For example, vLLM&rsquo;s paged kernel has shown to be up to 2.8x slower than FlashAttention-2 in specific tests [1].</li></ul><h4 id=the-hardware-native-alternative-vattention>The Hardware-Native Alternative: vAttention
<a class=heading-link href=#the-hardware-native-alternative-vattention><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p><strong>vAttention</strong> proposes returning the responsibility of memory management to the OS and hardware. By utilizing the CUDA Virtual Memory Management (VMM) APIs, it is possible to decouple the allocation of virtual memory from physical memory [1].</p><p><strong>How it works:</strong></p><ol><li><strong>Virtual Contiguity:</strong> The system reserves a large, contiguous range of virtual addresses for the KV cache at request start.</li><li><strong>Physical Paging:</strong> Physical memory pages are allocated and mapped to this virtual range only on demand (dynamically) as the token sequence grows [1].</li><li><strong>Hardware Lookups:</strong> Because the GPU sees a contiguous virtual address range, the hardware Translation Lookaside Buffer (TLB) handles the address translation. This allows the use of unmodified, high-performance kernels like FlashAttention-2 or FlashAttention-3 without custom paging logic [1].</li></ol><h4 id=technical-challenges-and-solutions>Technical Challenges and Solutions
<a class=heading-link href=#technical-challenges-and-solutions><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>Historically, using the GPU native virtual memory for high-frequency token generation faced two major bottlenecks: <strong>Control Plane Latency</strong> and <strong>Page Granularity</strong>.</p><p><strong>1. Control Plane Latency (The API Bottleneck)</strong>
Standard memory allocation (<code>cudaMalloc</code>) is monolithic—it allocates virtual and physical memory simultaneously. The more granular driver API, <code>cuMemMap</code>, allows separating these steps but involves expensive round-trips to the OS driver. Invoking these APIs synchronously during decoding (which generates one token at a time) would stall the GPU execution pipeline [1].</p><p>To solve this, vAttention utilizes <strong>execution overlap</strong>:</p><ul><li>Because LLM decoding is autoregressive and predictable, the system knows exactly when new memory is needed (one token ahead).</li><li>The CPU initiates the memory mapping for the <em>next</em> token asynchronously while the GPU is still computing the <em>current</em> token. By the time the GPU reaches the next step, the TLB and page tables are already updated, effectively hiding the driver latency [1].</li></ul><p><strong>2. Page Size Granularity (The Fragmentation Bottleneck)</strong>
The GPU TLB hierarchy is sensitive to page sizes.</p><ul><li><strong>4KB Pages:</strong> Too small. Mapping gigabytes of KV cache with 4KB pages causes &ldquo;TLB thrashing,&rdquo; degrading performance.</li><li><strong>2MB Huge Pages:</strong> The standard for CUDA large allocations. However, allocating 2MB for a single token update causes massive internal fragmentation, negating the benefits of dynamic allocation.</li></ul><p>Research identified <strong>64KB</strong> as the optimal page size, offering a balance between TLB efficiency and memory utilization. While standard CUDA APIs default to 2MB, vAttention utilizes modified driver calls to enable 64KB pages, eliminating TLB thrashing without incurring the fragmentation cost of huge pages [1].</p><h4 id=performance-and-portability-implications>Performance and Portability Implications
<a class=heading-link href=#performance-and-portability-implications><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>Moving memory management from software (PagedAttention) to hardware (vAttention) yields measurable benefits:</p><ul><li><strong>Throughput:</strong> In prefill-heavy workloads, vAttention outperforms PagedAttention-based systems (like vLLM and FlashInfer) by up to 1.23x due to the elimination of software lookup overheads. In decoding, it matches or exceeds the performance of optimized paged kernels [1].</li><li><strong>Portability:</strong> A significant advantage is software compatibility. When NVIDIA released FlashAttention-3 (optimized for Hopper H100 GPUs), it did not initially support PagedAttention. vAttention enabled the immediate use of FlashAttention-3 with dynamic memory support, achieving up to 1.5x higher throughput than PagedAttention-based FlashAttention-2 [1].</li></ul><h4 id=conclusion>Conclusion
<a class=heading-link href=#conclusion><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h4><p>While PagedAttention solved the critical issue of memory fragmentation in LLMs, it necessitated a complex software abstraction layer. By leveraging low-level CUDA VMM APIs, handling allocations asynchronously to hide driver latency, and optimizing page sizes, it is possible to achieve dynamic memory management using the GPU&rsquo;s native hardware. This restores the illusion of contiguous memory, simplifies kernel development, and improves inference performance.</p><h3 id=references>References
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>[1] R. Prabhu et al., &ldquo;vAttention: Dynamic Memory Management for Serving LLMs without PagedAttention,&rdquo; in <em>Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS &lsquo;25)</em>, 2025.</p></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

View File

@@ -0,0 +1,35 @@
<!doctype html><html lang=en><head><title>Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="&ldquo;Vibe coding&rdquo; has become my latest obsession. It&rsquo;s that flow state where the tools disappear, and you&rsquo;re just manipulating logic at the speed of thought. Usually, this happens in a high-end IDE like Antigravity. But lately, I&rsquo;ve been trying to answer a childhood dream.
Growing up in China before the internet age, my window to the outside world was CCTV-6. Along with Baywatch, one of the first American TV shows I ever watched was Knight Rider. I don&rsquo;t remember the exact plot lines, but the core concept stuck with me forever: KITT. A car that could talk, think, and do things for you."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams"><meta name=twitter:description content="“Vibe coding” has become my latest obsession. Its that flow state where the tools disappear, and youre just manipulating logic at the speed of thought. Usually, this happens in a high-end IDE like Antigravity. But lately, Ive been trying to answer a childhood dream.
Growing up in China before the internet age, my window to the outside world was CCTV-6. Along with Baywatch, one of the first American TV shows I ever watched was Knight Rider. I dont remember the exact plot lines, but the core concept stuck with me forever: KITT. A car that could talk, think, and do things for you."><meta property="og:url" content="https://ericxliu.me/posts/vibe-coding-from-the-jeep/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams"><meta property="og:description" content="“Vibe coding” has become my latest obsession. Its that flow state where the tools disappear, and youre just manipulating logic at the speed of thought. Usually, this happens in a high-end IDE like Antigravity. But lately, Ive been trying to answer a childhood dream.
Growing up in China before the internet age, my window to the outside world was CCTV-6. Along with Baywatch, one of the first American TV shows I ever watched was Knight Rider. I dont remember the exact plot lines, but the core concept stuck with me forever: KITT. A car that could talk, think, and do things for you."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2026-01-21T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-22T06:48:07+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/vibe-coding-from-the-jeep/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams","genre":"Blog","wordcount":"665","url":"https:\/\/ericxliu.me\/posts\/vibe-coding-from-the-jeep\/","datePublished":"2026-01-21T00:00:00\u002b00:00","dateModified":"2026-01-22T06:48:07\u002b00:00","description":"\u003cp\u003e\u0026ldquo;Vibe coding\u0026rdquo; has become my latest obsession. It\u0026rsquo;s that flow state where the tools disappear, and you\u0026rsquo;re just manipulating logic at the speed of thought. Usually, this happens in a high-end IDE like Antigravity. But lately, I\u0026rsquo;ve been trying to answer a childhood dream.\u003c\/p\u003e\n\u003cp\u003eGrowing up in China before the internet age, my window to the outside world was CCTV-6. Along with \u003cem\u003eBaywatch\u003c\/em\u003e, one of the first American TV shows I ever watched was \u003cem\u003eKnight Rider\u003c\/em\u003e. I don\u0026rsquo;t remember the exact plot lines, but the core concept stuck with me forever: KITT. A car that could talk, think, and do things for you.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/vibe-coding-from-the-jeep/>Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
<time datetime=2026-01-21T00:00:00Z>January 21, 2026
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
4-minute read</span></div></div></header><div class=post-content><p>&ldquo;Vibe coding&rdquo; has become my latest obsession. It&rsquo;s that flow state where the tools disappear, and you&rsquo;re just manipulating logic at the speed of thought. Usually, this happens in a high-end IDE like Antigravity. But lately, I&rsquo;ve been trying to answer a childhood dream.</p><p>Growing up in China before the internet age, my window to the outside world was CCTV-6. Along with <em>Baywatch</em>, one of the first American TV shows I ever watched was <em>Knight Rider</em>. I don&rsquo;t remember the exact plot lines, but the core concept stuck with me forever: KITT. A car that could talk, think, and do things for you.</p><p>Decades later, I&rsquo;m sitting in my Jeep, wondering: Can I build my own KITT? Can I take the vibe on the road?</p><p>I already updated the head unit in my Jeep to an aftermarket unit. It features a <strong>K706 (UIS7862S)</strong> chipset with an <strong>8-core CPU and 8GB of RAM</strong>, essentially making it a reasonably powerful Android tablet hardwired into the dashboard.</p><h2 id=the-objective>The Objective
<a class=heading-link href=#the-objective><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>Turn this car accessory into a legitimate dev environment. I wanted a physical keyboard, a real terminal, and access to my AI coding assistants. I wanted to push code while parked on a trail.</p><h2 id=the-hardware-blocker-getting-input>The Hardware Blocker: Getting Input
<a class=heading-link href=#the-hardware-blocker-getting-input><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>The first hurdle was mundane but blocking: My Bluetooth keyboard wouldn&rsquo;t pair. The head unit could see other devices, but refused to connect to my keyboard.</p><h3 id=attempt-1-the-usb-dongle-bypass>Attempt 1: The USB Dongle Bypass
<a class=heading-link href=#attempt-1-the-usb-dongle-bypass><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>My first instinct was to blame the cheap Chinese head unit hardware. I grabbed a spare TP-Link USB Bluetooth dongle and plugged it in, hoping to bypass the internal stack entirely.</p><p>The device showed up in <code>lsusb</code>, but it remained inert. A quick check of the kernel config via <code>zcat /proc/config.gz</code> revealed the bad news:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span><span style=color:#8b949e;font-style:italic># CONFIG_BT is not set</span>
</span></span></code></pre></div><p>The kernel was compiled without generic Bluetooth driver support (<code>btusb</code>). Even with root access, I couldn&rsquo;t load the drivers because they simply didn&rsquo;t exist in the firmware. I was stuck with the internal hardware.</p><h3 id=attempt-2-the-dual-bluetooth-fix>Attempt 2: The &ldquo;Dual Bluetooth&rdquo; Fix
<a class=heading-link href=#attempt-2-the-dual-bluetooth-fix><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h3><p>Forced back to the built-in Bluetooth, I tried to diagnose why it was ignoring my keyboard. Standard debugging tools painted a grim picture:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span> hciconfig -a
</span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic># (Empty output - no standard HCI interface found)</span>
</span></span><span style=display:flex><span>
</span></span><span style=display:flex><span> ps -A | grep -iE <span style=color:#a5d6ff>&#34;goc|ivt|syu&#34;</span>
</span></span><span style=display:flex><span>u0_a50 <span style=color:#a5d6ff>3456</span> ... com.goc.sdk <span style=color:#8b949e;font-style:italic># Accessing the proprietary BT chip</span>
</span></span></code></pre></div><p>The diagnosis was clear: The internal Bluetooth chip is acting in <strong>Slave Mode</strong> (Client), managed by a proprietary <code>com.goc.sdk</code> service instead of the standard Android Bluetooth stack. It&rsquo;s designed to <em>be</em> a speaker for your phone, not to <em>host</em> a keyboard.</p><p><strong>The Fix</strong>: Hidden deep in the Factory Settings (password <code>8888</code>), there&rsquo;s a toggle called <strong>&ldquo;Dual Bluetooth&rdquo;</strong>. Enabling this flips the proprietary stack to expose a standard Host interface. Enable that, and suddenly my mechanical keyboard connected instantly.</p><h2 id=the-software-termux--claude>The Software: Termux + Claude
<a class=heading-link href=#the-software-termux--claude><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><p>With input sorted, the software setup was surprisingly straightforward. <strong>Termux</strong> was the obvious choice for a terminal.</p><p>I discovered that <strong>Claude Code</strong> works on Termux with zero hassle.</p><p>The setup was shockingly simple:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>pkg install nodejs git ripgrep
</span></span><span style=display:flex><span>npm install -g @anthropic-ai/claude-code
</span></span></code></pre></div><p>Authentication via <code>claude login</code> worked out of the box. Now, I have a fully capable coding agent running directly on my dashboard. I can pull a repo, ask Claude to refactor a module, and push the changes—all without opening a laptop.</p><p><img src=/images/vibe-coding-from-the-jeep/399000b0b5ee4f5e8961e1d76b6c23c8.png alt="S3 File"></p><h2 id=key-insights>Key Insights
<a class=heading-link href=#key-insights><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><ul><li><strong>Head Units are just Weird Tablets</strong>: They have quirks (like Slave-only Bluetooth), but they are standard Android under the hood. <code>adb root</code> is your best friend for diagnosing them.</li><li><strong>Check the Kernel Config</strong>: Before buying hardware peripherals for embedded Android devices, always check <code>/proc/config.gz</code>. If the support isn&rsquo;t compiled in, you&rsquo;re dead in the water.</li><li><strong>The Vibe is Portable</strong>: With tools like Termux and Claude Code, the &ldquo;dev environment&rdquo; is no longer a heavy laptop. It&rsquo;s anywhere you have a terminal.</li></ul><h2 id=references>References
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
<span class=sr-only>Link to heading</span></a></h2><ol><li><a href=https://www.reddit.com/r/termux/comments/1jd4y4y/claude_code_is_easy_to_install_on_termux/ class=external-link target=_blank rel=noopener>Reddit: Claude Code on Termux</a></li></ol></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

4
robots.txt Normal file
View File

@@ -0,0 +1,4 @@
User-agent: *
Allow: /
Sitemap: https://ericxliu.me/sitemap.xml

34
rootCA.crt Normal file
View File

@@ -0,0 +1,34 @@
-----BEGIN CERTIFICATE-----
MIIF2DCCA8CgAwIBAgIUMxAajDuiWUFtwePBQChCPyqvyIowDQYJKoZIhvcNAQEL
BQAwcjELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRowGAYDVQQKDBFlcmljeGxp
dS5tZSwgSW5jLjEXMBUGA1UEAwwOZXJpY3hsaXUubG9jYWwxITAfBgkqhkiG9w0B
CQEWEm1hc3RlckBlcmljeGxpdS5tZTAeFw0yNDAxMDgwMzA0NDFaFw0yNjAxMDgw
MzA0NDFaMHIxCzAJBgNVBAYTAlVTMQswCQYDVQQIDAJDQTEaMBgGA1UECgwRZXJp
Y3hsaXUubWUsIEluYy4xFzAVBgNVBAMMDmVyaWN4bGl1LmxvY2FsMSEwHwYJKoZI
hvcNAQkBFhJtYXN0ZXJAZXJpY3hsaXUubWUwggIiMA0GCSqGSIb3DQEBAQUAA4IC
DwAwggIKAoICAQDedDTBe0+qRV1r+kRvMZzFkensiKMpL4T9bRbAbNFfS8QufHp9
wJoMh5xW4XPJtqkYdYnnoefaZS9a9DMHjw1+f7lL0vzIfzSO5JWTZQSAsi0yeqDn
j1l8ShYrPZvQR+NUht9qAztbhIcBy3FFVOFFMZjZaYIwF1C3QBv5h2/yfgw0uad/
rOEw1G1Z/xlj7K+rvm59+vzduASfFY6NMG0PFzY1jRnWZ4diiqWJEM02EAevosbW
Xg1CFRkoe+s088QXl4WZLxpHvsiKdvKjaaKXrQieAYL2Kl3DOziN7P659q0Bk2tm
yp0B81QZV24mhg5WCuwrteiOJz51vck/T+hWDFKjPwa+GjGpqGiXjJMBfS/MyGMf
mdnPdcMeKQo2Mx4hpl/h116xFY60Tzto/PI4Kb4VBTKkN0hu7BLDSU4l8PkiSSAd
0E2Kzg4P9BQgvVc/BhoR7oKebf2TCeTVN+gC9HRsBdzBA3mtp60Qd9XBFAkbDqZq
nusA8KEG10az4cXaMIohAsRh9AVz4tHxTOq2dgw9AE8EEfQzgcMQl4hV4TkYFubC
t/gm16yEvsPBMFjptLu4S7mOpSdaJylOXVcMZ6PgeGAlrbuYunblYtdyKVyNVFeX
ca6RPAbDthWSqrbzigCvSeqhRpPmEq5p51BFGA+QK2b1Bj7dF0yiDO5zbwIDAQAB
o2YwZDAfBgNVHSMEGDAWgBQEK7HddEflCZ9DL9VEIBXzB9dQFTAOBgNVHQ8BAf8E
BAMCAgQwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUBCux3XRH5QmfQy/V
RCAV8wfXUBUwDQYJKoZIhvcNAQELBQADggIBAKF16Ps4AccXsNDRqQANF/kcNZ2y
SKB3cNsOfWxKfgppkl43z9cimgGGbNn0mVGjaOzXdXHEEQ0Uuv3tkvgQA2KraaTy
wLG5+RQKIVRaOgWufXbL76JV6mMf8v3o8/o5EL+uC/2KxpDH0N1BOJ0hJB2/hbra
kHPuYobj1SWtPeO5lRdZed05kdiAWH7e3/PmKgH13tZLnnzCHRC1YNkk2Cdhp082
XL5zUtDdbWAm6UgM4Reg4MKZMZzmYDn+1/wW6D5oO5ZXlJF2QqjqfTXn6fKJWM9d
JK3O5vx+LquAMu1G9gkqmTZntQQ3ZDGs9bMfWchgWPWN1ignJgmqnIgIbvdAHhdL
DOz3WE53vpcUY35TOs/YgIj81vAZuhuaYQZcTL4H34c3ShdVi6RY3Y+yPxM9MjRc
zqzEMg4KTnK7Es+t4Yep7vOQRo3WN1A+lXsRf+n2XBTCTwFOCury64AjMQn5H0yb
aZGvvf3UnIdUrJjPGjF9W/uIpy0TDpsKo/qizAdQ5c18p2ihVO8mHHZhJnIQW9er
p8M0m6/woalM94apYNdY6YAbsej5gNktx+z2ptdPNmE3k3OevDFqRNSLh29Rr2vM
CfO6MjR4Bkilw5A67jQFQnLF6Y9TYqW0HlEvdODNvO9aR5RSwaNTGJBcjynrsL3v
IG73ZMQl6utPkbKh
-----END CERTIFICATE-----

7
series/index.html Normal file
View File

@@ -0,0 +1,7 @@
<!doctype html><html lang=en><head><title>Series · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Series"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/series/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Series"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/series/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/series/index.xml title="Eric X. Liu's Personal Page"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=https://ericxliu.me/series/>Series</a></h1></header><ul></ul></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

1
series/index.xml Normal file
View File

@@ -0,0 +1 @@
<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Series on Eric X. Liu's Personal Page</title><link>https://ericxliu.me/series/</link><description>Recent content in Series on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><atom:link href="https://ericxliu.me/series/index.xml" rel="self" type="application/rss+xml"/></channel></rss>

1
series/page/1/index.html Normal file
View File

@@ -0,0 +1 @@
<!doctype html><html lang=en><head><title>https://ericxliu.me/series/</title><link rel=canonical href=https://ericxliu.me/series/><meta charset=utf-8><meta http-equiv=refresh content="0; url=https://ericxliu.me/series/"></head></html>

20
site.webmanifest Normal file
View File

@@ -0,0 +1,20 @@
{
"name": "Eric X. Liu's Personal Page",
"short_name": "Eric Liu",
"description": "Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities.",
"start_url": "/",
"display": "standalone",
"background_color": "#ffffff",
"theme_color": "#000000",
"icons": [
{
"src": "/images/gravatar.png",
"sizes": "192x192",
"type": "image/png",
"purpose": "any maskable"
}
],
"categories": ["technology", "engineering", "blog"],
"lang": "en",
"orientation": "portrait-primary"
}

1
sitemap.xml Normal file

File diff suppressed because one or more lines are too long

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

View File

@@ -1,33 +0,0 @@
-----BEGIN CERTIFICATE-----
MIIFoDCCA4igAwIBAgIUJzlDGIEJdOQ0Shd1P0RJP5aangAwDQYJKoZIhvcNAQEL
BQAwYTELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMREwDwYDVQQHDAhTYW4gSm9z
ZTEUMBIGA1UECgwLZXJpY3hsaXUubWUxHDAaBgkqhkiG9w0BCQEWDV9AZXJpY3hs
aXUubWUwHhcNMjExMTIyMTgzMzQyWhcNMjQwOTExMTgzMzQyWjBhMQswCQYDVQQG
EwJVUzELMAkGA1UECAwCQ0ExETAPBgNVBAcMCFNhbiBKb3NlMRQwEgYDVQQKDAtl
cmljeGxpdS5tZTEcMBoGCSqGSIb3DQEJARYNX0BlcmljeGxpdS5tZTCCAiIwDQYJ
KoZIhvcNAQEBBQADggIPADCCAgoCggIBAN50NMF7T6pFXWv6RG8xnMWR6eyIoykv
hP1tFsBs0V9LxC58en3AmgyHnFbhc8m2qRh1ieeh59plL1r0MwePDX5/uUvS/Mh/
NI7klZNlBICyLTJ6oOePWXxKFis9m9BH41SG32oDO1uEhwHLcUVU4UUxmNlpgjAX
ULdAG/mHb/J+DDS5p3+s4TDUbVn/GWPsr6u+bn36/N24BJ8Vjo0wbQ8XNjWNGdZn
h2KKpYkQzTYQB6+ixtZeDUIVGSh76zTzxBeXhZkvGke+yIp28qNpopetCJ4BgvYq
XcM7OI3s/rn2rQGTa2bKnQHzVBlXbiaGDlYK7Cu16I4nPnW9yT9P6FYMUqM/Br4a
MamoaJeMkwF9L8zIYx+Z2c91wx4pCjYzHiGmX+HXXrEVjrRPO2j88jgpvhUFMqQ3
SG7sEsNJTiXw+SJJIB3QTYrODg/0FCC9Vz8GGhHugp5t/ZMJ5NU36AL0dGwF3MED
ea2nrRB31cEUCRsOpmqe6wDwoQbXRrPhxdowiiECxGH0BXPi0fFM6rZ2DD0ATwQR
9DOBwxCXiFXhORgW5sK3+CbXrIS+w8EwWOm0u7hLuY6lJ1onKU5dVwxno+B4YCWt
u5i6duVi13IpXI1UV5dxrpE8BsO2FZKqtvOKAK9J6qFGk+YSrmnnUEUYD5ArZvUG
Pt0XTKIM7nNvAgMBAAGjUDBOMB0GA1UdDgQWBBQEK7HddEflCZ9DL9VEIBXzB9dQ
FTAfBgNVHSMEGDAWgBQEK7HddEflCZ9DL9VEIBXzB9dQFTAMBgNVHRMEBTADAQH/
MA0GCSqGSIb3DQEBCwUAA4ICAQDKVGKjd1v6vecfNLZZ4+bqw4nwzzVwOdOWb2j+
zqPmYT/ZzCCxeiWLIaYtOQWXR4eSzULWYAGauecVlVYydbRbDC6LXp/1NrfQuNpp
6kd9JRGGdnNrW+0tEfJiXnEpOTwKncI1u6B0pvND8Gy6sxgjamyiKAh1vy0IZYJk
2T7PXxljqGxFZXZ5Ese/ogPn5KRGPkOmbW/BQXWC//3Qe39J6lxy2/HwfZ9pa+AQ
TxcJ/2OiDgBprMPJrHBiqvjoI9kp8vk3JhAQmbVM+8bpAIiiW8dPiEBDtROe/Wk5
UuiebFQNIebaIG+nEruUR28Df3Q52k6dY4MWLVNqB9lKKCqnbYtxDUIQrFCSHAEq
IdeOTEPjpkBr6UWwEunk32Mq6mdqmf5zBNaS64Wva43SLx+p/MIIacCYxOH7CHJX
r6XO/tR95cO4N3LdA/aJYpY0M35tFftFKI/AD5vEwshgYDw9QU1fu3Wljw3wYSVx
8YPKKwRkEBslEBmqf9YooDtGw3bLkQbJml0uMgxXOYI/VD95azvguq1lmcSdTTPu
f1GC0YnpQnXT6gPHNLoMhGiQUTlwHp2GKdaW0Xb9DEOLurzBZ9FIQsvrgclpJ49x
avp4Sgk3wLVue5iOKqlZL5fQIjckQEVR8vieKnZgGx6amVS9a5gB0GbAhkD06Y4p
M3O6VQ==
-----END CERTIFICATE-----

View File

@@ -1,140 +0,0 @@
verb 3
nobind
dev tun
client
remote 24.4.129.175 1194 udp
fast-io
compress lzo
auth-nocache
remote-cert-tls server
<tls-crypt>
-----BEGIN OpenVPN Static key V1-----
d188baecfc63820df3a11c50aa887c4e7236ff8021049038aec03f4f2a46376b
aee8d80d06dbd812b84962937bed7003fdf64c264e9b7423925dbce4dd38b4e0
a3bdfe6e656550a63430338c0dd4bcd4c694221c7561fa9e6da3efd0334a57ee
5926acc05f768339b4712bf005d7eeb27f2da8dc8f4861b718b6683eb42869c0
e11a1ac6c36daea5c79d7e08830de1c6f0a55207bb39e9c0420db34b3a631975
5cfcef448f6664fde5d40e31e381503a6a724eebd7cfd76fe6d7108edc83b5ab
ea1e66af70837d15a9d8ba58c82018b4cd669deb2323ba60d7c7ea8a398483aa
2dec8aa6890dc2f60ff5be1a5c2a6a2fe95efa27f75c38735335e7f6f39b256e
-----END OpenVPN Static key V1-----
</tls-crypt>
<ca>
-----BEGIN CERTIFICATE-----
MIIFNTCCAx2gAwIBAgIJAJBOAeknPeLqMA0GCSqGSIb3DQEBCwUAMDExCzAJBgNV
BAYTAkdCMQ8wDQYDVQQIDAZMb25kb24xETAPBgNVBAoMCFdXVyBMdGQuMB4XDTE5
MDEwNjA3NTIxOVoXDTI5MDEwMzA3NTIxOVowMTELMAkGA1UEBhMCR0IxDzANBgNV
BAgMBkxvbmRvbjERMA8GA1UECgwIV1dXIEx0ZC4wggIiMA0GCSqGSIb3DQEBAQUA
A4ICDwAwggIKAoICAQDpq5CFMT1VWb2MeaHXi4FpCLDXwnzaS+3qGCa3COdNg2BD
tkQOPJNTgVhGn5XcfSDZnnVpXXrPDAqEDCUVVZj/2Mup/LseNr4miY+QcojyRETh
Ecq0FVqgRvW2zRxqWxEPpLyzZGOcwAcW2jGO8XWPsqN4wAWO3WlpYT3unVK833Cx
0wkdFIPbkEE1xKaJiskNYGgDuHu4tzGhOHSKOMzo7HvMaYsNgNChx/x4HYyTgkCf
4r5zo4+CnOqRZ58STiV9AsOgg6mR8m6h/E9GNWpU0VWKm8hnklP+TiMnW/AKby0B
hUKXFJMyhrNBOQXyj1LTqM5Q97+SNstOfqutKZgdD8mZcL4ec+DzelCH4Gyc15Yx
gII/z3YwBUw/SGh+diCtWY2eJAHDkDFMGgiidVSzeKjRAgCDGi5+SYymLzLDyQey
BpgbxunC2zHKsEhH1ZfOxyEOsW7UzgN6axQQ5DdzzKc1ke6OBl0YD1pRsoWEXudi
b8LlNNI4oOaMiW3gsptJGPCOvXBrMm7wuzLrXMMRD0bh969KBJ7YQjUVkrAOsGTq
DnqoXILa0ljsdazxe2Xk8GqrGAQ1XIvO7elbUlV/0nlAj4nzzx1m8f0n9nZ1aEZe
Mv46+si6K/DgdUyGqcxOw6iZ00Fj6ha4yx7HJjZHHwFBXqJEPtdXYJKYa1AmHwID
AQABo1AwTjAdBgNVHQ4EFgQUeBJp2fBea1UzyKirF1VYDsYddiwwHwYDVR0jBBgw
FoAUeBJp2fBea1UzyKirF1VYDsYddiwwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0B
AQsFAAOCAgEAfN7KjdqydCuSH6GCziu7Jc+SxLKLqMQc27/SBRWJ54JwDsfRglH3
zze9j0f/auLKNirbxQG8/CeJO7BtxsPHk2NfKnXUMyIfRH4jlSmuuy0YLH1N3F19
5GKGyt/ufc4a19l7M8ZseFMee8GXn6uHpVtN88GMKqQOu0AGnxv379ulI/RQ7iC2
wkFpkT8Anzwd+jxMi5iNYbsHGd1uCyzY1bbNORY/fdX7A27xNjLe2cJc68OUOJQe
XyfVlH2JyY+qEAXmv5gABafLFOsGmGHaQxZj4+zIdvDX6DGVIKCK7eixwVnKDwHm
b9yF4ivMWk5gaY0sjezD7bnN2vAN1zXvpmmSu2tc/kOzGXZKoGEUn/4j+tWvvhPn
wrTonT9soGmm7/LVyG/z950lylZV3XRw/0ZVQeCtQj+b+SjozNjTutzgWiAJ4njm
Jyaqrj6vHB6vOPySk6AYyu1qTaJsniHR62Hv6WG/eZQalcXJZ8BuwAgdpcgPwdVU
4IaKyiCjHg7dnrAwPURHfmlvosq+J+8PdD0O2L2aYUUtBS2TezgedSLXBYD4xZFa
85zsZMlEurHM9o93vfjihyMxUla46o6uNyl32ebaPvLxEj/MyGOwkzAWa0qxy74J
aQjWl+dWivXNFfE/yD/7yVF+X9YdlSFGCRyIfkUwy9hxLqkUdXeFgwE=
-----END CERTIFICATE-----
</ca>
<cert>
-----BEGIN CERTIFICATE-----
MIIE5TCCAs2gAwIBAgICEAEwDQYJKoZIhvcNAQELBQAwMTELMAkGA1UEBhMCR0Ix
DzANBgNVBAgMBkxvbmRvbjERMA8GA1UECgwIV1dXIEx0ZC4wHhcNMTkwMTA2MDc1
MzQ5WhcNMjkwMTAzMDc1MzQ5WjAUMRIwEAYDVQQDDAl2cG5jbGllbnQwggIiMA0G
CSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCmpYnmgBet9aZInpN9DRi6EHjye699
GFCVuBc4zw8PoZieC/jt/hQ8jrwQC8KVU7g2nuAZNfX1wXJ0hLDKAZWSJhvlAeN1
/oA9oe/kikatUcijJnfipFJlhPJ8kru+UXH8ypwHoxbxd/2u+KDTjg3dJqNPGUak
2KlSxHDbS0OWEhBIdn4A2iD7HpDPBO805KfAWVtfQ6Pvy5XHHNm6s796x91hes3Q
ONY1TLKE+tHFMoTXn3C0a4/DU35Kj1JrZDdER+DrNmxGhInq2CXEGsyw8MwXYHLS
1H8jvuX8u7yS2QX2cjwzc4PcJFPjEPTtUqPb1Ob+xOYb0bGOEE64+7xNeA+Mk5XL
i08DJEFAU0hSCnHtz1y/JrXScKaHgtVzDm2TsXal9jO7ikVi3zhC6EcO0T4PFBX0
lACXUDpXv50WI3ftoIACtO+paxM+wuXCt9ZhC6BZbRBs9EtVUdBDXAsSp+yhb9Jr
GZzY+GIfmiPY027Cd0FYumspCHgBvUM4D9rCDVSiwr1DqNHqzlrjbwW3mCHLtmjE
qU5Jg9DBUB3J01AERUgBYE8O8BkmSkuKi0mwgi9LdQ4SbizqVreMip+kXFAwRMg8
Pw1h/cDUd9G1ZM+bZzHrjp4rdDHK8NAhDgJvxhGuhuwVFpA/LhvJ4tJBWjhJn7j5
IEHpNWA1xNp1IwIDAQABoyQwIjALBgNVHQ8EBAMCB4AwEwYDVR0lBAwwCgYIKwYB
BQUHAwIwDQYJKoZIhvcNAQELBQADggIBAFcqTpXcEe83shOnp+nOvGscMT0PwSNo
ojy5xR9UhHs3ijyJ3DeaCO4xh7V8PTzCTpg3NOs8+19/nAhSr+QBWQKwrQhQ7Uub
zv8AMXJ4tU1ZAyx0lX8FzUe/GsI8muqosK8F09jnTgGk05yCca9kDVzffGk1mivx
d6ANRdUkprZV1VPA/eKXBQYstbeYBitPql5anmh54fEvXt7S1SdPATXI/eTzaxtP
2KyPl7OZDA+mvS0qPFcY+MB2fjjdoyl74BShCJyI5sBCfN6WY6hNQ7meVWa6bCLQ
EgvrZqh0lkWhy3mKcTL8eZJeF2SoYHQCSY75gQM0gdCODHTvkJJknLVmtzHHTJL3
gVbdqFo/OiGGpD4XKpChNv//1kkRrwPBG4YDXu2/vsPoZKRVgjpQbNol+z+Ee1y3
MqGz09aGxC98KUuxrUYwT8fbVVyLm0Fu2O8u+Qz3s6dPkWqD94YGxh2pq9SD5aTl
/92LaIyqfMlWXj38yDUxjsENsTDtsSrx5cw6+BpB+VMmSuXlIYgE4khiEnYzCXbj
5rduGmz4t4rhZZaa3n3L+G0sCQUqmnYNAjEMYcKIZvkTI3GoW3s0FROeUL1zLir2
mdvWmQHTq39p/iBWmMTP/YofQPv8P1TWbKWaKalAf4+fLXHiTL7KHFw4YXXKE1iA
GI6Ngos0UzHR
-----END CERTIFICATE-----
</cert>
<key>
-----BEGIN ENCRYPTED PRIVATE KEY-----
MIIJljBABgkqhkiG9w0BBQ0wMzAbBgkqhkiG9w0BBQwwDgQITIzpduKGeaYCAggA
MBQGCCqGSIb3DQMHBAjjV2SrWFzjJQSCCVBzWLc0eFF9x2OmoOddeslRnNd0DdiZ
eOjsK93BkhlSpBJVLJY7x6DD3JJokgDCFl/sWHjU4zn4C5UogaqhrAIYeFpjx4w4
4adU8bb3K4WOHAbQk5f+76HWr8LlCb4Ws2x0e8OlVwRBNJKAfumAoODE2ZZ0qitt
5FeBix1XecaSpFl8J4BoytFD1R1Pf1KHL/iZ9Vh0SGGqE+ElDPOd8+PfsIKy09ZD
/kMiiItnCF3NwG+s69GJJbGhIPdaas/yqVjtKOdQ8y7VBbrERzZ1mOVWN8zQj4cl
5whPtMBgSYgkM9UcDNaQbqn/q4yXjPF+mWuZ6EyD4yNwpbroHEna5SAbaMiIHUah
gXbSDrbFAMPqbpnpG5pUO9xM8YI8VYhlJU6MtzTjNwkmwyCzhV9WAEMBrMO3ZQGc
FmG9HchAdKO75K7bHaaAZbBvt2LMMg+cvMSFiojKvOKXrC3ntQzHCrGL0IPEmdYK
r1SIBD42zjsYPEY/MD0aV6eP/8DHbTMhF1oaQxxGLotv2+yVzoI+MTBaBFanQ+41
7LBSB8oK8uq11vVz4LIp4xC+uyH+qoKE59mPG6QoRKC5GSjIj6J9hbDOS0DcHILg
S0ebY2s2pSpVfcLZJslzSoI3ArNCs0fdkhZ2wD9/kX9BRAtZQSOsAcNVXc2sQsCj
zeIZ9V7HlNGdZnG2CAPI5RWe8RSzz1T+IF9rUUD43Hi0csQ6y3IFQEXJtmXArVXo
F6WoxqpXF3IdvLcnTDX1CK+h+QztSRysiRvWCPbISv84BIlx6OEVu+c0D82D+AUz
Wf6DRsXIzqFKly/MZNsYG7Sx0t1eHaKaw+SCsWLRdiFsdmL+LUAcqVsJNCshKp6H
Qlg3w0g9eU//qt2HnE0dx597PeSnyjRYSswt2R5dSaDh6x9KUeXc+kcTJTwxQ66c
gSopFZyoGOxHLGoCBZV1qGGKbUVnbX8hy3eunVRNVsgOBFhmMYy1kaWajOGIfVSz
jErclJpCJjuJDnK5L9ipLpQtb2VbVIgVbzwQ+p6AGBU39YO7R/ql4/DUyvo35mMx
X9tr8uGYRWxkbBJSKZ6FNG0jUI++7goT66vMWb9Sn3Xsczj1J9INMeY4OGwXGZ+3
VZrVsPMed0IJ4NIYJ0FRVhv7Y04aexJmvHqLUeRdJLk4l9kJNHoKJoleT8IUhThl
nqP76jFabL3jX9fUpDxPNNoMiz+en4L8bX3dnLlvo8xeLnUaqT63Y+CgRVyVZJSI
7PUZwGBWFHpuboLTYMgaQK6+UOp/rqtDFAkBXRD0ncSL2KYcy6I5IN9YDcYvTqvU
N5TsVjftGKCCZAFyCkVqVjfV3uKJAiK4LHJe9J94Aq3lWeaw//gg1UjWrXCRwKuZ
hO0kOEN7tw7YxOSOEzyQ3+j3TnWrToF/9QrCfY/+tOvwAVmLTD1e/dNTCt/SboaY
2FGSI0TmPRSewxCT2L9hBgM1wtDdgSofVAwxW9qK+/0JPZm+C6gGc2ipZNdDH4uN
+5j0zKZ98u7w6xRW23wCV+cnJ7IvtpXZvChFUnwnq9WanJakr/zsNsuhGpVOnD46
QOZzO3U8VwXwK1yMas0oN7XSTwf0vGZYBaCtKkF7PrLlVeOYaj50jQNXUvfLt+bQ
c64apwATE3JK2FcaV32m8UPz3bF04uuMIxBldH3Mvyp0X+MXaLERiuefUZwwppp5
yFYuy+z03asOYeQrG8LSsTGNOgJXPu1Or32GMHlil1s74uOodA5T8XHEmX4Fxuik
ok2itZL3yo/Sl73AC9yeSr7R9+Hf7SUTBt8AVeNPhmNDSi1AbyYhT1y0G7Dqwxvb
oG5ZQyKPlquf8a5Xzodq6lPdXJwi8ZLmAuBelAg6A4MJZzMMrhOzQXyiMHVNtwdk
c3LzES9bKEWgJR1CGR2RWrxUfqV6Y+uC+r5nPU/DZSOjJ0u7kWWvdQiySakxhQhO
qkT/+PeYcspzB2juDA5kq9s5Votyl05nHoM7L3UdRxzA4IhKawL9lRWu0Q/0Gn4C
axG2hUi61rxsx/epruIBz/01dlxw6xUpZBviArGxx/Z+QD0e5tKiuNCAOGQ5mm61
UtopO6vBq6oS4O/xO/xHhYlcLKJ2D3C8v1JTDQXIQ1OY6IaqIXRiaB4/bz9eCByV
8tEVW8/zs03M9zcxNuRL4tuBU0yhUCTCtGgQGUMnOYl4d1ZvZvUHb3oJqEI3AVOJ
/tTVni6P3V4TdaF17EyndLZbIz9mRp3Tai4lsZXbRpevKzQfkFvg3vRZKj5Fymp6
4SypXMKvgAQ/R6m9T8L0/rT90jf4GHhKhbSYXkJmAZQ/yz9eyjP9SFgHk3P0/z0x
pa8oc50PAumDpz73pLFwYGffZb7yAAb+Uv4bjBgw+UytIWsbWJBmGAZpVOxKCFEX
NGcflCfANB7FGv72a6fm3cf4IeqS3KGGQalIzOwwtIDWe2SVkp2LSx5JeFFCIjDP
dKynm1tczPfoL/tUzcoRqI61zVpb3pAzKmrsWnSgA5Zl+LPZq23g5QRjCNeu1xkh
GXMeXvQ0Q1VfLG4iw7j6zx52qiFy0HTQ8FK6cSA/nJN6/fE/2p+buAKxP5qjsFCP
+/QQB681rfKGrQV1yh8TKuJ04h5gdxF4sC6cliHdw8daA2y4rQorBjM7F1EF8VmZ
NcC1cEclv/E3QwOkBJsaom2rw7LkeOHLjqorGAf5eazO2AFZXVVG5yWrNyZWnaYf
LYrXCk/4yLSexVEgiC81uSQL2uhvkatrUdDi4zV9mMrHKR10w8LVEuXSkS8IK3h5
ln+HDc+rqUZG0ChHaF/GJ5VpQ6BLcMYNaoc75AuYU2rlSvMWnaR9UdiNVx3nrxld
/SvNn8K+lFiKCr0J0DiVDztCpGOq4k2JSlCr+C+YxvipRr+VZOzpxx4RvkRFKAq6
ix0demDcAk+YB6OZP3JAEy/yoiK/f61KiRpv0VVnHRFKyBv6MIyZmXkn5SesXF5C
aBAV1zRdnV4EHXZy3qKIdvDP/5qp/6WcNI4edkAwr9bl+BqMe+0dy6QcsU9dLeQa
OcpDZqHOxCXYTtiSIVM5WvSfPI5j6OdXCsrDU0VZOiiKegnGKNhz8Hn1aLZpGmoU
TkqhRGpXchHSXNsGwT9AWlSJCnEF1dT0OOJzYbIbcwLa3WcKXHADpgfLJJ/KXHDJ
buf/Epyjpi6dgg==
-----END ENCRYPTED PRIVATE KEY-----
</key>

7
tags/index.html Normal file
View File

@@ -0,0 +1,7 @@
<!doctype html><html lang=en><head><title>Tags · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Tags"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/tags/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Tags"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/tags/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/tags/index.xml title="Eric X. Liu's Personal Page"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
</a><input type=checkbox id=menu-toggle>
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=https://ericxliu.me/tags/>Tags</a></h1></header><ul></ul></section></div><footer class=footer><section class=container>©
2016 -
2026
Eric X. Liu
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>

1
tags/index.xml Normal file
View File

@@ -0,0 +1 @@
<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Tags on Eric X. Liu's Personal Page</title><link>https://ericxliu.me/tags/</link><description>Recent content in Tags on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><atom:link href="https://ericxliu.me/tags/index.xml" rel="self" type="application/rss+xml"/></channel></rss>

1
tags/page/1/index.html Normal file
View File

@@ -0,0 +1 @@
<!doctype html><html lang=en><head><title>https://ericxliu.me/tags/</title><link rel=canonical href=https://ericxliu.me/tags/><meta charset=utf-8><meta http-equiv=refresh content="0; url=https://ericxliu.me/tags/"></head></html>