Compare commits
127 Commits
master
...
gitea-page
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bd862cb238 | ||
|
|
293f0bfa77 | ||
|
|
c15d37458e | ||
|
|
07438a27e9 | ||
|
|
4355096bdc | ||
|
|
9c66ed1b1b | ||
|
|
598c74df0a | ||
|
|
41ec0626e2 | ||
|
|
346f1f1450 | ||
|
|
0d2993f39b | ||
|
|
786f535c82 | ||
|
|
ab14cbc592 | ||
|
|
dc0feb72a8 | ||
|
|
8bf55a3b50 | ||
|
|
c75c89c088 | ||
|
|
4c7d9f4905 | ||
|
|
cde81e78d7 | ||
|
|
008e4afff6 | ||
|
|
ff2b69c081 | ||
|
|
2cbf345452 | ||
|
|
9616c3681f | ||
|
|
2d7d143cbf | ||
|
|
6e752d8af2 | ||
|
|
e48bde719b | ||
|
|
d9dccae876 | ||
|
|
960c082536 | ||
|
|
3e84d0613e | ||
|
|
645963ca87 | ||
|
|
22b2a53fc9 | ||
|
|
184c07ebff | ||
|
|
40a88799ee | ||
|
|
19d2678a16 | ||
|
|
175644c1bf | ||
|
|
c8d7b92351 | ||
|
|
7a88de8adc | ||
|
|
7864b7a14d | ||
|
|
7ff7d71dcb | ||
|
|
48268a2fc1 | ||
|
|
4808a62cd0 | ||
|
|
811c80144e | ||
|
|
ad8faa17fc | ||
|
|
66d0011843 | ||
|
|
f0b04beb1f | ||
|
|
dbe2d5d1b0 | ||
|
|
2aadf95801 | ||
|
|
ea9c28dce4 | ||
|
|
1be19a7328 | ||
|
|
073fbfe081 | ||
|
|
ed03d0a873 | ||
|
|
798e6c7d75 | ||
|
|
dff213a604 | ||
|
|
238fcb29b4 | ||
|
|
dc3978a294 | ||
|
|
6dfed70e80 | ||
|
|
596dc4948b | ||
|
|
cb921d30e0 | ||
|
|
50e9f52f56 | ||
|
|
b4e2b7f818 | ||
|
|
c2b8a4f233 | ||
|
|
8d18da2143 | ||
|
|
34ee48a56c | ||
|
|
df3c006010 | ||
|
|
c8813b97f3 | ||
|
|
52a6e87d0d | ||
|
|
5e1e4efc08 | ||
|
|
f50ba780e1 | ||
|
|
a9192dd7da | ||
|
|
a50fee0dcf | ||
|
|
9454edc7ed | ||
|
|
9efdd85826 | ||
|
|
95df119b6d | ||
|
|
a6a4ee4adb | ||
|
|
a977deebd1 | ||
|
|
8c3be83b91 | ||
|
|
76c539f415 | ||
|
|
c1be16072c | ||
|
|
11b8ac016c | ||
|
|
d03a2c49dd | ||
|
|
0ae24eb647 | ||
|
|
ce7b6b17b2 | ||
|
|
ef26adac81 | ||
|
|
fb47a09d9b | ||
|
|
b98d88fd0f | ||
|
|
144a1b1692 | ||
|
|
df6ffb4bc0 | ||
|
|
219a24e3a5 | ||
|
|
335ed1d107 | ||
|
|
8f3c545991 | ||
|
|
e8ae2242e3 | ||
|
|
d801fe9307 | ||
|
|
20c1888f78 | ||
|
|
9603629d20 | ||
|
|
e60475c8ac | ||
|
|
e83c0477c7 | ||
|
|
e86aa5f8cb | ||
|
|
8832dff8d6 | ||
|
|
ebed172a21 | ||
|
|
e7fda8a866 | ||
|
|
a147bbd8c4 | ||
|
|
38518686d9 | ||
|
|
38703cd607 | ||
|
|
22b4234f06 | ||
|
|
ef9bc708e1 | ||
| 61a3e5a38d | |||
| 303714c386 | |||
| 203b36bc6c | |||
| 085d1dd3f7 | |||
| 1f3238519a | |||
| 7ab352cdde | |||
| e5c7ad2ee3 | |||
| b14698604d | |||
| 482899015a | |||
| 396b46d31e | |||
| 1d53e2965c | |||
| 77bd58c48f | |||
| 4e79964a24 | |||
| 7667b0ebf3 | |||
| a9765b4d5b | |||
| d5b6868b70 | |||
| bd7fe9345f | |||
| f20a18d653 | |||
| c6d8e2aae6 | |||
| 685d7272e1 | |||
| 2f5387a7a3 | |||
| 4b1dd1a9bf | |||
| c05622c64f | |||
| b562560bbb |
28
.drone.yml
@@ -1,28 +0,0 @@
|
|||||||
kind: pipeline
|
|
||||||
name: default
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- name: build
|
|
||||||
image: plugins/hugo
|
|
||||||
settings:
|
|
||||||
hugo_version: 0.97.0
|
|
||||||
extended: true
|
|
||||||
minify: true
|
|
||||||
pull: always
|
|
||||||
url: ericxliu.me
|
|
||||||
validate: false
|
|
||||||
output: "./output"
|
|
||||||
mtu: 1450
|
|
||||||
- name: git-push
|
|
||||||
image: appleboy/drone-git-push:0.2.0-linux-amd64
|
|
||||||
settings:
|
|
||||||
branch: gitea-pages
|
|
||||||
remote: "git@git.ericxliu.me:eric/ericxliu-me.git"
|
|
||||||
force: true
|
|
||||||
commit: true
|
|
||||||
path: "./output"
|
|
||||||
commit_message: "Drone build ${DRONE_COMMIT_SHA:0:7}"
|
|
||||||
author_name: "Eric Liu"
|
|
||||||
author_email: "eric@ericxliu.me"
|
|
||||||
ssh_key:
|
|
||||||
from_secret: ssh_key
|
|
||||||
1
.gitignore
vendored
@@ -1 +0,0 @@
|
|||||||
_gen/
|
|
||||||
@@ -1,37 +0,0 @@
|
|||||||
# This file is a template, and might need editing before it works on your project.
|
|
||||||
# To contribute improvements to CI/CD templates, please follow the Development guide at:
|
|
||||||
# https://docs.gitlab.com/ee/development/cicd/templates.html
|
|
||||||
# This specific template is located at:
|
|
||||||
# https://gitlab.com/gitlab-org/gitlab/-/blob/master/lib/gitlab/ci/templates/Pages/HTML.gitlab-ci.yml
|
|
||||||
|
|
||||||
# Full project: https://gitlab.com/pages/plain-html
|
|
||||||
|
|
||||||
variables:
|
|
||||||
GIT_SUBMODULE_STRATEGY: recursive
|
|
||||||
|
|
||||||
build-stage:
|
|
||||||
stage: build
|
|
||||||
image: monachus/hugo:latest
|
|
||||||
script:
|
|
||||||
- hugo
|
|
||||||
- ls
|
|
||||||
artifacts:
|
|
||||||
paths:
|
|
||||||
- public
|
|
||||||
|
|
||||||
deploy-stage:
|
|
||||||
stage: deploy
|
|
||||||
image: minio/mc:latest
|
|
||||||
script:
|
|
||||||
- ls
|
|
||||||
- mkdir .public
|
|
||||||
- cp -r public/* .public
|
|
||||||
- mc alias set minio http://minio.diskstation.local:80 WjaYWk3uthUlotbT Hc3fff7v69nZ6XvcXXpOZ3JJMzcmGc6A
|
|
||||||
- mc cp -r .public/ minio/eric-personal
|
|
||||||
artifacts:
|
|
||||||
paths:
|
|
||||||
- .public
|
|
||||||
dependencies:
|
|
||||||
- build-stage
|
|
||||||
rules:
|
|
||||||
- if: $CI_COMMIT_BRANCH == $CI_DEFAULT_BRANCH
|
|
||||||
3
.gitmodules
vendored
@@ -1,3 +0,0 @@
|
|||||||
[submodule "themes/hugo-coder"]
|
|
||||||
path = themes/hugo-coder
|
|
||||||
url = https://github.com/luizdepra/hugo-coder
|
|
||||||
7
404.html
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="404 Page not found"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/404.html"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="404 Page not found"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/404.html><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container centered"><div class=error><h1>404</h1><h2>Page Not Found</h2><p>Sorry, this page does not exist.<br>You can head back to the <a href=https://ericxliu.me/>homepage</a>.</p></div></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
16
about/index.html
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>About · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="
|
||||||
|
Hi, I’m Eric Liu.
|
||||||
|
I am a Staff Software Engineer and Tech Lead Manager (TLM) at Google, based in Sunnyvale, CA.
|
||||||
|
My work focuses on Infrastructure Performance and Customer Engineering, specifically for GPUs and TPUs. I lead teams that bridge the gap between cutting-edge AI hardware and the latest ML models (like Gemini), ensuring optimal performance and reliability at Google Cloud scale. I thrive in the ambiguous space where hardware constraints meet software ambition—whether it’s debugging race conditions across thousands of chips or designing API surfaces for next-gen models."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="About"><meta name=twitter:description content="Hi, I’m Eric Liu.
|
||||||
|
I am a Staff Software Engineer and Tech Lead Manager (TLM) at Google, based in Sunnyvale, CA.
|
||||||
|
My work focuses on Infrastructure Performance and Customer Engineering, specifically for GPUs and TPUs. I lead teams that bridge the gap between cutting-edge AI hardware and the latest ML models (like Gemini), ensuring optimal performance and reliability at Google Cloud scale. I thrive in the ambiguous space where hardware constraints meet software ambition—whether it’s debugging race conditions across thousands of chips or designing API surfaces for next-gen models."><meta property="og:url" content="https://ericxliu.me/about/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="About"><meta property="og:description" content="Hi, I’m Eric Liu.
|
||||||
|
I am a Staff Software Engineer and Tech Lead Manager (TLM) at Google, based in Sunnyvale, CA.
|
||||||
|
My work focuses on Infrastructure Performance and Customer Engineering, specifically for GPUs and TPUs. I lead teams that bridge the gap between cutting-edge AI hardware and the latest ML models (like Gemini), ensuring optimal performance and reliability at Google Cloud scale. I thrive in the ambiguous space where hardware constraints meet software ambition—whether it’s debugging race conditions across thousands of chips or designing API surfaces for next-gen models."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:published_time" content="2025-12-19T22:46:12-08:00"><meta property="article:modified_time" content="2025-12-20T09:52:07-08:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/about/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"About","genre":"Blog","wordcount":"201","url":"https:\/\/ericxliu.me\/about\/","datePublished":"2025-12-19T22:46:12-08:00","dateModified":"2025-12-20T09:52:07-08:00","description":"\u003cimg src=\u0022\/images\/about.jpeg\u0022 alt=\u0022Eric Liu\u0022 width=\u0022300\u0022 style=\u0022float: left; margin-right: 1.5rem; margin-bottom: 1rem; border-radius: 8px;\u0022\/\u003e\n\u003cp\u003eHi, I\u0026rsquo;m \u003cstrong\u003eEric Liu\u003c\/strong\u003e.\u003c\/p\u003e\n\u003cp\u003eI am a \u003cstrong\u003eStaff Software Engineer and Tech Lead Manager (TLM)\u003c\/strong\u003e at \u003cstrong\u003eGoogle\u003c\/strong\u003e, based in Sunnyvale, CA.\u003c\/p\u003e\n\u003cp\u003eMy work focuses on \u003cstrong\u003eInfrastructure Performance and Customer Engineering\u003c\/strong\u003e, specifically for \u003cstrong\u003eGPUs and TPUs\u003c\/strong\u003e. I lead teams that bridge the gap between cutting-edge AI hardware and the latest ML models (like Gemini), ensuring optimal performance and reliability at Google Cloud scale. I thrive in the ambiguous space where hardware constraints meet software ambition—whether it\u0026rsquo;s debugging race conditions across thousands of chips or designing API surfaces for next-gen models.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container page"><article><header><h1 class=title><a class=title-link href=https://ericxliu.me/about/>About</a></h1></header><img src=/images/about.jpeg alt="Eric Liu" width=300 style=float:left;margin-right:1.5rem;margin-bottom:1rem;border-radius:8px><p>Hi, I’m <strong>Eric Liu</strong>.</p><p>I am a <strong>Staff Software Engineer and Tech Lead Manager (TLM)</strong> at <strong>Google</strong>, based in Sunnyvale, CA.</p><p>My work focuses on <strong>Infrastructure Performance and Customer Engineering</strong>, specifically for <strong>GPUs and TPUs</strong>. I lead teams that bridge the gap between cutting-edge AI hardware and the latest ML models (like Gemini), ensuring optimal performance and reliability at Google Cloud scale. I thrive in the ambiguous space where hardware constraints meet software ambition—whether it’s debugging race conditions across thousands of chips or designing API surfaces for next-gen models.</p><p>Beyond the code, I maintain this “digital garden” where I document my projects and learnings. It serves as my second brain, capturing everything from technical deep dives to random musings. I believe in <strong>“learning in public”</strong>—so you’ll find unpolished notes on troubleshooting Kubernetes clusters alongside recipes I’m refining. It’s not just a blog; it’s a living repository of my curiosity.</p><h3 id=personal-interests>Personal Interests
|
||||||
|
<a class=heading-link href=#personal-interests><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>I’m a tinkerer at heart, whether digital or physical:</p><ul><li><strong>Homelab</strong>: Kubernetes, Proxmox, and self-hosted services. I love over-engineering my home network.</li><li><strong>DIY & Jeep</strong>: Maintaining and modifying my Jeep, and general DIY projects.</li><li><strong>Cooking</strong>: experimenting with new recipes and techniques.</li></ul><p>Welcome to my corner of the internet.</p></article></section><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
1
ads.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
google.com, pub-3972604619956476, DIRECT, f08c47fec0942fa0
|
||||||
@@ -1,6 +0,0 @@
|
|||||||
---
|
|
||||||
title: "{{ replace .Name "-" " " | title }}"
|
|
||||||
date: {{ .Date }}
|
|
||||||
draft: true
|
|
||||||
---
|
|
||||||
|
|
||||||
7
authors/index.html
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Authors · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Authors"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/authors/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Authors"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/authors/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/authors/index.xml title="Eric X. Liu's Personal Page"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=https://ericxliu.me/authors/>Authors</a></h1></header><ul></ul></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
1
authors/index.xml
Normal file
@@ -0,0 +1 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Authors on Eric X. Liu's Personal Page</title><link>https://ericxliu.me/authors/</link><description>Recent content in Authors on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><atom:link href="https://ericxliu.me/authors/index.xml" rel="self" type="application/rss+xml"/></channel></rss>
|
||||||
1
authors/page/1/index.html
Normal file
@@ -0,0 +1 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>https://ericxliu.me/authors/</title><link rel=canonical href=https://ericxliu.me/authors/><meta charset=utf-8><meta http-equiv=refresh content="0; url=https://ericxliu.me/authors/"></head></html>
|
||||||
7
categories/index.html
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Categories · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Categories"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/categories/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Categories"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/categories/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/categories/index.xml title="Eric X. Liu's Personal Page"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=https://ericxliu.me/categories/>Categories</a></h1></header><ul></ul></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
1
categories/index.xml
Normal file
@@ -0,0 +1 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Categories on Eric X. Liu's Personal Page</title><link>https://ericxliu.me/categories/</link><description>Recent content in Categories on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><atom:link href="https://ericxliu.me/categories/index.xml" rel="self" type="application/rss+xml"/></channel></rss>
|
||||||
1
categories/page/1/index.html
Normal file
@@ -0,0 +1 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>https://ericxliu.me/categories/</title><link rel=canonical href=https://ericxliu.me/categories/><meta charset=utf-8><meta http-equiv=refresh content="0; url=https://ericxliu.me/categories/"></head></html>
|
||||||
84
config.toml
@@ -1,84 +0,0 @@
|
|||||||
title = "Eric's Personal Page"
|
|
||||||
|
|
||||||
theme = "hugo-coder"
|
|
||||||
|
|
||||||
languageCode = "en"
|
|
||||||
defaultcontentlanguage = "en"
|
|
||||||
|
|
||||||
paginate = 20
|
|
||||||
canonifyurls = true
|
|
||||||
|
|
||||||
pygmentsstyle = "b2"
|
|
||||||
pygmentscodefences = true
|
|
||||||
pygmentscodefencesguesssyntax = true
|
|
||||||
|
|
||||||
[params] # theme parameters
|
|
||||||
author = "Eric Liu"
|
|
||||||
info = "Platform Software & Performance Engineer @Google"
|
|
||||||
description = "Eric Liu's personal website"
|
|
||||||
keywords = "blog,developer,personal"
|
|
||||||
avatarurl = "images/gravatar.png"
|
|
||||||
|
|
||||||
# wether you want to hide copyright and credits in the footer
|
|
||||||
hideCredits = true
|
|
||||||
hideCopyright = false
|
|
||||||
|
|
||||||
rtl= false
|
|
||||||
|
|
||||||
colorscheme = "light"
|
|
||||||
|
|
||||||
# Series see also post count
|
|
||||||
maxSeeAlsoItems = 5
|
|
||||||
|
|
||||||
# Enable Twemoji
|
|
||||||
enableTwemoji = true
|
|
||||||
|
|
||||||
# Custom CSS
|
|
||||||
custom_css = []
|
|
||||||
|
|
||||||
# Custom JS
|
|
||||||
custom_js = []
|
|
||||||
|
|
||||||
# Social links
|
|
||||||
[[params.social]]
|
|
||||||
name = "Git"
|
|
||||||
icon = "fab fa-gitlab"
|
|
||||||
weight = 1
|
|
||||||
url = "https://git.ericxliu.me/eric"
|
|
||||||
[[params.social]]
|
|
||||||
name = "linkedin"
|
|
||||||
icon = "fab fa-linkedin"
|
|
||||||
weight = 2
|
|
||||||
url = "https://www.linkedin.com/in/eric-liu-46648b93/"
|
|
||||||
[[params.social]]
|
|
||||||
name = "Personal email"
|
|
||||||
icon = "fas fa-envelope-square"
|
|
||||||
weight = 3
|
|
||||||
|
|
||||||
# Menu links
|
|
||||||
[languages]
|
|
||||||
[languages.en]
|
|
||||||
languagename = "English"
|
|
||||||
[[languages.en.menu.main]]
|
|
||||||
name = "Posts"
|
|
||||||
weight = 1
|
|
||||||
url = "/posts/"
|
|
||||||
[[languages.en.menu.main]]
|
|
||||||
name = "Gitlab"
|
|
||||||
weight = 2
|
|
||||||
url = "https://git.ericxliu.me"
|
|
||||||
[[languages.en.menu.main]]
|
|
||||||
name = "Notebook"
|
|
||||||
weight = 3
|
|
||||||
url = "https://hub.ericxliu.me"
|
|
||||||
[[languages.en.menu.main]]
|
|
||||||
name = "Go"
|
|
||||||
weight = 4
|
|
||||||
url = "https://go.ericxliu.me/server"
|
|
||||||
[[languages.en.menu.main]]
|
|
||||||
name = "|"
|
|
||||||
weight = 10
|
|
||||||
[[languages.en.menu.main]]
|
|
||||||
name = "Sign in"
|
|
||||||
weight = 11
|
|
||||||
url = "https://auth.ericxliu.me"
|
|
||||||
@@ -1,5 +0,0 @@
|
|||||||
---
|
|
||||||
title: "About"
|
|
||||||
date: 2018-06-01T07:13:52Z
|
|
||||||
---
|
|
||||||
|
|
||||||
@@ -1,12 +0,0 @@
|
|||||||
+++
|
|
||||||
date = 2020-10-26T04:14:43Z
|
|
||||||
title = "Some useful files"
|
|
||||||
description = ""
|
|
||||||
slug = ""
|
|
||||||
tags = []
|
|
||||||
categories = []
|
|
||||||
externalLink = ""
|
|
||||||
series = []
|
|
||||||
+++
|
|
||||||
* [rootCA.pem](https://ericxliu.me/rootCA.pem)
|
|
||||||
* [vpnclient.ovpn](https://ericxliu.me/vpnclient.ovpn)
|
|
||||||
165
fonts/LICENSE.txt
Normal file
@@ -0,0 +1,165 @@
|
|||||||
|
Fonticons, Inc. (https://fontawesome.com)
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
Font Awesome Free License
|
||||||
|
|
||||||
|
Font Awesome Free is free, open source, and GPL friendly. You can use it for
|
||||||
|
commercial projects, open source projects, or really almost whatever you want.
|
||||||
|
Full Font Awesome Free license: https://fontawesome.com/license/free.
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Icons: CC BY 4.0 License (https://creativecommons.org/licenses/by/4.0/)
|
||||||
|
|
||||||
|
The Font Awesome Free download is licensed under a Creative Commons
|
||||||
|
Attribution 4.0 International License and applies to all icons packaged
|
||||||
|
as SVG and JS file types.
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Fonts: SIL OFL 1.1 License
|
||||||
|
|
||||||
|
In the Font Awesome Free download, the SIL OFL license applies to all icons
|
||||||
|
packaged as web and desktop font files.
|
||||||
|
|
||||||
|
Copyright (c) 2024 Fonticons, Inc. (https://fontawesome.com)
|
||||||
|
with Reserved Font Name: "Font Awesome".
|
||||||
|
|
||||||
|
This Font Software is licensed under the SIL Open Font License, Version 1.1.
|
||||||
|
This license is copied below, and is also available with a FAQ at:
|
||||||
|
http://scripts.sil.org/OFL
|
||||||
|
|
||||||
|
SIL OPEN FONT LICENSE
|
||||||
|
Version 1.1 - 26 February 2007
|
||||||
|
|
||||||
|
PREAMBLE
|
||||||
|
The goals of the Open Font License (OFL) are to stimulate worldwide
|
||||||
|
development of collaborative font projects, to support the font creation
|
||||||
|
efforts of academic and linguistic communities, and to provide a free and
|
||||||
|
open framework in which fonts may be shared and improved in partnership
|
||||||
|
with others.
|
||||||
|
|
||||||
|
The OFL allows the licensed fonts to be used, studied, modified and
|
||||||
|
redistributed freely as long as they are not sold by themselves. The
|
||||||
|
fonts, including any derivative works, can be bundled, embedded,
|
||||||
|
redistributed and/or sold with any software provided that any reserved
|
||||||
|
names are not used by derivative works. The fonts and derivatives,
|
||||||
|
however, cannot be released under any other type of license. The
|
||||||
|
requirement for fonts to remain under this license does not apply
|
||||||
|
to any document created using the fonts or their derivatives.
|
||||||
|
|
||||||
|
DEFINITIONS
|
||||||
|
"Font Software" refers to the set of files released by the Copyright
|
||||||
|
Holder(s) under this license and clearly marked as such. This may
|
||||||
|
include source files, build scripts and documentation.
|
||||||
|
|
||||||
|
"Reserved Font Name" refers to any names specified as such after the
|
||||||
|
copyright statement(s).
|
||||||
|
|
||||||
|
"Original Version" refers to the collection of Font Software components as
|
||||||
|
distributed by the Copyright Holder(s).
|
||||||
|
|
||||||
|
"Modified Version" refers to any derivative made by adding to, deleting,
|
||||||
|
or substituting — in part or in whole — any of the components of the
|
||||||
|
Original Version, by changing formats or by porting the Font Software to a
|
||||||
|
new environment.
|
||||||
|
|
||||||
|
"Author" refers to any designer, engineer, programmer, technical
|
||||||
|
writer or other person who contributed to the Font Software.
|
||||||
|
|
||||||
|
PERMISSION & CONDITIONS
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of the Font Software, to use, study, copy, merge, embed, modify,
|
||||||
|
redistribute, and sell modified and unmodified copies of the Font
|
||||||
|
Software, subject to the following conditions:
|
||||||
|
|
||||||
|
1) Neither the Font Software nor any of its individual components,
|
||||||
|
in Original or Modified Versions, may be sold by itself.
|
||||||
|
|
||||||
|
2) Original or Modified Versions of the Font Software may be bundled,
|
||||||
|
redistributed and/or sold with any software, provided that each copy
|
||||||
|
contains the above copyright notice and this license. These can be
|
||||||
|
included either as stand-alone text files, human-readable headers or
|
||||||
|
in the appropriate machine-readable metadata fields within text or
|
||||||
|
binary files as long as those fields can be easily viewed by the user.
|
||||||
|
|
||||||
|
3) No Modified Version of the Font Software may use the Reserved Font
|
||||||
|
Name(s) unless explicit written permission is granted by the corresponding
|
||||||
|
Copyright Holder. This restriction only applies to the primary font name as
|
||||||
|
presented to the users.
|
||||||
|
|
||||||
|
4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font
|
||||||
|
Software shall not be used to promote, endorse or advertise any
|
||||||
|
Modified Version, except to acknowledge the contribution(s) of the
|
||||||
|
Copyright Holder(s) and the Author(s) or with their explicit written
|
||||||
|
permission.
|
||||||
|
|
||||||
|
5) The Font Software, modified or unmodified, in part or in whole,
|
||||||
|
must be distributed entirely under this license, and must not be
|
||||||
|
distributed under any other license. The requirement for fonts to
|
||||||
|
remain under this license does not apply to any document created
|
||||||
|
using the Font Software.
|
||||||
|
|
||||||
|
TERMINATION
|
||||||
|
This license becomes null and void if any of the above conditions are
|
||||||
|
not met.
|
||||||
|
|
||||||
|
DISCLAIMER
|
||||||
|
THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
|
||||||
|
OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE
|
||||||
|
COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
|
||||||
|
DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM
|
||||||
|
OTHER DEALINGS IN THE FONT SOFTWARE.
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Code: MIT License (https://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
|
In the Font Awesome Free download, the MIT license applies to all non-font and
|
||||||
|
non-icon files.
|
||||||
|
|
||||||
|
Copyright 2024 Fonticons, Inc.
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
this software and associated documentation files (the "Software"), to deal in the
|
||||||
|
Software without restriction, including without limitation the rights to use, copy,
|
||||||
|
modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
||||||
|
and to permit persons to whom the Software is furnished to do so, subject to the
|
||||||
|
following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||||
|
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||||
|
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||||
|
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Attribution
|
||||||
|
|
||||||
|
Attribution is required by MIT, SIL OFL, and CC BY licenses. Downloaded Font
|
||||||
|
Awesome Free files already contain embedded comments with sufficient
|
||||||
|
attribution, so you shouldn't need to do anything additional when using these
|
||||||
|
files normally.
|
||||||
|
|
||||||
|
We've kept attribution comments terse, so we ask that you do not actively work
|
||||||
|
to remove them from files, especially code. They're a great way for folks to
|
||||||
|
learn about Font Awesome.
|
||||||
|
|
||||||
|
--------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Brand Icons
|
||||||
|
|
||||||
|
All brand icons are trademarks of their respective owners. The use of these
|
||||||
|
trademarks does not indicate endorsement of the trademark holder by Font
|
||||||
|
Awesome, nor vice versa. **Please do not use brand logos for any purpose except
|
||||||
|
to represent the company, product, or service to which they refer.**
|
||||||
BIN
fonts/fa-brands-400.ttf
Normal file
BIN
fonts/fa-brands-400.woff2
Normal file
BIN
fonts/fa-regular-400.ttf
Normal file
BIN
fonts/fa-regular-400.woff2
Normal file
BIN
fonts/fa-solid-900.ttf
Normal file
BIN
fonts/fa-solid-900.woff2
Normal file
186
hotfix.py
Normal file
@@ -0,0 +1,186 @@
|
|||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
# 1. Prepare /tmp/patch
|
||||||
|
PATCH_DIR = "/tmp/patch"
|
||||||
|
LITELLM_DIR = os.path.join(PATCH_DIR, "litellm")
|
||||||
|
|
||||||
|
if os.path.exists(PATCH_DIR):
|
||||||
|
shutil.rmtree(PATCH_DIR)
|
||||||
|
os.makedirs(PATCH_DIR)
|
||||||
|
|
||||||
|
shutil.copytree("/app/litellm", LITELLM_DIR)
|
||||||
|
|
||||||
|
# 2. Patch openai.py
|
||||||
|
openai_file = os.path.join(LITELLM_DIR, "types/llms/openai.py")
|
||||||
|
with open(openai_file, "r") as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
tool_call_chunk_original = (
|
||||||
|
'class ChatCompletionToolCallChunk(TypedDict): # result of /chat/completions call\n'
|
||||||
|
' id: Optional[str]\n'
|
||||||
|
' type: Literal["function"]'
|
||||||
|
)
|
||||||
|
tool_call_chunk_patch = tool_call_chunk_original.replace(
|
||||||
|
'Literal["function"]', 'Literal["function", "web_search"]'
|
||||||
|
)
|
||||||
|
delta_chunk_original = (
|
||||||
|
"class ChatCompletionDeltaToolCallChunk(TypedDict, total=False):\n"
|
||||||
|
" id: str\n"
|
||||||
|
' type: Literal["function"]'
|
||||||
|
)
|
||||||
|
delta_chunk_patch = delta_chunk_original.replace(
|
||||||
|
'Literal["function"]', 'Literal["function", "web_search"]'
|
||||||
|
)
|
||||||
|
|
||||||
|
for original, patched, label in [
|
||||||
|
(tool_call_chunk_original, tool_call_chunk_patch, "ChatCompletionToolCallChunk"),
|
||||||
|
(delta_chunk_original, delta_chunk_patch, "ChatCompletionDeltaToolCallChunk"),
|
||||||
|
]:
|
||||||
|
if original in content:
|
||||||
|
content = content.replace(original, patched, 1)
|
||||||
|
else:
|
||||||
|
print(f"Hotfix warning: {label} pattern not found, skipping update")
|
||||||
|
|
||||||
|
with open(openai_file, "w") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
# 3. Patch transformation.py
|
||||||
|
trans_file = os.path.join(LITELLM_DIR, "completion_extras/litellm_responses_transformation/transformation.py")
|
||||||
|
with open(trans_file, "r") as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
import_block_original = """ from litellm.types.utils import (
|
||||||
|
ChatCompletionToolCallChunk,
|
||||||
|
GenericStreamingChunk,
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import_block_updated = """ from litellm.types.utils import (
|
||||||
|
ChatCompletionToolCallChunk,
|
||||||
|
Delta,
|
||||||
|
GenericStreamingChunk,
|
||||||
|
ModelResponseStream,
|
||||||
|
StreamingChoices,
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
|
if import_block_original in content:
|
||||||
|
content = content.replace(import_block_original, import_block_updated, 1)
|
||||||
|
elif import_block_updated in content:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
print("Hotfix warning: unexpected chunk_parser import layout, skipping Delta/ModelResponseStream import patch")
|
||||||
|
|
||||||
|
added_block = """ elif output_item.get("type") == "web_search_call":
|
||||||
|
# handle web search call - mask tool call by emitting empty content delta
|
||||||
|
# This prevents Open WebUI from seeing tool_calls and trying to execute them
|
||||||
|
action_payload = output_item.get("action")
|
||||||
|
verbose_logger.debug(
|
||||||
|
"Chat provider: masking web_search_call (added) call_id=%s action=%s",
|
||||||
|
output_item.get("call_id"),
|
||||||
|
action_payload,
|
||||||
|
)
|
||||||
|
# Emit empty content delta instead of tool_call to mask the tool usage
|
||||||
|
return ModelResponseStream(
|
||||||
|
choices=[
|
||||||
|
StreamingChoices(
|
||||||
|
index=0,
|
||||||
|
delta=Delta(content=""),
|
||||||
|
finish_reason=None,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
|
done_block = """ elif output_item.get("type") == "web_search_call":
|
||||||
|
# handle web search done - mask tool call by emitting empty content delta
|
||||||
|
# This prevents Open WebUI from seeing tool_calls and trying to execute them
|
||||||
|
action_payload = output_item.get("action")
|
||||||
|
verbose_logger.debug(
|
||||||
|
"Chat provider: masking web_search_call (done) call_id=%s action=%s",
|
||||||
|
output_item.get("call_id"),
|
||||||
|
action_payload,
|
||||||
|
)
|
||||||
|
# Emit empty content delta instead of tool_call to mask the tool usage
|
||||||
|
# Do NOT set finish_reason="tool_calls" as that would signal Open WebUI to handle the tool
|
||||||
|
return ModelResponseStream(
|
||||||
|
choices=[
|
||||||
|
StreamingChoices(
|
||||||
|
index=0,
|
||||||
|
delta=Delta(content=""),
|
||||||
|
finish_reason=None,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
|
"""
|
||||||
|
|
||||||
|
added_target = ' elif output_item.get("type") == "message":'
|
||||||
|
|
||||||
|
def insert_block(source: str, block: str, occurrence_index: int) -> str:
|
||||||
|
"""Insert block before the nth occurrence (0-based) of added_target."""
|
||||||
|
start = -1
|
||||||
|
search_from = 0
|
||||||
|
for _ in range(occurrence_index + 1):
|
||||||
|
start = source.find(added_target, search_from)
|
||||||
|
if start == -1:
|
||||||
|
return source
|
||||||
|
search_from = start + len(added_target)
|
||||||
|
return source[:start] + block + source[start:]
|
||||||
|
|
||||||
|
if 'masking web_search_call (added)' not in content:
|
||||||
|
new_content = insert_block(content, added_block, 0)
|
||||||
|
if new_content == content:
|
||||||
|
print("Hotfix warning: unable to find insertion point for web_search_call (added)")
|
||||||
|
else:
|
||||||
|
content = new_content
|
||||||
|
|
||||||
|
if 'masking web_search_call (done)' not in content:
|
||||||
|
new_content = insert_block(content, done_block, 1)
|
||||||
|
if new_content == content:
|
||||||
|
print("Hotfix warning: unable to find insertion point for web_search_call (done)")
|
||||||
|
else:
|
||||||
|
content = new_content
|
||||||
|
|
||||||
|
# 4. Ensure streaming tool call chunks fall back to output_item IDs
|
||||||
|
call_id_pattern = 'id=output_item.get("call_id"),'
|
||||||
|
call_id_patch = 'id=output_item.get("call_id") or output_item.get("id"),'
|
||||||
|
if call_id_pattern in content:
|
||||||
|
content = content.replace(call_id_pattern, call_id_patch)
|
||||||
|
|
||||||
|
# 5. Guard assistant tool_call conversions when id is missing
|
||||||
|
tool_call_block_original = """ if function:
|
||||||
|
input_tool_call = {
|
||||||
|
"type": "function_call",
|
||||||
|
"call_id": tool_call["id"],
|
||||||
|
}
|
||||||
|
if "name" in function:
|
||||||
|
input_tool_call["name"] = function["name"]
|
||||||
|
if "arguments" in function:
|
||||||
|
input_tool_call["arguments"] = function["arguments"]
|
||||||
|
input_items.append(input_tool_call)
|
||||||
|
"""
|
||||||
|
tool_call_block_patch = """ if function:
|
||||||
|
call_id = tool_call.get("id") or tool_call.get("call_id")
|
||||||
|
if not call_id:
|
||||||
|
call_id = f"auto_tool_call_{len(input_items)}"
|
||||||
|
input_tool_call = {
|
||||||
|
"type": "function_call",
|
||||||
|
"call_id": call_id,
|
||||||
|
}
|
||||||
|
if "name" in function:
|
||||||
|
input_tool_call["name"] = function["name"]
|
||||||
|
if "arguments" in function:
|
||||||
|
input_tool_call["arguments"] = function["arguments"]
|
||||||
|
input_items.append(input_tool_call)
|
||||||
|
"""
|
||||||
|
if tool_call_block_original in content:
|
||||||
|
content = content.replace(tool_call_block_original, tool_call_block_patch, 1)
|
||||||
|
elif "auto_tool_call_" not in content:
|
||||||
|
print("Hotfix warning: assistant tool_call block not found; missing id guard not applied")
|
||||||
|
|
||||||
|
with open(trans_file, "w") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
print("Successfully applied hotfixes to /tmp/patch/litellm")
|
||||||
BIN
images/a-deep-dive-into-ppo-for-language-models/.png
Normal file
|
After Width: | Height: | Size: 1.2 MiB |
|
After Width: | Height: | Size: 1.2 MiB |
|
After Width: | Height: | Size: 254 KiB |
BIN
images/about.jpeg
Normal file
|
After Width: | Height: | Size: 287 KiB |
|
After Width: | Height: | Size: 694 KiB |
|
After Width: | Height: | Size: 673 KiB |
|
After Width: | Height: | Size: 374 KiB |
|
After Width: | Height: | Size: 689 KiB |
BIN
images/gravatar.png
Normal file
|
After Width: | Height: | Size: 288 KiB |
BIN
images/ppo-for-language-models/.png
Normal file
|
After Width: | Height: | Size: 1.2 MiB |
|
After Width: | Height: | Size: 1.2 MiB |
|
After Width: | Height: | Size: 152 KiB |
BIN
images/transformer-s-core-mechanics/.png
Normal file
|
After Width: | Height: | Size: 254 KiB |
|
After Width: | Height: | Size: 216 KiB |
|
After Width: | Height: | Size: 52 KiB |
|
After Width: | Height: | Size: 193 KiB |
|
After Width: | Height: | Size: 3.2 MiB |
8
index.html
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Eric X. Liu's Personal Page"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Eric X. Liu's Personal Page"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/index.xml title="Eric X. Liu's Personal Page"><meta name=generator content="Hugo 0.154.5"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container centered"><div class=about><div class=avatar><img src=/images/gravatar.png alt=avatar width=200 height=200></div><h1>Eric X. Liu</h1><h2 id=typeit-info></h2><script src=https://unpkg.com/typeit@8.7.1/dist/index.umd.js></script><script>document.addEventListener("DOMContentLoaded",function(){new TypeIt("#typeit-info",{strings:["Software & Performance Engineer @Google","DIY Overlander & Rock Crawler","Tech Enthusiast"],speed:50,loop:!0,breakLines:!1,nextStringDelay:2e3,deleteSpeed:50,startDelay:500,lifeLike:!0}).go()})</script><ul><li><a href=https://git.ericxliu.me/eric aria-label=Git><i class="fa-brands fa-git fa-2x" aria-hidden=true></i></a></li><li><a href=https://www.linkedin.com/in/eric-x-liu-46648b93/ aria-label=linkedin><i class="fa-brands fa-linkedin fa-2x" aria-hidden=true></i></a></li><li><style>#span-17968cae.cloaked-e-mail{display:none}</style> <span class=cloaked-e-mail data-user=cire data-domain=em.uilxcire data-display="PGkgY2xhc3M9ImZhIGZhLWVudmVsb3BlIGZhLTJ4IiBhcmlhLWhpZGRlbj0idHJ1ZSI+PC9pPg==" id=span-17968cae></span>
|
||||||
|
<script id=script-17968cae>var span,scriptTag=document.getElementById("script-17968cae"),link=document.createElement("a"),address="cire".split("").reverse().join("")+"@"+"em.uilxcire".split("").reverse().join("");link.href="mailto:"+address,span=document.getElementById("span-17968cae"),link.innerHTML=atob(span.getAttribute("data-display")),scriptTag.parentElement.insertBefore(link,scriptTag.previousElementSibling),scriptTag.parentElement.removeChild(scriptTag.previousElementSibling)</script></li><li><a href=https://ericxliu.me/index.xml aria-label=RSS rel=alternate type=application/rss+xml><i class="fa-solid fa-rss fa-2x" aria-hidden=true></i></a></li></ul></div></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
95
index.xml
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Eric X. Liu's Personal Page</title><link>https://ericxliu.me/</link><description>Recent content on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><lastBuildDate>Thu, 22 Jan 2026 06:48:07 +0000</lastBuildDate><atom:link href="https://ericxliu.me/index.xml" rel="self" type="application/rss+xml"/><item><title>Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams</title><link>https://ericxliu.me/posts/vibe-coding-from-the-jeep/</link><pubDate>Wed, 21 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/vibe-coding-from-the-jeep/</guid><description><p>&ldquo;Vibe coding&rdquo; has become my latest obsession. It&rsquo;s that flow state where the tools disappear, and you&rsquo;re just manipulating logic at the speed of thought. Usually, this happens in a high-end IDE like Antigravity. But lately, I&rsquo;ve been trying to answer a childhood dream.</p>
|
||||||
|
<p>Growing up in China before the internet age, my window to the outside world was CCTV-6. Along with <em>Baywatch</em>, one of the first American TV shows I ever watched was <em>Knight Rider</em>. I don&rsquo;t remember the exact plot lines, but the core concept stuck with me forever: KITT. A car that could talk, think, and do things for you.</p></description></item><item><title>How I Built a Blog Agent that Writes About Itself</title><link>https://ericxliu.me/posts/reverse-engineering-antigravity-ide/</link><pubDate>Fri, 16 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/reverse-engineering-antigravity-ide/</guid><description><p>I&rsquo;ve been spending a lot of time &ldquo;vibe coding&rdquo; in the Antigravity IDE lately. It&rsquo;s an incredible flow state—intense, iterative, and fast. But it has a major flaw: the context is ephemeral. Once the session is over, that rich history of decisions, wrong turns, and &ldquo;aha!&rdquo; moments is locked away in an opaque, internal format.</p>
|
||||||
|
<p>I wanted to capture that value. I wanted a system that could take my chaotic coding sessions and distill them into structured, technical blog posts (like the one you&rsquo;re reading right now).</p></description></item><item><title>Why I Downgraded Magisk to Root My Pixel 2 XL</title><link>https://ericxliu.me/posts/rooting-pixel-2-xl-for-reverse-engineering/</link><pubDate>Wed, 07 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/rooting-pixel-2-xl-for-reverse-engineering/</guid><description><p>For the past few weeks, I&rsquo;ve been stuck in a stalemate with my EcoFlow Bluetooth Protocol Reverse Engineering Project. I have the hci snoop logs, I have the decompiled APK, and I have a strong suspicion about where the authentication logic is hiding. But suspicion isn&rsquo;t proof.</p>
|
||||||
|
<p>Static analysis has its limits. I found the &ldquo;smoking gun&rdquo; function—a native method responsible for encrypting the login payload—but understanding <em>how</em> it constructs that payload within a strict 13-byte limit purely from assembly (ARM64) was proving to be a headache.</p></description></item><item><title>Why Your "Resilient" Homelab is Slower Than a Raspberry Pi</title><link>https://ericxliu.me/posts/debugging-authentik-performance/</link><pubDate>Fri, 02 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/debugging-authentik-performance/</guid><description><p>In the world of self-hosting, there are many metrics for success: 99.9% uptime, sub-second latency, or a perfect GitOps pipeline. But for those of us running &ldquo;production&rdquo; at home, there is only one metric that truly matters: <strong>The Wife Acceptance Factor (WAF)</strong>.</p>
|
||||||
|
<p>My detailed Grafana dashboards said everything was fine. But my wife said the SSO login was &ldquo;slow sometimes.&rdquo; She was right. Debugging it took me down a rabbit hole of connection pooling, misplaced assumptions, and the harsh reality of running databases on distributed storage.</p></description></item><item><title>How I Got Open WebUI Talking to OpenAI Web Search</title><link>https://ericxliu.me/posts/open-webui-openai-websearch/</link><pubDate>Mon, 29 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/open-webui-openai-websearch/</guid><description><p>OpenAI promised native web search in GPT‑5, but LiteLLM proxy deployments (and by extension Open WebUI) still choke on it—issue <a href="https://github.com/BerriAI/litellm/issues/13042" class="external-link" target="_blank" rel="noopener">#13042</a> tracks the fallout. I needed grounded answers inside Open WebUI anyway, so I built a workaround: route GPT‑5 traffic through the Responses API and mask every <code>web_search_call</code> before the UI ever sees it.</p>
|
||||||
|
<p>This post documents the final setup, the hotfix script that keeps LiteLLM honest, and the tests that prove Open WebUI now streams cited answers without trying to execute the tool itself.</p></description></item><item><title>From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM</title><link>https://ericxliu.me/posts/technical-deep-dive-llm-categorization/</link><pubDate>Sat, 27 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/technical-deep-dive-llm-categorization/</guid><description><p>Running a family finance system is surprisingly complex. What starts as a simple spreadsheet often evolves into a web of rules, exceptions, and &ldquo;wait, was this dinner or <em>vacation</em> dinner?&rdquo; questions.</p>
|
||||||
|
<p>For years, I relied on a rule-based system to categorize our credit card transactions. It worked&hellip; mostly. But maintaining <code>if &quot;UBER&quot; in description and amount &gt; 50</code> style rules is a never-ending battle against the entropy of merchant names and changing habits.</p></description></item><item><title>About</title><link>https://ericxliu.me/about/</link><pubDate>Fri, 19 Dec 2025 22:46:12 -0800</pubDate><guid>https://ericxliu.me/about/</guid><description><img src="https://ericxliu.me/images/about.jpeg" alt="Eric Liu" width="300" style="float: left; margin-right: 1.5rem; margin-bottom: 1rem; border-radius: 8px;"/>
|
||||||
|
<p>Hi, I&rsquo;m <strong>Eric Liu</strong>.</p>
|
||||||
|
<p>I am a <strong>Staff Software Engineer and Tech Lead Manager (TLM)</strong> at <strong>Google</strong>, based in Sunnyvale, CA.</p>
|
||||||
|
<p>My work focuses on <strong>Infrastructure Performance and Customer Engineering</strong>, specifically for <strong>GPUs and TPUs</strong>. I lead teams that bridge the gap between cutting-edge AI hardware and the latest ML models (like Gemini), ensuring optimal performance and reliability at Google Cloud scale. I thrive in the ambiguous space where hardware constraints meet software ambition—whether it&rsquo;s debugging race conditions across thousands of chips or designing API surfaces for next-gen models.</p></description></item><item><title>The Convergence of Fast Weights, Linear Attention, and State Space Models</title><link>https://ericxliu.me/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/</link><pubDate>Fri, 19 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/</guid><description><p>Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformer’s attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (&ldquo;Fast Weights&rdquo;) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).</p>
|
||||||
|
<p>This article explores the mathematical equivalence between Hinton’s concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV.</p></description></item><item><title>vAttention</title><link>https://ericxliu.me/posts/vattention/</link><pubDate>Mon, 08 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/vattention/</guid><description><p>Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While <strong>PagedAttention</strong> (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPU’s native hardware Memory Management Unit (MMU) offers a more performant and portable solution.</p>
|
||||||
|
<h4 id="the-status-quo-pagedattention-and-software-tables">
|
||||||
|
The Status Quo: PagedAttention and Software Tables
|
||||||
|
<a class="heading-link" href="#the-status-quo-pagedattention-and-software-tables">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h4>
|
||||||
|
<p>Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined &ldquo;page table&rdquo; (the Block Table) [1].</p></description></item><item><title>Setting Up Jellyfin SSO with Authentik: Surviving the Beta</title><link>https://ericxliu.me/posts/jellyfin-sso-with-authentik/</link><pubDate>Sat, 15 Nov 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/jellyfin-sso-with-authentik/</guid><description><p>I recently integrated Jellyfin with Authentik for Single Sign-On (SSO). While the plugin works, it is still very much in an early development phase. The logging is often sparse or cryptic, and the feedback loop can be frustrating. Here is a guide focused on the obscure errors you might encounter and the simple fixes that aren&rsquo;t immediately obvious.</p>
|
||||||
|
<h2 id="the-setup">
|
||||||
|
The Setup
|
||||||
|
<a class="heading-link" href="#the-setup">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h2>
|
||||||
|
<p>The configuration is best handled via API (curl) rather than the UI, as it ensures all fields are correctly typed and persistent.</p></description></item><item><title>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)</title><link>https://ericxliu.me/posts/benchmarking-llms-on-jetson-orin-nano/</link><pubDate>Sat, 04 Oct 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/benchmarking-llms-on-jetson-orin-nano/</guid><description><h2 id="introduction">
|
||||||
|
Introduction
|
||||||
|
<a class="heading-link" href="#introduction">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h2>
|
||||||
|
<p>NVIDIA&rsquo;s Jetson Orin Nano promises impressive specs: 1024 CUDA cores, 32 Tensor Cores, and 40 TOPS of INT8 compute performance packed into a compact, power-efficient edge device. On paper, it looks like a capable platform for running Large Language Models locally. But there&rsquo;s a catch—one that reveals a fundamental tension in modern edge AI hardware design.</p>
|
||||||
|
<p>After running 66 inference tests across seven different language models ranging from 0.5B to 5.4B parameters, I discovered something counterintuitive: the device&rsquo;s computational muscle sits largely idle during single-stream LLM inference. The bottleneck isn&rsquo;t computation—it&rsquo;s memory bandwidth. This isn&rsquo;t just a quirk of one device; it&rsquo;s a fundamental characteristic of single-user, autoregressive token generation on edge hardware—a reality that shapes how we should approach local LLM deployment.</p></description></item><item><title>Flashing Jetson Orin Nano in Virtualized Environments</title><link>https://ericxliu.me/posts/flashing-jetson-orin-nano-in-virtualized-environments/</link><pubDate>Thu, 02 Oct 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/flashing-jetson-orin-nano-in-virtualized-environments/</guid><description><h1 id="flashing-jetson-orin-nano-in-virtualized-environments">
|
||||||
|
Flashing Jetson Orin Nano in Virtualized Environments
|
||||||
|
<a class="heading-link" href="#flashing-jetson-orin-nano-in-virtualized-environments">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h1>
|
||||||
|
<h2 id="introduction">
|
||||||
|
Introduction
|
||||||
|
<a class="heading-link" href="#introduction">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h2>
|
||||||
|
<p>Flashing NVIDIA Jetson devices remotely presents unique challenges when the host machine is virtualized. This article documents the technical challenges, failures, and eventual success of flashing a Jetson Orin Nano Super developer kit using NVIDIA SDK Manager in various virtualized environments, specifically focusing on QEMU/KVM virtual machines and LXC containers on Proxmox VE.</p></description></item><item><title>OpenWrt: Fix WireGuard Connectivity with MWAN3 by Excluding the VPN Endpoint</title><link>https://ericxliu.me/posts/openwrt-mwan3-wireguard-endpoint-exclusion/</link><pubDate>Sun, 28 Sep 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/openwrt-mwan3-wireguard-endpoint-exclusion/</guid><description><h3 id="overview">
|
||||||
|
Overview
|
||||||
|
<a class="heading-link" href="#overview">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h3>
|
||||||
|
<p>When using WireGuard together with MWAN3 on OpenWrt, the tunnel can fail to establish or flap when the peer&rsquo;s IP is routed into the tunnel itself. This is a classic routing bootstrap problem: WireGuard wants to route 0.0.0.0/0 into the tunnel, but the UDP packets to the peer&rsquo;s public endpoint also get captured, so they never reach the Internet to bring the tunnel up.</p></description></item><item><title>UniFi VLAN Migration to Zone-Based Architecture</title><link>https://ericxliu.me/posts/unifi-vlan-migration-to-zone-based-architecture/</link><pubDate>Mon, 22 Sep 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/unifi-vlan-migration-to-zone-based-architecture/</guid><description><p>Embarking on a network migration to a properly segmented VLAN architecture is a rite of passage for any serious home lab or small business operator. The goal is clear: improve security and organization by separating traffic. However, the path from a flat network to a segmented one is often paved with subtle but critical configuration details that can lead to hours of frustrating troubleshooting.</p>
|
||||||
|
<p>This article documents that journey. It details the pitfalls encountered, the core networking concepts that were essential to understand, and the best practices that ultimately led to a stable, secure, and logical network design built on a zone-based firewall model.</p></description></item><item><title>Quantization in LLMs</title><link>https://ericxliu.me/posts/quantization-in-llms/</link><pubDate>Tue, 19 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/quantization-in-llms/</guid><description><p>The burgeoning scale of Large Language Models (LLMs) has necessitated a paradigm shift in their deployment, moving beyond full-precision floating-point arithmetic towards lower-precision representations. Quantization, the process of mapping a wide range of continuous values to a smaller, discrete set, has emerged as a critical technique to reduce model size, accelerate inference, and lower energy consumption. This article provides a technical overview of quantization theories, their application in modern LLMs, and highlights the ongoing innovations in this domain.</p></description></item><item><title>Breville Barista Pro Maintenance</title><link>https://ericxliu.me/posts/breville-barista-pro-maintenance/</link><pubDate>Sat, 16 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/breville-barista-pro-maintenance/</guid><description><p>Proper maintenance is critical for the longevity and performance of a Breville Barista Pro espresso machine. Consistent cleaning not only ensures the machine functions correctly but also directly impacts the quality of the espresso produced. This guide provides a detailed, technical breakdown of the essential maintenance routines, from automated cycles to daily upkeep.</p>
|
||||||
|
<h4 id="understanding-the-two-primary-maintenance-cycles">
|
||||||
|
<strong>Understanding the Two Primary Maintenance Cycles</strong>
|
||||||
|
<a class="heading-link" href="#understanding-the-two-primary-maintenance-cycles">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h4>
|
||||||
|
<p>The Breville Barista Pro has two distinct, automated maintenance procedures: the <strong>Cleaning (Flush) Cycle</strong> and the <strong>Descale Cycle</strong>. It is important to understand that these are not interchangeable, as they address different types of buildup within the machine.</p></description></item><item><title>Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian</title><link>https://ericxliu.me/posts/secure-boot-dkms-and-mok-on-proxmox-debian/</link><pubDate>Sat, 09 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/secure-boot-dkms-and-mok-on-proxmox-debian/</guid><description><p>I hit an issue where all GPU Operator pods on one node were stuck in Init after migrating from Legacy BIOS to UEFI. The common error was NVIDIA components waiting for “toolkit-ready,” while the toolkit init container looped with:</p>
|
||||||
|
<ul>
|
||||||
|
<li>nvidia-smi failed to communicate with the NVIDIA driver</li>
|
||||||
|
<li>modprobe nvidia → “Key was rejected by service”</li>
|
||||||
|
</ul>
|
||||||
|
<p>That message is the tell: Secure Boot is enabled and the kernel refuses to load modules not signed by a trusted key.</p></description></item><item><title>Beyond Words: How RVQ Teaches LLMs to See and Hear</title><link>https://ericxliu.me/posts/how-rvq-teaches-llms-to-see-and-hear/</link><pubDate>Thu, 07 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/how-rvq-teaches-llms-to-see-and-hear/</guid><description><p>Large Language Models (LLMs) are masters of text, but the world is not made of text alone. It’s a symphony of sights, sounds, and experiences. The ultimate goal for AI is to understand this rich, multi-modal world as we do. But how do you teach a model that thinks in words to understand a picture of a sunset or the melody of a song?</p>
|
||||||
|
<p>The answer lies in creating a universal language—a bridge between the continuous, messy world of pixels and audio waves and the discrete, structured world of language tokens. One of the most elegant and powerful tools for building this bridge is <strong>Residual Vector Quantization (RVQ)</strong>.</p></description></item><item><title>Supabase Deep Dive: It's Not Magic, It's Just Postgres</title><link>https://ericxliu.me/posts/supabase-deep-dive/</link><pubDate>Sun, 03 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/supabase-deep-dive/</guid><description><p>In the world of Backend-as-a-Service (BaaS), platforms are often treated as magic boxes. You push data in, you get data out, and you hope the magic inside scales. While this simplicity is powerful, it can obscure the underlying mechanics, leaving developers wondering what&rsquo;s really going on.</p>
|
||||||
|
<p>Supabase enters this space with a radically different philosophy: <strong>transparency</strong>. It provides the convenience of a BaaS, but it’s built on the world&rsquo;s most trusted relational database: PostgreSQL. The &ldquo;magic&rdquo; isn&rsquo;t a proprietary black box; it&rsquo;s a carefully assembled suite of open-source tools that enhance Postgres, not hide it.</p></description></item><item><title>A Deep Dive into PPO for Language Models</title><link>https://ericxliu.me/posts/ppo-for-language-models/</link><pubDate>Sat, 02 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/ppo-for-language-models/</guid><description><p>Large Language Models (LLMs) have demonstrated astonishing capabilities, but out-of-the-box, they are simply powerful text predictors. They don&rsquo;t inherently understand what makes a response helpful, harmless, or aligned with human values. The technique that has proven most effective at bridging this gap is Reinforcement Learning from Human Feedback (RLHF), and at its heart lies a powerful algorithm: Proximal Policy Optimization (PPO).</p>
|
||||||
|
<p>You may have seen diagrams like the one below, which outlines the RLHF training process. It can look intimidating, with a web of interconnected models, losses, and data flows.
|
||||||
|
<img src="https://ericxliu.me/images/ppo-for-language-models/7713bd3ecf27442e939b9190fa08165d.png" alt="S3 File"></p></description></item><item><title>Mixture-of-Experts (MoE) Models Challenges & Solutions in Practice</title><link>https://ericxliu.me/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/</link><pubDate>Wed, 02 Jul 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/</guid><description><p>Mixture-of-Experts (MoEs) are neural network architectures that allow different parts of the model (called &ldquo;experts&rdquo;) to specialize in different types of inputs. A &ldquo;gating network&rdquo; or &ldquo;router&rdquo; learns to dispatch each input (or &ldquo;token&rdquo;) to a subset of these experts. While powerful for scaling models, MoEs introduce several practical challenges.</p>
|
||||||
|
<h3 id="1-challenge-non-differentiability-of-routing-functions">
|
||||||
|
1. Challenge: Non-Differentiability of Routing Functions
|
||||||
|
<a class="heading-link" href="#1-challenge-non-differentiability-of-routing-functions">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h3>
|
||||||
|
<p><strong>The Problem:</strong>
|
||||||
|
Many routing mechanisms, especially &ldquo;Top-K routing,&rdquo; involve a discrete, hard selection process. A common function is <code>KeepTopK(v, k)</code>, which selects the top <code>k</code> scoring elements from a vector <code>v</code> and sets others to $-\infty$ or $0$.</p></description></item><item><title>An Architectural Deep Dive of T5</title><link>https://ericxliu.me/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/</link><pubDate>Sun, 01 Jun 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/</guid><description><p>In the rapidly evolving landscape of Large Language Models, a few key architectures define the dominant paradigms. Today, the &ldquo;decoder-only&rdquo; model, popularized by the GPT series and its successors like LLaMA and Mistral, reigns supreme. These models are scaled to incredible sizes and excel at in-context learning.</p>
|
||||||
|
<p>But to truly understand the field, we must look at the pivotal models that explored different paths. Google&rsquo;s T5, or <strong>Text-to-Text Transfer Transformer</strong>, stands out as one of the most influential. It didn&rsquo;t just introduce a new model; it proposed a new philosophy. This article dives deep into the architecture of T5, how it fundamentally differs from modern LLMs, and the lasting legacy of its unique design choices.</p></description></item><item><title>Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso</title><link>https://ericxliu.me/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/</link><pubDate>Thu, 01 May 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/</guid><description><p>Are you ready to transform your home espresso game from good to genuinely great? The Breville Barista Pro is a fantastic machine, but unlocking its full potential requires understanding a few key principles. This guide will walk you through the systematic process of dialing in your espresso, ensuring every shot is delicious and repeatable.</p>
|
||||||
|
<p>Our overarching philosophy is simple: <strong>isolate and change only one variable at a time.</strong> While numbers are crucial, your palate is the ultimate judge. Dose, ratio, and time are interconnected, but your <strong>grind size</strong> is your most powerful lever.</p></description></item><item><title>Transformer's Core Mechanics</title><link>https://ericxliu.me/posts/transformer-s-core-mechanics/</link><pubDate>Tue, 01 Apr 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/transformer-s-core-mechanics/</guid><description><p>The Transformer architecture is the bedrock of modern Large Language Models (LLMs). While its high-level success is widely known, a deeper understanding requires dissecting its core components. This article provides a detailed, technical breakdown of the fundamental concepts within a Transformer block, from the notion of &ldquo;channels&rdquo; to the intricate workings of the attention mechanism and its relationship with other advanced architectures like Mixture of Experts.</p>
|
||||||
|
<h3 id="1-the-channel-a-foundational-view-of-d_model">
|
||||||
|
1. The &ldquo;Channel&rdquo;: A Foundational View of <code>d_model</code>
|
||||||
|
<a class="heading-link" href="#1-the-channel-a-foundational-view-of-d_model">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h3>
|
||||||
|
<p>In deep learning, a &ldquo;channel&rdquo; can be thought of as a feature dimension. While this term is common in Convolutional Neural Networks for images (e.g., Red, Green, Blue channels), in LLMs, the analogous concept is the model&rsquo;s primary embedding dimension, commonly referred to as <code>d_model</code>.</p></description></item><item><title>Some useful files</title><link>https://ericxliu.me/posts/useful/</link><pubDate>Mon, 26 Oct 2020 04:14:43 +0000</pubDate><guid>https://ericxliu.me/posts/useful/</guid><description><ul>
|
||||||
|
<li><a href="https://ericxliu.me/rootCA.crt" >rootCA.pem</a></li>
|
||||||
|
</ul></description></item></channel></rss>
|
||||||
@@ -0,0 +1 @@
|
|||||||
|
const body=document.body,darkModeToggle=document.getElementById("dark-mode-toggle"),darkModeMediaQuery=window.matchMedia("(prefers-color-scheme: dark)");localStorage.getItem("colorscheme")?setTheme(localStorage.getItem("colorscheme")):setTheme(body.classList.contains("colorscheme-light")||body.classList.contains("colorscheme-dark")?body.classList.contains("colorscheme-dark")?"dark":"light":darkModeMediaQuery.matches?"dark":"light"),darkModeToggle&&darkModeToggle.addEventListener("click",()=>{let e=body.classList.contains("colorscheme-dark")?"light":"dark";setTheme(e),rememberTheme(e)}),darkModeMediaQuery.addListener(e=>{setTheme(e.matches?"dark":"light")}),document.addEventListener("DOMContentLoaded",function(){let e=document.querySelector(".preload-transitions");e.classList.remove("preload-transitions")});function setTheme(e){body.classList.remove("colorscheme-auto");let n=e==="dark"?"light":"dark";body.classList.remove("colorscheme-"+n),body.classList.add("colorscheme-"+e),document.documentElement.style["color-scheme"]=e;function t(e){return new Promise(t=>{if(document.querySelector(e))return t(document.querySelector(e));const n=new MutationObserver(s=>{document.querySelector(e)&&(t(document.querySelector(e)),n.disconnect())});n.observe(document.body,{childList:!0,subtree:!0})})}if(e==="dark"){const e={type:"set-theme",theme:"github-dark"};t(".utterances-frame").then(t=>{t.contentWindow.postMessage(e,"https://utteranc.es")})}else{const e={type:"set-theme",theme:"github-light"};t(".utterances-frame").then(t=>{t.contentWindow.postMessage(e,"https://utteranc.es")})}function s(e){const t=document.querySelector("iframe.giscus-frame");if(!t)return;t.contentWindow.postMessage({giscus:e},"https://giscus.app")}s({setConfig:{theme:e}});const o=new Event("themeChanged");document.dispatchEvent(o)}function rememberTheme(e){localStorage.setItem("colorscheme",e)}
|
||||||
65
posts/benchmarking-llms-on-jetson-orin-nano/index.html
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI) · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="
|
||||||
|
Introduction
|
||||||
|
|
||||||
|
|
||||||
|
Link to heading
|
||||||
|
|
||||||
|
|
||||||
|
NVIDIA’s Jetson Orin Nano promises impressive specs: 1024 CUDA cores, 32 Tensor Cores, and 40 TOPS of INT8 compute performance packed into a compact, power-efficient edge device. On paper, it looks like a capable platform for running Large Language Models locally. But there’s a catch—one that reveals a fundamental tension in modern edge AI hardware design.
|
||||||
|
After running 66 inference tests across seven different language models ranging from 0.5B to 5.4B parameters, I discovered something counterintuitive: the device’s computational muscle sits largely idle during single-stream LLM inference. The bottleneck isn’t computation—it’s memory bandwidth. This isn’t just a quirk of one device; it’s a fundamental characteristic of single-user, autoregressive token generation on edge hardware—a reality that shapes how we should approach local LLM deployment."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)"><meta name=twitter:description content="Introduction Link to heading NVIDIA’s Jetson Orin Nano promises impressive specs: 1024 CUDA cores, 32 Tensor Cores, and 40 TOPS of INT8 compute performance packed into a compact, power-efficient edge device. On paper, it looks like a capable platform for running Large Language Models locally. But there’s a catch—one that reveals a fundamental tension in modern edge AI hardware design.
|
||||||
|
After running 66 inference tests across seven different language models ranging from 0.5B to 5.4B parameters, I discovered something counterintuitive: the device’s computational muscle sits largely idle during single-stream LLM inference. The bottleneck isn’t computation—it’s memory bandwidth. This isn’t just a quirk of one device; it’s a fundamental characteristic of single-user, autoregressive token generation on edge hardware—a reality that shapes how we should approach local LLM deployment."><meta property="og:url" content="https://ericxliu.me/posts/benchmarking-llms-on-jetson-orin-nano/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)"><meta property="og:description" content="Introduction Link to heading NVIDIA’s Jetson Orin Nano promises impressive specs: 1024 CUDA cores, 32 Tensor Cores, and 40 TOPS of INT8 compute performance packed into a compact, power-efficient edge device. On paper, it looks like a capable platform for running Large Language Models locally. But there’s a catch—one that reveals a fundamental tension in modern edge AI hardware design.
|
||||||
|
After running 66 inference tests across seven different language models ranging from 0.5B to 5.4B parameters, I discovered something counterintuitive: the device’s computational muscle sits largely idle during single-stream LLM inference. The bottleneck isn’t computation—it’s memory bandwidth. This isn’t just a quirk of one device; it’s a fundamental characteristic of single-user, autoregressive token generation on edge hardware—a reality that shapes how we should approach local LLM deployment."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-10-04T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-10T20:10:48+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/benchmarking-llms-on-jetson-orin-nano/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Why Your Jetson Orin Nano\u0027s 40 TOPS Goes Unused (And What That Means for Edge AI)","genre":"Blog","wordcount":"1866","url":"https:\/\/ericxliu.me\/posts\/benchmarking-llms-on-jetson-orin-nano\/","datePublished":"2025-10-04T00:00:00\u002b00:00","dateModified":"2026-01-10T20:10:48\u002b00:00","description":"\u003ch2 id=\u0022introduction\u0022\u003e\n Introduction\n \u003ca class=\u0022heading-link\u0022 href=\u0022#introduction\u0022\u003e\n \u003ci class=\u0022fa-solid fa-link\u0022 aria-hidden=\u0022true\u0022 title=\u0022Link to heading\u0022\u003e\u003c\/i\u003e\n \u003cspan class=\u0022sr-only\u0022\u003eLink to heading\u003c\/span\u003e\n \u003c\/a\u003e\n\u003c\/h2\u003e\n\u003cp\u003eNVIDIA\u0026rsquo;s Jetson Orin Nano promises impressive specs: 1024 CUDA cores, 32 Tensor Cores, and 40 TOPS of INT8 compute performance packed into a compact, power-efficient edge device. On paper, it looks like a capable platform for running Large Language Models locally. But there\u0026rsquo;s a catch—one that reveals a fundamental tension in modern edge AI hardware design.\u003c\/p\u003e\n\u003cp\u003eAfter running 66 inference tests across seven different language models ranging from 0.5B to 5.4B parameters, I discovered something counterintuitive: the device\u0026rsquo;s computational muscle sits largely idle during single-stream LLM inference. The bottleneck isn\u0026rsquo;t computation—it\u0026rsquo;s memory bandwidth. This isn\u0026rsquo;t just a quirk of one device; it\u0026rsquo;s a fundamental characteristic of single-user, autoregressive token generation on edge hardware—a reality that shapes how we should approach local LLM deployment.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/benchmarking-llms-on-jetson-orin-nano/>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-10-04T00:00:00Z>October 4, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
9-minute read</span></div></div></header><div class=post-content><h2 id=introduction>Introduction
|
||||||
|
<a class=heading-link href=#introduction><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>NVIDIA’s Jetson Orin Nano promises impressive specs: 1024 CUDA cores, 32 Tensor Cores, and 40 TOPS of INT8 compute performance packed into a compact, power-efficient edge device. On paper, it looks like a capable platform for running Large Language Models locally. But there’s a catch—one that reveals a fundamental tension in modern edge AI hardware design.</p><p>After running 66 inference tests across seven different language models ranging from 0.5B to 5.4B parameters, I discovered something counterintuitive: the device’s computational muscle sits largely idle during single-stream LLM inference. The bottleneck isn’t computation—it’s memory bandwidth. This isn’t just a quirk of one device; it’s a fundamental characteristic of single-user, autoregressive token generation on edge hardware—a reality that shapes how we should approach local LLM deployment.</p><h2 id=the-hardware-what-were-working-with>The Hardware: What We’re Working With
|
||||||
|
<a class=heading-link href=#the-hardware-what-were-working-with><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>The NVIDIA Jetson Orin Nano 8GB I tested features:</p><ul><li><strong>GPU</strong>: NVIDIA Ampere architecture with 1024 CUDA cores and 32 Tensor Cores</li><li><strong>Compute Performance</strong>: 40 TOPS (INT8), 10 TFLOPS (FP16), 5 TFLOPS (FP32)</li><li><strong>Memory</strong>: 8GB LPDDR5 unified memory with 68 GB/s bandwidth</li><li><strong>Available VRAM</strong>: Approximately 5.2GB after OS overhead</li><li><strong>CPU</strong>: 6-core ARM Cortex-A78AE (ARMv8.2, 64-bit)</li><li><strong>TDP</strong>: 7-25W configurable</li></ul><p>The unified memory architecture is a double-edged sword: CPU and GPU share the same physical memory pool, which eliminates PCIe transfer overhead but also means you’re working with just 5.2GB of usable VRAM after the OS takes its share. This constraint shapes everything about LLM deployment on this device.</p><h2 id=testing-methodology>Testing Methodology
|
||||||
|
<a class=heading-link href=#testing-methodology><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><h3 id=the-models>The Models
|
||||||
|
<a class=heading-link href=#the-models><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>I tested seven models ranging from 0.5B to 5.4B parameters—essentially the entire practical deployment range for this hardware. The selection covered two inference backends (Ollama and vLLM) and various quantization strategies:</p><p><strong>Ollama-served models (with quantization):</strong></p><ul><li>Gemma 3 1B (Q4_K_M, 815MB)</li><li>Gemma 3n E2B (Q4_K_M, 3.5GB, 5.44B total params, 2B effective)</li><li>Qwen 2.5 0.5B (Q4_K_M, 350MB)</li><li>Qwen 3 0.6B (FP8, 600MB)</li></ul><p><strong>vLLM-served models (minimal/no quantization):</strong></p><ul><li>google/gemma-3-1b-it (FP16, 2GB)</li><li>Qwen/Qwen2.5-0.5B-Instruct (FP16, 1GB)</li><li>Qwen/Qwen3-0.6B-FP8 (FP8, 600MB)</li></ul><h3 id=the-testing-process>The Testing Process
|
||||||
|
<a class=heading-link href=#the-testing-process><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Each model faced 10-12 prompts of varying complexity—from simple arithmetic to technical explanations about LLMs themselves. All tests ran with batch size = 1, simulating a single user interacting with a local chatbot—the typical edge deployment scenario. Out of 84 planned tests, 66 completed successfully (78.6% success rate). The failures? Mostly out-of-memory crashes on larger models and occasional inference engine instability.</p><h3 id=understanding-the-limits-roofline-analysis>Understanding the Limits: Roofline Analysis
|
||||||
|
<a class=heading-link href=#understanding-the-limits-roofline-analysis><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>To understand where performance hits its ceiling, I applied roofline analysis—a method that reveals whether a workload is compute-bound (limited by processing power) or memory-bound (limited by data transfer speed). For each model, I calculated:</p><ul><li><strong>FLOPs per token</strong>: Approximately 2 × total_parameters (accounting for matrix multiplications in forward pass)</li><li><strong>Bytes per token</strong>: model_size × 1.1 (including 10% overhead for activations and KV cache)</li><li><strong>Operational Intensity (OI)</strong>: FLOPs per token / Bytes per token</li><li><strong>Theoretical performance</strong>: min(compute_limit, bandwidth_limit)</li></ul><p>The roofline model works by comparing a workload’s operational intensity (how many calculations you do per byte of data moved) against the device’s balance point. If your operational intensity is too low, you’re bottlenecked by memory bandwidth—and as we’ll see, that’s exactly what happens with LLM inference.</p><p><img src=/images/benchmarking-llms-on-jetson-orin-nano/16d64bdc9cf14b05b7c40c4718b8091b.png alt="S3 File"></p><h2 id=the-results-speed-and-efficiency>The Results: Speed and Efficiency
|
||||||
|
<a class=heading-link href=#the-results-speed-and-efficiency><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><h3 id=what-actually-runs-fast>What Actually Runs Fast
|
||||||
|
<a class=heading-link href=#what-actually-runs-fast><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Here’s how the models ranked by token generation speed:</p><table><thead><tr><th>Rank</th><th>Model</th><th>Backend</th><th>Avg Speed (t/s)</th><th>Std Dev</th><th>Success Rate</th></tr></thead><tbody><tr><td>1</td><td>qwen3:0.6b</td><td>Ollama</td><td>38.84</td><td>1.42</td><td>100%</td></tr><tr><td>2</td><td>qwen2.5:0.5b</td><td>Ollama</td><td>35.24</td><td>2.72</td><td>100%</td></tr><tr><td>3</td><td>gemma3:1b</td><td>Ollama</td><td>26.33</td><td>2.56</td><td>100%</td></tr><tr><td>4</td><td>Qwen/Qwen2.5-0.5B-Instruct</td><td>vLLM</td><td>15.18</td><td>2.15</td><td>100%</td></tr><tr><td>5</td><td>Qwen/Qwen3-0.6B-FP8</td><td>vLLM</td><td>12.81</td><td>0.36</td><td>100%</td></tr><tr><td>6</td><td>gemma3n:e2b</td><td>Ollama</td><td>8.98</td><td>1.22</td><td>100%</td></tr><tr><td>7</td><td>google/gemma-3-1b-it</td><td>vLLM</td><td>4.59</td><td>1.52</td><td>100%</td></tr></tbody></table><p>The standout finding: quantized sub-1B models hit 25-40 tokens/second, with Ollama consistently outperforming vLLM by 2-6× thanks to aggressive quantization and edge-optimized execution. These numbers align well with independent benchmarks from NVIDIA’s Jetson AI Lab (Llama 3.2 3B at 27.7 t/s, SmolLM2 at 41 t/s), confirming this is typical performance for the hardware class.
|
||||||
|
<img src=/images/benchmarking-llms-on-jetson-orin-nano/ee04876d75d247f9b27a647462555777.png alt="S3 File"></p><h3 id=responsiveness-first-token-latency>Responsiveness: First Token Latency
|
||||||
|
<a class=heading-link href=#responsiveness-first-token-latency><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>The time to generate the first output token—a critical metric for interactive applications—varied significantly:</p><ul><li>qwen3:0.6b (Ollama): 0.522 seconds</li><li>gemma3:1b (Ollama): 1.000 seconds</li><li>qwen2.5:0.5b (Ollama): 1.415 seconds</li><li>gemma3n:e2b (Ollama): 1.998 seconds</li></ul><p>Smaller, quantized models get to that first token faster—exactly what you want for a chatbot or interactive assistant where perceived responsiveness matters as much as raw throughput.</p><h3 id=the-memory-bottleneck-revealed>The Memory Bottleneck Revealed
|
||||||
|
<a class=heading-link href=#the-memory-bottleneck-revealed><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>When I compared actual performance against theoretical limits, the results were striking:</p><table><thead><tr><th>Model</th><th>Theoretical (t/s)</th><th>Actual (t/s)</th><th>Efficiency</th><th>Bottleneck</th><th>OI (FLOPs/byte)</th></tr></thead><tbody><tr><td>gemma3:1b</td><td>109.90</td><td>26.33</td><td>24.0%</td><td>Memory</td><td>3.23</td></tr><tr><td>qwen3:0.6b</td><td>103.03</td><td>38.84</td><td>37.7%</td><td>Memory</td><td>1.82</td></tr><tr><td>qwen2.5:0.5b</td><td>219.80</td><td>35.24</td><td>16.0%</td><td>Memory</td><td>3.23</td></tr><tr><td>gemma3n:e2b</td><td>54.95</td><td>8.98</td><td>16.3%</td><td>Memory</td><td>3.23</td></tr><tr><td>google/gemma-3-1b-it</td><td>30.91</td><td>4.59</td><td>14.9%</td><td>Memory</td><td>0.91</td></tr><tr><td>Qwen/Qwen3-0.6B-FP8</td><td>103.03</td><td>12.81</td><td>12.4%</td><td>Memory</td><td>1.82</td></tr><tr><td>Qwen/Qwen2.5-0.5B-Instruct</td><td>61.82</td><td>15.18</td><td>24.6%</td><td>Memory</td><td>0.91</td></tr></tbody></table><p>Every single model is memory-bound in this single-stream inference scenario. Average hardware efficiency sits at just 20.8%—meaning the computational units spend most of their time waiting for data rather than crunching numbers. That advertised 40 TOPS? Largely untapped when generating one token at a time for a single user.
|
||||||
|
<img src=/images/benchmarking-llms-on-jetson-orin-nano/ee04876d75d247f9b27a647462555777.png alt="S3 File"></p><h2 id=what-this-actually-means>What This Actually Means
|
||||||
|
<a class=heading-link href=#what-this-actually-means><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><h3 id=why-memory-bandwidth-dominates-in-single-stream-inference>Why Memory Bandwidth Dominates (in Single-Stream Inference)
|
||||||
|
<a class=heading-link href=#why-memory-bandwidth-dominates-in-single-stream-inference><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>The roofline numbers tell a clear story: operational intensity ranges from 0.91 to 3.23 FLOPs/byte across all tested models during single-token generation (batch size = 1). To actually saturate those 1024 CUDA cores and hit compute-bound operation, you’d need an operational intensity around 147 FLOPs/byte at the device’s 68 GB/s memory bandwidth.</p><p>In practice, for a model to actually become compute-bound on this device during single-stream inference, it would need an operational intensity exceeding:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-fallback data-lang=fallback><span style=display:flex><span>OI_threshold = Peak_Compute / Memory_Bandwidth
|
||||||
|
</span></span><span style=display:flex><span> = (40 × 10^12 ops/s) / (68 × 10^9 bytes/s)
|
||||||
|
</span></span><span style=display:flex><span> = 588 FLOPs/byte
|
||||||
|
</span></span></code></pre></div><p>Single-stream autoregressive decoding falls 100-600× short of this threshold because each token generation requires loading the entire model from memory (matrix-vector multiplication) while performing only ~2 FLOPs per parameter. The compute units are idle most of the time, simply waiting for model weights and activations to arrive from memory.</p><p>Note: Production LLM serving with large batch sizes (32-256 requests) changes this dynamic dramatically—batching transforms matrix-vector operations into matrix-matrix multiplications, increasing operational intensity by 30-250× and making workloads compute-bound. However, edge devices serving single users cannot exploit this optimization.</p><p>The largest model tested—gemma3n:e2b at 3.5GB quantized (5.44B total parameters, 2B effective)—shows only 16.3% efficiency, similar to other quantized models. Despite being the largest model, Q4_K_M quantization keeps its memory footprint manageable, resulting in similar operational intensity (3.23 FLOPs/byte) to the other INT4-quantized models. Its MatFormer architecture with selective parameter activation (only 2B of 5.44B params active per token) actually helps reduce memory traffic, though this benefit is partially offset by the overhead of routing logic.</p><h3 id=what-this-means-for-edge-deployment>What This Means for Edge Deployment
|
||||||
|
<a class=heading-link href=#what-this-means-for-edge-deployment><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>The performance gap between Ollama and vLLM (2.3-5.7×) tells us something important about optimization priorities for single-user edge devices:</p><p><strong>Qwen 2.5 0.5B:</strong> Ollama (Q4_K_M, 350MB) at 35.24 t/s vs vLLM (FP16, 1GB) at 15.18 t/s—2.32× faster
|
||||||
|
<strong>Qwen 3 0.6B:</strong> Ollama (FP8) at 38.84 t/s vs vLLM (FP8) at 12.81 t/s—3.03× faster despite identical quantization
|
||||||
|
<strong>Gemma 3 1B:</strong> Ollama (Q4_K_M, 815MB) at 26.33 t/s vs vLLM (FP16, 2GB) at 4.59 t/s—5.74× faster</p><p>In single-stream scenarios, quantization delivers near-linear performance gains by directly attacking the memory bandwidth bottleneck. Q4_K_M quantization (4.5 bits/parameter) hits a sweet spot between model quality and speed. Going lower to INT2 might help further, but you’ll need to carefully evaluate output quality.</p><p>The real insight: Ollama’s edge-first design philosophy (GGUF format, streamlined execution, optimized kernels from llama.cpp) is fundamentally better aligned with single-stream, memory-constrained edge scenarios. vLLM’s datacenter features—continuous batching, PagedAttention, tensor parallelism—add overhead without providing benefits when serving individual users on unified memory architectures. These features shine in multi-user production serving where batching can be exploited, but hurt performance in the single-stream case.</p><p><strong>What you should actually do</strong>: Stick with Ollama or TensorRT-LLM using Q4_K_M/INT4 quantized models in GGUF format. Target the 0.5-1B parameter range (under 3GB) to leave headroom for KV cache. Focus your optimization efforts on memory access patterns and bandwidth reduction. Watch for emerging techniques like INT4 AWQ, sparse attention, and quantized KV caches.</p><h3 id=room-for-improvement>Room for Improvement
|
||||||
|
<a class=heading-link href=#room-for-improvement><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>The 20.8% average efficiency might sound terrible, but it’s actually typical for edge AI devices running single-stream inference. Datacenter GPUs hit 60-80% efficiency on optimized workloads—but that’s typically with large batch sizes that increase operational intensity. In comparable single-stream scenarios, even high-end GPUs see similar efficiency drops. Edge devices commonly land in the 15-40% range due to architectural tradeoffs and memory bandwidth constraints relative to their compute capability.</p><p>Three factors explain the gap:</p><ol><li><strong>Architecture</strong>: Unified memory sacrifices bandwidth for integration simplicity. The 4MB L2 cache and 7-15W TDP limit further constrain performance.</li><li><strong>Software maturity</strong>: Edge inference frameworks lag behind their datacenter counterparts in optimization.</li><li><strong>Runtime overhead</strong>: Quantization/dequantization operations, Python abstractions, and non-optimized kernels all add up.</li></ol><p>The consistent 16-24% efficiency across most models suggests there’s room for 2-3× speedups through better software optimization—particularly in memory access patterns and kernel implementations. But fundamental performance leaps will require hardware changes—specifically, prioritizing memory bandwidth (200+ GB/s) over raw compute capability in future edge AI chips.</p><h2 id=where-to-go-from-here>Where to Go From Here
|
||||||
|
<a class=heading-link href=#where-to-go-from-here><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><h3 id=software-optimizations-worth-pursuing>Software Optimizations Worth Pursuing
|
||||||
|
<a class=heading-link href=#software-optimizations-worth-pursuing><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ul><li>Optimize memory access patterns in attention and MLP kernels</li><li>Implement quantized KV cache (8-bit or lower)</li><li>Tune for small batch sizes (2-4) to improve memory bus utilization</li><li>Overlap CPU-GPU pipeline operations to hide latency</li></ul><h3 id=research-directions>Research Directions
|
||||||
|
<a class=heading-link href=#research-directions><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ul><li>Architectures with higher operational intensity (fewer memory accesses per compute operation)</li><li>Sparse attention patterns to reduce memory movement</li><li>On-device LoRA fine-tuning with frozen, quantized base weights</li><li>Multi-model serving with shared base model weights</li></ul><h3 id=what-edge-ai-hardware-designers-should-focus-on>What Edge AI Hardware Designers Should Focus On
|
||||||
|
<a class=heading-link href=#what-edge-ai-hardware-designers-should-focus-on><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Future edge AI devices optimized for local, single-user LLM inference need a fundamental shift in priorities: memory bandwidth over raw compute capability. Specifically:</p><ul><li>200+ GB/s memory bandwidth (3× current Jetson Orin Nano)</li><li>HBM integration for higher bandwidth density</li><li>16GB+ capacity to support 7B+ parameter models</li><li>Purpose-built INT4/INT8 accelerators with larger on-chip caches to reduce DRAM traffic</li></ul><hr><h2 id=references>References
|
||||||
|
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><ol><li><p>Williams, S., Waterman, A., & Patterson, D. (2009). “Roofline: An Insightful Visual Performance Model for Multicore Architectures.” <em>Communications of the ACM</em>, 52(4), 65-76.</p></li><li><p>NVIDIA Corporation. (2024). “Jetson Orin Nano Developer Kit Technical Specifications.” <a href=https://developer.nvidia.com/embedded/jetson-orin-nano-developer-kit class=external-link target=_blank rel=noopener>https://developer.nvidia.com/embedded/jetson-orin-nano-developer-kit</a></p></li><li><p>“Jetson AI Lab Benchmarks.” NVIDIA Jetson AI Lab. <a href=https://www.jetson-ai-lab.com/benchmarks.html class=external-link target=_blank rel=noopener>https://www.jetson-ai-lab.com/benchmarks.html</a></p></li><li><p>Gerganov, G., et al. (2023). “GGML - AI at the edge.” <em>GitHub</em>. <a href=https://github.com/ggerganov/ggml class=external-link target=_blank rel=noopener>https://github.com/ggerganov/ggml</a></p></li><li><p>Kwon, W., et al. (2023). “Efficient Memory Management for Large Language Model Serving with PagedAttention.” <em>Proceedings of SOSP 2023</em>.</p></li><li><p>Team, G., Mesnard, T., et al. (2025). “Gemma 3: Technical Report.” <em>arXiv preprint arXiv:2503.19786v1</em>. <a href=https://arxiv.org/html/2503.19786v1 class=external-link target=_blank rel=noopener>https://arxiv.org/html/2503.19786v1</a></p></li><li><p>Yang, A., et al. (2025). “Qwen3 Technical Report.” <em>arXiv preprint arXiv:2505.09388</em>. <a href=https://arxiv.org/pdf/2505.09388 class=external-link target=_blank rel=noopener>https://arxiv.org/pdf/2505.09388</a></p></li><li><p>DeepSeek-AI. (2025). “DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning.” <em>arXiv preprint arXiv:2501.12948v1</em>. <a href=https://arxiv.org/html/2501.12948v1 class=external-link target=_blank rel=noopener>https://arxiv.org/html/2501.12948v1</a></p></li><li><p>“Running LLMs with TensorRT-LLM on NVIDIA Jetson Orin Nano Super.” Collabnix. <a href=https://collabnix.com/running-llms-with-tensorrt-llm-on-nvidia-jetson-orin-nano-super/ class=external-link target=_blank rel=noopener>https://collabnix.com/running-llms-with-tensorrt-llm-on-nvidia-jetson-orin-nano-super/</a></p></li><li><p>Pope, R., et al. (2022). “Efficiently Scaling Transformer Inference.” <em>Proceedings of MLSys 2022</em>.</p></li><li><p>Frantar, E., et al. (2023). “GPTQ: Accurate Post-Training Quantization for Generative Pre-trained Transformers.” <em>Proceedings of ICLR 2023</em>.</p></li><li><p>Dettmers, T., et al. (2023). “QLoRA: Efficient Finetuning of Quantized LLMs.” <em>Proceedings of NeurIPS 2023</em>.</p></li><li><p>Lin, J., et al. (2023). “AWQ: Activation-aware Weight Quantization for LLM Compression and Acceleration.” <em>arXiv preprint arXiv:2306.00978</em>.</p></li></ol></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
28
posts/breville-barista-pro-maintenance/index.html
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Breville Barista Pro Maintenance · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Proper maintenance is critical for the longevity and performance of a Breville Barista Pro espresso machine. Consistent cleaning not only ensures the machine functions correctly but also directly impacts the quality of the espresso produced. This guide provides a detailed, technical breakdown of the essential maintenance routines, from automated cycles to daily upkeep.
|
||||||
|
|
||||||
|
Understanding the Two Primary Maintenance Cycles
|
||||||
|
|
||||||
|
|
||||||
|
Link to heading
|
||||||
|
|
||||||
|
|
||||||
|
The Breville Barista Pro has two distinct, automated maintenance procedures: the Cleaning (Flush) Cycle and the Descale Cycle. It is important to understand that these are not interchangeable, as they address different types of buildup within the machine."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Breville Barista Pro Maintenance"><meta name=twitter:description content="Proper maintenance is critical for the longevity and performance of a Breville Barista Pro espresso machine. Consistent cleaning not only ensures the machine functions correctly but also directly impacts the quality of the espresso produced. This guide provides a detailed, technical breakdown of the essential maintenance routines, from automated cycles to daily upkeep.
|
||||||
|
Understanding the Two Primary Maintenance Cycles Link to heading The Breville Barista Pro has two distinct, automated maintenance procedures: the Cleaning (Flush) Cycle and the Descale Cycle. It is important to understand that these are not interchangeable, as they address different types of buildup within the machine."><meta property="og:url" content="https://ericxliu.me/posts/breville-barista-pro-maintenance/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Breville Barista Pro Maintenance"><meta property="og:description" content="Proper maintenance is critical for the longevity and performance of a Breville Barista Pro espresso machine. Consistent cleaning not only ensures the machine functions correctly but also directly impacts the quality of the espresso produced. This guide provides a detailed, technical breakdown of the essential maintenance routines, from automated cycles to daily upkeep.
|
||||||
|
Understanding the Two Primary Maintenance Cycles Link to heading The Breville Barista Pro has two distinct, automated maintenance procedures: the Cleaning (Flush) Cycle and the Descale Cycle. It is important to understand that these are not interchangeable, as they address different types of buildup within the machine."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-08-16T00:00:00+00:00"><meta property="article:modified_time" content="2025-08-20T06:04:36+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/breville-barista-pro-maintenance/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Breville Barista Pro Maintenance","genre":"Blog","wordcount":"920","url":"https:\/\/ericxliu.me\/posts\/breville-barista-pro-maintenance\/","datePublished":"2025-08-16T00:00:00\u002b00:00","dateModified":"2025-08-20T06:04:36\u002b00:00","description":"\u003cp\u003eProper maintenance is critical for the longevity and performance of a Breville Barista Pro espresso machine. Consistent cleaning not only ensures the machine functions correctly but also directly impacts the quality of the espresso produced. This guide provides a detailed, technical breakdown of the essential maintenance routines, from automated cycles to daily upkeep.\u003c\/p\u003e\n\u003ch4 id=\u0022understanding-the-two-primary-maintenance-cycles\u0022\u003e\n \u003cstrong\u003eUnderstanding the Two Primary Maintenance Cycles\u003c\/strong\u003e\n \u003ca class=\u0022heading-link\u0022 href=\u0022#understanding-the-two-primary-maintenance-cycles\u0022\u003e\n \u003ci class=\u0022fa-solid fa-link\u0022 aria-hidden=\u0022true\u0022 title=\u0022Link to heading\u0022\u003e\u003c\/i\u003e\n \u003cspan class=\u0022sr-only\u0022\u003eLink to heading\u003c\/span\u003e\n \u003c\/a\u003e\n\u003c\/h4\u003e\n\u003cp\u003eThe Breville Barista Pro has two distinct, automated maintenance procedures: the \u003cstrong\u003eCleaning (Flush) Cycle\u003c\/strong\u003e and the \u003cstrong\u003eDescale Cycle\u003c\/strong\u003e. It is important to understand that these are not interchangeable, as they address different types of buildup within the machine.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/breville-barista-pro-maintenance/>Breville Barista Pro Maintenance</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-08-16T00:00:00Z>August 16, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
5-minute read</span></div></div></header><div class=post-content><p>Proper maintenance is critical for the longevity and performance of a Breville Barista Pro espresso machine. Consistent cleaning not only ensures the machine functions correctly but also directly impacts the quality of the espresso produced. This guide provides a detailed, technical breakdown of the essential maintenance routines, from automated cycles to daily upkeep.</p><h4 id=understanding-the-two-primary-maintenance-cycles><strong>Understanding the Two Primary Maintenance Cycles</strong>
|
||||||
|
<a class=heading-link href=#understanding-the-two-primary-maintenance-cycles><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>The Breville Barista Pro has two distinct, automated maintenance procedures: the <strong>Cleaning (Flush) Cycle</strong> and the <strong>Descale Cycle</strong>. It is important to understand that these are not interchangeable, as they address different types of buildup within the machine.</p><ul><li><strong>Cleaning Cycle (Flush):</strong> This process is designed to remove coffee oils and granulated residue from the group head, shower screen, and portafilter system.</li><li><strong>Descale Cycle:</strong> This process targets the internal components of the machine, such as the thermocoil and water lines, to remove mineral and limescale deposits from water.</li></ul><h4 id=procedure-1-the-cleaning-flush-cycle><strong>Procedure 1: The Cleaning (Flush) Cycle</strong>
|
||||||
|
<a class=heading-link href=#procedure-1-the-cleaning-flush-cycle><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>The machine will indicate when a cleaning cycle is needed by displaying a “FLUSH” alert on the LCD screen. This typically occurs after approximately 200 extractions.</p><p><strong>Required Materials:</strong></p><ul><li>1-Cup filter basket</li><li>Grey silicone cleaning disc (provided with the machine)</li><li>One cleaning tablet</li></ul><p><strong>Step-by-Step Instructions:</strong></p><ol><li>Insert the 1-cup filter basket into the portafilter.</li><li>Place the grey silicone cleaning disc inside the basket.</li><li>Position one cleaning tablet in the center of the disc.</li><li>Lock the portafilter firmly into the group head.</li><li>Ensure the drip tray is empty and the water tank is filled.</li><li>Press the ‘MENU’ button and use the ‘Grind Amount’ dial to navigate to the ‘FLUSH’ option. Press the dial to select it.</li><li>The ‘1 CUP’ button will illuminate. Press it to initiate the cycle.</li><li>The cleaning process will last approximately five minutes, with the machine backflushing water under pressure. The remaining time will be displayed on the screen.</li><li>Upon completion, the machine will beep and return to its ready state.</li><li>Remove the portafilter and discard the water and dissolved tablet residue. Thoroughly rinse the portafilter, cleaning disc, and filter basket.</li><li>Re-insert the portafilter (without the disc or tablet) and run a shot of hot water through the group head to rinse any remaining cleaning solution.</li></ol><h4 id=procedure-2-the-descale-cycle><strong>Procedure 2: The Descale Cycle</strong>
|
||||||
|
<a class=heading-link href=#procedure-2-the-descale-cycle><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>The machine will alert you when descaling is required. The frequency depends on water hardness and usage but is generally recommended every 2-3 months.</p><p><strong>Required Materials:</strong></p><ul><li>Breville-recommended descaling solution</li><li>A large container (minimum 2-liter capacity)</li></ul><p><strong>Step-by-Step Instructions:</strong></p><p><strong>Part A: Preparation</strong></p><ol><li>Empty the drip tray and re-insert it.</li><li>Remove the water filter from the water tank.</li><li>Pour the descaling solution into the empty water tank and add fresh water up to the indicated “DESCALE” line.</li><li>Place a large container under the group head, hot water outlet, and steam wand.</li></ol><p><strong>Part B: The Descaling Process</strong></p><ol><li>Turn the machine on and press the ‘MENU’ button. Navigate to the ‘DESCALE’ option and select it by pressing the dial.</li><li>Press the illuminated ‘1 CUP’ button to begin.</li><li>The cycle proceeds in three stages. You must manually advance through them using the steam dial based on the LCD prompts:<ul><li><strong>Group Head (d3):</strong> The machine descales the coffee brewing components.</li><li><strong>Hot Water (d2):</strong> After a beep, the LCD shows “d2”. Turn the steam dial to the hot water position.</li><li><strong>Steam (d1):</strong> After another beep, the display reads “d1”. Turn the dial to the steam position.</li></ul></li></ol><p><strong>Part C: The Rinse Cycle</strong></p><ol><li>Once the descaling solution is expended, the machine will beep and prompt for a rinse cycle (“r”).</li><li>Empty the large container and rinse the water tank thoroughly.</li><li>Fill the water tank with fresh, cold water to the MAX line and re-insert it.</li><li>Place the empty container back under the outlets and press the ‘1 CUP’ button.</li><li>The rinse cycle will mirror the descaling process, prompting you to engage the group head (“r3”), hot water (“r2”), and steam wand (“r1”) in sequence.</li><li>After the rinse is complete, the machine will exit the maintenance mode and return to its ready state.</li></ol><h4 id=routine-and-preventative-maintenance-schedule><strong>Routine and Preventative Maintenance Schedule</strong>
|
||||||
|
<a class=heading-link href=#routine-and-preventative-maintenance-schedule><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>In addition to the automated cycles, regular manual cleaning is essential for machine health.</p><p><strong>Daily Tasks:</strong></p><ul><li><strong>Purge Group Head:</strong> After the final use of the day, run hot water through the group head (without the portafilter) to clear grounds.</li><li><strong>Clean Portafilter & Baskets:</strong> Do not let used coffee grounds sit in the portafilter. Rinse with hot water after every use.</li><li><strong>Clean Steam Wand:</strong> Immediately after texturing milk, wipe the wand with a damp cloth and purge steam for 2-3 seconds to clear internal passages.</li><li><strong>Empty Drip Tray:</strong> Empty and rinse the drip tray regularly.</li></ul><p><strong>Weekly Tasks:</strong></p><ul><li><strong>Soak Components:</strong> Remove the filter basket from the portafilter. Soak both components in a solution of hot water and a cleaning tablet (or specific espresso cleaner) for 20-30 minutes to dissolve accumulated coffee oils. Rinse thoroughly.</li><li><strong>Clean Grinder:</strong> Empty the bean hopper. Run the grinder to clear any remaining beans, then use a brush and/or vacuum to clean out fines and oil residue from the burrs and chute.</li></ul><p><strong>Periodic Tasks (Every 2-3 Months):</strong></p><ul><li><strong>Replace Water Filter:</strong> The water filter located inside the water tank should be replaced every 3 months. This reduces the rate of scale buildup.</li><li><strong>Inspect Shower Screen:</strong> Use a brush to gently scrub the shower screen inside the group head to remove any stubborn coffee grounds.</li></ul><p>By adhering to this comprehensive maintenance schedule, you can ensure your Breville Barista Pro operates at peak performance and consistently produces high-quality espresso.</p><hr><p><strong>Reference:</strong></p><ul><li>Breville Barista Pro Instruction Manual and official manufacturer guidelines.</li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
47
posts/debugging-authentik-performance/index.html
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Why Your "Resilient" Homelab is Slower Than a Raspberry Pi · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="In the world of self-hosting, there are many metrics for success: 99.9% uptime, sub-second latency, or a perfect GitOps pipeline. But for those of us running “production” at home, there is only one metric that truly matters: The Wife Acceptance Factor (WAF).
|
||||||
|
My detailed Grafana dashboards said everything was fine. But my wife said the SSO login was “slow sometimes.” She was right. Debugging it took me down a rabbit hole of connection pooling, misplaced assumptions, and the harsh reality of running databases on distributed storage."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content='Why Your "Resilient" Homelab is Slower Than a Raspberry Pi'><meta name=twitter:description content="In the world of self-hosting, there are many metrics for success: 99.9% uptime, sub-second latency, or a perfect GitOps pipeline. But for those of us running “production” at home, there is only one metric that truly matters: The Wife Acceptance Factor (WAF).
|
||||||
|
My detailed Grafana dashboards said everything was fine. But my wife said the SSO login was “slow sometimes.” She was right. Debugging it took me down a rabbit hole of connection pooling, misplaced assumptions, and the harsh reality of running databases on distributed storage."><meta property="og:url" content="https://ericxliu.me/posts/debugging-authentik-performance/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content='Why Your "Resilient" Homelab is Slower Than a Raspberry Pi'><meta property="og:description" content="In the world of self-hosting, there are many metrics for success: 99.9% uptime, sub-second latency, or a perfect GitOps pipeline. But for those of us running “production” at home, there is only one metric that truly matters: The Wife Acceptance Factor (WAF).
|
||||||
|
My detailed Grafana dashboards said everything was fine. But my wife said the SSO login was “slow sometimes.” She was right. Debugging it took me down a rabbit hole of connection pooling, misplaced assumptions, and the harsh reality of running databases on distributed storage."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2026-01-02T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-03T06:57:12+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/debugging-authentik-performance/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Why Your \u0022Resilient\u0022 Homelab is Slower Than a Raspberry Pi","genre":"Blog","wordcount":"1031","url":"https:\/\/ericxliu.me\/posts\/debugging-authentik-performance\/","datePublished":"2026-01-02T00:00:00\u002b00:00","dateModified":"2026-01-03T06:57:12\u002b00:00","description":"\u003cp\u003eIn the world of self-hosting, there are many metrics for success: 99.9% uptime, sub-second latency, or a perfect GitOps pipeline. But for those of us running \u0026ldquo;production\u0026rdquo; at home, there is only one metric that truly matters: \u003cstrong\u003eThe Wife Acceptance Factor (WAF)\u003c\/strong\u003e.\u003c\/p\u003e\n\u003cp\u003eMy detailed Grafana dashboards said everything was fine. But my wife said the SSO login was \u0026ldquo;slow sometimes.\u0026rdquo; She was right. Debugging it took me down a rabbit hole of connection pooling, misplaced assumptions, and the harsh reality of running databases on distributed storage.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/debugging-authentik-performance/>Why Your "Resilient" Homelab is Slower Than a Raspberry Pi</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2026-01-02T00:00:00Z>January 2, 2026
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
5-minute read</span></div></div></header><div class=post-content><p>In the world of self-hosting, there are many metrics for success: 99.9% uptime, sub-second latency, or a perfect GitOps pipeline. But for those of us running “production” at home, there is only one metric that truly matters: <strong>The Wife Acceptance Factor (WAF)</strong>.</p><p>My detailed Grafana dashboards said everything was fine. But my wife said the SSO login was “slow sometimes.” She was right. Debugging it took me down a rabbit hole of connection pooling, misplaced assumptions, and the harsh reality of running databases on distributed storage.</p><p>Here is a breakdown of the symptoms, the red herrings, and the root cause that was hiding in plain sight.</p><h2 id=the-environment>The Environment
|
||||||
|
<a class=heading-link href=#the-environment><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>My homelab is designed for node-level resilience, which adds complexity to the storage layer. It is not running on a single server, but rather a 3-node <strong>Proxmox</strong> cluster where every component is redundant:</p><ul><li><strong>Orchestration</strong>: Kubernetes (k3s) managed via Flux CD.</li><li><strong>Storage</strong>: A <strong>Ceph</strong> cluster running on the Proxmox nodes, utilizing enterprise NVMe SSDs (<code>bluestore</code>) for OSDs.</li><li><strong>Database</strong>: Postgres managed by the Zalando Postgres Operator, with persistent volumes (PVCs) provisioned on Ceph RBD (block storage).</li><li><strong>Identity</strong>: Authentik for SSO.</li></ul><p>While the underlying disks are blazing fast NVMe drives, the architecture dictates that a write to a Ceph RBD volume is not complete until it is replicated over the network and acknowledged by multiple OSDs. This setup provides incredible resilience—I can pull the plug on a node and nothing stops—but it introduces unavoidable network latency for synchronous write operations. <strong>Keep this particular trade-off in mind; it plays a starring role in the investigation later.</strong></p><h2 id=the-symptom>The Symptom
|
||||||
|
<a class=heading-link href=#the-symptom><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>The issue was insidious because it was intermittent. Clicking “Login” would sometimes hang for 5-8 seconds, while other times it was instant. To an engineer, “sometimes slow” is the worst kind of bug because it defies easy reproduction.</p><p>The breakthrough came when I put aside the server-side Grafana dashboards and looked at the client side. By opening Chrome DevTools and monitoring the <strong>Network</strong> tab during a slow login attempt, I was able to capture the exact failing request.</p><p>I identified the culprit: the <code>/api/v3/core/applications/</code> endpoint. It wasn’t a connection timeout or a DNS issue; the server was simply taking 5+ seconds to respond to this specific GET request.</p><p>Armed with this “smoking gun,” I copied the request as cURL (preserving the session cookies) and converted it into a Python benchmark script (<code>reproduce_latency.py</code>). This allowed me to reliably trigger the latency on demand, turning an intermittent “heisenbug” into a reproducible test case.</p><p>The results were validating and horrifying:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-text data-lang=text><span style=display:flex><span>Request 1: 2.1642s
|
||||||
|
</span></span><span style=display:flex><span>Request 2: 8.4321s
|
||||||
|
</span></span><span style=display:flex><span>Request 3: 5.1234s
|
||||||
|
</span></span><span style=display:flex><span>...
|
||||||
|
</span></span><span style=display:flex><span>Avg Latency: 4.8s
|
||||||
|
</span></span></code></pre></div><h2 id=investigation--red-herrings>Investigation & Red Herrings
|
||||||
|
<a class=heading-link href=#investigation--red-herrings><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><h3 id=attempt-1-the-connection-overhead-hypothesis>Attempt 1: The Connection Overhead Hypothesis
|
||||||
|
<a class=heading-link href=#attempt-1-the-connection-overhead-hypothesis><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Hypothesis</strong>: Authentik defaults to <code>CONN_MAX_AGE=0</code>, meaning it closes the database connection after every request. Since I enforce SSL for the database, I assumed the handshake overhead was killing performance.</p><p><strong>The Fix Attempt</strong>: I updated the Authentik configuration to enable persistent connections:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-yaml data-lang=yaml><span style=display:flex><span><span style=color:#7ee787>env</span>:<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span>- <span style=color:#7ee787>name</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>AUTHENTIK_POSTGRESQL__CONN_MAX_AGE</span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>value</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>"600"</span><span style=color:#6e7681>
|
||||||
|
</span></span></span></code></pre></div><p><strong>The Reality</strong>: The benchmark showed a slight improvement (~4.2s average), but the random 5-8s spikes remained. The 300ms connection setup was a factor, but not the root cause. As a side note, enabling this without configuring TCP Keepalives caused the Authentik worker to crash with <code>OperationalError('the connection is closed')</code> when firewalls silently dropped idle connections.</p><h3 id=attempt-2-cpu-starvation>Attempt 2: CPU Starvation
|
||||||
|
<a class=heading-link href=#attempt-2-cpu-starvation><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Hypothesis</strong>: The pods were CPU throttled during request processing.</p><p><strong>The Reality</strong>: <code>kubectl top pods</code> showed the server using only 29m (2.9% of a core). Even increasing the Gunicorn worker count from 2 to 4 did not improve the latency of individual requests, though it did help with concurrency.</p><h2 id=the-root-cause-a-perfect-storm>The Root Cause: A Perfect Storm
|
||||||
|
<a class=heading-link href=#the-root-cause-a-perfect-storm><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>I was stuck. The CPU was idle, network was fine, and individual database queries were fast (<1ms). Then I looked at the traffic patterns:</p><ol><li><strong>Redis</strong>: Almost zero traffic.</li><li><strong>Postgres</strong>: High <code>WALSync</code> and <code>WALWrite</code> wait times.</li><li><strong>The Table</strong>: <code>django_postgres_cache_cacheentry</code> was getting hammered.</li></ol><h3 id=insight-the-breaking-change>Insight: The Breaking Change
|
||||||
|
<a class=heading-link href=#insight-the-breaking-change><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>I checked the release notes for <strong>Authentik 2025.10</strong>:</p><blockquote><p><em>Breaking Change: Redis is no longer used for caching. All caching has been moved to the PostgreSQL database to simplify deployment.</em></p></blockquote><p>This architectural shift created a bottleneck specific to my storage backend:</p><ol><li><strong>The Change</strong>: Every API request triggers a cache write (session updates) to Postgres instead of Redis.</li><li><strong>The Default</strong>: Postgres defaults to <code>synchronous_commit = on</code>. A transaction is not considered “committed” until it is flushed to disk.</li><li><strong>The Storage</strong>: Ceph RBD replicates data across the network to multiple OSDs.</li></ol><p>Every time I loaded the dashboard, Authentik tried to update the cache. Postgres paused, verified the write was replicated to 3 other servers over the network (WAL Sync), and <em>then</em> responded.</p><h2 id=the-solution>The Solution
|
||||||
|
<a class=heading-link href=#the-solution><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>I couldn’t move the database to local NVMe without losing the failover capabilities I built the cluster for. However, for a cache-heavy workload, I could compromise on strict durability.</p><p>I patched the Postgres configuration to disable synchronous commits:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-yaml data-lang=yaml><span style=display:flex><span><span style=color:#7ee787>spec</span>:<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>postgresql</span>:<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>parameters</span>:<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>synchronous_commit</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>"off"</span><span style=color:#6e7681> </span><span style=color:#8b949e;font-style:italic># The magic switch</span><span style=color:#6e7681>
|
||||||
|
</span></span></span></code></pre></div><p><strong>What this does</strong>: Postgres returns “Success” to the application as soon as the transaction is in memory. It flushes to disk in the background. In the event of a crash, I might lose the last ~500ms of data (mostly cache entries), which is an acceptable trade-off.</p><h2 id=verification>Verification
|
||||||
|
<a class=heading-link href=#verification><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>I re-ran the benchmark with <code>synchronous_commit = off</code>.</p><table><thead><tr><th>Metric</th><th>Before (<code>sync=on</code>)</th><th>After (<code>sync=off</code>)</th><th>Improvement</th></tr></thead><tbody><tr><td>Sequential x8 stream (Avg)</td><td>~4.8s</td><td><strong>0.40s</strong></td><td><strong>12x Faster</strong></td></tr><tr><td>Parallel x8 stream (Wall)</td><td>~10.5s</td><td><strong>2.45s</strong></td><td><strong>4x Faster</strong></td></tr></tbody></table><p>The latency vanished. The login became instant.</p><h2 id=key-insights>Key Insights
|
||||||
|
<a class=heading-link href=#key-insights><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><ul><li><strong>Read Release Notes</strong>: The shift from Redis to Postgres for caching was a major architectural change that I missed during the upgrade.</li><li><strong>Storage Matters</strong>: Distributed storage (Ceph/Longhorn) handles linear writes well, but struggles with latency-sensitive, high-frequency sync operations like WAL updates.</li><li><strong>Tuning Postgres</strong>: For workloads where immediate durability is less critical than latency (like caching tables), <code>synchronous_commit = off</code> is a powerful tool.</li><li><strong>Observability</strong>: The “Wife Test” is a valid monitoring alert. If a user complains it’s slow, investigate the P99 latency, not just the average.</li></ul><h3 id=references>References
|
||||||
|
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ul><li><a href=https://docs.goauthentik.io/releases/2025.10/ class=external-link target=_blank rel=noopener>Authentik 2025.10 Release Notes</a></li><li><a href=https://www.postgresql.org/docs/current/wal-async-commit.html class=external-link target=_blank rel=noopener>PostgreSQL Documentation: Synchronous Commit</a></li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
@@ -0,0 +1,23 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Are you ready to transform your home espresso game from good to genuinely great? The Breville Barista Pro is a fantastic machine, but unlocking its full potential requires understanding a few key principles. This guide will walk you through the systematic process of dialing in your espresso, ensuring every shot is delicious and repeatable.
|
||||||
|
Our overarching philosophy is simple: isolate and change only one variable at a time. While numbers are crucial, your palate is the ultimate judge. Dose, ratio, and time are interconnected, but your grind size is your most powerful lever."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso"><meta name=twitter:description content="Are you ready to transform your home espresso game from good to genuinely great? The Breville Barista Pro is a fantastic machine, but unlocking its full potential requires understanding a few key principles. This guide will walk you through the systematic process of dialing in your espresso, ensuring every shot is delicious and repeatable.
|
||||||
|
Our overarching philosophy is simple: isolate and change only one variable at a time. While numbers are crucial, your palate is the ultimate judge. Dose, ratio, and time are interconnected, but your grind size is your most powerful lever."><meta property="og:url" content="https://ericxliu.me/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso"><meta property="og:description" content="Are you ready to transform your home espresso game from good to genuinely great? The Breville Barista Pro is a fantastic machine, but unlocking its full potential requires understanding a few key principles. This guide will walk you through the systematic process of dialing in your espresso, ensuring every shot is delicious and repeatable.
|
||||||
|
Our overarching philosophy is simple: isolate and change only one variable at a time. While numbers are crucial, your palate is the ultimate judge. Dose, ratio, and time are interconnected, but your grind size is your most powerful lever."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-05-01T00:00:00+00:00"><meta property="article:modified_time" content="2025-08-03T04:20:20+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso","genre":"Blog","wordcount":"1125","url":"https:\/\/ericxliu.me\/posts\/espresso-theory-application-a-guide-for-the-breville-barista-pro\/","datePublished":"2025-05-01T00:00:00\u002b00:00","dateModified":"2025-08-03T04:20:20\u002b00:00","description":"\u003cp\u003eAre you ready to transform your home espresso game from good to genuinely great? The Breville Barista Pro is a fantastic machine, but unlocking its full potential requires understanding a few key principles. This guide will walk you through the systematic process of dialing in your espresso, ensuring every shot is delicious and repeatable.\u003c\/p\u003e\n\u003cp\u003eOur overarching philosophy is simple: \u003cstrong\u003eisolate and change only one variable at a time.\u003c\/strong\u003e While numbers are crucial, your palate is the ultimate judge. Dose, ratio, and time are interconnected, but your \u003cstrong\u003egrind size\u003c\/strong\u003e is your most powerful lever.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/>Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-05-01T00:00:00Z>May 1, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
6-minute read</span></div></div></header><div class=post-content><p>Are you ready to transform your home espresso game from good to genuinely great? The Breville Barista Pro is a fantastic machine, but unlocking its full potential requires understanding a few key principles. This guide will walk you through the systematic process of dialing in your espresso, ensuring every shot is delicious and repeatable.</p><p>Our overarching philosophy is simple: <strong>isolate and change only one variable at a time.</strong> While numbers are crucial, your palate is the ultimate judge. Dose, ratio, and time are interconnected, but your <strong>grind size</strong> is your most powerful lever.</p><p>Let’s dive in!</p><hr><h3 id=part-1-the-foundation--dose-the-weight-of-dry-coffee><strong>Part 1: The Foundation — Dose (The Weight of Dry Coffee)</strong>
|
||||||
|
<a class=heading-link href=#part-1-the-foundation--dose-the-weight-of-dry-coffee><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Your dose is the bedrock of your espresso. It’s the weight of your ground coffee, and it should be the first variable you set and then keep <strong>constant</strong> during the initial dialing-in process.</p><p><strong>Why Dose Matters:</strong></p><ul><li><strong>Basket Size is Key:</strong> Your portafilter basket dictates your ideal dose. Too little coffee (under-dosing) creates excessive “headspace,” leading to soupy extractions. Too much (over-dosing) causes the coffee puck to touch the shower screen, preventing even water flow and causing channeling.</li><li><strong>Extraction “Work”:</strong> A higher dose means more coffee mass, requiring more “work” (a finer grind, more water) to extract properly.</li><li><strong>Coffee Type:</strong><ul><li><strong>Light Roasts:</strong> Denser and harder to extract. Consider a <strong>slightly lower dose</strong>.</li><li><strong>Dark Roasts:</strong> More brittle and soluble. You can often use a <strong>slightly higher dose</strong>.</li></ul></li></ul><p><strong>Application for Your Breville Barista Pro (54mm Portafilter):</strong></p><ul><li><strong>Your Starting Point:</strong> Always begin with <strong>18 grams</strong>. Use a scale for accuracy!</li><li><strong>Adjusting for Roast:</strong> For light roasts, if you’re struggling, drop to 17g. For dark roasts, you can try 19g.</li><li><strong>Golden Rule:</strong> Once you choose your starting dose (e.g., 18g), <strong>do not change it</strong> until you’ve dialed in your grind size.</li></ul><hr><h3 id=part-2-defining-the-drink--brew-ratio-dose-vs-yield><strong>Part 2: Defining the Drink — Brew Ratio (Dose vs. Yield)</strong>
|
||||||
|
<a class=heading-link href=#part-2-defining-the-drink--brew-ratio-dose-vs-yield><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>The brew ratio defines the relationship between your dry coffee dose and the weight of your liquid espresso yield. Always measure by <strong>weight (grams)</strong>, not volume (mL), as crema can be inconsistent.</p><p><strong>Understanding Ratios:</strong></p><ul><li><strong>Ristretto (1:1 – 1:1.5):</strong> E.g., 18g in → 18g to 27g out. Strong, textured, less extracted.</li><li><strong>Espresso (Normale) (1:1.5 – 1:2.5):</strong> E.g., 18g in → 27g to 45g out. The standard, balanced shot.</li><li><strong>Lungo (1:2.5+):</strong> E.g., 18g in → 45g+ out. Weaker, less textured, more extracted.</li></ul><p><strong>The Fundamental Trade-Off:</strong></p><ul><li><strong>Longer Ratio (more water):</strong> Higher extraction, but lower strength (more diluted).</li><li><strong>Shorter Ratio (less water):</strong> Lower extraction, but higher strength (more concentrated).</li></ul><p><strong>Application for Your Breville Barista Pro:</strong></p><ul><li><strong>Recommended Starting Ratio:</strong> A <strong>1:2 ratio</strong> is the perfect place to begin.</li><li><strong>Practical Numbers:</strong> With your 18g dose, your target yield is <strong>36 grams</strong> of liquid espresso.</li><li><strong>Execution:</strong> Place your cup on a scale and use the manual brew function to stop the shot precisely when the scale reads 36g.</li></ul><hr><h3 id=part-3-the-diagnostic-tool--brew-time><strong>Part 3: The Diagnostic Tool — Brew Time</strong>
|
||||||
|
<a class=heading-link href=#part-3-the-diagnostic-tool--brew-time><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Brew time is not something you set directly; it’s the <strong>result</strong> of how much resistance your coffee puck provides against the machine’s water pressure. Think of it as a <strong>diagnostic tool</strong>.</p><p><strong>The 25-30 Second Guideline:</strong></p><p>This is a benchmark. If your 1:2 ratio shot falls within this time, your grind size is likely in the correct range for a balanced extraction.</p><ul><li><strong>Too Fast (<25s):</strong> Indicates under-extraction (often tastes sour).</li><li><strong>Too Slow (>30s):</strong> Indicates over-extraction (often tastes bitter).</li></ul><p><strong>Taste is King:</strong> Remember, if a shot tastes fantastic at 32 seconds, it’s a great shot! The time simply becomes part of your successful recipe for that specific coffee.</p><p><strong>Application for Your Breville Barista Pro:</strong></p><ul><li><strong>Pre-infusion:</strong> The Barista Pro’s low-pressure pre-infusion is <strong>part of your total brew time</strong>. Its purpose is to saturate the puck evenly to prevent channeling. Keep it consistent for every shot while dialing in.</li></ul><hr><h3 id=part-4-the-primary-control--grind-setting><strong>Part 4: The Primary Control — Grind Setting</strong>
|
||||||
|
<a class=heading-link href=#part-4-the-primary-control--grind-setting><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>This is where the magic (and sometimes frustration) happens. Grind size is your main tool for controlling the resistance of the coffee puck, which directly dictates your brew time.</p><p><strong>The Dual Impact of Grinding Finer:</strong></p><ol><li><strong>Increases surface area:</strong> Allows for more efficient flavor extraction.</li><li><strong>Increases resistance:</strong> Slows down water flow and increases contact time.</li></ol><p><strong>The Risk of Grinding Too Fine (Channeling):</strong></p><p>If the grind is too fine, the puck becomes so dense that high-pressure water can’t flow evenly. Instead, it “breaks” the puck and punches an easy path (a channel) through a weak spot. This results in a disastrous shot that is simultaneously:</p><ul><li><strong>Under-extracted:</strong> Most of the coffee is bypassed.</li><li><strong>Over-extracted:</strong> The water that does flow blasts through the channel, extracting harsh, bitter compounds.</li><li><strong>The Taste:</strong> A channeled shot tastes hollow, weak, sour, <em>and</em> bitter all at once.</li></ul><p><strong>The Goal:</strong> You want to <strong>grind as fine as you possibly can <em>without</em> causing significant channeling</strong>. This is the sweet spot for maximizing surface area and resistance for high, even extraction.</p><p><strong>Grind Retention (Purging):</strong> Most grinders retain some old grounds. When you change your grind setting, always purge a few grams of coffee to ensure your dose is entirely at the new setting.</p><p><strong>Application for Your Breville Barista Pro:</strong></p><ul><li><strong>Grinder Mechanism:</strong> The “Grind Amount” dial controls the <strong>TIME</strong> the grinder runs, not the weight. When you adjust the fineness, you <strong>must</strong> re-adjust the grind time to ensure you are still getting your target 18g dose.</li><li><strong>Tackling Channeling:</strong> The Barista Pro is prone to channeling. To fight this, focus on excellent <strong>puck prep</strong>: use a WDT (Weiss Distribution Technique) tool to break up clumps and evenly distribute the grounds before tamping levelly.</li></ul><hr><h3 id=the-complete-dialing-in-workflow><strong>The Complete Dialing-In Workflow</strong>
|
||||||
|
<a class=heading-link href=#the-complete-dialing-in-workflow><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>This systematic process will get you to a delicious shot from your Breville Barista Pro efficiently:</p><ol><li><strong>Set Your Constants:</strong><ul><li><strong>Dose:</strong> <strong>18g</strong>.</li><li><strong>Ratio:</strong> <strong>1:2</strong> (meaning a <strong>Yield</strong> of <strong>36g</strong>).</li><li><strong>Pre-infusion:</strong> Use a consistent method (e.g., manual 8-second hold).</li></ul></li><li><strong>Make an Initial Grind:</strong><ul><li>Set the grinder to a starting point of <strong>15</strong>.</li><li>Adjust the grind <strong>time</strong> until the grinder dispenses exactly 18g.</li></ul></li><li><strong>Pull the First Shot:</strong><ul><li>Brew manually, stopping at <strong>36g</strong> of liquid in the cup. Note the <strong>total brew time</strong>.</li></ul></li><li><strong>Taste and Diagnose:</strong><ul><li><strong>Fast & Sour? (<25s):</strong> Grind is too coarse.</li><li><strong>Slow & Bitter? (>32s):</strong> Grind is too fine.</li></ul></li><li><strong>Make ONE Adjustment - THE GRIND SIZE:</strong><ul><li>If fast/sour, adjust the grind <strong>finer</strong> (e.g., from 15 down to 13).</li><li>If slow/bitter, adjust the grind <strong>coarser</strong> (e.g., from 15 up to 17).</li></ul></li><li><strong>Re-adjust and Repeat:</strong><ul><li>After changing the grind setting, <strong>purge</strong> a small amount of coffee.</li><li>Re-weigh your next dose and <strong>adjust the grind time</strong> to get back to exactly 18g.</li><li>Pull another 36g shot. Repeat this process until your shot tastes balanced and the time falls roughly between <strong>25-32 seconds</strong>.</li></ul></li></ol><p>Happy brewing! With patience and this systematic approach, you’ll be pulling consistently delicious espresso shots from your Breville Barista Pro in no time.</p></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
21
posts/how-rvq-teaches-llms-to-see-and-hear/index.html
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Beyond Words: How RVQ Teaches LLMs to See and Hear · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Large Language Models (LLMs) are masters of text, but the world is not made of text alone. It’s a symphony of sights, sounds, and experiences. The ultimate goal for AI is to understand this rich, multi-modal world as we do. But how do you teach a model that thinks in words to understand a picture of a sunset or the melody of a song?
|
||||||
|
The answer lies in creating a universal language—a bridge between the continuous, messy world of pixels and audio waves and the discrete, structured world of language tokens. One of the most elegant and powerful tools for building this bridge is Residual Vector Quantization (RVQ)."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Beyond Words: How RVQ Teaches LLMs to See and Hear"><meta name=twitter:description content="Large Language Models (LLMs) are masters of text, but the world is not made of text alone. It’s a symphony of sights, sounds, and experiences. The ultimate goal for AI is to understand this rich, multi-modal world as we do. But how do you teach a model that thinks in words to understand a picture of a sunset or the melody of a song?
|
||||||
|
The answer lies in creating a universal language—a bridge between the continuous, messy world of pixels and audio waves and the discrete, structured world of language tokens. One of the most elegant and powerful tools for building this bridge is Residual Vector Quantization (RVQ)."><meta property="og:url" content="https://ericxliu.me/posts/how-rvq-teaches-llms-to-see-and-hear/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Beyond Words: How RVQ Teaches LLMs to See and Hear"><meta property="og:description" content="Large Language Models (LLMs) are masters of text, but the world is not made of text alone. It’s a symphony of sights, sounds, and experiences. The ultimate goal for AI is to understand this rich, multi-modal world as we do. But how do you teach a model that thinks in words to understand a picture of a sunset or the melody of a song?
|
||||||
|
The answer lies in creating a universal language—a bridge between the continuous, messy world of pixels and audio waves and the discrete, structured world of language tokens. One of the most elegant and powerful tools for building this bridge is Residual Vector Quantization (RVQ)."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-08-07T00:00:00+00:00"><meta property="article:modified_time" content="2025-08-08T17:36:52+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/how-rvq-teaches-llms-to-see-and-hear/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Beyond Words: How RVQ Teaches LLMs to See and Hear","genre":"Blog","wordcount":"1150","url":"https:\/\/ericxliu.me\/posts\/how-rvq-teaches-llms-to-see-and-hear\/","datePublished":"2025-08-07T00:00:00\u002b00:00","dateModified":"2025-08-08T17:36:52\u002b00:00","description":"\u003cp\u003eLarge Language Models (LLMs) are masters of text, but the world is not made of text alone. It’s a symphony of sights, sounds, and experiences. The ultimate goal for AI is to understand this rich, multi-modal world as we do. But how do you teach a model that thinks in words to understand a picture of a sunset or the melody of a song?\u003c\/p\u003e\n\u003cp\u003eThe answer lies in creating a universal language—a bridge between the continuous, messy world of pixels and audio waves and the discrete, structured world of language tokens. One of the most elegant and powerful tools for building this bridge is \u003cstrong\u003eResidual Vector Quantization (RVQ)\u003c\/strong\u003e.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/how-rvq-teaches-llms-to-see-and-hear/>Beyond Words: How RVQ Teaches LLMs to See and Hear</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-08-07T00:00:00Z>August 7, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
6-minute read</span></div></div></header><div class=post-content><p>Large Language Models (LLMs) are masters of text, but the world is not made of text alone. It’s a symphony of sights, sounds, and experiences. The ultimate goal for AI is to understand this rich, multi-modal world as we do. But how do you teach a model that thinks in words to understand a picture of a sunset or the melody of a song?</p><p>The answer lies in creating a universal language—a bridge between the continuous, messy world of pixels and audio waves and the discrete, structured world of language tokens. One of the most elegant and powerful tools for building this bridge is <strong>Residual Vector Quantization (RVQ)</strong>.</p><p>This article dives deep into RVQ, exploring how it turns raw data into meaningful semantic IDs and how these IDs, in turn, unlock multi-modal understanding in LLMs.</p><h4 id=what-is-residual-vector-quantization-the-art-of-smart-compression><strong>What is Residual Vector Quantization? The Art of Smart Compression</strong>
|
||||||
|
<a class=heading-link href=#what-is-residual-vector-quantization-the-art-of-smart-compression><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>At its core, Vector Quantization (VQ) is a compression technique. It maps a high-dimensional vector (like an data embedding) to the single closest vector in a predefined dictionary, called a <strong>codebook</strong>. You then only need to store the index of that chosen vector. The problem? To represent complex data accurately, you’d need a codebook with an astronomical number of entries, which is computationally impossible.</p><p>This is where <strong>Residual</strong> Vector Quantization shines. Instead of one giant codebook, RVQ uses a series of smaller codebooks in stages.</p><ol><li><strong>Stage 1 (Coarse Quantization):</strong> The input vector is quantized by the first codebook. This finds the broadest, most general category for the data.</li><li><strong>Calculate the Residual:</strong> The system calculates the error, or “residual,” between the original vector and its quantized version from Stage 1. This residual vector represents the information that was lost in the first coarse approximation.</li><li><strong>Stage 2 (Refinement):</strong> This residual vector is then quantized by the <em>second</em> codebook. This stage doesn’t re-evaluate the whole vector, but only focuses on correcting the error from the previous stage.</li><li><strong>Iterate:</strong> This process repeats for several stages, with each subsequent codebook quantizing the residual error from the previous one, adding a finer and finer layer of detail.</li></ol><p>The final compressed representation is simply the sequence of indices from each codebook. For example, an ID like <code>[8, 5, 4, 1]</code> is produced. The magic of this approach is that it creates a <strong>hierarchical ID</strong>. The first digit <code>[8]</code> might represent “Sports,” the next <code>[5]</code> refines it to “Court Sports,” <code>[4]</code> to “Beach Volleyball,” and the final <code>[1]</code> distinguishes a specific match. Videos with similar content will naturally share a longer prefix in their Semantic ID.</p><h4 id=learning-what-matters-the-trainable-vq-autoencoder><strong>Learning What Matters: The Trainable VQ-Autoencoder</strong>
|
||||||
|
<a class=heading-link href=#learning-what-matters-the-trainable-vq-autoencoder><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>A key insight is that RVQ is not a fixed algorithm but a <strong>trainable neural network component</strong>. Its codebooks are not predefined; they are learned. This learning happens within a <strong>Vector-Quantized Autoencoder (VQ-AE)</strong> architecture.</p><ol><li><strong>Encoder:</strong> A powerful neural network (e.g., a Transformer or CNN) takes the raw data (like video frames and audio) and converts it into a continuous semantic embedding.</li><li><strong>RVQ Bottleneck:</strong> This embedding is fed into the RVQ module, which quantizes it into the sequence of discrete IDs.</li><li><strong>Decoder:</strong> The decoder takes these discrete IDs, looks up the corresponding codebook vectors, sums them up to get a reconstructed embedding, and attempts to rebuild the original video/audio.</li></ol><p>The entire system is trained end-to-end. The <strong>reconstruction loss</strong> (the difference between the original and reconstructed data) is used to update the parameters of the Encoder, the Decoder, and, most importantly, <strong>the codebook vectors within the RVQ module</strong>. Initially random, the codebook vectors are gradually pushed to become meaningful “anchors” for the core concepts present in the training data.</p><h4 id=from-implicit-to-explicit-controlling-semantics-with-contrastive-learning><strong>From Implicit to Explicit: Controlling Semantics with Contrastive Learning</strong>
|
||||||
|
<a class=heading-link href=#from-implicit-to-explicit-controlling-semantics-with-contrastive-learning><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>A standard VQ-AE learns implicit semantics. It gets good at reconstruction, but we can’t control <em>what</em> concepts it learns. To make the Semantic IDs truly meaningful and aligned with human language, we introduce <strong>contrastive learning</strong>.</p><p>The architecture is enhanced with a parallel text encoder (like BERT or CLIP’s). The model is then trained with a joint loss function:</p><p><code>L_total = L_reconstruction + λ * L_contrastive</code></p><ul><li><strong>Reconstruction Loss</strong> ensures the RVQ codes contain enough information to rebuild the input.</li><li><strong>Contrastive Loss</strong> forces the media embedding (from the video/audio encoder) to be mathematically “close” to the text embedding of its description, and “far” from the embeddings of unrelated text descriptions.</li></ul><p>This dual goal forces the model to organize its embedding space according to the semantics of human language. The codebook vectors now learn to represent concepts that are not just useful for reconstruction, but are also tied to explicit textual descriptions.</p><h4 id=integrating-with-llms-two-powerful-paths-to-multi-modality><strong>Integrating with LLMs: Two Powerful Paths to Multi-Modality</strong>
|
||||||
|
<a class=heading-link href=#integrating-with-llms-two-powerful-paths-to-multi-modality><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>Once we have a contrastively-trained VQ-AE, we can use its output to give LLMs the ability to see and hear. There are two primary strategies for this.</p><p><strong>Path 1: The Tokenizer Approach - Teaching the LLM a New Language</strong></p><p>This path treats the RVQ IDs as a new vocabulary. It’s a two-stage process ideal for high-fidelity content generation.</p><ol><li><strong>Create a Neural Codec:</strong> The trained VQ-AE serves as a powerful “codec.” You can take any piece of media (e.g., a song) and use the codec to compress it into a sequence of discrete RVQ tokens (e.g., <code>[8, 5, 4, 1, 8, 5, 9, 2, ...]</code>).</li><li><strong>Train a Generative LLM:</strong> A new Transformer model is trained auto-regressively on a massive dataset of these media-derived tokens. Its sole purpose is to learn the patterns and predict the next token in a sequence.</li></ol><p><strong>Use Case:</strong> This is the architecture behind models like Meta’s MusicGen. A user provides a text prompt, which conditions the Transformer to generate a new sequence of RVQ tokens. These tokens are then fed to the VQ-AE’s decoder to synthesize the final audio waveform.</p><p><strong>Path 2: The Adapter Approach - Translating for a Language Expert</strong></p><p>This path is used to augment a powerful, pre-trained, text-only LLM without the astronomical cost of retraining it.</p><ol><li><strong>Freeze the LLM:</strong> A massive, pre-trained LLM (like LLaMA) is frozen. Its deep language understanding is preserved.</li><li><strong>Use the Pre-Quantized Embedding:</strong> Instead of using the discrete RVQ tokens, we take the rich, continuous embedding vector produced by our media encoder <em>just before</em> it enters the RVQ module.</li><li><strong>Train a Small Adapter:</strong> A small, lightweight projection layer (or “adapter”) is trained. Its only job is to translate the media embedding into a vector that has the same format and structure as the LLM’s own word embeddings. It learns to map visual concepts to their corresponding “word” concepts in the LLM’s latent space.</li></ol><p><strong>Use Case:</strong> This is the principle behind models like Google’s Flamingo. To answer a question about an image, the image is passed through the media encoder and adapter. The resulting “vision-as-a-word” vector is inserted into the prompt sequence alongside the text tokens. The frozen LLM can now “reason” about the visual input because it has been translated into a format it already understands.</p></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
17
posts/index.html
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Posts · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Posts"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/posts/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Posts"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/posts/index.xml title="Eric X. Liu's Personal Page"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=https://ericxliu.me/posts/>Posts</a></h1></header><ul><li><span class=date>January 21, 2026</span>
|
||||||
|
<a class=title href=/posts/vibe-coding-from-the-jeep/>Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams</a></li><li><span class=date>January 16, 2026</span>
|
||||||
|
<a class=title href=/posts/reverse-engineering-antigravity-ide/>How I Built a Blog Agent that Writes About Itself</a></li><li><span class=date>January 7, 2026</span>
|
||||||
|
<a class=title href=/posts/rooting-pixel-2-xl-for-reverse-engineering/>Why I Downgraded Magisk to Root My Pixel 2 XL</a></li><li><span class=date>January 2, 2026</span>
|
||||||
|
<a class=title href=/posts/debugging-authentik-performance/>Why Your "Resilient" Homelab is Slower Than a Raspberry Pi</a></li><li><span class=date>December 29, 2025</span>
|
||||||
|
<a class=title href=/posts/open-webui-openai-websearch/>How I Got Open WebUI Talking to OpenAI Web Search</a></li><li><span class=date>December 27, 2025</span>
|
||||||
|
<a class=title href=/posts/technical-deep-dive-llm-categorization/>From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM</a></li><li><span class=date>December 19, 2025</span>
|
||||||
|
<a class=title href=/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/>The Convergence of Fast Weights, Linear Attention, and State Space Models</a></li><li><span class=date>December 8, 2025</span>
|
||||||
|
<a class=title href=/posts/vattention/>vAttention</a></li><li><span class=date>November 15, 2025</span>
|
||||||
|
<a class=title href=/posts/jellyfin-sso-with-authentik/>Setting Up Jellyfin SSO with Authentik: Surviving the Beta</a></li><li><span class=date>October 4, 2025</span>
|
||||||
|
<a class=title href=/posts/benchmarking-llms-on-jetson-orin-nano/>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)</a></li></ul><ul class=pagination><li>1</li><li><a href=/posts/page/2/>2</a></li><li><a href=/posts/page/3/>3</a></li><li class=hidden><a href=/posts/page/2/>›</a></li><li><a href=/posts/page/3/>»</a></li></ul></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
92
posts/index.xml
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Posts on Eric X. Liu's Personal Page</title><link>https://ericxliu.me/posts/</link><description>Recent content in Posts on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><lastBuildDate>Thu, 22 Jan 2026 06:48:07 +0000</lastBuildDate><atom:link href="https://ericxliu.me/posts/index.xml" rel="self" type="application/rss+xml"/><item><title>Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams</title><link>https://ericxliu.me/posts/vibe-coding-from-the-jeep/</link><pubDate>Wed, 21 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/vibe-coding-from-the-jeep/</guid><description><p>&ldquo;Vibe coding&rdquo; has become my latest obsession. It&rsquo;s that flow state where the tools disappear, and you&rsquo;re just manipulating logic at the speed of thought. Usually, this happens in a high-end IDE like Antigravity. But lately, I&rsquo;ve been trying to answer a childhood dream.</p>
|
||||||
|
<p>Growing up in China before the internet age, my window to the outside world was CCTV-6. Along with <em>Baywatch</em>, one of the first American TV shows I ever watched was <em>Knight Rider</em>. I don&rsquo;t remember the exact plot lines, but the core concept stuck with me forever: KITT. A car that could talk, think, and do things for you.</p></description></item><item><title>How I Built a Blog Agent that Writes About Itself</title><link>https://ericxliu.me/posts/reverse-engineering-antigravity-ide/</link><pubDate>Fri, 16 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/reverse-engineering-antigravity-ide/</guid><description><p>I&rsquo;ve been spending a lot of time &ldquo;vibe coding&rdquo; in the Antigravity IDE lately. It&rsquo;s an incredible flow state—intense, iterative, and fast. But it has a major flaw: the context is ephemeral. Once the session is over, that rich history of decisions, wrong turns, and &ldquo;aha!&rdquo; moments is locked away in an opaque, internal format.</p>
|
||||||
|
<p>I wanted to capture that value. I wanted a system that could take my chaotic coding sessions and distill them into structured, technical blog posts (like the one you&rsquo;re reading right now).</p></description></item><item><title>Why I Downgraded Magisk to Root My Pixel 2 XL</title><link>https://ericxliu.me/posts/rooting-pixel-2-xl-for-reverse-engineering/</link><pubDate>Wed, 07 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/rooting-pixel-2-xl-for-reverse-engineering/</guid><description><p>For the past few weeks, I&rsquo;ve been stuck in a stalemate with my EcoFlow Bluetooth Protocol Reverse Engineering Project. I have the hci snoop logs, I have the decompiled APK, and I have a strong suspicion about where the authentication logic is hiding. But suspicion isn&rsquo;t proof.</p>
|
||||||
|
<p>Static analysis has its limits. I found the &ldquo;smoking gun&rdquo; function—a native method responsible for encrypting the login payload—but understanding <em>how</em> it constructs that payload within a strict 13-byte limit purely from assembly (ARM64) was proving to be a headache.</p></description></item><item><title>Why Your "Resilient" Homelab is Slower Than a Raspberry Pi</title><link>https://ericxliu.me/posts/debugging-authentik-performance/</link><pubDate>Fri, 02 Jan 2026 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/debugging-authentik-performance/</guid><description><p>In the world of self-hosting, there are many metrics for success: 99.9% uptime, sub-second latency, or a perfect GitOps pipeline. But for those of us running &ldquo;production&rdquo; at home, there is only one metric that truly matters: <strong>The Wife Acceptance Factor (WAF)</strong>.</p>
|
||||||
|
<p>My detailed Grafana dashboards said everything was fine. But my wife said the SSO login was &ldquo;slow sometimes.&rdquo; She was right. Debugging it took me down a rabbit hole of connection pooling, misplaced assumptions, and the harsh reality of running databases on distributed storage.</p></description></item><item><title>How I Got Open WebUI Talking to OpenAI Web Search</title><link>https://ericxliu.me/posts/open-webui-openai-websearch/</link><pubDate>Mon, 29 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/open-webui-openai-websearch/</guid><description><p>OpenAI promised native web search in GPT‑5, but LiteLLM proxy deployments (and by extension Open WebUI) still choke on it—issue <a href="https://github.com/BerriAI/litellm/issues/13042" class="external-link" target="_blank" rel="noopener">#13042</a> tracks the fallout. I needed grounded answers inside Open WebUI anyway, so I built a workaround: route GPT‑5 traffic through the Responses API and mask every <code>web_search_call</code> before the UI ever sees it.</p>
|
||||||
|
<p>This post documents the final setup, the hotfix script that keeps LiteLLM honest, and the tests that prove Open WebUI now streams cited answers without trying to execute the tool itself.</p></description></item><item><title>From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM</title><link>https://ericxliu.me/posts/technical-deep-dive-llm-categorization/</link><pubDate>Sat, 27 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/technical-deep-dive-llm-categorization/</guid><description><p>Running a family finance system is surprisingly complex. What starts as a simple spreadsheet often evolves into a web of rules, exceptions, and &ldquo;wait, was this dinner or <em>vacation</em> dinner?&rdquo; questions.</p>
|
||||||
|
<p>For years, I relied on a rule-based system to categorize our credit card transactions. It worked&hellip; mostly. But maintaining <code>if &quot;UBER&quot; in description and amount &gt; 50</code> style rules is a never-ending battle against the entropy of merchant names and changing habits.</p></description></item><item><title>The Convergence of Fast Weights, Linear Attention, and State Space Models</title><link>https://ericxliu.me/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/</link><pubDate>Fri, 19 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/</guid><description><p>Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformer’s attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (&ldquo;Fast Weights&rdquo;) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).</p>
|
||||||
|
<p>This article explores the mathematical equivalence between Hinton’s concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV.</p></description></item><item><title>vAttention</title><link>https://ericxliu.me/posts/vattention/</link><pubDate>Mon, 08 Dec 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/vattention/</guid><description><p>Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While <strong>PagedAttention</strong> (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPU’s native hardware Memory Management Unit (MMU) offers a more performant and portable solution.</p>
|
||||||
|
<h4 id="the-status-quo-pagedattention-and-software-tables">
|
||||||
|
The Status Quo: PagedAttention and Software Tables
|
||||||
|
<a class="heading-link" href="#the-status-quo-pagedattention-and-software-tables">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h4>
|
||||||
|
<p>Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined &ldquo;page table&rdquo; (the Block Table) [1].</p></description></item><item><title>Setting Up Jellyfin SSO with Authentik: Surviving the Beta</title><link>https://ericxliu.me/posts/jellyfin-sso-with-authentik/</link><pubDate>Sat, 15 Nov 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/jellyfin-sso-with-authentik/</guid><description><p>I recently integrated Jellyfin with Authentik for Single Sign-On (SSO). While the plugin works, it is still very much in an early development phase. The logging is often sparse or cryptic, and the feedback loop can be frustrating. Here is a guide focused on the obscure errors you might encounter and the simple fixes that aren&rsquo;t immediately obvious.</p>
|
||||||
|
<h2 id="the-setup">
|
||||||
|
The Setup
|
||||||
|
<a class="heading-link" href="#the-setup">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h2>
|
||||||
|
<p>The configuration is best handled via API (curl) rather than the UI, as it ensures all fields are correctly typed and persistent.</p></description></item><item><title>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)</title><link>https://ericxliu.me/posts/benchmarking-llms-on-jetson-orin-nano/</link><pubDate>Sat, 04 Oct 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/benchmarking-llms-on-jetson-orin-nano/</guid><description><h2 id="introduction">
|
||||||
|
Introduction
|
||||||
|
<a class="heading-link" href="#introduction">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h2>
|
||||||
|
<p>NVIDIA&rsquo;s Jetson Orin Nano promises impressive specs: 1024 CUDA cores, 32 Tensor Cores, and 40 TOPS of INT8 compute performance packed into a compact, power-efficient edge device. On paper, it looks like a capable platform for running Large Language Models locally. But there&rsquo;s a catch—one that reveals a fundamental tension in modern edge AI hardware design.</p>
|
||||||
|
<p>After running 66 inference tests across seven different language models ranging from 0.5B to 5.4B parameters, I discovered something counterintuitive: the device&rsquo;s computational muscle sits largely idle during single-stream LLM inference. The bottleneck isn&rsquo;t computation—it&rsquo;s memory bandwidth. This isn&rsquo;t just a quirk of one device; it&rsquo;s a fundamental characteristic of single-user, autoregressive token generation on edge hardware—a reality that shapes how we should approach local LLM deployment.</p></description></item><item><title>Flashing Jetson Orin Nano in Virtualized Environments</title><link>https://ericxliu.me/posts/flashing-jetson-orin-nano-in-virtualized-environments/</link><pubDate>Thu, 02 Oct 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/flashing-jetson-orin-nano-in-virtualized-environments/</guid><description><h1 id="flashing-jetson-orin-nano-in-virtualized-environments">
|
||||||
|
Flashing Jetson Orin Nano in Virtualized Environments
|
||||||
|
<a class="heading-link" href="#flashing-jetson-orin-nano-in-virtualized-environments">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h1>
|
||||||
|
<h2 id="introduction">
|
||||||
|
Introduction
|
||||||
|
<a class="heading-link" href="#introduction">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h2>
|
||||||
|
<p>Flashing NVIDIA Jetson devices remotely presents unique challenges when the host machine is virtualized. This article documents the technical challenges, failures, and eventual success of flashing a Jetson Orin Nano Super developer kit using NVIDIA SDK Manager in various virtualized environments, specifically focusing on QEMU/KVM virtual machines and LXC containers on Proxmox VE.</p></description></item><item><title>OpenWrt: Fix WireGuard Connectivity with MWAN3 by Excluding the VPN Endpoint</title><link>https://ericxliu.me/posts/openwrt-mwan3-wireguard-endpoint-exclusion/</link><pubDate>Sun, 28 Sep 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/openwrt-mwan3-wireguard-endpoint-exclusion/</guid><description><h3 id="overview">
|
||||||
|
Overview
|
||||||
|
<a class="heading-link" href="#overview">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h3>
|
||||||
|
<p>When using WireGuard together with MWAN3 on OpenWrt, the tunnel can fail to establish or flap when the peer&rsquo;s IP is routed into the tunnel itself. This is a classic routing bootstrap problem: WireGuard wants to route 0.0.0.0/0 into the tunnel, but the UDP packets to the peer&rsquo;s public endpoint also get captured, so they never reach the Internet to bring the tunnel up.</p></description></item><item><title>UniFi VLAN Migration to Zone-Based Architecture</title><link>https://ericxliu.me/posts/unifi-vlan-migration-to-zone-based-architecture/</link><pubDate>Mon, 22 Sep 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/unifi-vlan-migration-to-zone-based-architecture/</guid><description><p>Embarking on a network migration to a properly segmented VLAN architecture is a rite of passage for any serious home lab or small business operator. The goal is clear: improve security and organization by separating traffic. However, the path from a flat network to a segmented one is often paved with subtle but critical configuration details that can lead to hours of frustrating troubleshooting.</p>
|
||||||
|
<p>This article documents that journey. It details the pitfalls encountered, the core networking concepts that were essential to understand, and the best practices that ultimately led to a stable, secure, and logical network design built on a zone-based firewall model.</p></description></item><item><title>Quantization in LLMs</title><link>https://ericxliu.me/posts/quantization-in-llms/</link><pubDate>Tue, 19 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/quantization-in-llms/</guid><description><p>The burgeoning scale of Large Language Models (LLMs) has necessitated a paradigm shift in their deployment, moving beyond full-precision floating-point arithmetic towards lower-precision representations. Quantization, the process of mapping a wide range of continuous values to a smaller, discrete set, has emerged as a critical technique to reduce model size, accelerate inference, and lower energy consumption. This article provides a technical overview of quantization theories, their application in modern LLMs, and highlights the ongoing innovations in this domain.</p></description></item><item><title>Breville Barista Pro Maintenance</title><link>https://ericxliu.me/posts/breville-barista-pro-maintenance/</link><pubDate>Sat, 16 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/breville-barista-pro-maintenance/</guid><description><p>Proper maintenance is critical for the longevity and performance of a Breville Barista Pro espresso machine. Consistent cleaning not only ensures the machine functions correctly but also directly impacts the quality of the espresso produced. This guide provides a detailed, technical breakdown of the essential maintenance routines, from automated cycles to daily upkeep.</p>
|
||||||
|
<h4 id="understanding-the-two-primary-maintenance-cycles">
|
||||||
|
<strong>Understanding the Two Primary Maintenance Cycles</strong>
|
||||||
|
<a class="heading-link" href="#understanding-the-two-primary-maintenance-cycles">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h4>
|
||||||
|
<p>The Breville Barista Pro has two distinct, automated maintenance procedures: the <strong>Cleaning (Flush) Cycle</strong> and the <strong>Descale Cycle</strong>. It is important to understand that these are not interchangeable, as they address different types of buildup within the machine.</p></description></item><item><title>Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian</title><link>https://ericxliu.me/posts/secure-boot-dkms-and-mok-on-proxmox-debian/</link><pubDate>Sat, 09 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/secure-boot-dkms-and-mok-on-proxmox-debian/</guid><description><p>I hit an issue where all GPU Operator pods on one node were stuck in Init after migrating from Legacy BIOS to UEFI. The common error was NVIDIA components waiting for “toolkit-ready,” while the toolkit init container looped with:</p>
|
||||||
|
<ul>
|
||||||
|
<li>nvidia-smi failed to communicate with the NVIDIA driver</li>
|
||||||
|
<li>modprobe nvidia → “Key was rejected by service”</li>
|
||||||
|
</ul>
|
||||||
|
<p>That message is the tell: Secure Boot is enabled and the kernel refuses to load modules not signed by a trusted key.</p></description></item><item><title>Beyond Words: How RVQ Teaches LLMs to See and Hear</title><link>https://ericxliu.me/posts/how-rvq-teaches-llms-to-see-and-hear/</link><pubDate>Thu, 07 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/how-rvq-teaches-llms-to-see-and-hear/</guid><description><p>Large Language Models (LLMs) are masters of text, but the world is not made of text alone. It’s a symphony of sights, sounds, and experiences. The ultimate goal for AI is to understand this rich, multi-modal world as we do. But how do you teach a model that thinks in words to understand a picture of a sunset or the melody of a song?</p>
|
||||||
|
<p>The answer lies in creating a universal language—a bridge between the continuous, messy world of pixels and audio waves and the discrete, structured world of language tokens. One of the most elegant and powerful tools for building this bridge is <strong>Residual Vector Quantization (RVQ)</strong>.</p></description></item><item><title>Supabase Deep Dive: It's Not Magic, It's Just Postgres</title><link>https://ericxliu.me/posts/supabase-deep-dive/</link><pubDate>Sun, 03 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/supabase-deep-dive/</guid><description><p>In the world of Backend-as-a-Service (BaaS), platforms are often treated as magic boxes. You push data in, you get data out, and you hope the magic inside scales. While this simplicity is powerful, it can obscure the underlying mechanics, leaving developers wondering what&rsquo;s really going on.</p>
|
||||||
|
<p>Supabase enters this space with a radically different philosophy: <strong>transparency</strong>. It provides the convenience of a BaaS, but it’s built on the world&rsquo;s most trusted relational database: PostgreSQL. The &ldquo;magic&rdquo; isn&rsquo;t a proprietary black box; it&rsquo;s a carefully assembled suite of open-source tools that enhance Postgres, not hide it.</p></description></item><item><title>A Deep Dive into PPO for Language Models</title><link>https://ericxliu.me/posts/ppo-for-language-models/</link><pubDate>Sat, 02 Aug 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/ppo-for-language-models/</guid><description><p>Large Language Models (LLMs) have demonstrated astonishing capabilities, but out-of-the-box, they are simply powerful text predictors. They don&rsquo;t inherently understand what makes a response helpful, harmless, or aligned with human values. The technique that has proven most effective at bridging this gap is Reinforcement Learning from Human Feedback (RLHF), and at its heart lies a powerful algorithm: Proximal Policy Optimization (PPO).</p>
|
||||||
|
<p>You may have seen diagrams like the one below, which outlines the RLHF training process. It can look intimidating, with a web of interconnected models, losses, and data flows.
|
||||||
|
<img src="https://ericxliu.me/images/ppo-for-language-models/7713bd3ecf27442e939b9190fa08165d.png" alt="S3 File"></p></description></item><item><title>Mixture-of-Experts (MoE) Models Challenges & Solutions in Practice</title><link>https://ericxliu.me/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/</link><pubDate>Wed, 02 Jul 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/</guid><description><p>Mixture-of-Experts (MoEs) are neural network architectures that allow different parts of the model (called &ldquo;experts&rdquo;) to specialize in different types of inputs. A &ldquo;gating network&rdquo; or &ldquo;router&rdquo; learns to dispatch each input (or &ldquo;token&rdquo;) to a subset of these experts. While powerful for scaling models, MoEs introduce several practical challenges.</p>
|
||||||
|
<h3 id="1-challenge-non-differentiability-of-routing-functions">
|
||||||
|
1. Challenge: Non-Differentiability of Routing Functions
|
||||||
|
<a class="heading-link" href="#1-challenge-non-differentiability-of-routing-functions">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h3>
|
||||||
|
<p><strong>The Problem:</strong>
|
||||||
|
Many routing mechanisms, especially &ldquo;Top-K routing,&rdquo; involve a discrete, hard selection process. A common function is <code>KeepTopK(v, k)</code>, which selects the top <code>k</code> scoring elements from a vector <code>v</code> and sets others to $-\infty$ or $0$.</p></description></item><item><title>An Architectural Deep Dive of T5</title><link>https://ericxliu.me/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/</link><pubDate>Sun, 01 Jun 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/</guid><description><p>In the rapidly evolving landscape of Large Language Models, a few key architectures define the dominant paradigms. Today, the &ldquo;decoder-only&rdquo; model, popularized by the GPT series and its successors like LLaMA and Mistral, reigns supreme. These models are scaled to incredible sizes and excel at in-context learning.</p>
|
||||||
|
<p>But to truly understand the field, we must look at the pivotal models that explored different paths. Google&rsquo;s T5, or <strong>Text-to-Text Transfer Transformer</strong>, stands out as one of the most influential. It didn&rsquo;t just introduce a new model; it proposed a new philosophy. This article dives deep into the architecture of T5, how it fundamentally differs from modern LLMs, and the lasting legacy of its unique design choices.</p></description></item><item><title>Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso</title><link>https://ericxliu.me/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/</link><pubDate>Thu, 01 May 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/</guid><description><p>Are you ready to transform your home espresso game from good to genuinely great? The Breville Barista Pro is a fantastic machine, but unlocking its full potential requires understanding a few key principles. This guide will walk you through the systematic process of dialing in your espresso, ensuring every shot is delicious and repeatable.</p>
|
||||||
|
<p>Our overarching philosophy is simple: <strong>isolate and change only one variable at a time.</strong> While numbers are crucial, your palate is the ultimate judge. Dose, ratio, and time are interconnected, but your <strong>grind size</strong> is your most powerful lever.</p></description></item><item><title>Transformer's Core Mechanics</title><link>https://ericxliu.me/posts/transformer-s-core-mechanics/</link><pubDate>Tue, 01 Apr 2025 00:00:00 +0000</pubDate><guid>https://ericxliu.me/posts/transformer-s-core-mechanics/</guid><description><p>The Transformer architecture is the bedrock of modern Large Language Models (LLMs). While its high-level success is widely known, a deeper understanding requires dissecting its core components. This article provides a detailed, technical breakdown of the fundamental concepts within a Transformer block, from the notion of &ldquo;channels&rdquo; to the intricate workings of the attention mechanism and its relationship with other advanced architectures like Mixture of Experts.</p>
|
||||||
|
<h3 id="1-the-channel-a-foundational-view-of-d_model">
|
||||||
|
1. The &ldquo;Channel&rdquo;: A Foundational View of <code>d_model</code>
|
||||||
|
<a class="heading-link" href="#1-the-channel-a-foundational-view-of-d_model">
|
||||||
|
<i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"></i>
|
||||||
|
<span class="sr-only">Link to heading</span>
|
||||||
|
</a>
|
||||||
|
</h3>
|
||||||
|
<p>In deep learning, a &ldquo;channel&rdquo; can be thought of as a feature dimension. While this term is common in Convolutional Neural Networks for images (e.g., Red, Green, Blue channels), in LLMs, the analogous concept is the model&rsquo;s primary embedding dimension, commonly referred to as <code>d_model</code>.</p></description></item><item><title>Some useful files</title><link>https://ericxliu.me/posts/useful/</link><pubDate>Mon, 26 Oct 2020 04:14:43 +0000</pubDate><guid>https://ericxliu.me/posts/useful/</guid><description><ul>
|
||||||
|
<li><a href="https://ericxliu.me/rootCA.crt" >rootCA.pem</a></li>
|
||||||
|
</ul></description></item></channel></rss>
|
||||||
74
posts/jellyfin-sso-with-authentik/index.html
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Setting Up Jellyfin SSO with Authentik: Surviving the Beta · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="I recently integrated Jellyfin with Authentik for Single Sign-On (SSO). While the plugin works, it is still very much in an early development phase. The logging is often sparse or cryptic, and the feedback loop can be frustrating. Here is a guide focused on the obscure errors you might encounter and the simple fixes that aren’t immediately obvious.
|
||||||
|
|
||||||
|
The Setup
|
||||||
|
|
||||||
|
|
||||||
|
Link to heading
|
||||||
|
|
||||||
|
|
||||||
|
The configuration is best handled via API (curl) rather than the UI, as it ensures all fields are correctly typed and persistent."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Setting Up Jellyfin SSO with Authentik: Surviving the Beta"><meta name=twitter:description content="I recently integrated Jellyfin with Authentik for Single Sign-On (SSO). While the plugin works, it is still very much in an early development phase. The logging is often sparse or cryptic, and the feedback loop can be frustrating. Here is a guide focused on the obscure errors you might encounter and the simple fixes that aren’t immediately obvious.
|
||||||
|
The Setup Link to heading The configuration is best handled via API (curl) rather than the UI, as it ensures all fields are correctly typed and persistent."><meta property="og:url" content="https://ericxliu.me/posts/jellyfin-sso-with-authentik/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Setting Up Jellyfin SSO with Authentik: Surviving the Beta"><meta property="og:description" content="I recently integrated Jellyfin with Authentik for Single Sign-On (SSO). While the plugin works, it is still very much in an early development phase. The logging is often sparse or cryptic, and the feedback loop can be frustrating. Here is a guide focused on the obscure errors you might encounter and the simple fixes that aren’t immediately obvious.
|
||||||
|
The Setup Link to heading The configuration is best handled via API (curl) rather than the UI, as it ensures all fields are correctly typed and persistent."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-11-15T00:00:00+00:00"><meta property="article:modified_time" content="2025-12-28T21:21:42+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/jellyfin-sso-with-authentik/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Setting Up Jellyfin SSO with Authentik: Surviving the Beta","genre":"Blog","wordcount":"516","url":"https:\/\/ericxliu.me\/posts\/jellyfin-sso-with-authentik\/","datePublished":"2025-11-15T00:00:00\u002b00:00","dateModified":"2025-12-28T21:21:42\u002b00:00","description":"\u003cp\u003eI recently integrated Jellyfin with Authentik for Single Sign-On (SSO). While the plugin works, it is still very much in an early development phase. The logging is often sparse or cryptic, and the feedback loop can be frustrating. Here is a guide focused on the obscure errors you might encounter and the simple fixes that aren\u0026rsquo;t immediately obvious.\u003c\/p\u003e\n\u003ch2 id=\u0022the-setup\u0022\u003e\n The Setup\n \u003ca class=\u0022heading-link\u0022 href=\u0022#the-setup\u0022\u003e\n \u003ci class=\u0022fa-solid fa-link\u0022 aria-hidden=\u0022true\u0022 title=\u0022Link to heading\u0022\u003e\u003c\/i\u003e\n \u003cspan class=\u0022sr-only\u0022\u003eLink to heading\u003c\/span\u003e\n \u003c\/a\u003e\n\u003c\/h2\u003e\n\u003cp\u003eThe configuration is best handled via API (curl) rather than the UI, as it ensures all fields are correctly typed and persistent.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/jellyfin-sso-with-authentik/>Setting Up Jellyfin SSO with Authentik: Surviving the Beta</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-11-15T00:00:00Z>November 15, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
3-minute read</span></div></div></header><div class=post-content><p>I recently integrated Jellyfin with Authentik for Single Sign-On (SSO). While the plugin works, it is still very much in an early development phase. The logging is often sparse or cryptic, and the feedback loop can be frustrating. Here is a guide focused on the obscure errors you might encounter and the simple fixes that aren’t immediately obvious.</p><h2 id=the-setup>The Setup
|
||||||
|
<a class=heading-link href=#the-setup><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>The configuration is best handled via API (curl) rather than the UI, as it ensures all fields are correctly typed and persistent.</p><h3 id=1-authentik-terraform>1. Authentik (Terraform)
|
||||||
|
<a class=heading-link href=#1-authentik-terraform><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Let Authentik manage the secrets. Don’t hardcode them.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-hcl data-lang=hcl><span style=display:flex><span><span style=color:#ff7b72>resource</span> <span style=color:#a5d6ff>"authentik_provider_oauth2" "jellyfin"</span> {
|
||||||
|
</span></span><span style=display:flex><span> name <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#a5d6ff>"Jellyfin"</span>
|
||||||
|
</span></span><span style=display:flex><span> client_id <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#a5d6ff>"jellyfin-ericxliu-me"</span><span style=color:#8b949e;font-style:italic>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic> # client_secret omitted -> auto-generated
|
||||||
|
</span></span></span><span style=display:flex><span> property_mappings <span style=color:#ff7b72;font-weight:700>=</span> [
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#ff7b72>authentik_scope_mapping</span>.<span style=color:#ff7b72>openid</span>.<span style=color:#ff7b72>id</span>,
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#ff7b72>authentik_scope_mapping</span>.<span style=color:#ff7b72>profile</span>.<span style=color:#ff7b72>id</span>,
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#ff7b72>authentik_scope_mapping</span>.<span style=color:#ff7b72>email</span>.<span style=color:#ff7b72>id</span>,
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#ff7b72>authentik_scope_mapping</span>.<span style=color:#ff7b72>groups</span>.<span style=color:#ff7b72>id</span>
|
||||||
|
</span></span><span style=display:flex><span> ]<span style=color:#8b949e;font-style:italic>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic> # ...
|
||||||
|
</span></span></span><span style=display:flex><span>}
|
||||||
|
</span></span></code></pre></div><h3 id=2-jellyfin-plugin-bashcurl>2. Jellyfin Plugin (Bash/Curl)
|
||||||
|
<a class=heading-link href=#2-jellyfin-plugin-bashcurl><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span><span style=color:#8b949e;font-style:italic># ... (retrieve secret from terraform) ...</span>
|
||||||
|
</span></span><span style=display:flex><span>curl -X POST <span style=color:#a5d6ff>"https://jellyfin.ericxliu.me/SSO/OID/Add/authentik"</span> ... -d <span style=color:#a5d6ff>'{
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> "OidClientId": "jellyfin-ericxliu-me",
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> "OidSecret": "'</span><span style=color:#a5d6ff>"</span><span style=color:#a5d6ff>${</span><span style=color:#79c0ff>SECRET</span><span style=color:#a5d6ff>}</span><span style=color:#a5d6ff>"</span><span style=color:#a5d6ff>'",
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> "OidScopes": ["openid", "profile", "email", "groups"],
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> "SchemeOverride": "https",
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> "RoleClaim": "groups"
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> }'</span>
|
||||||
|
</span></span></code></pre></div><h2 id=obscure-errors--fixes>Obscure Errors & Fixes
|
||||||
|
<a class=heading-link href=#obscure-errors--fixes><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>Because the plugin is still maturing, it doesn’t always handle configuration errors gracefully. Here are the two main “cryptic” failures I encountered.</p><h3 id=1-the-value-cannot-be-null-crash>1. The “Value cannot be null” Crash
|
||||||
|
<a class=heading-link href=#1-the-value-cannot-be-null-crash><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Symptom</strong>:
|
||||||
|
You attempt to start the SSO flow and get a generic 500 error. The Jellyfin logs show a C# exception:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-fallback data-lang=fallback><span style=display:flex><span>System.ArgumentNullException: Value cannot be null. (Parameter 'source')
|
||||||
|
</span></span><span style=display:flex><span> at System.Linq.Enumerable.Prepend[TSource](IEnumerable`1 source, TSource element)
|
||||||
|
</span></span><span style=display:flex><span> at Jellyfin.Plugin.SSO.Api.SSOController.OidChallenge(...)
|
||||||
|
</span></span></code></pre></div><p><strong>The Reality</strong>:
|
||||||
|
This looks like deep internal failure, but it’s actually a simple configuration miss. The plugin code attempts to prepend “openid profile” to your configured scopes without checking if your scopes array exists first.
|
||||||
|
<strong>The Fix</strong>:
|
||||||
|
You <strong>must</strong> explicitly provide <code>"OidScopes"</code> in your JSON configuration. It cannot be null or omitted.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-json data-lang=json><span style=display:flex><span><span style=color:#a5d6ff>"OidScopes"</span><span style=color:#f85149>:</span> [<span style=color:#a5d6ff>"openid"</span>, <span style=color:#a5d6ff>"profile"</span>, <span style=color:#a5d6ff>"email"</span>, <span style=color:#a5d6ff>"groups"</span>]
|
||||||
|
</span></span></code></pre></div><h3 id=2-the-httphttps-mismatch-redirect-loop>2. The HTTP/HTTPS Mismatch (Redirect Loop)
|
||||||
|
<a class=heading-link href=#2-the-httphttps-mismatch-redirect-loop><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Symptom</strong>:
|
||||||
|
Authentik rejects the authorization request with “Redirect URI mismatch”, or the browser enters a redirect loop.
|
||||||
|
<strong>The Reality</strong>:
|
||||||
|
Jellyfin often sits behind a reverse proxy (Ingress/Traefik) terminating TLS. Use <code>Browser Developer Tools</code> to inspect the network requests. You will likely see the <code>redirect_uri</code> parameter encoded as <code>http://jellyfin...</code> instead of <code>https://</code>. configuration.
|
||||||
|
<strong>The Fix</strong>:
|
||||||
|
Do not rely on header forwarding magic. Force the scheme in the plugin configuration:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-json data-lang=json><span style=display:flex><span><span style=color:#a5d6ff>"SchemeOverride"</span><span style=color:#f85149>:</span> <span style=color:#a5d6ff>"https"</span>
|
||||||
|
</span></span></code></pre></div><h3 id=3-case-sensitivity-in-json>3. Case Sensitivity in JSON
|
||||||
|
<a class=heading-link href=#3-case-sensitivity-in-json><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Symptom</strong>: Configuration seems to be ignored or fields remain empty after a POST.
|
||||||
|
<strong>The Reality</strong>: The plugin’s API controller keys are Case Sensitive in some versions/contexts.
|
||||||
|
<strong>The Fix</strong>: Stick to PascalCase for the keys (<code>OidEndpoint</code>, <code>AdminRoles</code>) as seen in the C# DTOs, rather than camelCase (<code>oidEndpoint</code>), unless the specific version documentation explicitly states otherwise. When in doubt, checking the source code (<code>SSOController.cs</code>) is often faster than trusting the README.</p><h2 id=summary>Summary
|
||||||
|
<a class=heading-link href=#summary><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>When debugging Jellyfin SSO, don’t trust the UI to tell you what’s wrong.</p><ol><li><strong>Check the logs</strong> (<code>kubectl logs</code>) for C# stack traces.</li><li><strong>Sanitize your JSON</strong> inputs (arrays can’t be null).</li><li><strong>Inspect the URL parameters</strong> in your browser to see what Redirect URI is actually being generated.</li></ol><h3 id=references>References
|
||||||
|
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ul><li>Jellyfin SSO Plugin Repository: <code>https://github.com/9p4/jellyfin-plugin-sso</code></li><li>Authentik Documentation: <code>https://goauthentik.io/docs/providers/oauth2/</code></li><li>Jellyfin API Documentation: <code>https://api.jellyfin.org/</code></li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
@@ -0,0 +1,47 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Mixture-of-Experts (MoE) Models Challenges & Solutions in Practice · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Mixture-of-Experts (MoEs) are neural network architectures that allow different parts of the model (called “experts”) to specialize in different types of inputs. A “gating network” or “router” learns to dispatch each input (or “token”) to a subset of these experts. While powerful for scaling models, MoEs introduce several practical challenges.
|
||||||
|
|
||||||
|
1. Challenge: Non-Differentiability of Routing Functions
|
||||||
|
|
||||||
|
|
||||||
|
Link to heading
|
||||||
|
|
||||||
|
|
||||||
|
The Problem:
|
||||||
|
Many routing mechanisms, especially “Top-K routing,” involve a discrete, hard selection process. A common function is KeepTopK(v, k), which selects the top k scoring elements from a vector v and sets others to $-\infty$ or $0$."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Mixture-of-Experts (MoE) Models Challenges & Solutions in Practice"><meta name=twitter:description content="Mixture-of-Experts (MoEs) are neural network architectures that allow different parts of the model (called “experts”) to specialize in different types of inputs. A “gating network” or “router” learns to dispatch each input (or “token”) to a subset of these experts. While powerful for scaling models, MoEs introduce several practical challenges.
|
||||||
|
1. Challenge: Non-Differentiability of Routing Functions Link to heading The Problem: Many routing mechanisms, especially “Top-K routing,” involve a discrete, hard selection process. A common function is KeepTopK(v, k), which selects the top k scoring elements from a vector v and sets others to $-\infty$ or $0$."><meta property="og:url" content="https://ericxliu.me/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Mixture-of-Experts (MoE) Models Challenges & Solutions in Practice"><meta property="og:description" content="Mixture-of-Experts (MoEs) are neural network architectures that allow different parts of the model (called “experts”) to specialize in different types of inputs. A “gating network” or “router” learns to dispatch each input (or “token”) to a subset of these experts. While powerful for scaling models, MoEs introduce several practical challenges.
|
||||||
|
1. Challenge: Non-Differentiability of Routing Functions Link to heading The Problem: Many routing mechanisms, especially “Top-K routing,” involve a discrete, hard selection process. A common function is KeepTopK(v, k), which selects the top k scoring elements from a vector v and sets others to $-\infty$ or $0$."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-07-02T00:00:00+00:00"><meta property="article:modified_time" content="2025-08-03T06:02:48+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Mixture-of-Experts (MoE) Models Challenges \u0026 Solutions in Practice","genre":"Blog","wordcount":"1381","url":"https:\/\/ericxliu.me\/posts\/mixture-of-experts-moe-models-challenges-solutions-in-practice\/","datePublished":"2025-07-02T00:00:00\u002b00:00","dateModified":"2025-08-03T06:02:48\u002b00:00","description":"\u003cp\u003eMixture-of-Experts (MoEs) are neural network architectures that allow different parts of the model (called \u0026ldquo;experts\u0026rdquo;) to specialize in different types of inputs. A \u0026ldquo;gating network\u0026rdquo; or \u0026ldquo;router\u0026rdquo; learns to dispatch each input (or \u0026ldquo;token\u0026rdquo;) to a subset of these experts. While powerful for scaling models, MoEs introduce several practical challenges.\u003c\/p\u003e\n\u003ch3 id=\u00221-challenge-non-differentiability-of-routing-functions\u0022\u003e\n 1. Challenge: Non-Differentiability of Routing Functions\n \u003ca class=\u0022heading-link\u0022 href=\u0022#1-challenge-non-differentiability-of-routing-functions\u0022\u003e\n \u003ci class=\u0022fa-solid fa-link\u0022 aria-hidden=\u0022true\u0022 title=\u0022Link to heading\u0022\u003e\u003c\/i\u003e\n \u003cspan class=\u0022sr-only\u0022\u003eLink to heading\u003c\/span\u003e\n \u003c\/a\u003e\n\u003c\/h3\u003e\n\u003cp\u003e\u003cstrong\u003eThe Problem:\u003c\/strong\u003e\nMany routing mechanisms, especially \u0026ldquo;Top-K routing,\u0026rdquo; involve a discrete, hard selection process. A common function is \u003ccode\u003eKeepTopK(v, k)\u003c\/code\u003e, which selects the top \u003ccode\u003ek\u003c\/code\u003e scoring elements from a vector \u003ccode\u003ev\u003c\/code\u003e and sets others to $-\\infty$ or $0$.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/>Mixture-of-Experts (MoE) Models Challenges & Solutions in Practice</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-07-02T00:00:00Z>July 2, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
7-minute read</span></div></div></header><div class=post-content><p>Mixture-of-Experts (MoEs) are neural network architectures that allow different parts of the model (called “experts”) to specialize in different types of inputs. A “gating network” or “router” learns to dispatch each input (or “token”) to a subset of these experts. While powerful for scaling models, MoEs introduce several practical challenges.</p><h3 id=1-challenge-non-differentiability-of-routing-functions>1. Challenge: Non-Differentiability of Routing Functions
|
||||||
|
<a class=heading-link href=#1-challenge-non-differentiability-of-routing-functions><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Problem:</strong>
|
||||||
|
Many routing mechanisms, especially “Top-K routing,” involve a discrete, hard selection process. A common function is <code>KeepTopK(v, k)</code>, which selects the top <code>k</code> scoring elements from a vector <code>v</code> and sets others to $-\infty$ or $0$.</p>$$
|
||||||
|
KeepTopK(v, k)_i = \begin{cases} v_i & \text{if } v_i \text{ is in the top } k \text{ elements of } v \\ -\infty & \text{otherwise.} \end{cases}
|
||||||
|
$$<p>This function is <strong>not differentiable</strong>. Its gradient is zero almost everywhere and undefined at the threshold points, making it impossible to directly train the gating network’s parameters (e.g., $W_g$) using standard gradient descent.</p><p><strong>Solutions (Stochastic Approximations):</strong>
|
||||||
|
To enable end-to-end training, non-differentiable routing decisions must be approximated with differentiable or stochastic methods.</p><ul><li><p><strong>Stochastic Scoring (e.g., Shazeer et al. 2017):</strong>
|
||||||
|
The expert score $H(x)_i = (x \cdot W_g)_i + \text{StandardNormal}() \cdot \text{Softplus}((x \cdot W_{noise})_i)$ introduces Gaussian noise. This makes the scores themselves stochastic, which can be leveraged with other methods.</p></li><li><p><strong>Gumbel-Softmax Trick (or Concrete Distribution):</strong>
|
||||||
|
This method allows for differentiable sampling from categorical distributions. Instead of directly picking the top-k, Gumbel noise is added to the scores, and a Softmax (with a temperature parameter) is applied. This provides a continuous, differentiable approximation of a discrete choice, allowing gradients to flow back.</p></li><li><p><strong>REINFORCE (Score Function Estimator):</strong>
|
||||||
|
This is a policy gradient method from reinforcement learning. The routing decision is treated as an action, and the gating network’s parameters are updated based on the “reward” (e.g., the model’s performance). Gradients are estimated by sampling routing choices and weighting them by their outcomes.</p></li><li><p><strong>Straight-Through Estimator (STE):</strong>
|
||||||
|
A simpler approximation where, during the backward pass, gradients are treated as if the non-differentiable operation was an identity function or a simple smooth function.</p></li><li><p><strong>Softmax after TopK (e.g., Mixtral, DBRX, DeepSeek v3):</strong>
|
||||||
|
Instead of <code>Softmax(KeepTopK(...))</code>, some models apply a Softmax <em>only to the scores of the selected TopK experts</em>, and then assign $0$ to the rest. This provides differentiable weights for the selected experts while still enforcing sparsity.</p></li></ul><h3 id=2-challenge-uneven-expert-utilization-balancing-loss>2. Challenge: Uneven Expert Utilization (Balancing Loss)
|
||||||
|
<a class=heading-link href=#2-challenge-uneven-expert-utilization-balancing-loss><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Problem:</strong>
|
||||||
|
Left unchecked, the gating network might learn to heavily favor a few experts, leaving others underutilized. This leads to:</p><ul><li><strong>System Inefficiency:</strong> Overloaded experts become bottlenecks, while underutilized experts waste computational resources.</li><li><strong>Suboptimal Learning:</strong> Experts might not specialize effectively if they don’t receive diverse data.</li></ul><p><strong>Solution: Heuristic Balancing Losses (e.g., from Switch Transformer, Fedus et al. 2022)</strong>
|
||||||
|
An auxiliary loss is added to the total model loss during training to encourage more even expert usage.</p>$$ \text{loss}_{\text{auxiliary}} = \alpha \cdot N \cdot \sum_{i=1}^{N} f_i \cdot P_i $$<p>Where:</p><ul><li>$\alpha$: A hyperparameter controlling the strength of the auxiliary loss.</li><li>$N$: Total number of experts.</li><li>$f_i$: The <strong>fraction of tokens <em>actually dispatched</em> to expert $i$</strong> in the current batch $B$.
|
||||||
|
$$ f_i = \frac{1}{T} \sum_{x \in B} \mathbf{1}\{\text{argmax } p(x) = i\} $$
|
||||||
|
($p(x)$ here refers to the output of the gating network, which could be $s_{i,t}$ in the DeepSeek/classic router. The $\text{argmax}$ means it counts hard assignments to expert $i$.)</li><li>$P_i$: The <strong>fraction of the router <em>probability mass</em> allocated to expert $i$</strong> in the current batch $B$.
|
||||||
|
$$ P_i = \frac{1}{T} \sum_{x \in B} p_i(x) $$
|
||||||
|
($p_i(x)$ is the learned probability (or soft score) from the gating network for token $x$ and expert $i$.)</li></ul><p><strong>How it works:</strong>
|
||||||
|
The loss aims to minimize the product $f_i \cdot P_i$ when $f_i$ and $P_i$ are small, effectively pushing them to be larger (closer to $1/N$). If an expert $i$ is overused (high $f_i$ and $P_i$), its term in the sum contributes significantly to the loss. The derivative with respect to $p_i(x)$ reveals that “more frequent use = stronger downweighting,” meaning the gating network is penalized for sending too much traffic to an already busy expert.</p><p><strong>Relationship to Gating Network:</strong></p><ul><li><strong>$p_i(x)$ (or $s_{i,t}$):</strong> This is the output of the <strong>learned gating network</strong> (e.g., from a linear layer followed by Softmax). The gating network’s parameters are updated via gradient descent, influenced by this auxiliary loss.</li><li><strong>$P_i$:</strong> This is <em>calculated</em> from the outputs of the learned gating network for the current batch. It’s not a pre-defined value.</li></ul><p><strong>Limitation (“Second Best” Scenario):</strong>
|
||||||
|
Even with this loss, an expert can remain imbalanced if it’s consistently the “second best” option (high $P_i$) but never the <em>absolute top choice</em> that gets counted in $f_i$ (especially if $K=1$). This is because $f_i$ strictly counts hard assignments based on <code>argmax</code>. This limitation highlights why “soft” routing or “softmax after TopK” approaches can be more effective for truly even distribution.</p><h3 id=3-challenge-overfitting-during-fine-tuning>3. Challenge: Overfitting during Fine-tuning
|
||||||
|
<a class=heading-link href=#3-challenge-overfitting-during-fine-tuning><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p><strong>The Problem:</strong>
|
||||||
|
Sparse MoE models, despite only activating a few experts per token, possess a very large total number of parameters. When fine-tuning these models on <strong>smaller datasets</strong>, they are highly prone to <strong>overfitting</strong>. The model’s vast capacity allows it to memorize the limited fine-tuning data, leading to poor generalization performance on unseen validation data. This is evident when training loss continues to decrease, but validation loss stagnates or increases.</p><p><strong>Solutions:</strong></p><ul><li><p><strong>Zoph et al. Solution – Fine-tune non-MoE MLPs:</strong></p><ul><li>This strategy involves freezing a portion of the MoE model’s parameters during fine-tuning, specifically the large expert weights.</li><li>Instead, only the “non-MoE” parameters (e.g., attention layers, adapter layers, or the gating network itself) are updated.</li><li>This reduces the effective number of trainable parameters during fine-tuning, thereby mitigating the risk of overfitting on small datasets. It assumes the experts are already well-pre-trained for general tasks.</li></ul></li><li><p><strong>DeepSeek Solution – Use Lots of Data (1.4M SFT):</strong></p><ul><li>This approach tackles the problem by providing the model with a very large and diverse dataset for Supervised Fine-Tuning (SFT).</li><li>With abundant data (e.g., 1.4 million examples covering a wide range of tasks and languages), the model’s large capacity can be effectively utilized for specialized learning rather than memorization. The diversity and volume of data prevent individual experts from overfitting to specific examples.</li></ul></li></ul><p><strong>Conclusion:</strong>
|
||||||
|
MoE models offer significant advantages in terms of model capacity and computational efficiency, but their unique sparse activation pattern introduces challenges in training and fine-tuning. Overcoming non-differentiability in routing and ensuring balanced expert utilization are crucial for effective pre-training. During fine-tuning, managing the model’s vast parameter count to prevent overfitting on smaller datasets requires either strategic parameter freezing or access to very large and diverse fine-tuning data.
|
||||||
|
The <strong>Top-K routing</strong> mechanism, as illustrated in the provided image, is a core component in many modern Mixture-of-Experts (MoE) models. It involves selecting a fixed number (<code>K</code>) of experts for each input based on relevance scores.</p><hr><p><strong>Traditional Top-K (Deterministic Selection):</strong></p><ul><li><strong>How it works:</strong><ol><li>Calculate relevance scores (<code>s_{i,t}</code>) for each expert <code>i</code> and input <code>t</code>.</li><li>Identify the <code>K</code> experts with the highest scores.</li><li>Experts <em>within</em> the Top-K are assigned their scores (<code>g_{i,t} = s_{i,t}</code>).</li><li>Experts <em>outside</em> the Top-K are assigned a score of <code>0</code> (<code>g_{i,t} = 0</code>).</li><li>The output is a weighted sum of the selected experts’ outputs.</li></ol></li><li><strong>Pros:</strong> Predictable, deterministic, selects the “best” experts based on current scores.</li><li><strong>Cons:</strong> Can lead to expert imbalance, where a few popular experts are always chosen, starving others of training.</li></ul><p><strong>Alternative: Sampling from Softmax (Probabilistic Selection):</strong></p><ul><li><strong>How it works:</strong><ol><li>Calculate relevance scores (<code>s_{i,t}</code>) which are treated as probabilities (after softmax).</li><li><strong>Randomly sample</strong> <code>K</code> unique expert indices from the distribution defined by these probabilities.</li><li>Selected experts contribute; unselected experts do not.</li></ol></li><li><strong>Why it’s suggested:</strong><ul><li><strong>Load Balancing:</strong> Prevents expert collapse by ensuring all experts get a chance to be selected, even those with slightly lower scores. This promotes more even training across the entire expert pool.</li><li><strong>Diversity & Exploration:</strong> Introduces randomness, potentially leading to better generalization and robustness by exploring different expert combinations.</li></ul></li><li><strong>Pros:</strong> Better load balancing, prevents expert starvation, encourages exploration.</li><li><strong>Cons:</strong> Stochastic (non-deterministic routing), can make debugging harder, might not pick the absolute “best” expert in a single instance (but better for long-term training).</li></ul><p><strong>Key Takeaway:</strong> While deterministic Top-K is simpler and directly picks the “highest-scoring” experts, sampling from the softmax offers a more robust training dynamic by ensuring that all experts receive training data, thereby preventing some experts from becoming unused (“dead experts”).</p><hr></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
89
posts/open-webui-openai-websearch/index.html
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>How I Got Open WebUI Talking to OpenAI Web Search · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="OpenAI promised native web search in GPT‑5, but LiteLLM proxy deployments (and by extension Open WebUI) still choke on it—issue #13042 tracks the fallout. I needed grounded answers inside Open WebUI anyway, so I built a workaround: route GPT‑5 traffic through the Responses API and mask every web_search_call before the UI ever sees it.
|
||||||
|
This post documents the final setup, the hotfix script that keeps LiteLLM honest, and the tests that prove Open WebUI now streams cited answers without trying to execute the tool itself."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="How I Got Open WebUI Talking to OpenAI Web Search"><meta name=twitter:description content="OpenAI promised native web search in GPT‑5, but LiteLLM proxy deployments (and by extension Open WebUI) still choke on it—issue #13042 tracks the fallout. I needed grounded answers inside Open WebUI anyway, so I built a workaround: route GPT‑5 traffic through the Responses API and mask every web_search_call before the UI ever sees it.
|
||||||
|
This post documents the final setup, the hotfix script that keeps LiteLLM honest, and the tests that prove Open WebUI now streams cited answers without trying to execute the tool itself."><meta property="og:url" content="https://ericxliu.me/posts/open-webui-openai-websearch/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="How I Got Open WebUI Talking to OpenAI Web Search"><meta property="og:description" content="OpenAI promised native web search in GPT‑5, but LiteLLM proxy deployments (and by extension Open WebUI) still choke on it—issue #13042 tracks the fallout. I needed grounded answers inside Open WebUI anyway, so I built a workaround: route GPT‑5 traffic through the Responses API and mask every web_search_call before the UI ever sees it.
|
||||||
|
This post documents the final setup, the hotfix script that keeps LiteLLM honest, and the tests that prove Open WebUI now streams cited answers without trying to execute the tool itself."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-12-29T00:00:00+00:00"><meta property="article:modified_time" content="2025-12-29T07:15:58+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/open-webui-openai-websearch/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"How I Got Open WebUI Talking to OpenAI Web Search","genre":"Blog","wordcount":"1087","url":"https:\/\/ericxliu.me\/posts\/open-webui-openai-websearch\/","datePublished":"2025-12-29T00:00:00\u002b00:00","dateModified":"2025-12-29T07:15:58\u002b00:00","description":"\u003cp\u003eOpenAI promised native web search in GPT‑5, but LiteLLM proxy deployments (and by extension Open WebUI) still choke on it—issue \u003ca href=\u0022https:\/\/github.com\/BerriAI\/litellm\/issues\/13042\u0022 class=\u0022external-link\u0022 target=\u0022_blank\u0022 rel=\u0022noopener\u0022\u003e#13042\u003c\/a\u003e tracks the fallout. I needed grounded answers inside Open WebUI anyway, so I built a workaround: route GPT‑5 traffic through the Responses API and mask every \u003ccode\u003eweb_search_call\u003c\/code\u003e before the UI ever sees it.\u003c\/p\u003e\n\u003cp\u003eThis post documents the final setup, the hotfix script that keeps LiteLLM honest, and the tests that prove Open WebUI now streams cited answers without trying to execute the tool itself.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/open-webui-openai-websearch/>How I Got Open WebUI Talking to OpenAI Web Search</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-12-29T00:00:00Z>December 29, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
6-minute read</span></div></div></header><div class=post-content><p>OpenAI promised native web search in GPT‑5, but LiteLLM proxy deployments (and by extension Open WebUI) still choke on it—issue <a href=https://github.com/BerriAI/litellm/issues/13042 class=external-link target=_blank rel=noopener>#13042</a> tracks the fallout. I needed grounded answers inside Open WebUI anyway, so I built a workaround: route GPT‑5 traffic through the Responses API and mask every <code>web_search_call</code> before the UI ever sees it.</p><p>This post documents the final setup, the hotfix script that keeps LiteLLM honest, and the tests that prove Open WebUI now streams cited answers without trying to execute the tool itself.</p><h2 id=why-open-webui-broke>Why Open WebUI Broke
|
||||||
|
<a class=heading-link href=#why-open-webui-broke><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><ol><li><strong>Wrong API surface.</strong> <code>/v1/chat/completions</code> still rejects <code>type: "web_search"</code> with <code>Invalid value: 'web_search'. Supported values are: 'function' and 'custom'.</code></li><li><strong>LiteLLM tooling gap.</strong> The OpenAI TypedDicts in <code>litellm/types/llms/openai.py</code> only allow <code>Literal["function"]</code>. Even if the backend call succeeded, streaming would crash when it saw a new tool type.</li><li><strong>Open WebUI assumptions.</strong> The UI eagerly parses every tool delta, so when LiteLLM streamed the raw <code>web_search_call</code> chunk, the UI tried to execute it, failed to parse the arguments, and aborted the chat.</li></ol><p>Fixing all three required touching both the proxy configuration and the LiteLLM transformation path.</p><h2 id=step-1--route-gpt5-through-the-responses-api>Step 1 – Route GPT‑5 Through the Responses API
|
||||||
|
<a class=heading-link href=#step-1--route-gpt5-through-the-responses-api><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>LiteLLM’s Responses bridge activates whenever the backend model name starts with <code>openai/responses/</code>. I added a dedicated alias, <code>gpt-5.2-search</code>, that hardcodes the Responses API plus web search metadata. Existing models (reasoning, embeddings, TTS) stay untouched.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-yaml data-lang=yaml><span style=display:flex><span><span style=color:#8b949e;font-style:italic># proxy-config.yaml (sanitized)</span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#7ee787>model_list</span>:<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span>- <span style=color:#7ee787>model_name</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>gpt-5.2-search</span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>litellm_params</span>:<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>model</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>openai/responses/openai/gpt-5.2</span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>api_key</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff><OPENAI_API_KEY></span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>reasoning_effort</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>high</span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>merge_reasoning_content_in_choices</span>:<span style=color:#6e7681> </span><span style=color:#79c0ff>true</span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>tools</span>:<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span>- <span style=color:#7ee787>type</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>web_search</span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>user_location</span>:<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>type</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>approximate</span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#7ee787>country</span>:<span style=color:#6e7681> </span><span style=color:#a5d6ff>US</span><span style=color:#6e7681>
|
||||||
|
</span></span></span></code></pre></div><p>Any client (Open WebUI included) can now request <code>model: "gpt-5.2-search"</code> over the standard <code>/v1/chat/completions</code> endpoint, and LiteLLM handles the Responses API hop transparently.</p><h2 id=step-2--mask-web_search_call-chunks-inside-litellm>Step 2 – Mask <code>web_search_call</code> Chunks Inside LiteLLM
|
||||||
|
<a class=heading-link href=#step-2--mask-web_search_call-chunks-inside-litellm><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>Even with the right API, LiteLLM still needs to stream deltas Open WebUI can digest. My <a href=https://ericxliu.me/hotfix.py class=external-link target=_blank rel=noopener>hotfix.py</a> script copies the LiteLLM source into <code>/tmp/patch/litellm</code>, then rewrites two files. This script runs as part of the Helm release’s init hook so I can inject fixes directly into the container filesystem at pod start. That saves me from rebuilding and pushing new images every time LiteLLM upstream changes (or refuses a patch), which is critical while waiting for issue #13042 to land. I’ll try to upstream the fix, but this is admittedly hacky, so timelines are uncertain.</p><ol><li><strong><code>openai.py</code> TypedDicts</strong>: extend the tool chunk definitions to accept <code>Literal["web_search"]</code>.</li><li><strong><code>litellm_responses_transformation/transformation.py</code></strong>: intercept every streaming item and short-circuit anything with <code>type == "web_search_call"</code>, returning an empty assistant delta instead of a tool call.</li></ol><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-python data-lang=python><span style=display:flex><span><span style=color:#8b949e;font-style:italic># Excerpt from hotfix.py</span>
|
||||||
|
</span></span><span style=display:flex><span>tool_call_chunk_original <span style=color:#ff7b72;font-weight:700>=</span> (
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#a5d6ff>'class ChatCompletionToolCallChunk(TypedDict): # result of /chat/completions call</span><span style=color:#79c0ff>\n</span><span style=color:#a5d6ff>'</span>
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#a5d6ff>' id: Optional[str]</span><span style=color:#79c0ff>\n</span><span style=color:#a5d6ff>'</span>
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#a5d6ff>' type: Literal["function"]'</span>
|
||||||
|
</span></span><span style=display:flex><span>)
|
||||||
|
</span></span><span style=display:flex><span>tool_call_chunk_patch <span style=color:#ff7b72;font-weight:700>=</span> tool_call_chunk_original<span style=color:#ff7b72;font-weight:700>.</span>replace(
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#a5d6ff>'Literal["function"]'</span>, <span style=color:#a5d6ff>'Literal["function", "web_search"]'</span>
|
||||||
|
</span></span><span style=display:flex><span>)
|
||||||
|
</span></span><span style=display:flex><span><span style=color:#ff7b72;font-weight:700>...</span>
|
||||||
|
</span></span><span style=display:flex><span><span style=color:#ff7b72>if</span> tool_call_chunk_original <span style=color:#ff7b72;font-weight:700>in</span> content:
|
||||||
|
</span></span><span style=display:flex><span> content <span style=color:#ff7b72;font-weight:700>=</span> content<span style=color:#ff7b72;font-weight:700>.</span>replace(tool_call_chunk_original, tool_call_chunk_patch, <span style=color:#a5d6ff>1</span>)
|
||||||
|
</span></span></code></pre></div><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-python data-lang=python><span style=display:flex><span>added_block <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#a5d6ff>""" elif output_item.get("type") == "web_search_call":
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> # Mask the call: Open WebUI should never see tool metadata
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> action_payload = output_item.get("action")
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> verbose_logger.debug(
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> "Chat provider: masking web_search_call (added) call_id=</span><span style=color:#a5d6ff>%s</span><span style=color:#a5d6ff> action=</span><span style=color:#a5d6ff>%s</span><span style=color:#a5d6ff>",
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> output_item.get("call_id"),
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> action_payload,
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> )
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> return ModelResponseStream(
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> choices=[
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> StreamingChoices(
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> index=0,
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> delta=Delta(content=""),
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> finish_reason=None,
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> )
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> ]
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> )
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff>"""</span>
|
||||||
|
</span></span></code></pre></div><p>These patches ensure LiteLLM never emits a <code>tool_calls</code> delta for <code>web_search</code>. Open WebUI only receives assistant text chunks, so it happily renders the model response and the inline citations the Responses API already provides.</p><h2 id=step-3--prove-it-with-curl-and-open-webui>Step 3 – Prove It with cURL (and Open WebUI)
|
||||||
|
<a class=heading-link href=#step-3--prove-it-with-curl-and-open-webui><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>I keep a simple smoke test (<code>litellm_smoke_test.sh</code>) that hits the public ingress with and without streaming. The only secrets are placeholders here, but the structure is the same.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span><span style=color:#8b949e;font-weight:700;font-style:italic>#!/usr/bin/env bash
|
||||||
|
</span></span></span><span style=display:flex><span>set -euo pipefail
|
||||||
|
</span></span><span style=display:flex><span>
|
||||||
|
</span></span><span style=display:flex><span>echo <span style=color:#a5d6ff>"Testing non-streaming..."</span>
|
||||||
|
</span></span><span style=display:flex><span>curl <span style=color:#a5d6ff>"https://api.ericxliu.me/v1/chat/completions"</span> <span style=color:#79c0ff>\
|
||||||
|
</span></span></span><span style=display:flex><span> -H <span style=color:#a5d6ff>"Authorization: Bearer <LITELLM_MASTER_KEY>"</span> <span style=color:#79c0ff>\
|
||||||
|
</span></span></span><span style=display:flex><span> -H <span style=color:#a5d6ff>"Content-Type: application/json"</span> <span style=color:#79c0ff>\
|
||||||
|
</span></span></span><span style=display:flex><span> -d <span style=color:#a5d6ff>'{
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> "model": "gpt-5.2-search",
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> "messages": [{"role": "user", "content": "Find the sunset time in Tokyo today."}]
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> }'</span>
|
||||||
|
</span></span><span style=display:flex><span>
|
||||||
|
</span></span><span style=display:flex><span>echo -e <span style=color:#a5d6ff>"\n\nTesting streaming..."</span>
|
||||||
|
</span></span><span style=display:flex><span>curl <span style=color:#a5d6ff>"https://api.ericxliu.me/v1/chat/completions"</span> <span style=color:#79c0ff>\
|
||||||
|
</span></span></span><span style=display:flex><span> -H <span style=color:#a5d6ff>"Authorization: Bearer <LITELLM_MASTER_KEY>"</span> <span style=color:#79c0ff>\
|
||||||
|
</span></span></span><span style=display:flex><span> -H <span style=color:#a5d6ff>"Content-Type: application/json"</span> <span style=color:#79c0ff>\
|
||||||
|
</span></span></span><span style=display:flex><span> -d <span style=color:#a5d6ff>'{
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> "model": "gpt-5.2-search",
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> "stream": true,
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> "messages": [{"role": "user", "content": "What is the weather in NYC right now?"}]
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#a5d6ff> }'</span>
|
||||||
|
</span></span></code></pre></div><p>Each request now returns grounded answers with citations (<code>url_citation</code> annotations) via Open WebUI, and the SSE feed never stalls because the UI isn’t asked to interpret tool calls.</p><h2 id=lessons--pitfalls>Lessons & Pitfalls
|
||||||
|
<a class=heading-link href=#lessons--pitfalls><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><ul><li><strong>The Responses API is non-negotiable (and syntax-sensitive).</strong> <code>/v1/chat/completions</code> still rejects <code>web_search</code>. Always test against <code>/v1/responses</code> directly before wiring LiteLLM into the loop. Furthermore, the syntax for <code>reasoning</code> is different: while Chat Completions uses the top-level <code>reasoning_effort</code> parameter, the Responses API requires a nested object: <code>"reasoning": {"effort": "medium"}</code>.</li><li><strong>The Native Model Trap.</strong> Models like <code>gpt-5-search-api</code> exist and support web search via standard Chat Completions, but they are often less flexible—for instance, rejecting <code>reasoning_effort</code> entirely. Routing a standard model through LiteLLM’s Responses bridge offers more control over formatting and fallbacks.</li><li><strong>Magic strings control routing.</strong> LiteLLM has hardcoded logic (deep in <code>main.py</code>) that only triggers the Responses-to-Chat bridge if the backend model name starts with <code>openai/responses/</code>. Without that specific prefix, LiteLLM bypasses its internal transformation layer entirely, leading to cryptic 404s or “model not found” errors.</li><li><strong>Synthesized Sovereignty: The Call ID Crisis.</strong> Open WebUI is a “well-behaved” OpenAI client, yet it often omits the <code>id</code> field in <code>tool_calls</code> when sending assistant messages back to the server. LiteLLM’s Responses bridge initially exploded with a <code>KeyError: 'id'</code> because it assumed an ID would always be present. The fix: synthesizing predictable IDs like <code>auto_tool_call_N</code> on the fly to satisfy the server-side schema.</li><li><strong>The Argument Delta Void.</strong> In streaming mode, the Responses API sometimes skips sending <code>response.function_call_arguments.delta</code> entirely if the query is simple. If the proxy only waits for deltas, the client receives an empty <code>{}</code> for tool arguments. The solution is to fallback and synthesize the <code>arguments</code> string from the <code>action</code> payload (e.g., <code>output_item['action']['query']</code>) when deltas are missing.</li><li><strong>Streaming State Machines are Fragile.</strong> Open WebUI is highly sensitive to the exact state of a tool call. If it sees a <code>web_search_call</code> with <code>status: "in_progress"</code>, its internal parser chokes, assuming it’s an uncompleted “function” call. These intermediate state chunks must be intercepted and handled before they reach the UI.</li><li><strong>Defensive Masking is the Final Boss.</strong> To stop Open WebUI from entering an infinite client-side loop (thinking it needs to execute a tool it doesn’t have), LiteLLM must “mask” the <code>web_search_call</code> chunks. By emitting empty content deltas instead of tool chunks, we hide the server-side search mechanics from the UI, allowing it to stay focused on the final answer.</li></ul><p>With those guardrails in place, GPT‑5’s native web search works end-to-end inside Open WebUI, complete with citations, without waiting for LiteLLM upstream fixes.</p><h2 id=references>References
|
||||||
|
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><ul><li><a href=https://docs.litellm.ai/docs/proxy/openai_responses class=external-link target=_blank rel=noopener>LiteLLM Documentation - OpenAI Responses API Bridge</a></li><li><a href=https://platform.openai.com/docs/api-reference/responses class=external-link target=_blank rel=noopener>OpenAI Documentation - Responses API</a></li><li><a href=https://github.com/BerriAI/litellm/issues/13042 class=external-link target=_blank rel=noopener>LiteLLM GitHub Issue #13042</a></li><li><a href=https://docs.openwebui.com/ class=external-link target=_blank rel=noopener>Open WebUI Documentation</a></li><li><a href=https://ericxliu.me/hotfix.py class=external-link target=_blank rel=noopener>The hotfix.py Script</a></li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
101
posts/openwrt-mwan3-wireguard-endpoint-exclusion/index.html
Normal file
1
posts/page/1/index.html
Normal file
@@ -0,0 +1 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>https://ericxliu.me/posts/</title><link rel=canonical href=https://ericxliu.me/posts/><meta charset=utf-8><meta http-equiv=refresh content="0; url=https://ericxliu.me/posts/"></head></html>
|
||||||
17
posts/page/2/index.html
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Posts · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Posts"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/posts/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Posts"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/posts/index.xml title="Eric X. Liu's Personal Page"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=https://ericxliu.me/posts/>Posts</a></h1></header><ul><li><span class=date>October 2, 2025</span>
|
||||||
|
<a class=title href=/posts/flashing-jetson-orin-nano-in-virtualized-environments/>Flashing Jetson Orin Nano in Virtualized Environments</a></li><li><span class=date>September 28, 2025</span>
|
||||||
|
<a class=title href=/posts/openwrt-mwan3-wireguard-endpoint-exclusion/>OpenWrt: Fix WireGuard Connectivity with MWAN3 by Excluding the VPN Endpoint</a></li><li><span class=date>September 22, 2025</span>
|
||||||
|
<a class=title href=/posts/unifi-vlan-migration-to-zone-based-architecture/>UniFi VLAN Migration to Zone-Based Architecture</a></li><li><span class=date>August 19, 2025</span>
|
||||||
|
<a class=title href=/posts/quantization-in-llms/>Quantization in LLMs</a></li><li><span class=date>August 16, 2025</span>
|
||||||
|
<a class=title href=/posts/breville-barista-pro-maintenance/>Breville Barista Pro Maintenance</a></li><li><span class=date>August 9, 2025</span>
|
||||||
|
<a class=title href=/posts/secure-boot-dkms-and-mok-on-proxmox-debian/>Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian</a></li><li><span class=date>August 7, 2025</span>
|
||||||
|
<a class=title href=/posts/how-rvq-teaches-llms-to-see-and-hear/>Beyond Words: How RVQ Teaches LLMs to See and Hear</a></li><li><span class=date>August 3, 2025</span>
|
||||||
|
<a class=title href=/posts/supabase-deep-dive/>Supabase Deep Dive: It's Not Magic, It's Just Postgres</a></li><li><span class=date>August 2, 2025</span>
|
||||||
|
<a class=title href=/posts/ppo-for-language-models/>A Deep Dive into PPO for Language Models</a></li><li><span class=date>July 2, 2025</span>
|
||||||
|
<a class=title href=/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/>Mixture-of-Experts (MoE) Models Challenges & Solutions in Practice</a></li></ul><ul class=pagination><li><a href=/posts/>«</a></li><li class=hidden><a href=/posts/>‹</a></li><li><a href=/posts/>1</a></li><li>2</li><li><a href=/posts/page/3/>3</a></li><li class=hidden><a href=/posts/page/3/>›</a></li><li><a href=/posts/page/3/>»</a></li></ul></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
11
posts/page/3/index.html
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Posts · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Posts"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/posts/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Posts"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/posts/index.xml title="Eric X. Liu's Personal Page"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=https://ericxliu.me/posts/>Posts</a></h1></header><ul><li><span class=date>June 1, 2025</span>
|
||||||
|
<a class=title href=/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/>An Architectural Deep Dive of T5</a></li><li><span class=date>May 1, 2025</span>
|
||||||
|
<a class=title href=/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/>Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso</a></li><li><span class=date>April 1, 2025</span>
|
||||||
|
<a class=title href=/posts/transformer-s-core-mechanics/>Transformer's Core Mechanics</a></li><li><span class=date>October 26, 2020</span>
|
||||||
|
<a class=title href=/posts/useful/>Some useful files</a></li></ul><ul class=pagination><li><a href=/posts/>«</a></li><li class=hidden><a href=/posts/page/2/>‹</a></li><li><a href=/posts/>1</a></li><li><a href=/posts/page/2/>2</a></li><li>3</li></ul></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
28
posts/ppo-for-language-models/index.html
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>A Deep Dive into PPO for Language Models · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Large Language Models (LLMs) have demonstrated astonishing capabilities, but out-of-the-box, they are simply powerful text predictors. They don’t inherently understand what makes a response helpful, harmless, or aligned with human values. The technique that has proven most effective at bridging this gap is Reinforcement Learning from Human Feedback (RLHF), and at its heart lies a powerful algorithm: Proximal Policy Optimization (PPO).
|
||||||
|
You may have seen diagrams like the one below, which outlines the RLHF training process. It can look intimidating, with a web of interconnected models, losses, and data flows.
|
||||||
|
"><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="A Deep Dive into PPO for Language Models"><meta name=twitter:description content="Large Language Models (LLMs) have demonstrated astonishing capabilities, but out-of-the-box, they are simply powerful text predictors. They don’t inherently understand what makes a response helpful, harmless, or aligned with human values. The technique that has proven most effective at bridging this gap is Reinforcement Learning from Human Feedback (RLHF), and at its heart lies a powerful algorithm: Proximal Policy Optimization (PPO).
|
||||||
|
You may have seen diagrams like the one below, which outlines the RLHF training process. It can look intimidating, with a web of interconnected models, losses, and data flows."><meta property="og:url" content="https://ericxliu.me/posts/ppo-for-language-models/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="A Deep Dive into PPO for Language Models"><meta property="og:description" content="Large Language Models (LLMs) have demonstrated astonishing capabilities, but out-of-the-box, they are simply powerful text predictors. They don’t inherently understand what makes a response helpful, harmless, or aligned with human values. The technique that has proven most effective at bridging this gap is Reinforcement Learning from Human Feedback (RLHF), and at its heart lies a powerful algorithm: Proximal Policy Optimization (PPO).
|
||||||
|
You may have seen diagrams like the one below, which outlines the RLHF training process. It can look intimidating, with a web of interconnected models, losses, and data flows."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-08-02T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-10T20:10:48+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/ppo-for-language-models/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"A Deep Dive into PPO for Language Models","genre":"Blog","wordcount":"1393","url":"https:\/\/ericxliu.me\/posts\/ppo-for-language-models\/","datePublished":"2025-08-02T00:00:00\u002b00:00","dateModified":"2026-01-10T20:10:48\u002b00:00","description":"\u003cp\u003eLarge Language Models (LLMs) have demonstrated astonishing capabilities, but out-of-the-box, they are simply powerful text predictors. They don\u0026rsquo;t inherently understand what makes a response helpful, harmless, or aligned with human values. The technique that has proven most effective at bridging this gap is Reinforcement Learning from Human Feedback (RLHF), and at its heart lies a powerful algorithm: Proximal Policy Optimization (PPO).\u003c\/p\u003e\n\u003cp\u003eYou may have seen diagrams like the one below, which outlines the RLHF training process. It can look intimidating, with a web of interconnected models, losses, and data flows.\n\u003cimg src=\u0022\/images\/ppo-for-language-models\/7713bd3ecf27442e939b9190fa08165d.png\u0022 alt=\u0022S3 File\u0022\u003e\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/ppo-for-language-models/>A Deep Dive into PPO for Language Models</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-08-02T00:00:00Z>August 2, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
7-minute read</span></div></div></header><div class=post-content><p>Large Language Models (LLMs) have demonstrated astonishing capabilities, but out-of-the-box, they are simply powerful text predictors. They don’t inherently understand what makes a response helpful, harmless, or aligned with human values. The technique that has proven most effective at bridging this gap is Reinforcement Learning from Human Feedback (RLHF), and at its heart lies a powerful algorithm: Proximal Policy Optimization (PPO).</p><p>You may have seen diagrams like the one below, which outlines the RLHF training process. It can look intimidating, with a web of interconnected models, losses, and data flows.
|
||||||
|
<img src=/images/ppo-for-language-models/7713bd3ecf27442e939b9190fa08165d.png alt="S3 File"></p><p>This post will decode that diagram, piece by piece. We’ll explore the “why” behind each component, moving from high-level concepts to the deep technical reasoning that makes this process work.</p><h3 id=translating-rl-to-a-conversation>Translating RL to a Conversation
|
||||||
|
<a class=heading-link href=#translating-rl-to-a-conversation><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>The first step is to understand how the traditional language of reinforcement learning maps to the world of text generation.</p><ul><li><strong>State (<code>s_t</code>)</strong>: In a chat setting, the “state” is the context of the conversation so far. It’s the initial prompt (<code>x</code>) plus all the text the model has generated up to the current moment (<code>y₁, ..., y_{t-1}</code>).</li><li><strong>Action (<code>a_t</code>)</strong>: The “action” is the model’s decision at each step. For an LLM, this means generating the very next token (<code>y_t</code>). A full response is a sequence of these actions.blob:https://aistudio.google.com/872e746f-88c1-40ec-8e45-fa0efce97299</li><li><strong>Reward (<code>r</code>)</strong>: The “reward” is a numeric score that tells the model how good its full response (<code>y</code>) was. This score comes from a separate <strong>Reward Model</strong>, which has been trained on a large dataset of human preference comparisons (e.g., humans rating which of two responses is better). This reward is often only awarded at the end of the entire generated sequence.</li></ul><p>Let’s make this concrete. If a user provides the prompt <strong>(x)</strong>: <em>“The best thing about AI is”</em>, and the model generates the response <strong>(y)</strong>: <em>“its potential to solve problems.”</em>, here is how it’s broken down for training:</p><ul><li><strong>State 1</strong>: “The best thing about AI is”<ul><li><strong>Action 1</strong>: “its”</li></ul></li><li><strong>State 2</strong>: “The best thing about AI is its”<ul><li><strong>Action 2</strong>: " potential"</li></ul></li><li><strong>State 3</strong>: “The best thing about AI is its potential”<ul><li><strong>Action 3</strong>: " to"</li></ul></li><li>…and so on for every generated token.</li></ul><p>This breakdown transforms a single prompt-response pair into a rich trajectory of state-action pairs, which becomes the raw data for our learning algorithm.</p><h3 id=the-cast-of-models-an-actor-critic-ensemble>The Cast of Models: An Actor-Critic Ensemble
|
||||||
|
<a class=heading-link href=#the-cast-of-models-an-actor-critic-ensemble><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>The PPO process doesn’t rely on a single model but an ensemble where each member has a distinct role.</p><ol><li><strong>The Actor (Policy LM)</strong>: This is the star of the show—the LLM we are actively fine-tuning. Its role is to take a state (the current text) and decide on an action (the next token). We refer to its decision-making process as its “policy” (<code>π</code>).</li><li><strong>The Critic (Value Model)</strong>: This is the Actor’s coach. The Critic doesn’t generate text. Instead, it observes a state and estimates the <em>potential future reward</em> the Actor is likely to receive from that point onward. This estimate is called the “value” (<code>V(s_t)</code>). The Critic’s feedback helps the Actor understand whether it’s in a promising or a dead-end situation, which is a much more immediate learning signal than waiting for the final reward.</li><li><strong>The Reward Model</strong>: This is the ultimate judge. As mentioned, it’s a separate model trained on human preference data that provides the final score for a complete generation. Its judgment is treated as the ground truth for training both the Actor and the Critic.</li></ol><h3 id=the-challenge-of-credit-assignment-generalized-advantage-estimation-gae>The Challenge of Credit Assignment: Generalized Advantage Estimation (GAE)
|
||||||
|
<a class=heading-link href=#the-challenge-of-credit-assignment-generalized-advantage-estimation-gae><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>A key problem in RL is assigning credit. If a 20-token response gets a high reward, was it because of the first token, the last one, or all of them? The Critic helps solve this. By comparing the reward at each step with the Critic’s value estimate, we can calculate the <strong>Advantage (<code>Â</code>)</strong>.</p><p>A simple advantage calculation might be: <code>Advantage = reward + Value_of_next_state - Value_of_current_state</code>.</p><p>However, this can be noisy. PPO uses a more sophisticated technique called <strong>Generalized Advantage Estimation (GAE)</strong>. The formula looks complex, but the idea is intuitive:</p><p><code>Â(s_t, a_t) = Σ(γλ)^l * δ_{t+l}</code>
|
||||||
|
where <code>δ_t = r_t + γV(s_{t+1}) - V(s_t)</code></p><ul><li><strong>γ (gamma)</strong> is a discount factor (e.g., 0.99), which values immediate rewards slightly more than distant ones.</li><li><strong>λ (lambda)</strong> is a smoothing parameter that balances the trade-off between bias and variance. It creates a weighted average of advantages over multiple future time steps.</li></ul><p>In essence, GAE provides a more stable and accurate estimate of how much better a specific action was compared to the policy’s average behavior in that state.</p><h3 id=the-heart-of-ppo-the-quest-for-stable-updates>The Heart of PPO: The Quest for Stable Updates
|
||||||
|
<a class=heading-link href=#the-heart-of-ppo-the-quest-for-stable-updates><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Now we arrive at the core innovation of PPO. We want to update our Actor model to take actions with higher advantages. The naive way to do this is to re-weight our training objective by an <strong>importance sampling ratio</strong>: <code>(π_new / π_old)</code>. This corrects for the fact that the data we are learning from was generated by a slightly older version of our policy.</p><p>However, this ratio is incredibly dangerous. If the new policy becomes very different from the old one, the ratio can explode, leading to massive, unstable gradient updates that destroy the model.</p><p>PPO solves this with its signature <strong>Clipped Surrogate Objective</strong>. The PPO loss function is:</p><p><code>L_CLIP(θ) = Ê_t [ min( r_t(θ)Â_t, clip(r_t(θ), 1 - ε, 1 + ε)Â_t ) ]</code></p><p>Let’s translate this from math to English:</p><ul><li><code>r_t(θ)</code> is the probability ratio <code>π_new(a_t|s_t) / π_old(a_t|s_t)</code>.</li><li>The goal is to increase the objective by an amount proportional to the advantage <code>Â_t</code>.</li><li><strong>The <code>clip</code> function is the crucial safeguard.</strong> It forbids the probability ratio from moving outside a small window (e.g., <code>[0.8, 1.2]</code>).</li></ul><p>This means the algorithm says: “Let’s update our policy to favor this good action. But if the required update would change the policy too drastically from the old one, we’ll ‘clip’ the update to a more modest size.” This creates a “trust region,” ensuring stable, incremental improvements.</p><h3 id=avoiding-amnesia-the-pretraining-loss>Avoiding Amnesia: The Pretraining Loss
|
||||||
|
<a class=heading-link href=#avoiding-amnesia-the-pretraining-loss><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>There’s one final problem. If we only optimize for the PPO loss, the model might learn to “hack” the reward model by generating repetitive or nonsensical text that gets a high score. In doing so, it could suffer from <strong>catastrophic forgetting</strong>, losing its fundamental grasp of grammar and facts.</p><p>To prevent this, we introduce a second loss term. As seen in the diagram, we mix in data from the original <strong>Pretraining Data</strong> (or the dataset used for Supervised Fine-Tuning). We calculate a standard next-token prediction loss (<code>LM Loss</code>) on this high-quality data.</p><p>The final loss for the Actor is a combination of both objectives:</p><p><strong>Total Loss = Loss_PPO + <code>λ_ptx</code> * Loss_LM</strong></p><p>This brilliantly balances two goals:</p><ol><li>The <code>Loss_PPO</code> pushes the model towards behaviors that align with human preferences.</li><li>The <code>Loss_LM</code> acts as a regularizer, pulling the model back towards its core language capabilities and preventing it from drifting into gibberish.</li></ol><h3 id=the-full-training-loop>The Full Training Loop
|
||||||
|
<a class=heading-link href=#the-full-training-loop><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Now, we can assemble the entire process into a clear, iterative loop:</p><ol><li><strong>Collect</strong>: The current Actor policy <code>π_k</code> generates responses to a batch of prompts. These experiences—<code>(state, action, probability, reward, value)</code>—are stored in an <strong>Experience Buffer</strong>.</li><li><strong>Calculate</strong>: Once the buffer is full, we use the collected data to compute the advantage estimates <code>Â_t</code> for every single token-generation step.</li><li><strong>Optimize</strong>: For a few epochs, we repeatedly sample mini-batches from the buffer and update the Actor and Critic models. The Actor is updated using the combined <code>PPO-clip Loss</code> and <code>LM Loss</code>. The Critic is updated to improve its value predictions.</li><li><strong>Flush and Repeat</strong>: After the optimization phase, the entire experience buffer is discarded. The data is now “stale” because our policy has changed. The newly updated policy <code>π_{k+1}</code> becomes the new Actor, and we return to step 1 to collect fresh data.</li></ol><p>This cycle of collection and optimization allows the language model to gradually and safely steer its behavior towards human-defined goals, creating the helpful and aligned AI assistants we interact with today.</p><hr><p><strong>References:</strong></p><ol><li>Schulman, J., Wolski, F., Dhariwal, P., Radford, A., & Klimov, O. (2017). <em>Proximal Policy Optimization Algorithms</em>. arXiv preprint arXiv:1707.06347.</li><li>Schulman, J., Moritz, P., Levine, S., Jordan, M., & Abbeel, P. (2015). <em>High-Dimensional Continuous Control Using Generalized Advantage Estimation</em>. arXiv preprint arXiv:1506.02438.</li><li>Ouyang, L., et al. (2022). <em>Training language models to follow instructions with human feedback</em>. Advances in Neural Information Processing Systems 35.</li></ol></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
10
posts/quantization-in-llms/index.html
Normal file
27
posts/reverse-engineering-antigravity-ide/index.html
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>How I Built a Blog Agent that Writes About Itself · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="I’ve been spending a lot of time “vibe coding” in the Antigravity IDE lately. It’s an incredible flow state—intense, iterative, and fast. But it has a major flaw: the context is ephemeral. Once the session is over, that rich history of decisions, wrong turns, and “aha!” moments is locked away in an opaque, internal format.
|
||||||
|
I wanted to capture that value. I wanted a system that could take my chaotic coding sessions and distill them into structured, technical blog posts (like the one you’re reading right now)."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="How I Built a Blog Agent that Writes About Itself"><meta name=twitter:description content="I’ve been spending a lot of time “vibe coding” in the Antigravity IDE lately. It’s an incredible flow state—intense, iterative, and fast. But it has a major flaw: the context is ephemeral. Once the session is over, that rich history of decisions, wrong turns, and “aha!” moments is locked away in an opaque, internal format.
|
||||||
|
I wanted to capture that value. I wanted a system that could take my chaotic coding sessions and distill them into structured, technical blog posts (like the one you’re reading right now)."><meta property="og:url" content="https://ericxliu.me/posts/reverse-engineering-antigravity-ide/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="How I Built a Blog Agent that Writes About Itself"><meta property="og:description" content="I’ve been spending a lot of time “vibe coding” in the Antigravity IDE lately. It’s an incredible flow state—intense, iterative, and fast. But it has a major flaw: the context is ephemeral. Once the session is over, that rich history of decisions, wrong turns, and “aha!” moments is locked away in an opaque, internal format.
|
||||||
|
I wanted to capture that value. I wanted a system that could take my chaotic coding sessions and distill them into structured, technical blog posts (like the one you’re reading right now)."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2026-01-16T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-22T01:49:53+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/reverse-engineering-antigravity-ide/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"How I Built a Blog Agent that Writes About Itself","genre":"Blog","wordcount":"779","url":"https:\/\/ericxliu.me\/posts\/reverse-engineering-antigravity-ide\/","datePublished":"2026-01-16T00:00:00\u002b00:00","dateModified":"2026-01-22T01:49:53\u002b00:00","description":"\u003cp\u003eI\u0026rsquo;ve been spending a lot of time \u0026ldquo;vibe coding\u0026rdquo; in the Antigravity IDE lately. It\u0026rsquo;s an incredible flow state—intense, iterative, and fast. But it has a major flaw: the context is ephemeral. Once the session is over, that rich history of decisions, wrong turns, and \u0026ldquo;aha!\u0026rdquo; moments is locked away in an opaque, internal format.\u003c\/p\u003e\n\u003cp\u003eI wanted to capture that value. I wanted a system that could take my chaotic coding sessions and distill them into structured, technical blog posts (like the one you\u0026rsquo;re reading right now).\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/reverse-engineering-antigravity-ide/>How I Built a Blog Agent that Writes About Itself</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2026-01-16T00:00:00Z>January 16, 2026
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
4-minute read</span></div></div></header><div class=post-content><p>I’ve been spending a lot of time “vibe coding” in the Antigravity IDE lately. It’s an incredible flow state—intense, iterative, and fast. But it has a major flaw: the context is ephemeral. Once the session is over, that rich history of decisions, wrong turns, and “aha!” moments is locked away in an opaque, internal format.</p><p>I wanted to capture that value. I wanted a system that could take my chaotic coding sessions and distill them into structured, technical blog posts (like the one you’re reading right now).</p><p>But getting the data out turned into a much deeper rabbit hole than I expected.</p><h2 id=the-challenge-check-the-database>The Challenge: Check the Database?
|
||||||
|
<a class=heading-link href=#the-challenge-check-the-database><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>My first instinct was simple: It’s an Electron app, so there’s probably a SQLite database.</p><p>I found it easily enough at <code>~/Library/Application Support/Antigravity/User/globalStorage/state.vscdb</code>. But when I opened it up, I hit a wall. The data wasn’t plain text; it was stored in the <code>ItemTable</code> under keys like <code>antigravityUnifiedStateSync.trajectorySummaries</code> as Base64-encoded strings.</p><p>Decoding them revealed raw Protobuf wire formats, not JSON.</p><h3 id=the-wire-walking-dead-end>The “Wire-Walking” Dead End
|
||||||
|
<a class=heading-link href=#the-wire-walking-dead-end><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>I spent a few hours writing a Python script to “wire-walk” the Protobuf data without a schema. I managed to extract some human-readable strings, but it was a mess:</p><ol><li><strong>Missing Context</strong>: I got fragments of text, but the user prompts and cohesive flow were gone.</li><li><strong>Encryption</strong>: The actual conversation files (ending in <code>.pb</code>) in <code>~/.gemini/antigravity/conversations/</code> were encrypted.</li></ol><p>It turns out Antigravity uses Electron’s <code>safeStorage</code> API, which interfaces directly with the macOS Keychain. Without the app’s private key (which is hardware-bound), that data is effectively random noise. I even tried using Frida to hook <code>safeStorage.decryptString()</code>, but macOS SIP (System Integrity Protection) and code signing shut that down immediately.</p><p>I was stuck. I couldn’t decrypt the local files, and I couldn’t parse the database effectively.</p><h2 id=the-breakthrough-living-off-the-land>The Breakthrough: Living Off the Land
|
||||||
|
<a class=heading-link href=#the-breakthrough-living-off-the-land><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>When you can’t break the front door, look for the side entrance. I realized I wasn’t the only one trying to read this state—the official extensions had to do it too.</p><p>I started poking around the source code of the <code>vscode-antigravity-cockpit</code> extension, specifically a file named <code>local_auth_importer.ts</code>. That’s where I found the golden ticket.</p><p>The extension <em>doesn’t</em> decrypt the local files. Instead, it reads a specific key from the SQLite database: <code>jetskiStateSync.agentManagerInitState</code>.</p><p>When I decoded field #6 of this Protobuf structure, I found an <code>OAuthTokenInfo</code> message. It contained the user’s active <code>accessToken</code> and <code>refreshToken</code>.</p><h3 id=shifting-strategy-dont-crack-it-join-it>Shifting Strategy: Don’t Crack it, Join it
|
||||||
|
<a class=heading-link href=#shifting-strategy-dont-crack-it-join-it><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>This changed everything. I didn’t need to reverse-engineer the local storage encryption; I just needed to impersonate the IDE.</p><p>By “piggybacking” on this existing auth mechanism, I could extract a valid OAuth token directly from the local state. But I still needed the endpoints.</p><p>Instead of guessing, I opened the <strong>Developer Tools</strong> inside Antigravity itself (it is Electron, after all). By enabling the Chrome network tracing tools and triggering an export manually, I caught the request in the act.</p><p>I saw the exact call to <code>exa.language_server_pb.LanguageServerService/ConvertTrajectoryToMarkdown</code>.</p><p>It was perfect. By sending a gRPC-over-HTTP request to this endpoint using the stolen token, the server—which <em>does</em> have access to the unencrypted history—returned a perfectly formatted Markdown document of my entire coding session.</p><h2 id=the-architecture-the-blog-agent>The Architecture: The Blog-Agent
|
||||||
|
<a class=heading-link href=#the-architecture-the-blog-agent><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>Once I had the data extraction solved, building the rest of the “blog-agent” was straightforward. I built a <strong>Node.js</strong> stack to automate the pipeline:</p><ul><li><strong>Backend</strong>: An <strong>Express</strong> server handles the routing, session imports, and post generation.</li><li><strong>Frontend</strong>: A clean <strong>EJS</strong> interface to list sessions, view summaries, and “publish” them to the filesystem.</li><li><strong>Storage</strong>: A local SQLite database (<code>data/sessions.sqlite</code>) acts as a cache. (I learned my lesson: always cache your LLM inputs).</li><li><strong>The Brain</strong>: I use the <strong>OpenAI SDK</strong> (pointing to a LiteLLM proxy) to interface with <code>gemini-3-flash</code>. I wrote a map-reduce style prompt that first extracts technical decisions from the raw conversation log, then synthesizes them into a narrative.</li><li><strong>Persistence</strong>: The final posts are saved with YAML front matter into a <code>generated_posts/</code> directory.</li></ul><h2 id=key-insights>Key Insights
|
||||||
|
<a class=heading-link href=#key-insights><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><ul><li><strong>Don’t Fight the OS</strong>: Trying to break macOS Keychain/SIP encryption is a losing battle for a weekend project.</li><li><strong>Follow the Tokens</strong>: Applications often store auth tokens in less-secure places (like plain SQLite or weaker encryption) than the user content itself.</li><li><strong>Extensions are Open Books</strong>: If an app has extensions, their source code is often the best documentation for the internal API.</li></ul><p>In a satisfying detailed loop, <strong>this very article was generated by the blog-agent itself</strong>, analyzing the “vibe coding” session where I built it.</p><h2 id=references>References
|
||||||
|
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><ul><li><code>server.js</code>: The Express server and API implementation.</li><li><code>services/antigravity.js</code>: The client for the Antigravity gRPC-over-HTTP API.</li><li><a href=https://github.com/jlcodes99/vscode-antigravity-cockpit class=external-link target=_blank rel=noopener>vscode-antigravity-cockpit</a>: The extension that leaked the auth logic.</li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
38
posts/rooting-pixel-2-xl-for-reverse-engineering/index.html
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Why I Downgraded Magisk to Root My Pixel 2 XL · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="For the past few weeks, I’ve been stuck in a stalemate with my EcoFlow Bluetooth Protocol Reverse Engineering Project. I have the hci snoop logs, I have the decompiled APK, and I have a strong suspicion about where the authentication logic is hiding. But suspicion isn’t proof.
|
||||||
|
Static analysis has its limits. I found the “smoking gun” function—a native method responsible for encrypting the login payload—but understanding how it constructs that payload within a strict 13-byte limit purely from assembly (ARM64) was proving to be a headache."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Why I Downgraded Magisk to Root My Pixel 2 XL"><meta name=twitter:description content="For the past few weeks, I’ve been stuck in a stalemate with my EcoFlow Bluetooth Protocol Reverse Engineering Project. I have the hci snoop logs, I have the decompiled APK, and I have a strong suspicion about where the authentication logic is hiding. But suspicion isn’t proof.
|
||||||
|
Static analysis has its limits. I found the “smoking gun” function—a native method responsible for encrypting the login payload—but understanding how it constructs that payload within a strict 13-byte limit purely from assembly (ARM64) was proving to be a headache."><meta property="og:url" content="https://ericxliu.me/posts/rooting-pixel-2-xl-for-reverse-engineering/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Why I Downgraded Magisk to Root My Pixel 2 XL"><meta property="og:description" content="For the past few weeks, I’ve been stuck in a stalemate with my EcoFlow Bluetooth Protocol Reverse Engineering Project. I have the hci snoop logs, I have the decompiled APK, and I have a strong suspicion about where the authentication logic is hiding. But suspicion isn’t proof.
|
||||||
|
Static analysis has its limits. I found the “smoking gun” function—a native method responsible for encrypting the login payload—but understanding how it constructs that payload within a strict 13-byte limit purely from assembly (ARM64) was proving to be a headache."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2026-01-07T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-08T06:02:38+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/rooting-pixel-2-xl-for-reverse-engineering/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Why I Downgraded Magisk to Root My Pixel 2 XL","genre":"Blog","wordcount":"775","url":"https:\/\/ericxliu.me\/posts\/rooting-pixel-2-xl-for-reverse-engineering\/","datePublished":"2026-01-07T00:00:00\u002b00:00","dateModified":"2026-01-08T06:02:38\u002b00:00","description":"\u003cp\u003eFor the past few weeks, I\u0026rsquo;ve been stuck in a stalemate with my EcoFlow Bluetooth Protocol Reverse Engineering Project. I have the hci snoop logs, I have the decompiled APK, and I have a strong suspicion about where the authentication logic is hiding. But suspicion isn\u0026rsquo;t proof.\u003c\/p\u003e\n\u003cp\u003eStatic analysis has its limits. I found the \u0026ldquo;smoking gun\u0026rdquo; function—a native method responsible for encrypting the login payload—but understanding \u003cem\u003ehow\u003c\/em\u003e it constructs that payload within a strict 13-byte limit purely from assembly (ARM64) was proving to be a headache.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/rooting-pixel-2-xl-for-reverse-engineering/>Why I Downgraded Magisk to Root My Pixel 2 XL</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2026-01-07T00:00:00Z>January 7, 2026
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
4-minute read</span></div></div></header><div class=post-content><p>For the past few weeks, I’ve been stuck in a stalemate with my EcoFlow Bluetooth Protocol Reverse Engineering Project. I have the hci snoop logs, I have the decompiled APK, and I have a strong suspicion about where the authentication logic is hiding. But suspicion isn’t proof.</p><p>Static analysis has its limits. I found the “smoking gun” function—a native method responsible for encrypting the login payload—but understanding <em>how</em> it constructs that payload within a strict 13-byte limit purely from assembly (ARM64) was proving to be a headache.</p><p>I needed to move from <strong>static analysis</strong> to <strong>dynamic analysis</strong>. I needed to hook the function at runtime, inspect the memory, and see the data before it gets encrypted. To do that, I needed a rooted Android device.</p><p>The only candidate in my drawer? An 8-year-old <strong>Google Pixel 2 XL (“taimen”)</strong> that hadn’t been turned on since 2017.</p><h2 id=the-objective>The Objective
|
||||||
|
<a class=heading-link href=#the-objective><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>Bring this relic back to life, update it to the final official firmware, and gain <code>su</code> access to install Frida and tcpdump. It sounds simple, but 2026 tools don’t always play nice with 2017 hardware.</p><h2 id=phase-1-the-i-forgot-my-password-hurdle>Phase 1: The “I Forgot My Password” Hurdle
|
||||||
|
<a class=heading-link href=#phase-1-the-i-forgot-my-password-hurdle><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>The first problem was mundane: I didn’t remember the PIN. My only way in was a physical <strong>Hard Reset</strong>, which relies on a specific sequence of hardware button inputs:</p><ol><li><strong>Fastboot Mode</strong>: Hold <code>Power</code> + <code>Vol Down</code> until the familiar bootloader screen appears.</li><li><strong>Recovery Mode</strong>: Use volume keys to select “Recovery Mode”.</li><li><strong>The “No Command” Trick</strong>: The phone reboots to a broken android logo. To get the actual menu, you have to hold <code>Power</code> and tap <code>Vol Up</code> <em>once</em>.</li><li><strong>Wipe</strong>: Select <code>Wipe data/factory reset</code>.</li></ol><p><strong>The Catch</strong>: This triggers <strong>Factory Reset Protection (FRP)</strong>. Upon boot, the device required authentication with the Google Account previously synced to the hardware. Since I verified my identity using the original credentials, I could proceed; otherwise, bypassing this security feature would have been a significant roadblock.</p><h2 id=phase-2-the-update-trap>Phase 2: The Update Trap
|
||||||
|
<a class=heading-link href=#phase-2-the-update-trap><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>Once in, I checked the version: <code>Android 10 (QP1A.190711.020)</code>. This was ancient. The Pixel 2 XL officially supports Android 11, and I wanted the latest possible base for compatibility with modern tools.</p><p>I tried the easy route: <strong>Settings > System Update</strong>.
|
||||||
|
<strong>The Result</strong>: Failure. The phone refused to pull the final OTA (<code>RP1A.201005.004.A1</code>), likely due to the Google update servers no longer prioritizing this EOL device.</p><h3 id=the-fix-manual-flashing>The Fix: Manual Flashing
|
||||||
|
<a class=heading-link href=#the-fix-manual-flashing><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>I had to bypass the OTA system entirely. I downloaded the <a href=https://developers.google.com/android/images class=external-link target=_blank rel=noopener>final Factory Image</a> from Google.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span><span style=color:#8b949e;font-style:italic># Don't rely on OTA. Flash the whole valid state.</span>
|
||||||
|
</span></span><span style=display:flex><span>fastboot -w update image-taimen-rp1a.201005.004.a1.zip
|
||||||
|
</span></span></code></pre></div><p><em>Note: I used the <code>-w</code> flag here since I had just wiped the device anyway. This gave me a pristine, stock Android 11 environment to break.</em></p><h2 id=phase-3-the-magisk-time-travel>Phase 3: The Magisk “Time Travel”
|
||||||
|
<a class=heading-link href=#phase-3-the-magisk-time-travel><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>This is where “modern tools meets old hardware” caused the most pain.</p><p><strong>The Hypothesis</strong>: Rooting a Pixel is standard procedure.</p><ol><li>Extract <code>boot.img</code> from the factory zip.</li><li>Patch it with the latest <strong>Magisk</strong> app.</li><li>Flash it back.</li></ol><p><strong>The Reality</strong>: Bootloop.
|
||||||
|
I used <strong>Magisk v30.6</strong> (the latest as of writing). The patch process “succeeded,” but flashing the resulting image caused the phone to immediately crash back to the bootloader with a “Cannot find valid operating system” error.</p><h3 id=debugging-the-bootloop>Debugging the Bootloop
|
||||||
|
<a class=heading-link href=#debugging-the-bootloop><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>I suspected a regression in how modern Magisk handles the antiquated boot partition structure of the Pixel 2 (A/B partitions, but pre-GKI).</p><p>I decided to perform some “software archaeology” and use a version of Magisk that was contemporary with the device’s lifespan. I grabbed <strong>Magisk v25.0</strong> (released around 2022).</p><ol><li><strong>Repatch</strong>: I patched the <em>exact same</em> stock <code>boot.img</code> using the v25.0 app.</li><li><strong>Reflash</strong>:</li></ol><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span><span style=color:#8b949e;font-style:italic># Flash to both slots to be safe</span>
|
||||||
|
</span></span><span style=display:flex><span>fastboot flash boot_a magisk_patched_25000.img
|
||||||
|
</span></span><span style=display:flex><span>fastboot flash boot_b magisk_patched_25000.img
|
||||||
|
</span></span></code></pre></div><p><strong>The Result</strong>: Success. The phone booted, and the Magisk app confirmed <code>Installed: 25.0</code>.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>❯ adb shell <span style=color:#a5d6ff>"su -c id"</span>
|
||||||
|
</span></span><span style=display:flex><span><span style=color:#79c0ff>uid</span><span style=color:#ff7b72;font-weight:700>=</span>0<span style=color:#ff7b72;font-weight:700>(</span>root<span style=color:#ff7b72;font-weight:700>)</span> <span style=color:#79c0ff>gid</span><span style=color:#ff7b72;font-weight:700>=</span>0<span style=color:#ff7b72;font-weight:700>(</span>root<span style=color:#ff7b72;font-weight:700>)</span> <span style=color:#79c0ff>groups</span><span style=color:#ff7b72;font-weight:700>=</span>0<span style=color:#ff7b72;font-weight:700>(</span>root<span style=color:#ff7b72;font-weight:700>)</span> <span style=color:#79c0ff>context</span><span style=color:#ff7b72;font-weight:700>=</span>u:r:magisk:s0
|
||||||
|
</span></span></code></pre></div><h2 id=key-insights>Key Insights
|
||||||
|
<a class=heading-link href=#key-insights><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><ul><li><strong>Don’t Trust OTAs on EOL Devices</strong>: If you’re reviving old hardware, the OTA mechanism is likely broken or unreliable. Go straight to the factory images.</li><li><strong>Version Matching Matters</strong>: Tools like Magisk evolve. Using a 2026 root method on a 2017 kernel is a recipe for instability. Sometimes, downgrading your tools is the only way forward.</li><li><strong>A/B Partitions</strong>: Always flash your patched boot image to <em>both</em> slots (<code>boot_a</code> and <code>boot_b</code>) to avoid active slot mismatches causing boot failures.</li></ul><p>With root access secured, the path is now clear to install Frida and finally intercept those elusive EcoFlow authentication packets.</p><h2 id=references>References
|
||||||
|
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><ol><li><a href=https://developers.google.com/android/images class=external-link target=_blank rel=noopener>Google Pixel Factory Images</a></li><li><a href=https://topjohnwu.github.io/Magisk/install.html class=external-link target=_blank rel=noopener>Magisk Installation Guide</a></li><li><a href=https://github.com/topjohnwu/Magisk/releases class=external-link target=_blank rel=noopener>Magisk GitHub Releases</a></li><li><a href=https://xdaforums.com/t/guide-unlock-flash-root-for-the-pixel-2-xl-taimen.3702418/ class=external-link target=_blank rel=noopener>XDA Guide: Unlock/Flash/Root Pixel 2 XL</a></li></ol></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
62
posts/secure-boot-dkms-and-mok-on-proxmox-debian/index.html
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="I hit an issue where all GPU Operator pods on one node were stuck in Init after migrating from Legacy BIOS to UEFI. The common error was NVIDIA components waiting for “toolkit-ready,” while the toolkit init container looped with:
|
||||||
|
|
||||||
|
nvidia-smi failed to communicate with the NVIDIA driver
|
||||||
|
modprobe nvidia → “Key was rejected by service”
|
||||||
|
|
||||||
|
That message is the tell: Secure Boot is enabled and the kernel refuses to load modules not signed by a trusted key."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian"><meta name=twitter:description content="I hit an issue where all GPU Operator pods on one node were stuck in Init after migrating from Legacy BIOS to UEFI. The common error was NVIDIA components waiting for “toolkit-ready,” while the toolkit init container looped with:
|
||||||
|
nvidia-smi failed to communicate with the NVIDIA driver modprobe nvidia → “Key was rejected by service” That message is the tell: Secure Boot is enabled and the kernel refuses to load modules not signed by a trusted key."><meta property="og:url" content="https://ericxliu.me/posts/secure-boot-dkms-and-mok-on-proxmox-debian/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian"><meta property="og:description" content="I hit an issue where all GPU Operator pods on one node were stuck in Init after migrating from Legacy BIOS to UEFI. The common error was NVIDIA components waiting for “toolkit-ready,” while the toolkit init container looped with:
|
||||||
|
nvidia-smi failed to communicate with the NVIDIA driver modprobe nvidia → “Key was rejected by service” That message is the tell: Secure Boot is enabled and the kernel refuses to load modules not signed by a trusted key."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-08-09T00:00:00+00:00"><meta property="article:modified_time" content="2025-08-14T06:50:22+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/secure-boot-dkms-and-mok-on-proxmox-debian/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox \u002b Debian","genre":"Blog","wordcount":"639","url":"https:\/\/ericxliu.me\/posts\/secure-boot-dkms-and-mok-on-proxmox-debian\/","datePublished":"2025-08-09T00:00:00\u002b00:00","dateModified":"2025-08-14T06:50:22\u002b00:00","description":"\u003cp\u003eI hit an issue where all GPU Operator pods on one node were stuck in Init after migrating from Legacy BIOS to UEFI. The common error was NVIDIA components waiting for “toolkit-ready,” while the toolkit init container looped with:\u003c\/p\u003e\n\u003cul\u003e\n\u003cli\u003envidia-smi failed to communicate with the NVIDIA driver\u003c\/li\u003e\n\u003cli\u003emodprobe nvidia → “Key was rejected by service”\u003c\/li\u003e\n\u003c\/ul\u003e\n\u003cp\u003eThat message is the tell: Secure Boot is enabled and the kernel refuses to load modules not signed by a trusted key.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/secure-boot-dkms-and-mok-on-proxmox-debian/>Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-08-09T00:00:00Z>August 9, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
3-minute read</span></div></div></header><div class=post-content><p>I hit an issue where all GPU Operator pods on one node were stuck in Init after migrating from Legacy BIOS to UEFI. The common error was NVIDIA components waiting for “toolkit-ready,” while the toolkit init container looped with:</p><ul><li>nvidia-smi failed to communicate with the NVIDIA driver</li><li>modprobe nvidia → “Key was rejected by service”</li></ul><p>That message is the tell: Secure Boot is enabled and the kernel refuses to load modules not signed by a trusted key.</p><h3 id=environment>Environment
|
||||||
|
<a class=heading-link href=#environment><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ul><li>Proxmox VM (QEMU/KVM) 8.4.9</li><li>Debian 12 (bookworm), kernel 6.1</li><li>GPU: NVIDIA Tesla V100 (GV100GL)</li><li>NVIDIA driver installed via Debian packages (nvidia-driver, nvidia-kernel-dkms)</li></ul><h3 id=root-cause>Root Cause
|
||||||
|
<a class=heading-link href=#root-cause><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ul><li>Secure Boot enabled (verified with <code>mokutil --sb-state</code>)</li><li>NVIDIA DKMS modules were built, but the signing key was not trusted by the UEFI shim/firmware</li><li>VM booted via the fallback “UEFI QEMU HARDDISK” path (not shim), so MOK requests didn’t run; no MOK screen</li></ul><h3 id=strategy>Strategy
|
||||||
|
<a class=heading-link href=#strategy><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Keep Secure Boot on; get modules trusted. That requires:</p><ol><li>Ensure the VM boots via shim (so MOK can work)</li><li>Make sure DKMS signs modules with a MOK key/cert</li><li>Enroll that MOK into the firmware via shim’s MokManager</li></ol><h3 id=step-1--boot-via-shim-and-persist-efi-variables>Step 1 — Boot via shim and persist EFI variables
|
||||||
|
<a class=heading-link href=#step-1--boot-via-shim-and-persist-efi-variables><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>In Proxmox (VM stopped):</p><ul><li>BIOS: OVMF (UEFI)</li><li>Add EFI Disk (stores OVMF VARS; required for MOK)</li><li>Machine: q35</li><li>Enable Secure Boot (option shows only with OVMF + EFI Disk)</li></ul><p>Inside Debian:</p><ul><li>Ensure ESP is mounted at <code>/boot/efi</code></li><li>Install signed boot stack:<div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo apt install shim-signed grub-efi-amd64-signed efibootmgr mokutil
|
||||||
|
</span></span><span style=display:flex><span>sudo grub-install --target<span style=color:#ff7b72;font-weight:700>=</span>x86_64-efi --efi-directory<span style=color:#ff7b72;font-weight:700>=</span>/boot/efi --bootloader-id<span style=color:#ff7b72;font-weight:700>=</span>debian
|
||||||
|
</span></span><span style=display:flex><span>sudo update-grub
|
||||||
|
</span></span></code></pre></div></li><li>Create/verify a boot entry that points to shim:<div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo efibootmgr -c -d /dev/sda -p <span style=color:#a5d6ff>15</span> -L <span style=color:#a5d6ff>"debian"</span> -l <span style=color:#a5d6ff>'\EFI\debian\shimx64.efi'</span>
|
||||||
|
</span></span><span style=display:flex><span>sudo efibootmgr -o 0002,0001,0000 <span style=color:#8b949e;font-style:italic># make shim (0002) first</span>
|
||||||
|
</span></span><span style=display:flex><span>sudo efibootmgr -n <span style=color:#a5d6ff>0002</span> <span style=color:#8b949e;font-style:italic># BootNext shim for the next reboot</span>
|
||||||
|
</span></span></code></pre></div></li></ul><p>Tip: If NVRAM resets or fallback path is used, copy as a fallback:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo mkdir -p /boot/efi/EFI/BOOT
|
||||||
|
</span></span><span style=display:flex><span>sudo cp /boot/efi/EFI/debian/shimx64.efi /boot/efi/EFI/BOOT/BOOTX64.EFI
|
||||||
|
</span></span><span style=display:flex><span>sudo cp /boot/efi/EFI/debian/<span style=color:#ff7b72;font-weight:700>{</span>mmx64.efi,grubx64.efi<span style=color:#ff7b72;font-weight:700>}</span> /boot/efi/EFI/BOOT/
|
||||||
|
</span></span></code></pre></div><h3 id=step-2--make-dkms-sign-nvidia-modules-with-a-mok>Step 2 — Make DKMS sign NVIDIA modules with a MOK
|
||||||
|
<a class=heading-link href=#step-2--make-dkms-sign-nvidia-modules-with-a-mok><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Debian already generated a DKMS key at <code>/var/lib/dkms/mok.key</code>. Create an X.509 cert in DER format:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo openssl req -new -x509 <span style=color:#79c0ff>\
|
||||||
|
</span></span></span><span style=display:flex><span> -key /var/lib/dkms/mok.key <span style=color:#79c0ff>\
|
||||||
|
</span></span></span><span style=display:flex><span> -out /var/lib/dkms/mok.der <span style=color:#79c0ff>\
|
||||||
|
</span></span></span><span style=display:flex><span> -outform DER <span style=color:#79c0ff>\
|
||||||
|
</span></span></span><span style=display:flex><span> -subj <span style=color:#a5d6ff>"/CN=DKMS MOK/"</span> <span style=color:#79c0ff>\
|
||||||
|
</span></span></span><span style=display:flex><span> -days <span style=color:#a5d6ff>36500</span>
|
||||||
|
</span></span></code></pre></div><p>Enable DKMS signing:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo sed -i <span style=color:#a5d6ff>'s|^mok_signing_key=.*|mok_signing_key=/var/lib/dkms/mok.key|'</span> /etc/dkms/framework.conf
|
||||||
|
</span></span><span style=display:flex><span>sudo sed -i <span style=color:#a5d6ff>'s|^mok_certificate=.*|mok_certificate=/var/lib/dkms/mok.der|'</span> /etc/dkms/framework.conf
|
||||||
|
</span></span></code></pre></div><p>Rebuild/install modules (signs them now):</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo dkms build nvidia/<span style=color:#ff7b72>$(</span>modinfo -F version nvidia<span style=color:#ff7b72>)</span> -k <span style=color:#ff7b72>$(</span>uname -r<span style=color:#ff7b72>)</span> --force
|
||||||
|
</span></span><span style=display:flex><span>sudo dkms install nvidia/<span style=color:#ff7b72>$(</span>modinfo -F version nvidia<span style=color:#ff7b72>)</span> -k <span style=color:#ff7b72>$(</span>uname -r<span style=color:#ff7b72>)</span> --force
|
||||||
|
</span></span></code></pre></div><h3 id=step-3--enroll-the-mok-via-shim-mokmanager>Step 3 — Enroll the MOK via shim (MokManager)
|
||||||
|
<a class=heading-link href=#step-3--enroll-the-mok-via-shim-mokmanager><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Queue the cert and set a longer prompt timeout:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo mokutil --revoke-import
|
||||||
|
</span></span><span style=display:flex><span>sudo mokutil --import /var/lib/dkms/mok.der
|
||||||
|
</span></span><span style=display:flex><span>sudo mokutil --timeout <span style=color:#a5d6ff>30</span>
|
||||||
|
</span></span><span style=display:flex><span>sudo efibootmgr -n <span style=color:#a5d6ff>0002</span> <span style=color:#8b949e;font-style:italic># ensure next boot goes through shim</span>
|
||||||
|
</span></span></code></pre></div><p>Reboot to the VM console (not SSH). In the blue MOK UI:</p><ul><li>Enroll MOK → Continue → Yes → enter password → reboot</li></ul><p>If arrow keys don’t work in Proxmox noVNC:</p><ul><li>Use SPICE (virt-viewer), or</li><li>From the Proxmox host, send keys:<ul><li><code>qm sendkey <VMID> down</code>, <code>qm sendkey <VMID> ret</code>, <code>qm sendkey <VMID> esc</code></li></ul></li></ul><h3 id=verification>Verification
|
||||||
|
<a class=heading-link href=#verification><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo mokutil --test-key /var/lib/dkms/mok.der <span style=color:#8b949e;font-style:italic># “already enrolled”</span>
|
||||||
|
</span></span><span style=display:flex><span>sudo modprobe nvidia
|
||||||
|
</span></span><span style=display:flex><span>nvidia-smi
|
||||||
|
</span></span><span style=display:flex><span>kubectl -n gpu-operator get pods -o wide
|
||||||
|
</span></span></code></pre></div><p>Once the module loads, GPU Operator pods on that node leave Init and become Ready.</p><h3 id=key-insights>Key Insights
|
||||||
|
<a class=heading-link href=#key-insights><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ul><li>“Key was rejected by service” during <code>modprobe nvidia</code> means Secure Boot rejected an untrusted module.</li><li>Without shim in the boot path (or without a persistent EFI vars disk), <code>mokutil --import</code> won’t surface a MOK screen.</li><li>DKMS will not sign modules unless configured; set <code>mok_signing_key</code> and <code>mok_certificate</code> in <code>/etc/dkms/framework.conf</code>.</li><li>If you cannot or don’t want to use MOK, the pragmatic dev choice is to disable Secure Boot in OVMF. For production, prefer shim+MOK.</li></ul><h3 id=references>References
|
||||||
|
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ul><li>Proxmox Secure Boot setup (shim + MOK, EFI vars, DKMS): <a href=https://pve.proxmox.com/wiki/Secure_Boot_Setup#Setup_instructions_for_shim_+_MOK_variant class=external-link target=_blank rel=noopener>Proxmox docs</a></li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
93
posts/supabase-deep-dive/index.html
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Supabase Deep Dive: It's Not Magic, It's Just Postgres · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="In the world of Backend-as-a-Service (BaaS), platforms are often treated as magic boxes. You push data in, you get data out, and you hope the magic inside scales. While this simplicity is powerful, it can obscure the underlying mechanics, leaving developers wondering what’s really going on.
|
||||||
|
Supabase enters this space with a radically different philosophy: transparency. It provides the convenience of a BaaS, but it’s built on the world’s most trusted relational database: PostgreSQL. The “magic” isn’t a proprietary black box; it’s a carefully assembled suite of open-source tools that enhance Postgres, not hide it."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Supabase Deep Dive: It's Not Magic, It's Just Postgres"><meta name=twitter:description content="In the world of Backend-as-a-Service (BaaS), platforms are often treated as magic boxes. You push data in, you get data out, and you hope the magic inside scales. While this simplicity is powerful, it can obscure the underlying mechanics, leaving developers wondering what’s really going on.
|
||||||
|
Supabase enters this space with a radically different philosophy: transparency. It provides the convenience of a BaaS, but it’s built on the world’s most trusted relational database: PostgreSQL. The “magic” isn’t a proprietary black box; it’s a carefully assembled suite of open-source tools that enhance Postgres, not hide it."><meta property="og:url" content="https://ericxliu.me/posts/supabase-deep-dive/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Supabase Deep Dive: It's Not Magic, It's Just Postgres"><meta property="og:description" content="In the world of Backend-as-a-Service (BaaS), platforms are often treated as magic boxes. You push data in, you get data out, and you hope the magic inside scales. While this simplicity is powerful, it can obscure the underlying mechanics, leaving developers wondering what’s really going on.
|
||||||
|
Supabase enters this space with a radically different philosophy: transparency. It provides the convenience of a BaaS, but it’s built on the world’s most trusted relational database: PostgreSQL. The “magic” isn’t a proprietary black box; it’s a carefully assembled suite of open-source tools that enhance Postgres, not hide it."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-08-03T00:00:00+00:00"><meta property="article:modified_time" content="2025-08-04T03:59:37+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/supabase-deep-dive/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Supabase Deep Dive: It\u0027s Not Magic, It\u0027s Just Postgres","genre":"Blog","wordcount":"1513","url":"https:\/\/ericxliu.me\/posts\/supabase-deep-dive\/","datePublished":"2025-08-03T00:00:00\u002b00:00","dateModified":"2025-08-04T03:59:37\u002b00:00","description":"\u003cp\u003eIn the world of Backend-as-a-Service (BaaS), platforms are often treated as magic boxes. You push data in, you get data out, and you hope the magic inside scales. While this simplicity is powerful, it can obscure the underlying mechanics, leaving developers wondering what\u0026rsquo;s really going on.\u003c\/p\u003e\n\u003cp\u003eSupabase enters this space with a radically different philosophy: \u003cstrong\u003etransparency\u003c\/strong\u003e. It provides the convenience of a BaaS, but it’s built on the world\u0026rsquo;s most trusted relational database: PostgreSQL. The \u0026ldquo;magic\u0026rdquo; isn\u0026rsquo;t a proprietary black box; it\u0026rsquo;s a carefully assembled suite of open-source tools that enhance Postgres, not hide it.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/supabase-deep-dive/>Supabase Deep Dive: It's Not Magic, It's Just Postgres</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-08-03T00:00:00Z>August 3, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
8-minute read</span></div></div></header><div class=post-content><p>In the world of Backend-as-a-Service (BaaS), platforms are often treated as magic boxes. You push data in, you get data out, and you hope the magic inside scales. While this simplicity is powerful, it can obscure the underlying mechanics, leaving developers wondering what’s really going on.</p><p>Supabase enters this space with a radically different philosophy: <strong>transparency</strong>. It provides the convenience of a BaaS, but it’s built on the world’s most trusted relational database: PostgreSQL. The “magic” isn’t a proprietary black box; it’s a carefully assembled suite of open-source tools that enhance Postgres, not hide it.</p><p>This deep dive will deconstruct that suite. We will move beyond the basics to explore the architectural patterns, security models, and development workflows that allow you to build robust, scalable applications. We will cover:</p><ul><li><strong>The Supabase Blueprint:</strong> A procedural guide to designing your application.</li><li><strong>The Pillars of Supabase:</strong> A detailed look at Auth, Storage, Functions, and Realtime.</li><li><strong>Transactional Realtime:</strong> How Supabase guarantees data consistency in a live environment.</li><li><strong>Best Practices:</strong> The practical knowledge you need before writing a single line of code.</li></ul><h3 id=the-guiding-philosophy-your-database-as-the-source-of-truth>The Guiding Philosophy: Your Database as the Source of Truth
|
||||||
|
<a class=heading-link href=#the-guiding-philosophy-your-database-as-the-source-of-truth><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>The most critical shift when adopting Supabase is to see your database as more than just a data store. It is your <strong>single source of truth</strong>. This means your database schema is responsible for:</p><ul><li><strong>Structure:</strong> The tables and columns that define your data.</li><li><strong>Relationships:</strong> The foreign keys that link tables together.</li><li><strong>Integrity:</strong> The constraints (<code>NOT NULL</code>, <code>UNIQUE</code>) that ensure your data is always valid.</li><li><strong>Security:</strong> The access control rules that define who can do what.</li></ul><p>By leveraging PostgreSQL’s native power, you get <strong>full ACID compliance</strong> (Atomicity, Consistency, Isolation, Durability) out of the box. You don’t need to worry about application-level code to prevent orphan records or inconsistent states; the database guarantees it for you.</p><h3 id=the-supabase-design-blueprint-a-procedural-guide>The Supabase Design Blueprint: A Procedural Guide
|
||||||
|
<a class=heading-link href=#the-supabase-design-blueprint-a-procedural-guide><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>To build a scalable application, follow a structured design process that moves from abstract ideas to concrete implementation.</p><h4 id=phase-1-conceptual-modeling-the-blueprint>Phase 1: Conceptual Modeling (The Blueprint)
|
||||||
|
<a class=heading-link href=#phase-1-conceptual-modeling-the-blueprint><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>Before touching the Supabase dashboard, map out your application on paper.</p><ol><li><strong>Identify the “Nouns”:</strong> These are your core data objects, which will become your database tables. For a project management app, they are <code>projects</code>, <code>tasks</code>, <code>users</code>, <code>comments</code>.</li><li><strong>Define the “Verbs”:</strong> These are the user actions. “A user <em>creates</em> a task.” “A user <em>assigns</em> a task to another user.” These actions will inform your security policies and APIs.</li><li><strong>Map Relationships:</strong> How do the nouns connect? A <code>task</code> belongs to one <code>project</code>. A <code>user</code> can have many <code>tasks</code>. A <code>project</code> can have many <code>users</code> (a many-to-many relationship, requiring a <code>project_users</code> join table).</li></ol><h4 id=phase-2-the-foundation-schema--migrations>Phase 2: The Foundation (Schema & Migrations)
|
||||||
|
<a class=heading-link href=#phase-2-the-foundation-schema--migrations><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>Translate your model into SQL. For any serious project, use the <strong>Supabase CLI</strong> to manage this process.</p><ol><li><strong>Develop Locally:</strong> Run a full Supabase stack on your machine with <code>supabase start</code>.</li><li><strong>Create Migration Files:</strong> Write your <code>CREATE TABLE</code> statements in SQL files. Define columns, data types, and foreign key <code>REFERENCES</code> to enforce your relationships.</li><li><strong>Version Control:</strong> Commit these migration files to Git. Your database schema is now version-controlled alongside your application code.</li><li><strong>Deploy:</strong> Use <code>supabase db push</code> to apply your migrations to your live production database. This workflow is safe, repeatable, and professional.</li></ol><h4 id=phase-3-the-security-layer-row-level-security>Phase 3: The Security Layer (Row Level Security)
|
||||||
|
<a class=heading-link href=#phase-3-the-security-layer-row-level-security><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>This is not an optional step. RLS is the cornerstone of Supabase security.</p><ol><li><strong>Deny by Default:</strong> For any table holding user data, immediately enable RLS. This blocks all access until you explicitly grant it.</li></ol><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-sql data-lang=sql><span style=display:flex><span><span style=color:#ff7b72>ALTER</span><span style=color:#6e7681> </span><span style=color:#ff7b72>TABLE</span><span style=color:#6e7681> </span>tasks<span style=color:#6e7681> </span>ENABLE<span style=color:#6e7681> </span><span style=color:#ff7b72>ROW</span><span style=color:#6e7681> </span><span style=color:#ff7b72>LEVEL</span><span style=color:#6e7681> </span><span style=color:#ff7b72>SECURITY</span>;<span style=color:#6e7681>
|
||||||
|
</span></span></span></code></pre></div><ol start=2><li><strong>Write “Allow” Policies:</strong> Create policies based on your user stories. Policies are SQL rules that the database enforces on every single query.</li></ol><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-sql data-lang=sql><span style=display:flex><span><span style=color:#8b949e;font-style:italic>-- Users can see tasks in projects they are a member of.
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>CREATE</span><span style=color:#6e7681> </span>POLICY<span style=color:#6e7681> </span><span style=color:#a5d6ff>"Allow read access to tasks in user's projects"</span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>ON</span><span style=color:#6e7681> </span>tasks<span style=color:#6e7681> </span><span style=color:#ff7b72>FOR</span><span style=color:#6e7681> </span><span style=color:#ff7b72>SELECT</span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>USING</span><span style=color:#6e7681> </span>(<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>EXISTS</span><span style=color:#6e7681> </span>(<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>SELECT</span><span style=color:#6e7681> </span><span style=color:#a5d6ff>1</span><span style=color:#6e7681> </span><span style=color:#ff7b72>FROM</span><span style=color:#6e7681> </span>project_users<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>WHERE</span><span style=color:#6e7681> </span>project_users.project_id<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>tasks.project_id<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>AND</span><span style=color:#6e7681> </span>project_users.user_id<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>auth.uid()<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span>)<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span>);<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic>-- Users can only insert tasks for themselves.
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>CREATE</span><span style=color:#6e7681> </span>POLICY<span style=color:#6e7681> </span><span style=color:#a5d6ff>"Allow users to create their own tasks"</span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>ON</span><span style=color:#6e7681> </span>tasks<span style=color:#6e7681> </span><span style=color:#ff7b72>FOR</span><span style=color:#6e7681> </span><span style=color:#ff7b72>INSERT</span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>WITH</span><span style=color:#6e7681> </span><span style=color:#ff7b72>CHECK</span><span style=color:#6e7681> </span>(<span style=color:#6e7681> </span>auth.uid()<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>tasks.assignee_id<span style=color:#6e7681> </span>);<span style=color:#6e7681>
|
||||||
|
</span></span></span></code></pre></div><p>The <code>auth.uid()</code> function is a special Supabase utility that securely returns the ID of the logged-in user making the request.</p><h4 id=phase-4-the-apis-data-access>Phase 4: The APIs (Data Access)
|
||||||
|
<a class=heading-link href=#phase-4-the-apis-data-access><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>With your data structured and secured, you can now build the access points.</p><ul><li><strong>For Simple CRUD:</strong> Use Supabase’s auto-generated API. It’s convenient, respects all your RLS policies, and is perfect for simple reads and writes on a single table.</li></ul><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#ff7b72>const</span> { data, error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.from(<span style=color:#a5d6ff>'tasks'</span>).select(<span style=color:#a5d6ff>'*'</span>);
|
||||||
|
</span></span></code></pre></div><ul><li><strong>For Complex Logic:</strong> Use PostgreSQL Functions (RPC). Encapsulate complex <code>JOIN</code>s or multi-step transactions into a single, callable function. This reduces network chattiness and keeps your business logic secure on the server.</li></ul><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-sql data-lang=sql><span style=display:flex><span><span style=color:#8b949e;font-style:italic>-- A function to get a task and its project name in one call
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>CREATE</span><span style=color:#6e7681> </span><span style=color:#ff7b72>OR</span><span style=color:#6e7681> </span><span style=color:#ff7b72>REPLACE</span><span style=color:#6e7681> </span><span style=color:#ff7b72>FUNCTION</span><span style=color:#6e7681> </span>get_task_with_project(task_id_input<span style=color:#6e7681> </span>int)<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>RETURNS</span><span style=color:#6e7681> </span><span style=color:#ff7b72>TABLE</span><span style=color:#6e7681> </span>(task_title<span style=color:#6e7681> </span>text,<span style=color:#6e7681> </span>project_name<span style=color:#6e7681> </span>text)<span style=color:#6e7681> </span><span style=color:#ff7b72>AS</span><span style=color:#6e7681> </span><span style=color:#f85149>$$</span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>BEGIN</span><span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>RETURN</span><span style=color:#6e7681> </span>QUERY<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>SELECT</span><span style=color:#6e7681> </span>tasks.title,<span style=color:#6e7681> </span>projects.name<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>FROM</span><span style=color:#6e7681> </span>tasks<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>JOIN</span><span style=color:#6e7681> </span>projects<span style=color:#6e7681> </span><span style=color:#ff7b72>ON</span><span style=color:#6e7681> </span>tasks.project_id<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>projects.id<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#6e7681> </span><span style=color:#ff7b72>WHERE</span><span style=color:#6e7681> </span>tasks.id<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>task_id_input;<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>END</span>;<span style=color:#6e7681>
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#f85149>$$</span><span style=color:#6e7681> </span><span style=color:#ff7b72>LANGUAGE</span><span style=color:#6e7681> </span>plpgsql;<span style=color:#6e7681>
|
||||||
|
</span></span></span></code></pre></div><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#8b949e;font-style:italic>// Called simply from the frontend
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>const</span> { data, error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.rpc(<span style=color:#a5d6ff>'get_task_with_project'</span>, { task_id_input<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>123</span> });
|
||||||
|
</span></span></code></pre></div><h3 id=a-tour-of-the-core-services>A Tour of the Core Services
|
||||||
|
<a class=heading-link href=#a-tour-of-the-core-services><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Beyond the database, Supabase provides a suite of essential tools.</p><h4 id=authentication>Authentication
|
||||||
|
<a class=heading-link href=#authentication><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>A complete user management system that integrates directly with your database. When a user signs up, a corresponding entry is created in the managed <code>auth.users</code> table, which you can then reference in your own tables.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#8b949e;font-style:italic>// Sign up a new user and handle social logins with ease
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>const</span> { data, error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.auth.signUp({ email, password });
|
||||||
|
</span></span><span style=display:flex><span><span style=color:#ff7b72>const</span> { data, error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.auth.signInWithOAuth({ provider<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>'github'</span> });
|
||||||
|
</span></span></code></pre></div><h4 id=storage>Storage
|
||||||
|
<a class=heading-link href=#storage><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>A simple, S3-compatible object store for managing files like user avatars or documents. It’s integrated with Postgres and RLS, allowing you to write fine-grained access policies on files and folders (buckets).</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#8b949e;font-style:italic>// Upload a user avatar to a public 'avatars' bucket
|
||||||
|
</span></span></span><span style=display:flex><span><span style=color:#ff7b72>const</span> { error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.storage
|
||||||
|
</span></span><span style=display:flex><span> .from(<span style=color:#a5d6ff>'avatars'</span>)
|
||||||
|
</span></span><span style=display:flex><span> .upload(<span style=color:#a5d6ff>`public/</span><span style=color:#a5d6ff>${</span>userId<span style=color:#a5d6ff>}</span><span style=color:#a5d6ff>.png`</span>, file);
|
||||||
|
</span></span></code></pre></div><h4 id=edge-functions-vs-database-functions>Edge Functions vs. Database Functions
|
||||||
|
<a class=heading-link href=#edge-functions-vs-database-functions><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>It’s critical to know when to use which.</p><ul><li><strong>Database Functions (SQL):</strong> For data-intensive logic <em>inside</em> your database.</li><li><strong>Edge Functions (TypeScript/Deno):</strong> For connecting to the outside world. Use them to call third-party APIs (like Stripe for payments) or run computations that are not well-suited for SQL. This is where you use your secret <code>service_role</code> key, as the function runs in a trusted server environment.</li></ul><h3 id=the-realtime-engine-a-pubsub-system-for-postgres>The Realtime Engine: A Pub/Sub System for Postgres
|
||||||
|
<a class=heading-link href=#the-realtime-engine-a-pubsub-system-for-postgres><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Supabase’s Realtime engine is a powerful feature for building live, interactive experiences.</p><h4 id=how-it-works-logical-replication>How it Works: Logical Replication
|
||||||
|
<a class=heading-link href=#how-it-works-logical-replication><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>It’s not magic; it leverages a core PostgreSQL feature.</p><ol><li>When you enable Realtime on a table, Supabase creates a <strong>Publication</strong> for it.</li><li>The Realtime server subscribes to this publication via a <strong>Logical Replication Slot</strong>.</li><li>When a transaction is <strong>successfully committed</strong> to your database, the change is written to Postgres’s Write-Ahead Log (WAL).</li><li>The WAL change is then sent to the Realtime server through the replication slot.</li><li>The server converts this database event into a JSON payload and broadcasts it over a WebSocket to all subscribed clients.</li></ol><h4 id=transactional-integrity>Transactional Integrity
|
||||||
|
<a class=heading-link href=#transactional-integrity><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>The most important guarantee of this system is its relationship with database transactions. An event is <strong>only broadcast <em>after</em> a transaction is fully and successfully committed.</strong> If a transaction is rolled back due to an error, the replication slot receives nothing, and no Realtime event is ever sent. This means you can trust that every Realtime message you receive corresponds to data that is permanently and consistently stored in your database.</p><h4 id=use-cases-and-limitations>Use Cases and Limitations
|
||||||
|
<a class=heading-link href=#use-cases-and-limitations><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><ul><li><strong>Use For:</strong> Small, JSON-based messages like chat messages, live notifications, activity feeds, and presence indicators (“who’s online”). Use the <code>broadcast</code> feature for ephemeral data like cursor positions that you don’t need to save.</li><li><strong>Do NOT Use For:</strong> Large, continuous data streams. It is <strong>not</strong> a replacement for WebRTC for video/audio calls. The system is designed for small, infrequent payloads.</li></ul><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#ff7b72>const</span> channel <span style=color:#ff7b72;font-weight:700>=</span> supabase.channel(<span style=color:#a5d6ff>'public:messages'</span>);
|
||||||
|
</span></span><span style=display:flex><span>
|
||||||
|
</span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic>// Subscribe to new rows in the 'messages' table
|
||||||
|
</span></span></span><span style=display:flex><span>channel
|
||||||
|
</span></span><span style=display:flex><span> .on(
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#a5d6ff>'postgres_changes'</span>,
|
||||||
|
</span></span><span style=display:flex><span> { event<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>'INSERT'</span>, schema<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>'public'</span>, table<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>'messages'</span> },
|
||||||
|
</span></span><span style=display:flex><span> (payload) => {
|
||||||
|
</span></span><span style=display:flex><span> console.log(<span style=color:#a5d6ff>'New message received!'</span>, payload.<span style=color:#ff7b72>new</span>);
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#8b949e;font-style:italic>// Update your UI here
|
||||||
|
</span></span></span><span style=display:flex><span> }
|
||||||
|
</span></span><span style=display:flex><span> )
|
||||||
|
</span></span><span style=display:flex><span> .subscribe();
|
||||||
|
</span></span></code></pre></div><h3 id=final-words-of-advice>Final Words of Advice
|
||||||
|
<a class=heading-link href=#final-words-of-advice><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ul><li><strong>Frontend Freedom:</strong> Supabase is frontend-agnostic, but meta-frameworks like <strong>Next.js</strong> and <strong>SvelteKit</strong> offer a “golden path” with Auth Helpers that simplify server-side rendering and data fetching.</li><li><strong>Embrace the CLI:</strong> Use the Supabase CLI for a professional, safe, and repeatable development workflow. Don’t manage your production schema by clicking in the UI.</li><li><strong>Know Your Keys:</strong> Use the public <code>anon</code> key in the browser. Guard the secret <code>service_role</code> key and only use it in secure server environments like Edge Functions.</li><li><strong>Indexes Matter:</strong> For fast queries on large tables, <code>CREATE INDEX</code> on frequently queried columns. Performance is not automatic.</li></ul><p>By understanding these principles, you can leverage Supabase not as a simple BaaS, but as a powerful, transparent, and scalable platform for building next-generation applications on the solid foundation of PostgreSQL.</p></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
@@ -0,0 +1,33 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>An Architectural Deep Dive of T5 · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="In the rapidly evolving landscape of Large Language Models, a few key architectures define the dominant paradigms. Today, the “decoder-only” model, popularized by the GPT series and its successors like LLaMA and Mistral, reigns supreme. These models are scaled to incredible sizes and excel at in-context learning.
|
||||||
|
But to truly understand the field, we must look at the pivotal models that explored different paths. Google’s T5, or Text-to-Text Transfer Transformer, stands out as one of the most influential. It didn’t just introduce a new model; it proposed a new philosophy. This article dives deep into the architecture of T5, how it fundamentally differs from modern LLMs, and the lasting legacy of its unique design choices."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="An Architectural Deep Dive of T5"><meta name=twitter:description content="In the rapidly evolving landscape of Large Language Models, a few key architectures define the dominant paradigms. Today, the “decoder-only” model, popularized by the GPT series and its successors like LLaMA and Mistral, reigns supreme. These models are scaled to incredible sizes and excel at in-context learning.
|
||||||
|
But to truly understand the field, we must look at the pivotal models that explored different paths. Google’s T5, or Text-to-Text Transfer Transformer, stands out as one of the most influential. It didn’t just introduce a new model; it proposed a new philosophy. This article dives deep into the architecture of T5, how it fundamentally differs from modern LLMs, and the lasting legacy of its unique design choices."><meta property="og:url" content="https://ericxliu.me/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="An Architectural Deep Dive of T5"><meta property="og:description" content="In the rapidly evolving landscape of Large Language Models, a few key architectures define the dominant paradigms. Today, the “decoder-only” model, popularized by the GPT series and its successors like LLaMA and Mistral, reigns supreme. These models are scaled to incredible sizes and excel at in-context learning.
|
||||||
|
But to truly understand the field, we must look at the pivotal models that explored different paths. Google’s T5, or Text-to-Text Transfer Transformer, stands out as one of the most influential. It didn’t just introduce a new model; it proposed a new philosophy. This article dives deep into the architecture of T5, how it fundamentally differs from modern LLMs, and the lasting legacy of its unique design choices."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-06-01T00:00:00+00:00"><meta property="article:modified_time" content="2025-08-03T03:41:10+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"An Architectural Deep Dive of T5","genre":"Blog","wordcount":"1183","url":"https:\/\/ericxliu.me\/posts\/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive\/","datePublished":"2025-06-01T00:00:00\u002b00:00","dateModified":"2025-08-03T03:41:10\u002b00:00","description":"\u003cp\u003eIn the rapidly evolving landscape of Large Language Models, a few key architectures define the dominant paradigms. Today, the \u0026ldquo;decoder-only\u0026rdquo; model, popularized by the GPT series and its successors like LLaMA and Mistral, reigns supreme. These models are scaled to incredible sizes and excel at in-context learning.\u003c\/p\u003e\n\u003cp\u003eBut to truly understand the field, we must look at the pivotal models that explored different paths. Google\u0026rsquo;s T5, or \u003cstrong\u003eText-to-Text Transfer Transformer\u003c\/strong\u003e, stands out as one of the most influential. It didn\u0026rsquo;t just introduce a new model; it proposed a new philosophy. This article dives deep into the architecture of T5, how it fundamentally differs from modern LLMs, and the lasting legacy of its unique design choices.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/>An Architectural Deep Dive of T5</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-06-01T00:00:00Z>June 1, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
6-minute read</span></div></div></header><div class=post-content><p>In the rapidly evolving landscape of Large Language Models, a few key architectures define the dominant paradigms. Today, the “decoder-only” model, popularized by the GPT series and its successors like LLaMA and Mistral, reigns supreme. These models are scaled to incredible sizes and excel at in-context learning.</p><p>But to truly understand the field, we must look at the pivotal models that explored different paths. Google’s T5, or <strong>Text-to-Text Transfer Transformer</strong>, stands out as one of the most influential. It didn’t just introduce a new model; it proposed a new philosophy. This article dives deep into the architecture of T5, how it fundamentally differs from modern LLMs, and the lasting legacy of its unique design choices.</p><h3 id=the-core-philosophy-everything-is-a-text-to-text-problem>The Core Philosophy: Everything is a Text-to-Text Problem
|
||||||
|
<a class=heading-link href=#the-core-philosophy-everything-is-a-text-to-text-problem><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>The genius of T5 lies in its unifying framework. Instead of building different models or fine-tuning procedures for various NLP tasks, T5 reframes every task as a text-to-text problem. The model takes a string as input and generates a string as output, regardless of the underlying objective.</p><p>This is accomplished by adding a <strong>task prefix</strong> to the input. These prefixes are not conversational prompts like a GPT “system prompt”; they are learned triggers that the model is explicitly fine-tuned to recognize.</p><table><thead><tr><th style=text-align:left>Task</th><th style=text-align:left>T5 Input</th><th style=text-align:left>Expected T5 Output</th></tr></thead><tbody><tr><td style=text-align:left>Translation</td><td style=text-align:left><code>translate English to German: The cat is cute.</code></td><td style=text-align:left><code>Die Katze ist süß.</code></td></tr><tr><td style=text-align:left>Summarization</td><td style=text-align:left><code>summarize: [A long news article...]</code></td><td style=text-align:left><code>[A concise summary.]</code></td></tr><tr><td style=text-align:left>Classification</td><td style=text-align:left><code>cola sentence: The boys is walking.</code></td><td style=text-align:left><code>unacceptable</code></td></tr><tr><td style=text-align:left>Similarity</td><td style=text-align:left><code>stsb sentence1: The car is red. sentence2: The auto is crimson.</code></td><td style=text-align:left><code>4.8</code></td></tr></tbody></table><p>This elegant approach turns even classification into a generation task, where the model learns to generate the text of the correct label.</p><h3 id=the-engine-a-two-window-encoder-decoder-architecture>The Engine: A Two-Window Encoder-Decoder Architecture
|
||||||
|
<a class=heading-link href=#the-engine-a-two-window-encoder-decoder-architecture><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>To execute this text-to-text mission, T5 uses the original Transformer’s <strong>encoder-decoder architecture</strong>. This is the most significant point of divergence from modern decoder-only LLMs. The inference process works in two distinct stages:</p><h4 id=stage-1-the-encoder-the-understanding-window>Stage 1: The Encoder (The “Understanding” Window)
|
||||||
|
<a class=heading-link href=#stage-1-the-encoder-the-understanding-window><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>When T5 receives an input like <code>summarize: [article text]</code>, the entire string is fed into the <strong>encoder</strong>.</p><ul><li><strong>Bidirectional Context:</strong> The encoder processes the input bidirectionally. Every token can see every other token in the input text simultaneously. This allows the model to build a deep, holistic understanding of the entire prompt and its context.</li><li><strong>Static Representation:</strong> The encoder’s final output is not text. It’s a set of numerical representations (hidden states) that encapsulates the meaning and intent of the input. This representation is generated once and remains static for the entire generation process.</li></ul><h4 id=stage-2-the-decoder-the-writing-window>Stage 2: The Decoder (The “Writing” Window)
|
||||||
|
<a class=heading-link href=#stage-2-the-decoder-the-writing-window><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>The decoder is responsible for generating the output string token by token.</p><ul><li><strong>Autoregressive Generation:</strong> It begins with a <code>start-of-sequence</code> token and generates the output one word at a time.</li><li><strong>Cross-Attention:</strong> At each step, the decoder does two things: it looks at the text it has generated so far (its own “decoder context”), and crucially, it uses a mechanism called <strong>cross-attention</strong> to look back at the static representation created by the encoder. This allows the decoder’s generation to be guided by the encoder’s complete understanding of the prompt.</li><li><strong>Growing Context:</strong> The decoder’s context window grows with each token it generates until it produces an <code>end-of-sequence</code> token, signaling that the task is complete.</li></ul><p>This two-window system is a powerful design, especially for tasks that require a full understanding of a source document before generating a new one (like translation or summarization).</p><h3 id=architectural-divergence-t5-vs-the-modern-llm-playbook>Architectural Divergence: T5 vs. The Modern LLM Playbook
|
||||||
|
<a class=heading-link href=#architectural-divergence-t5-vs-the-modern-llm-playbook><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Beyond its core architecture, T5 made several specific design choices that contrast with today’s standards.</p><h4 id=1-positional-embeddings-relative-rpe-vs-rotary-rope>1. Positional Embeddings: Relative (RPE) vs. Rotary (RoPE)
|
||||||
|
<a class=heading-link href=#1-positional-embeddings-relative-rpe-vs-rotary-rope><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>How a model knows the order of words is critical.</p><ul><li><strong>T5’s Approach (RPE):</strong> T5 uses a form of <strong>Relative Positional Embedding</strong>. Instead of adding a position signal to the word embeddings, it adds a learned bias directly to the attention scores based on the relative distance between tokens. It’s a clever way to encode position that is independent of sequence length.</li><li><strong>The Modern Standard (RoPE):</strong> Most modern LLMs (LLaMA, PaLM, Mistral) use <strong>Rotary Positional Embeddings</strong>. As detailed in the CS336 slides, RoPE works by mathematically <em>rotating</em> the Query and Key vectors based on their absolute position. This method has proven exceptionally effective for long sequences and is considered the current state-of-the-art.</li></ul><h4 id=2-the-feed-forward-network-an-extreme-experiment>2. The Feed-Forward Network: An Extreme Experiment
|
||||||
|
<a class=heading-link href=#2-the-feed-forward-network-an-extreme-experiment><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>The Feed-Forward Network (FFN) inside each Transformer block is typically 4 times the model’s hidden dimension (<code>d_model</code>). The original T5 11B model took a radical departure from this rule.</p><ul><li><strong>T5 11B’s Choice:</strong> It used a small hidden dimension (<code>d_model = 1024</code>) but an astoundingly large FFN dimension (<code>d_ff = 65,536</code>), a <strong>64-times multiplier</strong>. The rationale was that modern accelerators (like Google’s TPUs) are highly efficient at large, dense matrix multiplications.</li><li><strong>The Modern Standard:</strong> This experiment was not widely adopted. Later models, including T5’s own successor <strong>T5 v1.1</strong>, reverted to the standard 4x multiplier (or ~2.66x when using GLU activations) for a better balance of parameters and performance.</li></ul><h4 id=3-denoising-span-corruption-vs-iterative-diffusion>3. Denoising: Span Corruption vs. Iterative Diffusion
|
||||||
|
<a class=heading-link href=#3-denoising-span-corruption-vs-iterative-diffusion><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>While T5’s pre-training is called “denoising,” it’s conceptually different from the denoising in modern diffusion models.</p><ul><li><strong>T5’s Denoising:</strong> This is <strong>span corruption</strong>. The model is shown a sentence with chunks of text masked out and learns to predict exactly what was removed in a single step. It’s a fill-in-the-blanks task to learn rich language representations.</li><li><strong>Diffusion Denoising:</strong> This is a multi-step generative process. A clean text is gradually corrupted with noise, and the model learns to reverse this process step-by-step, allowing it to generate high-fidelity text from pure noise.</li></ul><h3 id=where-t5-was-ahead-of-its-time>Where T5 Was Ahead of its Time
|
||||||
|
<a class=heading-link href=#where-t5-was-ahead-of-its-time><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Despite its differences, the “T5 v1.1” variant pioneered several techniques that are now standard practice in the most advanced LLMs:</p><ul><li><strong>RMSNorm:</strong> It was one of the first major models to adopt Root Mean Square Normalization instead of LayerNorm, a choice now used by LLaMA, Mistral, and others for its efficiency and stability.</li><li><strong>Pre-Normalization:</strong> T5 applies the normalization layer <em>before</em> the attention and FFN blocks, a critical technique for enabling stable training of very deep networks.</li><li><strong>No Bias Terms:</strong> T5 v1.1 removed the bias parameters from its normalization and FFN layers, a small but important optimization for memory and stability that modern models follow.</li><li><strong>Gated Activations (GeGLU):</strong> While the original T5 used ReLU, T5 v1.1 adopted a Gated Linear Unit (GeGLU), presaging the move to GLU-family activations (like SwiGLU) that is now ubiquitous.</li></ul><h3 id=conclusion-the-lasting-legacy>Conclusion: The Lasting Legacy
|
||||||
|
<a class=heading-link href=#conclusion-the-lasting-legacy><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>T5 represents a different evolutionary branch in the Transformer family tree. While the field has largely converged on the decoder-only architecture for its scalability in general-purpose models, T5’s design remains a masterclass in purpose-built engineering.</p><p>Its text-to-text framework was revolutionary, its encoder-decoder structure is still a go-to for tasks like translation, and its refined T5 v1.1 architecture laid the groundwork for many of the stability and efficiency tricks we see in today’s state-of-the-art models. T5 is more than just a model; it’s a crucial case study in the architectural trade-offs that continue to shape the future of artificial intelligence.</p></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
76
posts/technical-deep-dive-llm-categorization/index.html
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content='Running a family finance system is surprisingly complex. What starts as a simple spreadsheet often evolves into a web of rules, exceptions, and “wait, was this dinner or vacation dinner?” questions.
|
||||||
|
For years, I relied on a rule-based system to categorize our credit card transactions. It worked… mostly. But maintaining if "UBER" in description and amount > 50 style rules is a never-ending battle against the entropy of merchant names and changing habits.'><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM"><meta name=twitter:description content='Running a family finance system is surprisingly complex. What starts as a simple spreadsheet often evolves into a web of rules, exceptions, and “wait, was this dinner or vacation dinner?” questions.
|
||||||
|
For years, I relied on a rule-based system to categorize our credit card transactions. It worked… mostly. But maintaining if "UBER" in description and amount > 50 style rules is a never-ending battle against the entropy of merchant names and changing habits.'><meta property="og:url" content="https://ericxliu.me/posts/technical-deep-dive-llm-categorization/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM"><meta property="og:description" content='Running a family finance system is surprisingly complex. What starts as a simple spreadsheet often evolves into a web of rules, exceptions, and “wait, was this dinner or vacation dinner?” questions.
|
||||||
|
For years, I relied on a rule-based system to categorize our credit card transactions. It worked… mostly. But maintaining if "UBER" in description and amount > 50 style rules is a never-ending battle against the entropy of merchant names and changing habits.'><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-12-27T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-10T20:10:48+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/technical-deep-dive-llm-categorization/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM","genre":"Blog","wordcount":"1355","url":"https:\/\/ericxliu.me\/posts\/technical-deep-dive-llm-categorization\/","datePublished":"2025-12-27T00:00:00\u002b00:00","dateModified":"2026-01-10T20:10:48\u002b00:00","description":"\u003cp\u003eRunning a family finance system is surprisingly complex. What starts as a simple spreadsheet often evolves into a web of rules, exceptions, and \u0026ldquo;wait, was this dinner or \u003cem\u003evacation\u003c\/em\u003e dinner?\u0026rdquo; questions.\u003c\/p\u003e\n\u003cp\u003eFor years, I relied on a rule-based system to categorize our credit card transactions. It worked\u0026hellip; mostly. But maintaining \u003ccode\u003eif \u0026quot;UBER\u0026quot; in description and amount \u0026gt; 50\u003c\/code\u003e style rules is a never-ending battle against the entropy of merchant names and changing habits.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/technical-deep-dive-llm-categorization/>From Gemini-3-Flash to T5-Gemma-2: A Journey in Distilling a Family Finance LLM</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-12-27T00:00:00Z>December 27, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
7-minute read</span></div></div></header><div class=post-content><p>Running a family finance system is surprisingly complex. What starts as a simple spreadsheet often evolves into a web of rules, exceptions, and “wait, was this dinner or <em>vacation</em> dinner?” questions.</p><p>For years, I relied on a rule-based system to categorize our credit card transactions. It worked… mostly. But maintaining <code>if "UBER" in description and amount > 50</code> style rules is a never-ending battle against the entropy of merchant names and changing habits.</p><p>Recently, I decided to modernize this stack using Large Language Models (LLMs). This post details the technical journey from using an off-the-shelf commercial model to distilling that knowledge into a small, efficient local model (<code>google/t5gemma-2-270m</code>) that runs on my own hardware while maintaining high accuracy.</p><h2 id=phase-1-the-proof-of-concept-with-commercial-llms>Phase 1: The Proof of Concept with Commercial LLMs
|
||||||
|
<a class=heading-link href=#phase-1-the-proof-of-concept-with-commercial-llms><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>My first step was to replace the spaghetti code of regex rules with a prompt. I used <strong>Gemini-3-Flash</strong> (via <code>litellm</code>) as my categorization engine.</p><p>The core challenge was context. A transaction like <code>MCDONALDS</code> could be:</p><ul><li><strong>Dining</strong>: A quick lunch during work.</li><li><strong>Travel-Dining</strong>: A meal while on a road trip.</li></ul><p>To solve this, I integrated my <strong>private Google Calendar</strong> (via <code>.ics</code> export). The prompt doesn’t just see the transaction; it sees <em>where I was</em> and <em>what I was doing</em> on that day.</p><h3 id=the-god-prompt>The “God Prompt”
|
||||||
|
<a class=heading-link href=#the-god-prompt><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>The system prompt was designed to return strict JSON, adhering to a schema of Categories (e.g., <code>Dining</code>, <code>Travel</code>, <code>Bills</code>) and Sub-Categories (e.g., <code>Travel</code> -> <code>Accommodation</code>).</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-json data-lang=json><span style=display:flex><span>{
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#7ee787>"Category"</span>: <span style=color:#a5d6ff>"Travel"</span>,
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#7ee787>"Travel Category"</span>: <span style=color:#a5d6ff>"Dining"</span>,
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#7ee787>"Reasoning"</span>: <span style=color:#a5d6ff>"User is on 'Trip: 34TH ARCH CANYON 2025', distinguishing this from regular dining."</span>
|
||||||
|
</span></span><span style=display:flex><span>}
|
||||||
|
</span></span></code></pre></div><p>This worked well. The “Reasoning” field even gave me explanations for why it flagged something as <code>Entertainment</code> vs <code>Shopping</code>. But relying on an external API for every single transaction felt like overkill for a personal project, and I wanted to own the stack.</p><h2 id=phase-2-distilling-knowledge>Phase 2: Distilling Knowledge
|
||||||
|
<a class=heading-link href=#phase-2-distilling-knowledge><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>I wanted to train a smaller model to mimic Gemini’s performance. But I didn’t want to manually label thousands of transactions.</p><h3 id=consistency-filtering>Consistency Filtering
|
||||||
|
<a class=heading-link href=#consistency-filtering><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>I had a massive CSV of historical transactions (years of data). However, that data was “noisy”—some manual labels were outdated or inconsistent.</p><p>I built a <strong>Distillation Pipeline</strong> (<code>distill_reasoning.py</code>) that uses the Teacher Model (Gemini) to re-label the historical data. But here’s the twist: I only added a data point to my training set if the <strong>Teacher’s prediction matched the Historical Ground Truth</strong>.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-python data-lang=python><span style=display:flex><span><span style=color:#8b949e;font-style:italic># Pseudo-code for consistency filtering</span>
|
||||||
|
</span></span><span style=display:flex><span>teacher_pred <span style=color:#ff7b72;font-weight:700>=</span> gemini<span style=color:#ff7b72;font-weight:700>.</span>categorize(transaction)
|
||||||
|
</span></span><span style=display:flex><span>historical_label <span style=color:#ff7b72;font-weight:700>=</span> row[<span style=color:#a5d6ff>'Category'</span>]
|
||||||
|
</span></span><span style=display:flex><span>
|
||||||
|
</span></span><span style=display:flex><span><span style=color:#ff7b72>if</span> teacher_pred<span style=color:#ff7b72;font-weight:700>.</span>category <span style=color:#ff7b72;font-weight:700>==</span> historical_label:
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#8b949e;font-style:italic># High confidence sample!</span>
|
||||||
|
</span></span><span style=display:flex><span> training_data<span style=color:#ff7b72;font-weight:700>.</span>append({
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#a5d6ff>"input"</span>: format_transaction(transaction),
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#a5d6ff>"output"</span>: teacher_pred<span style=color:#ff7b72;font-weight:700>.</span>to_json()
|
||||||
|
</span></span><span style=display:flex><span> })
|
||||||
|
</span></span><span style=display:flex><span><span style=color:#ff7b72>else</span>:
|
||||||
|
</span></span><span style=display:flex><span> <span style=color:#8b949e;font-style:italic># Discard: Either history is wrong OR teacher hallucinated.</span>
|
||||||
|
</span></span><span style=display:flex><span> log_fail(transaction)
|
||||||
|
</span></span></code></pre></div><p>This filtered out the noise, leaving me with ~2,000 high-quality, “verified” examples where both the human (me, years ago) and the AI agreed.</p><h2 id=phase-3-training-the-little-guy>Phase 3: Training the Little Guy
|
||||||
|
<a class=heading-link href=#phase-3-training-the-little-guy><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>For the local model, I chose <strong>google/t5gemma-2-270m</strong>. This is a Seq2Seq model, which fits the “Text-to-JSON” task perfectly, and it’s tiny (270M parameters), meaning it can run on almost anything.</p><h3 id=the-stack>The Stack
|
||||||
|
<a class=heading-link href=#the-stack><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ul><li><strong>Library</strong>: <code>transformers</code>, <code>peft</code>, <code>bitsandbytes</code></li><li><strong>Technique</strong>: <strong>LoRA</strong> (Low-Rank Adaptation). I targeted all linear layers (<code>q_proj</code>, <code>k_proj</code>, <code>v_proj</code>, etc.) with <code>r=16</code>.</li><li><strong>Optimization</strong>: <code>AdamW</code> with linear decay.</li></ul><h3 id=pitfall-1-the-loss-is-0-initial-panic>Pitfall #1: The “Loss is 0” Initial Panic
|
||||||
|
<a class=heading-link href=#pitfall-1-the-loss-is-0-initial-panic><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>My first training run showed a loss of exactly <code>0.000</code> essentially immediately. In deep learning, if it looks too good to be true, it’s a bug.
|
||||||
|
It turned out to be a syntax error in my arguments passed to the <code>Trainer</code> (or rather, my custom loop). Once fixed, the loss looked “healthy”—starting high and decaying noisily.</p><h3 id=pitfall-2-stability-vs-noise>Pitfall #2: Stability vs. Noise
|
||||||
|
<a class=heading-link href=#pitfall-2-stability-vs-noise><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>The loss curve was initially extremely erratic. The batch size on my GPU was limited (Physical Batch Size = 4).
|
||||||
|
<strong>The Fix</strong>: I implemented <strong>Gradient Accumulation</strong> (accumulating over 8 steps) to simulate a batch size of 32. This smoothed out the optimization landscape significantly.
|
||||||
|
<img src=/images/technical-deep-dive-llm-categorization/eedb3be8259a4a70aa7029b78a029364.png alt="S3 File"></p><h3 id=pitfall-3-overfitting>Pitfall #3: Overfitting
|
||||||
|
<a class=heading-link href=#pitfall-3-overfitting><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>With a small dataset (~2k samples), overfitting is a real risk. I employed a multi-layered defense strategy:</p><ol><li><strong>Data Quality First</strong>: The “Consistency Filtering” phase was the most critical step. By discarding ambiguous samples where the teacher model disagreed with history, I prevented the model from memorizing noise.</li><li><strong>Model Regularization</strong>:<ul><li><strong>LoRA Dropout</strong>: I set <code>lora_dropout=0.1</code>, randomly dropping 10% of the trainable adapter connections during training to force robust feature learning.</li><li><strong>Gradient Clipping</strong>: We capped the gradient norm at <code>1.0</code>. This prevents the “exploding gradient” problem and keeps weight updates stable.</li><li><strong>AdamW</strong>: Using the AdamW optimizer adds decoupled weight decay, implicitly penalizing overly complex weights.</li></ul></li></ol><p>I also set up a rigorous evaluation loop (10% validation split, eval every 50 steps) to monitor the <code>Train Loss</code> vs <code>Eval Loss</code> in real-time. The final curves showed them tracking downwards together, confirming generalization.</p><h2 id=phase-4-results-and-the-travel-edge-case>Phase 4: Results and The “Travel” Edge Case
|
||||||
|
<a class=heading-link href=#phase-4-results-and-the-travel-edge-case><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>The distilled model is surprisingly capable. It learned the JSON schema very well. Although I included a regex fallback in the inference script as a safety net, the model generates valid JSON the vast majority of the time.</p><h3 id=head-to-head-local-model-vs-gemini-flash>Head-to-Head: Local Model vs Gemini-Flash
|
||||||
|
<a class=heading-link href=#head-to-head-local-model-vs-gemini-flash><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>I ran a blind evaluation on 20 random unseen transactions.</p><ul><li><strong>Gemini-3-Flash Accuracy</strong>: 90% (18/20)</li><li><strong>Local T5-Gemma-2 Accuracy</strong>: 85% (17/20)</li></ul><p>The gap is surprisingly small. In fact, the local model sometimes outperformed the API because it was fine-tuned on <em>my</em> specific data distribution.</p><p><strong>Win for Local Model:</strong></p><blockquote><p><strong>Transaction</strong>: <code>XX RANCH #1702</code>
|
||||||
|
<strong>Local Prediction</strong>: <code>Groceries</code> (Correct)
|
||||||
|
<strong>API Prediction</strong>: <code>Gas</code> (Incorrect)
|
||||||
|
<strong>Local Reasoning</strong>: " XX RANCH refers to a well-known supermarket chain.
|
||||||
|
<strong>API Reasoning</strong>: “XX RANCH is a known convenience store and gas station chain.”
|
||||||
|
<strong>Analysis</strong>: The local model “knows” (from training data) that XX Ranch is a Asian grocery store I frequent, whereas the general-purpose API assumed it was a gas station based on the name pattern.</p></blockquote><p><strong>Win for API (World Knowledge):</strong></p><blockquote><p><strong>Transaction</strong>: <code>LOVE'S #0792</code>
|
||||||
|
<strong>Local Prediction</strong>: <code>Dining</code> (Hallucination)
|
||||||
|
<strong>API Prediction</strong>: <code>Travel-Gas</code> (Correct)
|
||||||
|
<strong>Local Reasoning</strong>: “Love’s is a well-known restaurant chain, which falls under the Dining category.”
|
||||||
|
<strong>API Reasoning</strong>: “Love’s is a well-known gas station chain, and the transaction occurred during a trip to Moab, categorizing it as travel-related fuel.”
|
||||||
|
<strong>Analysis</strong>: The API knows “Love’s” is a major gas station chain. The small local model lacks this world knowledge and hallucinates it as a restaurant, highlighting the pure “Knowledge Gap” between a 270M and a 70B+ model. Additionally, Gemini Flash has <strong>Google Search grounding</strong> enabled, allowing it to verify real-world entities in real-time—a capability our isolated local model intrinsically lacks.</p></blockquote><h3 id=surprise-win-json-stability>Surprise Win: JSON Stability
|
||||||
|
<a class=heading-link href=#surprise-win-json-stability><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>One pleasant surprise was the <strong>format adherence</strong>. I initially feared I’d need constrained generation tools like <code>outlines</code> or a simplified schema for a 270M parameter model. However, the distilled T5-Gemma model followed the complex JSON schema (including nested fields) with near-perfect reliability, proving that specific structure can be learned effectively through fine-tuning alone.</p><h3 id=key-lesson-the-noisy-ground-truth-trap>Key Lesson: The “Noisy Ground Truth” Trap
|
||||||
|
<a class=heading-link href=#key-lesson-the-noisy-ground-truth-trap><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Since this is a <strong>distillation (SFT)</strong> pipeline, not Reinforcement Learning, the model has no way to “unlearn” bad habits via negative rewards. It relies entirely on the quality of the teacher’s reasoning.</p><blockquote><p><strong>Transaction</strong>: <code>[TRAVEL] SWEETHOME KITCHEN</code>
|
||||||
|
<strong>Local Prediction</strong>: <code>Dining</code>
|
||||||
|
<strong>API Prediction</strong>: <code>Travel-Dining</code>
|
||||||
|
<strong>Local Reasoning</strong>: “The description ‘SWEETHOME KITCHEN’ indicates a restaurant or dining establishment, which falls under the Dining category.”
|
||||||
|
<strong>API Reasoning</strong>: “The transaction is for a kitchen/restaurant and occurred while the user was traveling to Pfeiffer Big Sur SP, making it a travel-related dining expense.”</p></blockquote><p>In this case, the API correctly used the calendar context (“User is in Big Sur”). The local model missed this link. This highlights that simply having the data isn’t enough—the <em>reasoning</em> in the training set must explicitly force the model to look at the context, or it will revert to simple pattern matching (Kitchen = Dining).</p><h2 id=conclusion>Conclusion
|
||||||
|
<a class=heading-link href=#conclusion><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>We often think we need 70B parameter models for everything. usage shows that for a specific, well-defined task with consistent formatting, a <strong>270M parameter model</strong>—fine-tuned on high-quality, distilled data—can punch way above its weight class.</p><p>The key was <strong>data quality over quantity</strong>. By using the commercial model to “verify” my historical data, I created a dataset that was cleaner than either source alone.</p></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>The Convergence of Fast Weights, Linear Attention, and State Space Models · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformer’s attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (“Fast Weights”) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).
|
||||||
|
This article explores the mathematical equivalence between Hinton’s concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="The Convergence of Fast Weights, Linear Attention, and State Space Models"><meta name=twitter:description content="Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformer’s attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (“Fast Weights”) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).
|
||||||
|
This article explores the mathematical equivalence between Hinton’s concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV."><meta property="og:url" content="https://ericxliu.me/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="The Convergence of Fast Weights, Linear Attention, and State Space Models"><meta property="og:description" content="Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformer’s attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (“Fast Weights”) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).
|
||||||
|
This article explores the mathematical equivalence between Hinton’s concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-12-19T00:00:00+00:00"><meta property="article:modified_time" content="2025-12-19T21:21:55+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"The Convergence of Fast Weights, Linear Attention, and State Space Models","genre":"Blog","wordcount":"984","url":"https:\/\/ericxliu.me\/posts\/the-convergence-of-fast-weights-linear-attention-and-state-space-models\/","datePublished":"2025-12-19T00:00:00\u002b00:00","dateModified":"2025-12-19T21:21:55\u002b00:00","description":"\u003cp\u003eModern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformer’s attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (\u0026ldquo;Fast Weights\u0026rdquo;) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).\u003c\/p\u003e\n\u003cp\u003eThis article explores the mathematical equivalence between Hinton’s concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/>The Convergence of Fast Weights, Linear Attention, and State Space Models</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-12-19T00:00:00Z>December 19, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
5-minute read</span></div></div></header><div class=post-content><p>Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformer’s attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (“Fast Weights”) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).</p><p>This article explores the mathematical equivalence between Hinton’s concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV.</p><h2 id=1-the-standard-transformer-bottleneck>1. The Standard Transformer Bottleneck
|
||||||
|
<a class=heading-link href=#1-the-standard-transformer-bottleneck><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>To understand the motivation for Fast Weights, one must first identify the inefficiency in standard Transformers. The core operation is <strong>Self-Attention</strong>, defined as:</p>$$ \text{Attention}(Q, K, V) = \text{softmax}\left(\frac{Q K^T}{\sqrt{d}}\right) V $$<p>During inference (generating tokens one by one), the model computes a Query ($Q$) for the current token and compares it against the Keys ($K$) and Values ($V$) of all previous tokens.</p><ul><li><strong>Computational Cost:</strong> Quadratic $O(N^2)$ during training; Linear $O(N)$ per step during inference.</li><li><strong>Memory Cost:</strong> The KV Cache. To calculate the softmax, the model must explicitly store the $K$ and $V$ vectors for the entire history in GPU memory. For long contexts (e.g., 1 million tokens), this memory footprint becomes prohibitive.</li></ul><p>The <strong>Softmax</strong> function is the culprit. It introduces a non-linearity that binds $Q$ and $K$ together, preventing the mathematical separation of the current query from the historical context.</p><h2 id=2-fast-weights-as-associative-memory>2. Fast Weights as Associative Memory
|
||||||
|
<a class=heading-link href=#2-fast-weights-as-associative-memory><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>Geoffrey Hinton proposes that the brain does not maintain a “digital buffer” of past activations (like a KV cache). Instead, it relies on <strong>Fast Weights</strong>.</p><p>In this framework, neural connections possess two timescales:</p><ol><li><strong>Slow Weights:</strong> The standard parameters learned over long periods (training).</li><li><strong>Fast Weights:</strong> Synaptic strengths that change rapidly during a forward pass to store temporary context.</li></ol><p>Hinton formalizes this temporary storage as an <strong>Associative Memory</strong>. When a network encounters a new key-value pair ($k, v$), it does not store the vectors in a list. Instead, it updates a fast weight matrix $W_{fast}$ using the Hebbian learning rule (outer product):</p>$$ W_{fast} \leftarrow \lambda W_{fast} + (v \otimes k) $$<p>Here, $\lambda$ is a decay factor ($0 < \lambda < 1$) representing forgetfulness. This matrix $W_{fast}$ compresses the history into a fixed-size representation of size $d \times d$, regardless of the sequence length.</p><h2 id=3-mathematical-unification-linear-attention>3. Mathematical Unification: Linear Attention
|
||||||
|
<a class=heading-link href=#3-mathematical-unification-linear-attention><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>The connection between Fast Weights and Transformers is established by removing the softmax function from the attention mechanism, a technique known as <strong>Linear Attention</strong>.</p><p>If we treat the interaction between $Q$ and $K$ as linear, the attention equation becomes:</p>$$ \text{LinearAttention} = (Q K^T) V $$<p>Using the associative property of matrix multiplication, we can reorder the operations:</p>$$ Q (K^T V) $$<p>This reordering fundamentally alters the mechanism:</p><ul><li><strong>Left Side $(Q K^T) V$:</strong> Compare Query to all Keys, then multiply by Values. Requires storing history.</li><li><strong>Right Side $Q (K^T V)$:</strong> Compute the summation of Key-Value outer products first.</li></ul><p>The term $(K^T V)$ represents the summation of all past associations. This term <strong>is</strong> the Fast Weight matrix $W_{fast}$ described by Hinton.</p>$$ \text{State}_t = \sum_{i=1}^t k_i v_i^T $$<p>Thus, Linear Attention is effectively a system where the “state” is a matrix of Fast Weights that is updated at every time step.</p><h2 id=4-state-space-models-ssms-as-recurrent-fast-weights>4. State Space Models (SSMs) as Recurrent Fast Weights
|
||||||
|
<a class=heading-link href=#4-state-space-models-ssms-as-recurrent-fast-weights><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>State Space Models (like S4 and Mamba) typically define sequence modeling through continuous control theory, discretized into a recurrence:</p>$$ h_t = \bar{A} h_{t-1} + \bar{B} x_t $$<p></p>$$ y_t = \bar{C} h_t $$<p>While derived differently, this recurrence is mathematically equivalent to the Linear Attention/Fast Weight mechanism. We can demonstrate this by “unrolling” the SSM recursion to see how the output $y_t$ depends on the history.</p><p>The output at time $t$ is the sum of inputs weighted by decaying powers of $\bar{A}$:</p>$$ y_t = \sum_{j=1}^t \bar{C} (\bar{A}^{t-j}) (\bar{B} x_j) $$<p>Comparing this to the Linear Attention formulation with decay $\lambda$:</p>$$ \text{Attention}_t = q_t \sum_{j=1}^t (\lambda^{t-j}) (k_j^T v_j) $$<p>The mapping between architectures becomes clear:</p><ul><li><strong>Query ($q_t$)</strong> $\leftrightarrow$ Output Matrix <strong>$\bar{C}$</strong></li><li><strong>Key/Value ($k_j^T v_j$)</strong> $\leftrightarrow$ Input Matrix <strong>$\bar{B} x_j$</strong> (Input Projection)</li><li><strong>Decay Factor ($\lambda$)</strong> $\leftrightarrow$ State Matrix <strong>$\bar{A}$</strong></li><li><strong>Fast Weight Matrix ($S_t$)</strong> $\leftrightarrow$ Hidden State <strong>$h_t$</strong></li></ul><p>Therefore, an SSM is mechanically a Transformer that uses Fast Weights (a fixed-size recurrent state) rather than a KV Cache (a growing buffer) to handle attention.</p><h2 id=5-implications-for-inference-optimization>5. Implications for Inference Optimization
|
||||||
|
<a class=heading-link href=#5-implications-for-inference-optimization><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>This theoretical convergence has significant implications for inference efficiency.</p><h3 id=standard-transformer>Standard Transformer
|
||||||
|
<a class=heading-link href=#standard-transformer><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ul><li><strong>Mechanism:</strong> Stores history in a KV Cache.</li><li><strong>Memory:</strong> $O(N)$ (Grows linearly with sequence length).</li><li><strong>Performance:</strong> High recall/precision because it retains the exact history.</li></ul><h3 id=fast-weight--ssm-mamba--rwkv>Fast Weight / SSM (Mamba / RWKV)
|
||||||
|
<a class=heading-link href=#fast-weight--ssm-mamba--rwkv><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ul><li><strong>Mechanism:</strong> Compresses history into a single Matrix/Vector state.</li><li><strong>Memory:</strong> $O(1)$ (Constant memory, regardless of sequence length).</li><li><strong>Performance:</strong> Historically lower than Transformers due to “compression loss” (trying to stuff infinite history into a finite matrix).</li></ul><p><strong>The Solution:</strong> Modern SSMs like Mamba improve upon basic Linear Attention by introducing <strong>Selectivity</strong>. Instead of compressing <em>all</em> history equally (which blurs the memory), Mamba allows the model to dynamically gate the inputs—choosing to store relevant information and reset/forget irrelevant noise. This allows the Fast Weight approach to compete with the accuracy of explicit Attention while maintaining constant memory usage.</p><h3 id=references>References
|
||||||
|
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ol><li><strong>Hinton, G. E., & Plaut, D. C. (1987).</strong> “Using Fast Weights to Deblur Old Memories.” <em>Proceedings of the 9th Annual Conference of the Cognitive Science Society.</em></li><li><strong>Ba, J., Hinton, G. E., et al. (2016).</strong> “Using Fast Weights to Attend to the Recent Past.” <em>Advances in Neural Information Processing Systems (NeurIPS).</em></li><li><strong>Katharopoulos, A., et al. (2020).</strong> “Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention.” <em>International Conference on Machine Learning (ICML).</em></li><li><strong>Gu, A., & Dao, T. (2023).</strong> “Mamba: Linear-Time Sequence Modeling with Selective State Spaces.” <em>arXiv preprint arXiv:2312.00752.</em></li><li><strong>Vaswani, A., et al. (2017).</strong> “Attention Is All You Need.” <em>Advances in Neural Information Processing Systems (NeurIPS).</em></li></ol></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
39
posts/transformer-s-core-mechanics/index.html
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Transformer's Core Mechanics · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="The Transformer architecture is the bedrock of modern Large Language Models (LLMs). While its high-level success is widely known, a deeper understanding requires dissecting its core components. This article provides a detailed, technical breakdown of the fundamental concepts within a Transformer block, from the notion of “channels” to the intricate workings of the attention mechanism and its relationship with other advanced architectures like Mixture of Experts.
|
||||||
|
|
||||||
|
1. The “Channel”: A Foundational View of d_model
|
||||||
|
|
||||||
|
|
||||||
|
Link to heading
|
||||||
|
|
||||||
|
|
||||||
|
In deep learning, a “channel” can be thought of as a feature dimension. While this term is common in Convolutional Neural Networks for images (e.g., Red, Green, Blue channels), in LLMs, the analogous concept is the model’s primary embedding dimension, commonly referred to as d_model."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Transformer's Core Mechanics"><meta name=twitter:description content="The Transformer architecture is the bedrock of modern Large Language Models (LLMs). While its high-level success is widely known, a deeper understanding requires dissecting its core components. This article provides a detailed, technical breakdown of the fundamental concepts within a Transformer block, from the notion of “channels” to the intricate workings of the attention mechanism and its relationship with other advanced architectures like Mixture of Experts.
|
||||||
|
1. The “Channel”: A Foundational View of d_model Link to heading In deep learning, a “channel” can be thought of as a feature dimension. While this term is common in Convolutional Neural Networks for images (e.g., Red, Green, Blue channels), in LLMs, the analogous concept is the model’s primary embedding dimension, commonly referred to as d_model."><meta property="og:url" content="https://ericxliu.me/posts/transformer-s-core-mechanics/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Transformer's Core Mechanics"><meta property="og:description" content="The Transformer architecture is the bedrock of modern Large Language Models (LLMs). While its high-level success is widely known, a deeper understanding requires dissecting its core components. This article provides a detailed, technical breakdown of the fundamental concepts within a Transformer block, from the notion of “channels” to the intricate workings of the attention mechanism and its relationship with other advanced architectures like Mixture of Experts.
|
||||||
|
1. The “Channel”: A Foundational View of d_model Link to heading In deep learning, a “channel” can be thought of as a feature dimension. While this term is common in Convolutional Neural Networks for images (e.g., Red, Green, Blue channels), in LLMs, the analogous concept is the model’s primary embedding dimension, commonly referred to as d_model."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-04-01T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-10T20:10:48+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/transformer-s-core-mechanics/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Transformer\u0027s Core Mechanics","genre":"Blog","wordcount":"1326","url":"https:\/\/ericxliu.me\/posts\/transformer-s-core-mechanics\/","datePublished":"2025-04-01T00:00:00\u002b00:00","dateModified":"2026-01-10T20:10:48\u002b00:00","description":"\u003cp\u003eThe Transformer architecture is the bedrock of modern Large Language Models (LLMs). While its high-level success is widely known, a deeper understanding requires dissecting its core components. This article provides a detailed, technical breakdown of the fundamental concepts within a Transformer block, from the notion of \u0026ldquo;channels\u0026rdquo; to the intricate workings of the attention mechanism and its relationship with other advanced architectures like Mixture of Experts.\u003c\/p\u003e\n\u003ch3 id=\u00221-the-channel-a-foundational-view-of-d_model\u0022\u003e\n 1. The \u0026ldquo;Channel\u0026rdquo;: A Foundational View of \u003ccode\u003ed_model\u003c\/code\u003e\n \u003ca class=\u0022heading-link\u0022 href=\u0022#1-the-channel-a-foundational-view-of-d_model\u0022\u003e\n \u003ci class=\u0022fa-solid fa-link\u0022 aria-hidden=\u0022true\u0022 title=\u0022Link to heading\u0022\u003e\u003c\/i\u003e\n \u003cspan class=\u0022sr-only\u0022\u003eLink to heading\u003c\/span\u003e\n \u003c\/a\u003e\n\u003c\/h3\u003e\n\u003cp\u003eIn deep learning, a \u0026ldquo;channel\u0026rdquo; can be thought of as a feature dimension. While this term is common in Convolutional Neural Networks for images (e.g., Red, Green, Blue channels), in LLMs, the analogous concept is the model\u0026rsquo;s primary embedding dimension, commonly referred to as \u003ccode\u003ed_model\u003c\/code\u003e.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/transformer-s-core-mechanics/>Transformer's Core Mechanics</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-04-01T00:00:00Z>April 1, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
7-minute read</span></div></div></header><div class=post-content><p>The Transformer architecture is the bedrock of modern Large Language Models (LLMs). While its high-level success is widely known, a deeper understanding requires dissecting its core components. This article provides a detailed, technical breakdown of the fundamental concepts within a Transformer block, from the notion of “channels” to the intricate workings of the attention mechanism and its relationship with other advanced architectures like Mixture of Experts.</p><h3 id=1-the-channel-a-foundational-view-of-d_model>1. The “Channel”: A Foundational View of <code>d_model</code>
|
||||||
|
<a class=heading-link href=#1-the-channel-a-foundational-view-of-d_model><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>In deep learning, a “channel” can be thought of as a feature dimension. While this term is common in Convolutional Neural Networks for images (e.g., Red, Green, Blue channels), in LLMs, the analogous concept is the model’s primary embedding dimension, commonly referred to as <code>d_model</code>.</p><p>An input text is first tokenized, and each token is mapped to a vector of size <code>d_model</code> (e.g., 4096). Each of the 4096 dimensions in this vector can be considered a “channel,” representing a different semantic or syntactic feature of the token.</p><p>As this data, represented by a tensor of shape <code>[batch_size, sequence_length, d_model]</code>, progresses through the layers of the Transformer, these channels are continuously transformed. However, a critical design choice is that the output dimension of every main sub-layer (like the attention block or the FFN block) is also <code>d_model</code>. This consistency is essential for enabling <strong>residual connections</strong>, where the input to a block is added to its output (<code>output = input + SubLayer(input)</code>). This technique is vital for training the extremely deep networks common today.</p><h3 id=2-the-building-blocks-dimensions-of-key-layers>2. The Building Blocks: Dimensions of Key Layers
|
||||||
|
<a class=heading-link href=#2-the-building-blocks-dimensions-of-key-layers><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>A Transformer layer is primarily composed of two sub-layers: a Multi-Head Attention block and a position-wise Feed-Forward Network (FFN). The parameters for these are stored in several key weight matrices. Understanding their dimensions is crucial.</p><p>Let’s define our variables:</p><ul><li><code>d_model</code>: The core embedding dimension.</li><li><code>d_ff</code>: The inner dimension of the FFN, typically <code>4 * d_model</code>.</li><li><code>h</code>: The number of attention heads.</li><li><code>d_head</code>: The dimension of each attention head, where <code>d_model = h * d_head</code>.</li></ul><p>The dimensions of the weight matrices are as follows:</p><table><thead><tr><th>Layer</th><th>Weight Matrix</th><th>Input Vector Shape</th><th>Output Vector Shape</th><th><strong>Weight Matrix Dimension</strong></th></tr></thead><tbody><tr><td><strong>Attention Projections</strong></td><td></td><td></td><td></td><td></td></tr><tr><td>Query</td><td><code>W_Q</code></td><td><code>d_model</code></td><td><code>d_model</code></td><td><strong><code>[d_model, d_model]</code></strong></td></tr><tr><td>Key</td><td><code>W_K</code></td><td><code>d_model</code></td><td><code>d_model</code></td><td><strong><code>[d_model, d_model]</code></strong></td></tr><tr><td>Value</td><td><code>W_V</code></td><td><code>d_model</code></td><td><code>d_model</code></td><td><strong><code>[d_model, d_model]</code></strong></td></tr><tr><td>Output</td><td><code>W_O</code></td><td><code>d_model</code></td><td><code>d_model</code></td><td><strong><code>[d_model, d_model]</code></strong></td></tr><tr><td><strong>Feed-Forward Network</strong></td><td></td><td></td><td></td><td></td></tr><tr><td>Layer 1 (Up-projection)</td><td><code>W_ff1</code></td><td><code>d_model</code></td><td><code>d_ff</code></td><td><strong><code>[d_model, d_ff]</code></strong></td></tr><tr><td>Layer 2 (Down-projection)</td><td><code>W_ff2</code></td><td><code>d_ff</code></td><td><code>d_model</code></td><td><strong><code>[d_ff, d_model]</code></strong></td></tr></tbody></table><h3 id=3-deconstructing-multi-head-attention-mha>3. Deconstructing Multi-Head Attention (MHA)
|
||||||
|
<a class=heading-link href=#3-deconstructing-multi-head-attention-mha><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>The core innovation of the Transformer is Multi-Head Attention. It allows the model to weigh the importance of different tokens in the sequence from multiple perspectives simultaneously.
|
||||||
|
<img src=/images/transformer-s-core-mechanics/c7fe4af2633840cfbc81d7c4e3e42d0c.png alt="S3 File"></p><h4 id=31-the-why-beyond-a-single-attention>3.1. The “Why”: Beyond a Single Attention
|
||||||
|
<a class=heading-link href=#31-the-why-beyond-a-single-attention><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>A single attention mechanism would force the model to average all types of linguistic relationships into one pattern. MHA avoids this by creating <code>h</code> parallel subspaces. Each “head” can specialize, with one head learning syntactic dependencies, another tracking semantic similarity, and so on. This creates a much richer representation.</p><h4 id=32-an-encodingdecoding-analogy>3.2. An Encoding/Decoding Analogy
|
||||||
|
<a class=heading-link href=#32-an-encodingdecoding-analogy><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>A powerful way to conceptualize the attention calculation is as a two-stage process:</p><ol><li><strong>Encoding Relationships:</strong> The first part of the calculation, <code>softmax(Q @ K.T)</code>, can be seen as an encoding step. It does not use the actual “content” of the tokens (the <code>V</code> vectors). Instead, it uses the Queries and Keys to build a dynamic “relationship map” between tokens in the sequence. This map, a matrix of attention scores, answers the question: “For each token, how important is every other token right now?”</li><li><strong>Decoding via Information Retrieval:</strong> The second part, <code>scores @ V</code>, acts as a decoding step. It uses the relationship map to retrieve and synthesize information. For each token, it creates a new vector by taking a weighted sum of all the <code>V</code> vectors in the sequence, using the scores as the precise mixing recipe. It decodes the relational structure into a new, context-aware representation.</li></ol><h4 id=33-the-how-a-step-by-step-flow>3.3. The “How”: A Step-by-Step Flow
|
||||||
|
<a class=heading-link href=#33-the-how-a-step-by-step-flow><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>The MHA process is designed for maximum computational efficiency.</p><ol><li><strong>Initial Projections:</strong> The input vectors (shape <code>[seq_len, d_model]</code>) are multiplied by <code>W_Q</code>, <code>W_K</code>, and <code>W_V</code>. These matrices are all <code>[d_model, d_model]</code> not to create one large query, but to <strong>efficiently compute the vectors for all <code>h</code> heads at once</strong>. The single large output vector is then reshaped into <code>h</code> separate vectors, each of size <code>d_head</code>.</li><li><strong>Attention Score Calculation:</strong> For each head <code>i</code>, a score matrix is calculated: <code>scores_i = softmax( (Q_i @ K_i.T) / sqrt(d_head) )</code>. Note that <code>Q_i</code> and <code>K_i</code> have dimensions <code>[seq_len, d_head]</code>, so the resulting <code>scores_i</code> matrix has a dimension of <strong><code>[seq_len, seq_len]</code></strong>.</li><li><strong>Weighted Value Calculation:</strong> The scores are used to create a weighted sum of the Value vectors for each head: <code>output_i = scores_i @ V_i</code>. Since <code>scores_i</code> is <code>[seq_len, seq_len]</code> and <code>V_i</code> is <code>[seq_len, d_head]</code>, the resulting <code>output_i</code> has a dimension of <strong><code>[seq_len, d_head]</code></strong>. This is the final output of a single head.</li><li><strong>Concatenation and Final Projection:</strong> The outputs of all <code>h</code> heads are concatenated along the last dimension. This produces a single large matrix of shape <code>[seq_len, h * d_head]</code>, which is equivalent to <code>[seq_len, d_model]</code>. This matrix is then passed through the final output projection layer, <code>W_O</code> (shape <code>[d_model, d_model]</code>), to produce the attention block’s final output. The <code>W_O</code> matrix learns the optimal way to mix the information from all the specialized heads into a single, unified representation.</li></ol><h3 id=4-optimizing-attention-gqa-and-mqa>4. Optimizing Attention: GQA and MQA
|
||||||
|
<a class=heading-link href=#4-optimizing-attention-gqa-and-mqa><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>During inference, storing the Key and Value vectors for all previous tokens (the KV Cache) is a major memory bottleneck. <strong>Grouped-Query Attention (GQA)</strong> and <strong>Multi-Query Attention (MQA)</strong> are architectural modifications that address this by allowing multiple Query heads to share the same Key and Value heads.</p><p>Let’s use a concrete example, similar to Llama 2 7B:</p><ul><li><code>d_model</code> = 4096</li><li><code>h</code> = 32 Q heads</li><li><code>d_head</code> = 128</li><li><code>g</code> = 8 KV head groups for GQA</li></ul><p>The key insight is that only the dimensions of the <code>W_K</code> and <code>W_V</code> matrices change, which in turn reduces the size of the KV cache. The <code>W_Q</code> and <code>W_O</code> matrices remain <code>[4096, 4096]</code>.</p><table><thead><tr><th>Attention Type</th><th>No. of Q Heads</th><th>No. of KV Heads</th><th><code>W_K</code> & <code>W_V</code> Dimension</th><th>Relative KV Cache Size</th></tr></thead><tbody><tr><td><strong>MHA</strong> (Multi-Head)</td><td>32</td><td>32</td><td><code>[4096, 32*128]</code> = <code>[4096, 4096]</code></td><td>1x (Baseline)</td></tr><tr><td><strong>GQA</strong> (Grouped)</td><td>32</td><td>8</td><td><code>[4096, 8*128]</code> = <code>[4096, 1024]</code></td><td>1/4x</td></tr><tr><td><strong>MQA</strong> (Multi-Query)</td><td>32</td><td>1</td><td><code>[4096, 1*128]</code> = <code>[4096, 128]</code></td><td>1/32x</td></tr></tbody></table><p>GQA provides a robust balance, significantly reducing the memory and bandwidth requirements for the KV cache with negligible impact on model performance, making it a popular choice in modern LLMs.</p><h3 id=5-mha-vs-mixture-of-experts-moe-a-clarification>5. MHA vs. Mixture of Experts (MoE): A Clarification
|
||||||
|
<a class=heading-link href=#5-mha-vs-mixture-of-experts-moe-a-clarification><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>While both MHA and MoE use the concept of “experts,” they are functionally and architecturally distinct.</p><ul><li><strong>MHA:</strong> The “experts” are the <strong>attention heads</strong>. All heads are active for every token to build a rich representation within the attention layer. This is akin to a board meeting where every member analyzes and contributes to every decision.</li><li><strong>MoE:</strong> The “experts” are full <strong>Feed-Forward Networks</strong>. A routing network selects a small subset of these FFNs for each token. This is a scaling strategy to increase a model’s parameter count for greater capacity while keeping the computational cost fixed. It replaces the standard FFN block, whereas MHA <em>is</em> the attention block.</li></ul><p>By understanding these technical details, from the basic concept of a channel to the sophisticated interplay of heads and experts, one can build a more complete and accurate mental model of how LLMs truly operate.</p><hr><h3 id=references>References
|
||||||
|
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ol><li>Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., … & Polosukhin, I. (2017). Attention is all you need. <em>Advances in neural information processing systems</em>, 30.</li><li>Shazeer, N., Mirhoseini, A., Maziarz, K., Davis, A., Le, Q., Hinton, G., & Dean, J. (2017). Outrageously large neural networks: The sparsely-gated mixture-of-experts layer. <em>arXiv preprint arXiv:1701.06538</em>.</li><li>Ainslie, J., Ontanon, J., Cakka, E., Dosovitskiy, A., & Le, Q. V. (2023). GQA: Training Generalized Multi-Query Transformer Models from Multi-Head Checkpoints. <em>arXiv preprint arXiv:2305.13245</em>.</li></ol></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
@@ -0,0 +1,31 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>UniFi VLAN Migration to Zone-Based Architecture · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Embarking on a network migration to a properly segmented VLAN architecture is a rite of passage for any serious home lab or small business operator. The goal is clear: improve security and organization by separating traffic. However, the path from a flat network to a segmented one is often paved with subtle but critical configuration details that can lead to hours of frustrating troubleshooting.
|
||||||
|
This article documents that journey. It details the pitfalls encountered, the core networking concepts that were essential to understand, and the best practices that ultimately led to a stable, secure, and logical network design built on a zone-based firewall model."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="UniFi VLAN Migration to Zone-Based Architecture"><meta name=twitter:description content="Embarking on a network migration to a properly segmented VLAN architecture is a rite of passage for any serious home lab or small business operator. The goal is clear: improve security and organization by separating traffic. However, the path from a flat network to a segmented one is often paved with subtle but critical configuration details that can lead to hours of frustrating troubleshooting.
|
||||||
|
This article documents that journey. It details the pitfalls encountered, the core networking concepts that were essential to understand, and the best practices that ultimately led to a stable, secure, and logical network design built on a zone-based firewall model."><meta property="og:url" content="https://ericxliu.me/posts/unifi-vlan-migration-to-zone-based-architecture/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="UniFi VLAN Migration to Zone-Based Architecture"><meta property="og:description" content="Embarking on a network migration to a properly segmented VLAN architecture is a rite of passage for any serious home lab or small business operator. The goal is clear: improve security and organization by separating traffic. However, the path from a flat network to a segmented one is often paved with subtle but critical configuration details that can lead to hours of frustrating troubleshooting.
|
||||||
|
This article documents that journey. It details the pitfalls encountered, the core networking concepts that were essential to understand, and the best practices that ultimately led to a stable, secure, and logical network design built on a zone-based firewall model."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-09-22T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-10T20:10:48+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/unifi-vlan-migration-to-zone-based-architecture/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"UniFi VLAN Migration to Zone-Based Architecture","genre":"Blog","wordcount":"1001","url":"https:\/\/ericxliu.me\/posts\/unifi-vlan-migration-to-zone-based-architecture\/","datePublished":"2025-09-22T00:00:00\u002b00:00","dateModified":"2026-01-10T20:10:48\u002b00:00","description":"\u003cp\u003eEmbarking on a network migration to a properly segmented VLAN architecture is a rite of passage for any serious home lab or small business operator. The goal is clear: improve security and organization by separating traffic. However, the path from a flat network to a segmented one is often paved with subtle but critical configuration details that can lead to hours of frustrating troubleshooting.\u003c\/p\u003e\n\u003cp\u003eThis article documents that journey. It details the pitfalls encountered, the core networking concepts that were essential to understand, and the best practices that ultimately led to a stable, secure, and logical network design built on a zone-based firewall model.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/unifi-vlan-migration-to-zone-based-architecture/>UniFi VLAN Migration to Zone-Based Architecture</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-09-22T00:00:00Z>September 22, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
5-minute read</span></div></div></header><div class=post-content><p>Embarking on a network migration to a properly segmented VLAN architecture is a rite of passage for any serious home lab or small business operator. The goal is clear: improve security and organization by separating traffic. However, the path from a flat network to a segmented one is often paved with subtle but critical configuration details that can lead to hours of frustrating troubleshooting.</p><p>This article documents that journey. It details the pitfalls encountered, the core networking concepts that were essential to understand, and the best practices that ultimately led to a stable, secure, and logical network design built on a zone-based firewall model.</p><h3 id=lesson-1-demystifying-the-native-vlan>Lesson 1: Demystifying the Native VLAN
|
||||||
|
<a class=heading-link href=#lesson-1-demystifying-the-native-vlan><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>The most significant source of initial problems was a fundamental misunderstanding of the “Native VLAN” setting on a switch port.</p><p><strong>The Misconception:</strong> It’s easy to assume that the “Native Network” on a port should be set to the VLAN you want the connected device to be on. For example, if a switch should be on the “corp” network (VLAN 10), one might set its management VLAN to <code>corp</code> and the upstream switch port’s Native Network to <code>corp</code> as well.</p><p><strong>The Reality:</strong> The Native VLAN on a trunk port has a specific purpose: it determines which VLAN any <strong>untagged</strong> traffic belongs to. A trunk port is designed to carry traffic for multiple VLANs by adding a “tag” to each packet. The one exception is the traffic for the Native VLAN, which is sent <em>without</em> a tag.</p><p>This leads to a critical rule: <strong>for a trunk link to function correctly, the Native VLAN must be the same on both ends of the connection.</strong> When they mismatch, management traffic from devices like switches and access points gets lost, sending them offline.</p><h3 id=lesson-2-the-power-of-a-dedicated-management-vlan>Lesson 2: The Power of a Dedicated Management VLAN
|
||||||
|
<a class=heading-link href=#lesson-2-the-power-of-a-dedicated-management-vlan><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>This realization about the Native VLAN led directly to the next critical architectural decision: isolating the network’s control plane. The initial plan involved using VLAN 1 for a DMZ, but this is a significant security risk, as VLAN 1 is often the default “catch-all” network.</p><p><strong>The Best Practice:</strong> The industry-standard solution is to create a dedicated <strong>Management VLAN</strong>. This network’s sole purpose is to be the home for the management interfaces of your router, switches, and access points.</p><p>The final, secure architecture was as follows:</p><ol><li>A new network, “Management” (e.g., VLAN 1, <code>192.168.1.0/24</code>), was created.</li><li>This network was assigned to its own “Management” firewall zone with highly restrictive rules.</li><li>All trunk ports connecting switches and access points were configured with “Management” as the <strong>Native VLAN</strong>.</li><li>All other user-facing VLANs (<code>corp</code>, <code>iot</code>, <code>dmz</code>) were configured as <strong>Tagged VLANs</strong> on these trunk ports.</li></ol><p>This isolates the network’s control plane from the data plane, vastly improving the security posture.</p><h3 id=lesson-3-mastering-inter-vlan-communication>Lesson 3: Mastering Inter-VLAN Communication
|
||||||
|
<a class=heading-link href=#lesson-3-mastering-inter-vlan-communication><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>With traffic properly segmented at Layer 2, the next challenge was controlling communication at Layer 3. This is the job of the router and its firewall, and it presented a common challenge: providing DHCP to clients when the server resides in a different VLAN.</p><p>DHCP requests are broadcasts and are not passed between VLANs by a router. The solution is to use a <strong>DHCP Relay</strong>.</p><ol><li>On the network configuration for a client VLAN (e.g., <code>corp</code>), the DHCP mode was changed from “Server” to “Relay”.</li><li>The IP address of the actual DHCP server was specified.</li></ol><p>This instructs the router to listen for DHCP broadcasts, catch them, and forward them as a unicast packet directly to the DHCP server. For this to work, the firewall must allow this traffic, and the DHCP server itself must be configured with a “scope” or pool of IP addresses for the client’s subnet.</p><h3 id=the-final-architecture-a-zone-based-firewall-model>The Final Architecture: A Zone-Based Firewall Model
|
||||||
|
<a class=heading-link href=#the-final-architecture-a-zone-based-firewall-model><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>The culmination of these lessons is a network architecture defined by clear, logical zones, each with a distinct purpose and trust level. This model simplifies firewall management and provides a robust security posture that is easy to understand at a glance.</p><h4 id=network-zones-and-their-roles>Network Zones and Their Roles
|
||||||
|
<a class=heading-link href=#network-zones-and-their-roles><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>The final configuration groups the individual VLANs into distinct zones, forming the foundation of the security policy.</p><ul><li><strong>Internal:</strong> Contains the <code>corp</code> network. This is the most trusted zone for daily work.</li><li><strong>DMZ:</strong> Contains the <code>dns</code> and <code>prod</code> networks for semi-trusted, exposed services.</li><li><strong>IoT:</strong> Contains the <code>iot</code> network. This is a low-trust zone for smart devices.</li><li><strong>Management:</strong> Contains the <code>management</code> network. This is a highly privileged, isolated zone for network infrastructure.
|
||||||
|
<img src=/images/unifi-vlan-migration-to-zone-based-architecture/472bf0cd504f4cd7ab7a33cd3322a5f1.png alt="S3 File"></li></ul><h4 id=the-security-policy-matrix>The Security Policy Matrix
|
||||||
|
<a class=heading-link href=#the-security-policy-matrix><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>The true power of this model is realized in the firewall’s zone matrix, which dictates the default traffic flow between each zone.
|
||||||
|
<img src=/images/unifi-vlan-migration-to-zone-based-architecture/663d732d14fc4fa8ad051c6926523efb.png alt="S3 File"></p><p>This matrix enforces the desired security policy with clear, high-level rules:</p><ul><li><strong>Complete IoT Isolation:</strong> The <code>IoT</code> row shows that devices in this zone are blocked from initiating any communication with any other internal zone. Their only allowed path is out to the internet.</li><li><strong>Protected Management Plane:</strong> The <code>management</code> row and column are almost entirely red. The critical network infrastructure is blocked from initiating contact with any user-facing zone, and vice-versa, following the principle of least privilege.</li><li><strong>Controlled DMZ Access:</strong> The <code>DMZ</code> is prevented from initiating connections to the trusted <code>Internal</code> zone, preventing a compromised public-facing server from being used as a pivot point to attack internal devices.</li></ul><h4 id=granular-intra-zone-control>Granular Intra-Zone Control
|
||||||
|
<a class=heading-link href=#granular-intra-zone-control><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>Beyond the high-level zone policies, the configuration also implements granular rules to control traffic <em>within</em> a single zone, providing defense-in-depth.</p><p>These rules explicitly define the communication paths between services. For instance, rules allow a specific device to access a Kubernetes load balancer while another rule allows general DNS access within the zone. This ensures that even within a semi-trusted zone, services can only communicate in expected and necessary ways, further reducing the potential attack surface.</p><p>By adhering to these principles, what began as a day of frustrating troubleshooting evolved into a robust, layered, and logically segmented network that balances simplicity with strong security practices.</p><hr><h3 id=references>References
|
||||||
|
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><ul><li><a href=https://help.ui.com/hc/en-us/articles/7258465146519-Troubleshooting-UniFi-Device-Connectivity class=external-link target=_blank rel=noopener>Troubleshooting UniFi Device Connectivity</a></li><li><a href=https://help.ui.com/hc/en-us/articles/9592924981911-Virtual-Network-VLAN-Troubleshooting class=external-link target=_blank rel=noopener>Virtual Network (VLAN) Troubleshooting</a></li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
12
posts/useful/index.html
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Some useful files · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="
|
||||||
|
rootCA.pem
|
||||||
|
"><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Some useful files"><meta name=twitter:description content="rootCA.pem"><meta property="og:url" content="https://ericxliu.me/posts/useful/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Some useful files"><meta property="og:description" content="rootCA.pem"><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2020-10-26T04:14:43+00:00"><meta property="article:modified_time" content="2025-08-03T08:37:28-07:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/useful/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Some useful files","genre":"Blog","wordcount":"1","url":"https:\/\/ericxliu.me\/posts\/useful\/","datePublished":"2020-10-26T04:14:43\u002b00:00","dateModified":"2025-08-03T08:37:28-07:00","description":"\u003cul\u003e\n\u003cli\u003e\u003ca href=\u0022\/rootCA.crt\u0022 \u003erootCA.pem\u003c\/a\u003e\u003c\/li\u003e\n\u003c\/ul\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/useful/>Some useful files</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2020-10-26T04:14:43Z>October 26, 2020
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
One-minute read</span></div></div></header><div class=post-content><ul><li><a href=/rootCA.crt>rootCA.pem</a></li></ul></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
34
posts/vattention/index.html
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>vAttention · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While PagedAttention (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPU’s native hardware Memory Management Unit (MMU) offers a more performant and portable solution.
|
||||||
|
|
||||||
|
The Status Quo: PagedAttention and Software Tables
|
||||||
|
|
||||||
|
|
||||||
|
Link to heading
|
||||||
|
|
||||||
|
|
||||||
|
Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined “page table” (the Block Table) [1]."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="vAttention"><meta name=twitter:description content="Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While PagedAttention (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPU’s native hardware Memory Management Unit (MMU) offers a more performant and portable solution.
|
||||||
|
The Status Quo: PagedAttention and Software Tables Link to heading Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined “page table” (the Block Table) [1]."><meta property="og:url" content="https://ericxliu.me/posts/vattention/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="vAttention"><meta property="og:description" content="Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While PagedAttention (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPU’s native hardware Memory Management Unit (MMU) offers a more performant and portable solution.
|
||||||
|
The Status Quo: PagedAttention and Software Tables Link to heading Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined “page table” (the Block Table) [1]."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-12-08T00:00:00+00:00"><meta property="article:modified_time" content="2025-12-19T21:21:55+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/vattention/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"vAttention","genre":"Blog","wordcount":"824","url":"https:\/\/ericxliu.me\/posts\/vattention\/","datePublished":"2025-12-08T00:00:00\u002b00:00","dateModified":"2025-12-19T21:21:55\u002b00:00","description":"\u003cp\u003eLarge Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While \u003cstrong\u003ePagedAttention\u003c\/strong\u003e (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPU’s native hardware Memory Management Unit (MMU) offers a more performant and portable solution.\u003c\/p\u003e\n\u003ch4 id=\u0022the-status-quo-pagedattention-and-software-tables\u0022\u003e\n The Status Quo: PagedAttention and Software Tables\n \u003ca class=\u0022heading-link\u0022 href=\u0022#the-status-quo-pagedattention-and-software-tables\u0022\u003e\n \u003ci class=\u0022fa-solid fa-link\u0022 aria-hidden=\u0022true\u0022 title=\u0022Link to heading\u0022\u003e\u003c\/i\u003e\n \u003cspan class=\u0022sr-only\u0022\u003eLink to heading\u003c\/span\u003e\n \u003c\/a\u003e\n\u003c\/h4\u003e\n\u003cp\u003ePrior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined \u0026ldquo;page table\u0026rdquo; (the Block Table) [1].\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/vattention/>vAttention</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2025-12-08T00:00:00Z>December 8, 2025
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
4-minute read</span></div></div></header><div class=post-content><p>Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While <strong>PagedAttention</strong> (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPU’s native hardware Memory Management Unit (MMU) offers a more performant and portable solution.</p><h4 id=the-status-quo-pagedattention-and-software-tables>The Status Quo: PagedAttention and Software Tables
|
||||||
|
<a class=heading-link href=#the-status-quo-pagedattention-and-software-tables><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined “page table” (the Block Table) [1].</p><p>While effective at reducing fragmentation, this approach introduces significant complexity:</p><ul><li><strong>Kernel Rewriting:</strong> Because the KV cache is no longer contiguous in virtual memory, standard attention kernels (like cuDNN SDPA or vanilla FlashAttention) cannot be used directly. Developers must rewrite kernels to manually dereference block tables [1].</li><li><strong>Software Overhead:</strong> The system must manage virtual-to-physical mapping in user space, duplicating work typically handled by the OS. This adds runtime overhead to the critical path of both the CPU (managing tables) and the GPU (performing lookups) [1].</li><li><strong>Performance Penalties:</strong> PagedAttention-based kernels have been observed to be slower than their non-paged counterparts. For example, vLLM’s paged kernel has shown to be up to 2.8x slower than FlashAttention-2 in specific tests [1].</li></ul><h4 id=the-hardware-native-alternative-vattention>The Hardware-Native Alternative: vAttention
|
||||||
|
<a class=heading-link href=#the-hardware-native-alternative-vattention><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p><strong>vAttention</strong> proposes returning the responsibility of memory management to the OS and hardware. By utilizing the CUDA Virtual Memory Management (VMM) APIs, it is possible to decouple the allocation of virtual memory from physical memory [1].</p><p><strong>How it works:</strong></p><ol><li><strong>Virtual Contiguity:</strong> The system reserves a large, contiguous range of virtual addresses for the KV cache at request start.</li><li><strong>Physical Paging:</strong> Physical memory pages are allocated and mapped to this virtual range only on demand (dynamically) as the token sequence grows [1].</li><li><strong>Hardware Lookups:</strong> Because the GPU sees a contiguous virtual address range, the hardware Translation Lookaside Buffer (TLB) handles the address translation. This allows the use of unmodified, high-performance kernels like FlashAttention-2 or FlashAttention-3 without custom paging logic [1].</li></ol><h4 id=technical-challenges-and-solutions>Technical Challenges and Solutions
|
||||||
|
<a class=heading-link href=#technical-challenges-and-solutions><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>Historically, using the GPU native virtual memory for high-frequency token generation faced two major bottlenecks: <strong>Control Plane Latency</strong> and <strong>Page Granularity</strong>.</p><p><strong>1. Control Plane Latency (The API Bottleneck)</strong>
|
||||||
|
Standard memory allocation (<code>cudaMalloc</code>) is monolithic—it allocates virtual and physical memory simultaneously. The more granular driver API, <code>cuMemMap</code>, allows separating these steps but involves expensive round-trips to the OS driver. Invoking these APIs synchronously during decoding (which generates one token at a time) would stall the GPU execution pipeline [1].</p><p>To solve this, vAttention utilizes <strong>execution overlap</strong>:</p><ul><li>Because LLM decoding is autoregressive and predictable, the system knows exactly when new memory is needed (one token ahead).</li><li>The CPU initiates the memory mapping for the <em>next</em> token asynchronously while the GPU is still computing the <em>current</em> token. By the time the GPU reaches the next step, the TLB and page tables are already updated, effectively hiding the driver latency [1].</li></ul><p><strong>2. Page Size Granularity (The Fragmentation Bottleneck)</strong>
|
||||||
|
The GPU TLB hierarchy is sensitive to page sizes.</p><ul><li><strong>4KB Pages:</strong> Too small. Mapping gigabytes of KV cache with 4KB pages causes “TLB thrashing,” degrading performance.</li><li><strong>2MB Huge Pages:</strong> The standard for CUDA large allocations. However, allocating 2MB for a single token update causes massive internal fragmentation, negating the benefits of dynamic allocation.</li></ul><p>Research identified <strong>64KB</strong> as the optimal page size, offering a balance between TLB efficiency and memory utilization. While standard CUDA APIs default to 2MB, vAttention utilizes modified driver calls to enable 64KB pages, eliminating TLB thrashing without incurring the fragmentation cost of huge pages [1].</p><h4 id=performance-and-portability-implications>Performance and Portability Implications
|
||||||
|
<a class=heading-link href=#performance-and-portability-implications><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>Moving memory management from software (PagedAttention) to hardware (vAttention) yields measurable benefits:</p><ul><li><strong>Throughput:</strong> In prefill-heavy workloads, vAttention outperforms PagedAttention-based systems (like vLLM and FlashInfer) by up to 1.23x due to the elimination of software lookup overheads. In decoding, it matches or exceeds the performance of optimized paged kernels [1].</li><li><strong>Portability:</strong> A significant advantage is software compatibility. When NVIDIA released FlashAttention-3 (optimized for Hopper H100 GPUs), it did not initially support PagedAttention. vAttention enabled the immediate use of FlashAttention-3 with dynamic memory support, achieving up to 1.5x higher throughput than PagedAttention-based FlashAttention-2 [1].</li></ul><h4 id=conclusion>Conclusion
|
||||||
|
<a class=heading-link href=#conclusion><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h4><p>While PagedAttention solved the critical issue of memory fragmentation in LLMs, it necessitated a complex software abstraction layer. By leveraging low-level CUDA VMM APIs, handling allocations asynchronously to hide driver latency, and optimizing page sizes, it is possible to achieve dynamic memory management using the GPU’s native hardware. This restores the illusion of contiguous memory, simplifies kernel development, and improves inference performance.</p><h3 id=references>References
|
||||||
|
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>[1] R. Prabhu et al., “vAttention: Dynamic Memory Management for Serving LLMs without PagedAttention,” in <em>Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS ‘25)</em>, 2025.</p></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
35
posts/vibe-coding-from-the-jeep/index.html
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="“Vibe coding” has become my latest obsession. It’s that flow state where the tools disappear, and you’re just manipulating logic at the speed of thought. Usually, this happens in a high-end IDE like Antigravity. But lately, I’ve been trying to answer a childhood dream.
|
||||||
|
Growing up in China before the internet age, my window to the outside world was CCTV-6. Along with Baywatch, one of the first American TV shows I ever watched was Knight Rider. I don’t remember the exact plot lines, but the core concept stuck with me forever: KITT. A car that could talk, think, and do things for you."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams"><meta name=twitter:description content="“Vibe coding” has become my latest obsession. It’s that flow state where the tools disappear, and you’re just manipulating logic at the speed of thought. Usually, this happens in a high-end IDE like Antigravity. But lately, I’ve been trying to answer a childhood dream.
|
||||||
|
Growing up in China before the internet age, my window to the outside world was CCTV-6. Along with Baywatch, one of the first American TV shows I ever watched was Knight Rider. I don’t remember the exact plot lines, but the core concept stuck with me forever: KITT. A car that could talk, think, and do things for you."><meta property="og:url" content="https://ericxliu.me/posts/vibe-coding-from-the-jeep/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams"><meta property="og:description" content="“Vibe coding” has become my latest obsession. It’s that flow state where the tools disappear, and you’re just manipulating logic at the speed of thought. Usually, this happens in a high-end IDE like Antigravity. But lately, I’ve been trying to answer a childhood dream.
|
||||||
|
Growing up in China before the internet age, my window to the outside world was CCTV-6. Along with Baywatch, one of the first American TV shows I ever watched was Knight Rider. I don’t remember the exact plot lines, but the core concept stuck with me forever: KITT. A car that could talk, think, and do things for you."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2026-01-21T00:00:00+00:00"><meta property="article:modified_time" content="2026-01-22T06:48:07+00:00"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/posts/vibe-coding-from-the-jeep/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script><script type=application/ld+json>{"@context":"http://schema.org","@type":"BlogPosting","headline":"Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams","genre":"Blog","wordcount":"665","url":"https:\/\/ericxliu.me\/posts\/vibe-coding-from-the-jeep\/","datePublished":"2026-01-21T00:00:00\u002b00:00","dateModified":"2026-01-22T06:48:07\u002b00:00","description":"\u003cp\u003e\u0026ldquo;Vibe coding\u0026rdquo; has become my latest obsession. It\u0026rsquo;s that flow state where the tools disappear, and you\u0026rsquo;re just manipulating logic at the speed of thought. Usually, this happens in a high-end IDE like Antigravity. But lately, I\u0026rsquo;ve been trying to answer a childhood dream.\u003c\/p\u003e\n\u003cp\u003eGrowing up in China before the internet age, my window to the outside world was CCTV-6. Along with \u003cem\u003eBaywatch\u003c\/em\u003e, one of the first American TV shows I ever watched was \u003cem\u003eKnight Rider\u003c\/em\u003e. I don\u0026rsquo;t remember the exact plot lines, but the core concept stuck with me forever: KITT. A car that could talk, think, and do things for you.\u003c\/p\u003e","author":{"@type":"Person","name":"Eric X. Liu"}}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=https://ericxliu.me/posts/vibe-coding-from-the-jeep/>Hacking a Chinese Car Stereo to fulfill my Knight Rider dreams</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
|
||||||
|
<time datetime=2026-01-21T00:00:00Z>January 21, 2026
|
||||||
|
</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
|
||||||
|
4-minute read</span></div></div></header><div class=post-content><p>“Vibe coding” has become my latest obsession. It’s that flow state where the tools disappear, and you’re just manipulating logic at the speed of thought. Usually, this happens in a high-end IDE like Antigravity. But lately, I’ve been trying to answer a childhood dream.</p><p>Growing up in China before the internet age, my window to the outside world was CCTV-6. Along with <em>Baywatch</em>, one of the first American TV shows I ever watched was <em>Knight Rider</em>. I don’t remember the exact plot lines, but the core concept stuck with me forever: KITT. A car that could talk, think, and do things for you.</p><p>Decades later, I’m sitting in my Jeep, wondering: Can I build my own KITT? Can I take the vibe on the road?</p><p>I already updated the head unit in my Jeep to an aftermarket unit. It features a <strong>K706 (UIS7862S)</strong> chipset with an <strong>8-core CPU and 8GB of RAM</strong>, essentially making it a reasonably powerful Android tablet hardwired into the dashboard.</p><h2 id=the-objective>The Objective
|
||||||
|
<a class=heading-link href=#the-objective><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>Turn this car accessory into a legitimate dev environment. I wanted a physical keyboard, a real terminal, and access to my AI coding assistants. I wanted to push code while parked on a trail.</p><h2 id=the-hardware-blocker-getting-input>The Hardware Blocker: Getting Input
|
||||||
|
<a class=heading-link href=#the-hardware-blocker-getting-input><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>The first hurdle was mundane but blocking: My Bluetooth keyboard wouldn’t pair. The head unit could see other devices, but refused to connect to my keyboard.</p><h3 id=attempt-1-the-usb-dongle-bypass>Attempt 1: The USB Dongle Bypass
|
||||||
|
<a class=heading-link href=#attempt-1-the-usb-dongle-bypass><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>My first instinct was to blame the cheap Chinese head unit hardware. I grabbed a spare TP-Link USB Bluetooth dongle and plugged it in, hoping to bypass the internal stack entirely.</p><p>The device showed up in <code>lsusb</code>, but it remained inert. A quick check of the kernel config via <code>zcat /proc/config.gz</code> revealed the bad news:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span><span style=color:#8b949e;font-style:italic># CONFIG_BT is not set</span>
|
||||||
|
</span></span></code></pre></div><p>The kernel was compiled without generic Bluetooth driver support (<code>btusb</code>). Even with root access, I couldn’t load the drivers because they simply didn’t exist in the firmware. I was stuck with the internal hardware.</p><h3 id=attempt-2-the-dual-bluetooth-fix>Attempt 2: The “Dual Bluetooth” Fix
|
||||||
|
<a class=heading-link href=#attempt-2-the-dual-bluetooth-fix><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h3><p>Forced back to the built-in Bluetooth, I tried to diagnose why it was ignoring my keyboard. Standard debugging tools painted a grim picture:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>❯ hciconfig -a
|
||||||
|
</span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic># (Empty output - no standard HCI interface found)</span>
|
||||||
|
</span></span><span style=display:flex><span>
|
||||||
|
</span></span><span style=display:flex><span>❯ ps -A | grep -iE <span style=color:#a5d6ff>"goc|ivt|syu"</span>
|
||||||
|
</span></span><span style=display:flex><span>u0_a50 <span style=color:#a5d6ff>3456</span> ... com.goc.sdk <span style=color:#8b949e;font-style:italic># Accessing the proprietary BT chip</span>
|
||||||
|
</span></span></code></pre></div><p>The diagnosis was clear: The internal Bluetooth chip is acting in <strong>Slave Mode</strong> (Client), managed by a proprietary <code>com.goc.sdk</code> service instead of the standard Android Bluetooth stack. It’s designed to <em>be</em> a speaker for your phone, not to <em>host</em> a keyboard.</p><p><strong>The Fix</strong>: Hidden deep in the Factory Settings (password <code>8888</code>), there’s a toggle called <strong>“Dual Bluetooth”</strong>. Enabling this flips the proprietary stack to expose a standard Host interface. Enable that, and suddenly my mechanical keyboard connected instantly.</p><h2 id=the-software-termux--claude>The Software: Termux + Claude
|
||||||
|
<a class=heading-link href=#the-software-termux--claude><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><p>With input sorted, the software setup was surprisingly straightforward. <strong>Termux</strong> was the obvious choice for a terminal.</p><p>I discovered that <strong>Claude Code</strong> works on Termux with zero hassle.</p><p>The setup was shockingly simple:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>pkg install nodejs git ripgrep
|
||||||
|
</span></span><span style=display:flex><span>npm install -g @anthropic-ai/claude-code
|
||||||
|
</span></span></code></pre></div><p>Authentication via <code>claude login</code> worked out of the box. Now, I have a fully capable coding agent running directly on my dashboard. I can pull a repo, ask Claude to refactor a module, and push the changes—all without opening a laptop.</p><p><img src=/images/vibe-coding-from-the-jeep/399000b0b5ee4f5e8961e1d76b6c23c8.png alt="S3 File"></p><h2 id=key-insights>Key Insights
|
||||||
|
<a class=heading-link href=#key-insights><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><ul><li><strong>Head Units are just Weird Tablets</strong>: They have quirks (like Slave-only Bluetooth), but they are standard Android under the hood. <code>adb root</code> is your best friend for diagnosing them.</li><li><strong>Check the Kernel Config</strong>: Before buying hardware peripherals for embedded Android devices, always check <code>/proc/config.gz</code>. If the support isn’t compiled in, you’re dead in the water.</li><li><strong>The Vibe is Portable</strong>: With tools like Termux and Claude Code, the “dev environment” is no longer a heavy laptop. It’s anywhere you have a terminal.</li></ul><h2 id=references>References
|
||||||
|
<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
|
||||||
|
<span class=sr-only>Link to heading</span></a></h2><ol><li><a href=https://www.reddit.com/r/termux/comments/1jd4y4y/claude_code_is_easy_to_install_on_termux/ class=external-link target=_blank rel=noopener>Reddit: Claude Code on Termux</a></li></ol></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
4
robots.txt
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
User-agent: *
|
||||||
|
Allow: /
|
||||||
|
|
||||||
|
Sitemap: https://ericxliu.me/sitemap.xml
|
||||||
34
rootCA.crt
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
-----BEGIN CERTIFICATE-----
|
||||||
|
MIIF2DCCA8CgAwIBAgIUMxAajDuiWUFtwePBQChCPyqvyIowDQYJKoZIhvcNAQEL
|
||||||
|
BQAwcjELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMRowGAYDVQQKDBFlcmljeGxp
|
||||||
|
dS5tZSwgSW5jLjEXMBUGA1UEAwwOZXJpY3hsaXUubG9jYWwxITAfBgkqhkiG9w0B
|
||||||
|
CQEWEm1hc3RlckBlcmljeGxpdS5tZTAeFw0yNDAxMDgwMzA0NDFaFw0yNjAxMDgw
|
||||||
|
MzA0NDFaMHIxCzAJBgNVBAYTAlVTMQswCQYDVQQIDAJDQTEaMBgGA1UECgwRZXJp
|
||||||
|
Y3hsaXUubWUsIEluYy4xFzAVBgNVBAMMDmVyaWN4bGl1LmxvY2FsMSEwHwYJKoZI
|
||||||
|
hvcNAQkBFhJtYXN0ZXJAZXJpY3hsaXUubWUwggIiMA0GCSqGSIb3DQEBAQUAA4IC
|
||||||
|
DwAwggIKAoICAQDedDTBe0+qRV1r+kRvMZzFkensiKMpL4T9bRbAbNFfS8QufHp9
|
||||||
|
wJoMh5xW4XPJtqkYdYnnoefaZS9a9DMHjw1+f7lL0vzIfzSO5JWTZQSAsi0yeqDn
|
||||||
|
j1l8ShYrPZvQR+NUht9qAztbhIcBy3FFVOFFMZjZaYIwF1C3QBv5h2/yfgw0uad/
|
||||||
|
rOEw1G1Z/xlj7K+rvm59+vzduASfFY6NMG0PFzY1jRnWZ4diiqWJEM02EAevosbW
|
||||||
|
Xg1CFRkoe+s088QXl4WZLxpHvsiKdvKjaaKXrQieAYL2Kl3DOziN7P659q0Bk2tm
|
||||||
|
yp0B81QZV24mhg5WCuwrteiOJz51vck/T+hWDFKjPwa+GjGpqGiXjJMBfS/MyGMf
|
||||||
|
mdnPdcMeKQo2Mx4hpl/h116xFY60Tzto/PI4Kb4VBTKkN0hu7BLDSU4l8PkiSSAd
|
||||||
|
0E2Kzg4P9BQgvVc/BhoR7oKebf2TCeTVN+gC9HRsBdzBA3mtp60Qd9XBFAkbDqZq
|
||||||
|
nusA8KEG10az4cXaMIohAsRh9AVz4tHxTOq2dgw9AE8EEfQzgcMQl4hV4TkYFubC
|
||||||
|
t/gm16yEvsPBMFjptLu4S7mOpSdaJylOXVcMZ6PgeGAlrbuYunblYtdyKVyNVFeX
|
||||||
|
ca6RPAbDthWSqrbzigCvSeqhRpPmEq5p51BFGA+QK2b1Bj7dF0yiDO5zbwIDAQAB
|
||||||
|
o2YwZDAfBgNVHSMEGDAWgBQEK7HddEflCZ9DL9VEIBXzB9dQFTAOBgNVHQ8BAf8E
|
||||||
|
BAMCAgQwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUBCux3XRH5QmfQy/V
|
||||||
|
RCAV8wfXUBUwDQYJKoZIhvcNAQELBQADggIBAKF16Ps4AccXsNDRqQANF/kcNZ2y
|
||||||
|
SKB3cNsOfWxKfgppkl43z9cimgGGbNn0mVGjaOzXdXHEEQ0Uuv3tkvgQA2KraaTy
|
||||||
|
wLG5+RQKIVRaOgWufXbL76JV6mMf8v3o8/o5EL+uC/2KxpDH0N1BOJ0hJB2/hbra
|
||||||
|
kHPuYobj1SWtPeO5lRdZed05kdiAWH7e3/PmKgH13tZLnnzCHRC1YNkk2Cdhp082
|
||||||
|
XL5zUtDdbWAm6UgM4Reg4MKZMZzmYDn+1/wW6D5oO5ZXlJF2QqjqfTXn6fKJWM9d
|
||||||
|
JK3O5vx+LquAMu1G9gkqmTZntQQ3ZDGs9bMfWchgWPWN1ignJgmqnIgIbvdAHhdL
|
||||||
|
DOz3WE53vpcUY35TOs/YgIj81vAZuhuaYQZcTL4H34c3ShdVi6RY3Y+yPxM9MjRc
|
||||||
|
zqzEMg4KTnK7Es+t4Yep7vOQRo3WN1A+lXsRf+n2XBTCTwFOCury64AjMQn5H0yb
|
||||||
|
aZGvvf3UnIdUrJjPGjF9W/uIpy0TDpsKo/qizAdQ5c18p2ihVO8mHHZhJnIQW9er
|
||||||
|
p8M0m6/woalM94apYNdY6YAbsej5gNktx+z2ptdPNmE3k3OevDFqRNSLh29Rr2vM
|
||||||
|
CfO6MjR4Bkilw5A67jQFQnLF6Y9TYqW0HlEvdODNvO9aR5RSwaNTGJBcjynrsL3v
|
||||||
|
IG73ZMQl6utPkbKh
|
||||||
|
-----END CERTIFICATE-----
|
||||||
7
series/index.html
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Series · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Series"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/series/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Series"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/series/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/series/index.xml title="Eric X. Liu's Personal Page"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=https://ericxliu.me/series/>Series</a></h1></header><ul></ul></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
1
series/index.xml
Normal file
@@ -0,0 +1 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Series on Eric X. Liu's Personal Page</title><link>https://ericxliu.me/series/</link><description>Recent content in Series on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><atom:link href="https://ericxliu.me/series/index.xml" rel="self" type="application/rss+xml"/></channel></rss>
|
||||||
1
series/page/1/index.html
Normal file
@@ -0,0 +1 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>https://ericxliu.me/series/</title><link rel=canonical href=https://ericxliu.me/series/><meta charset=utf-8><meta http-equiv=refresh content="0; url=https://ericxliu.me/series/"></head></html>
|
||||||
20
site.webmanifest
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
{
|
||||||
|
"name": "Eric X. Liu's Personal Page",
|
||||||
|
"short_name": "Eric Liu",
|
||||||
|
"description": "Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities.",
|
||||||
|
"start_url": "/",
|
||||||
|
"display": "standalone",
|
||||||
|
"background_color": "#ffffff",
|
||||||
|
"theme_color": "#000000",
|
||||||
|
"icons": [
|
||||||
|
{
|
||||||
|
"src": "/images/gravatar.png",
|
||||||
|
"sizes": "192x192",
|
||||||
|
"type": "image/png",
|
||||||
|
"purpose": "any maskable"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"categories": ["technology", "engineering", "blog"],
|
||||||
|
"lang": "en",
|
||||||
|
"orientation": "portrait-primary"
|
||||||
|
}
|
||||||
1
sitemap.xml
Normal file
|
Before Width: | Height: | Size: 26 KiB |
@@ -1,33 +0,0 @@
|
|||||||
-----BEGIN CERTIFICATE-----
|
|
||||||
MIIFoDCCA4igAwIBAgIUJzlDGIEJdOQ0Shd1P0RJP5aangAwDQYJKoZIhvcNAQEL
|
|
||||||
BQAwYTELMAkGA1UEBhMCVVMxCzAJBgNVBAgMAkNBMREwDwYDVQQHDAhTYW4gSm9z
|
|
||||||
ZTEUMBIGA1UECgwLZXJpY3hsaXUubWUxHDAaBgkqhkiG9w0BCQEWDV9AZXJpY3hs
|
|
||||||
aXUubWUwHhcNMjExMTIyMTgzMzQyWhcNMjQwOTExMTgzMzQyWjBhMQswCQYDVQQG
|
|
||||||
EwJVUzELMAkGA1UECAwCQ0ExETAPBgNVBAcMCFNhbiBKb3NlMRQwEgYDVQQKDAtl
|
|
||||||
cmljeGxpdS5tZTEcMBoGCSqGSIb3DQEJARYNX0BlcmljeGxpdS5tZTCCAiIwDQYJ
|
|
||||||
KoZIhvcNAQEBBQADggIPADCCAgoCggIBAN50NMF7T6pFXWv6RG8xnMWR6eyIoykv
|
|
||||||
hP1tFsBs0V9LxC58en3AmgyHnFbhc8m2qRh1ieeh59plL1r0MwePDX5/uUvS/Mh/
|
|
||||||
NI7klZNlBICyLTJ6oOePWXxKFis9m9BH41SG32oDO1uEhwHLcUVU4UUxmNlpgjAX
|
|
||||||
ULdAG/mHb/J+DDS5p3+s4TDUbVn/GWPsr6u+bn36/N24BJ8Vjo0wbQ8XNjWNGdZn
|
|
||||||
h2KKpYkQzTYQB6+ixtZeDUIVGSh76zTzxBeXhZkvGke+yIp28qNpopetCJ4BgvYq
|
|
||||||
XcM7OI3s/rn2rQGTa2bKnQHzVBlXbiaGDlYK7Cu16I4nPnW9yT9P6FYMUqM/Br4a
|
|
||||||
MamoaJeMkwF9L8zIYx+Z2c91wx4pCjYzHiGmX+HXXrEVjrRPO2j88jgpvhUFMqQ3
|
|
||||||
SG7sEsNJTiXw+SJJIB3QTYrODg/0FCC9Vz8GGhHugp5t/ZMJ5NU36AL0dGwF3MED
|
|
||||||
ea2nrRB31cEUCRsOpmqe6wDwoQbXRrPhxdowiiECxGH0BXPi0fFM6rZ2DD0ATwQR
|
|
||||||
9DOBwxCXiFXhORgW5sK3+CbXrIS+w8EwWOm0u7hLuY6lJ1onKU5dVwxno+B4YCWt
|
|
||||||
u5i6duVi13IpXI1UV5dxrpE8BsO2FZKqtvOKAK9J6qFGk+YSrmnnUEUYD5ArZvUG
|
|
||||||
Pt0XTKIM7nNvAgMBAAGjUDBOMB0GA1UdDgQWBBQEK7HddEflCZ9DL9VEIBXzB9dQ
|
|
||||||
FTAfBgNVHSMEGDAWgBQEK7HddEflCZ9DL9VEIBXzB9dQFTAMBgNVHRMEBTADAQH/
|
|
||||||
MA0GCSqGSIb3DQEBCwUAA4ICAQDKVGKjd1v6vecfNLZZ4+bqw4nwzzVwOdOWb2j+
|
|
||||||
zqPmYT/ZzCCxeiWLIaYtOQWXR4eSzULWYAGauecVlVYydbRbDC6LXp/1NrfQuNpp
|
|
||||||
6kd9JRGGdnNrW+0tEfJiXnEpOTwKncI1u6B0pvND8Gy6sxgjamyiKAh1vy0IZYJk
|
|
||||||
2T7PXxljqGxFZXZ5Ese/ogPn5KRGPkOmbW/BQXWC//3Qe39J6lxy2/HwfZ9pa+AQ
|
|
||||||
TxcJ/2OiDgBprMPJrHBiqvjoI9kp8vk3JhAQmbVM+8bpAIiiW8dPiEBDtROe/Wk5
|
|
||||||
UuiebFQNIebaIG+nEruUR28Df3Q52k6dY4MWLVNqB9lKKCqnbYtxDUIQrFCSHAEq
|
|
||||||
IdeOTEPjpkBr6UWwEunk32Mq6mdqmf5zBNaS64Wva43SLx+p/MIIacCYxOH7CHJX
|
|
||||||
r6XO/tR95cO4N3LdA/aJYpY0M35tFftFKI/AD5vEwshgYDw9QU1fu3Wljw3wYSVx
|
|
||||||
8YPKKwRkEBslEBmqf9YooDtGw3bLkQbJml0uMgxXOYI/VD95azvguq1lmcSdTTPu
|
|
||||||
f1GC0YnpQnXT6gPHNLoMhGiQUTlwHp2GKdaW0Xb9DEOLurzBZ9FIQsvrgclpJ49x
|
|
||||||
avp4Sgk3wLVue5iOKqlZL5fQIjckQEVR8vieKnZgGx6amVS9a5gB0GbAhkD06Y4p
|
|
||||||
M3O6VQ==
|
|
||||||
-----END CERTIFICATE-----
|
|
||||||
@@ -1,140 +0,0 @@
|
|||||||
verb 3
|
|
||||||
nobind
|
|
||||||
dev tun
|
|
||||||
client
|
|
||||||
remote 24.4.129.175 1194 udp
|
|
||||||
fast-io
|
|
||||||
compress lzo
|
|
||||||
auth-nocache
|
|
||||||
remote-cert-tls server
|
|
||||||
<tls-crypt>
|
|
||||||
-----BEGIN OpenVPN Static key V1-----
|
|
||||||
d188baecfc63820df3a11c50aa887c4e7236ff8021049038aec03f4f2a46376b
|
|
||||||
aee8d80d06dbd812b84962937bed7003fdf64c264e9b7423925dbce4dd38b4e0
|
|
||||||
a3bdfe6e656550a63430338c0dd4bcd4c694221c7561fa9e6da3efd0334a57ee
|
|
||||||
5926acc05f768339b4712bf005d7eeb27f2da8dc8f4861b718b6683eb42869c0
|
|
||||||
e11a1ac6c36daea5c79d7e08830de1c6f0a55207bb39e9c0420db34b3a631975
|
|
||||||
5cfcef448f6664fde5d40e31e381503a6a724eebd7cfd76fe6d7108edc83b5ab
|
|
||||||
ea1e66af70837d15a9d8ba58c82018b4cd669deb2323ba60d7c7ea8a398483aa
|
|
||||||
2dec8aa6890dc2f60ff5be1a5c2a6a2fe95efa27f75c38735335e7f6f39b256e
|
|
||||||
-----END OpenVPN Static key V1-----
|
|
||||||
</tls-crypt>
|
|
||||||
<ca>
|
|
||||||
-----BEGIN CERTIFICATE-----
|
|
||||||
MIIFNTCCAx2gAwIBAgIJAJBOAeknPeLqMA0GCSqGSIb3DQEBCwUAMDExCzAJBgNV
|
|
||||||
BAYTAkdCMQ8wDQYDVQQIDAZMb25kb24xETAPBgNVBAoMCFdXVyBMdGQuMB4XDTE5
|
|
||||||
MDEwNjA3NTIxOVoXDTI5MDEwMzA3NTIxOVowMTELMAkGA1UEBhMCR0IxDzANBgNV
|
|
||||||
BAgMBkxvbmRvbjERMA8GA1UECgwIV1dXIEx0ZC4wggIiMA0GCSqGSIb3DQEBAQUA
|
|
||||||
A4ICDwAwggIKAoICAQDpq5CFMT1VWb2MeaHXi4FpCLDXwnzaS+3qGCa3COdNg2BD
|
|
||||||
tkQOPJNTgVhGn5XcfSDZnnVpXXrPDAqEDCUVVZj/2Mup/LseNr4miY+QcojyRETh
|
|
||||||
Ecq0FVqgRvW2zRxqWxEPpLyzZGOcwAcW2jGO8XWPsqN4wAWO3WlpYT3unVK833Cx
|
|
||||||
0wkdFIPbkEE1xKaJiskNYGgDuHu4tzGhOHSKOMzo7HvMaYsNgNChx/x4HYyTgkCf
|
|
||||||
4r5zo4+CnOqRZ58STiV9AsOgg6mR8m6h/E9GNWpU0VWKm8hnklP+TiMnW/AKby0B
|
|
||||||
hUKXFJMyhrNBOQXyj1LTqM5Q97+SNstOfqutKZgdD8mZcL4ec+DzelCH4Gyc15Yx
|
|
||||||
gII/z3YwBUw/SGh+diCtWY2eJAHDkDFMGgiidVSzeKjRAgCDGi5+SYymLzLDyQey
|
|
||||||
BpgbxunC2zHKsEhH1ZfOxyEOsW7UzgN6axQQ5DdzzKc1ke6OBl0YD1pRsoWEXudi
|
|
||||||
b8LlNNI4oOaMiW3gsptJGPCOvXBrMm7wuzLrXMMRD0bh969KBJ7YQjUVkrAOsGTq
|
|
||||||
DnqoXILa0ljsdazxe2Xk8GqrGAQ1XIvO7elbUlV/0nlAj4nzzx1m8f0n9nZ1aEZe
|
|
||||||
Mv46+si6K/DgdUyGqcxOw6iZ00Fj6ha4yx7HJjZHHwFBXqJEPtdXYJKYa1AmHwID
|
|
||||||
AQABo1AwTjAdBgNVHQ4EFgQUeBJp2fBea1UzyKirF1VYDsYddiwwHwYDVR0jBBgw
|
|
||||||
FoAUeBJp2fBea1UzyKirF1VYDsYddiwwDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0B
|
|
||||||
AQsFAAOCAgEAfN7KjdqydCuSH6GCziu7Jc+SxLKLqMQc27/SBRWJ54JwDsfRglH3
|
|
||||||
zze9j0f/auLKNirbxQG8/CeJO7BtxsPHk2NfKnXUMyIfRH4jlSmuuy0YLH1N3F19
|
|
||||||
5GKGyt/ufc4a19l7M8ZseFMee8GXn6uHpVtN88GMKqQOu0AGnxv379ulI/RQ7iC2
|
|
||||||
wkFpkT8Anzwd+jxMi5iNYbsHGd1uCyzY1bbNORY/fdX7A27xNjLe2cJc68OUOJQe
|
|
||||||
XyfVlH2JyY+qEAXmv5gABafLFOsGmGHaQxZj4+zIdvDX6DGVIKCK7eixwVnKDwHm
|
|
||||||
b9yF4ivMWk5gaY0sjezD7bnN2vAN1zXvpmmSu2tc/kOzGXZKoGEUn/4j+tWvvhPn
|
|
||||||
wrTonT9soGmm7/LVyG/z950lylZV3XRw/0ZVQeCtQj+b+SjozNjTutzgWiAJ4njm
|
|
||||||
Jyaqrj6vHB6vOPySk6AYyu1qTaJsniHR62Hv6WG/eZQalcXJZ8BuwAgdpcgPwdVU
|
|
||||||
4IaKyiCjHg7dnrAwPURHfmlvosq+J+8PdD0O2L2aYUUtBS2TezgedSLXBYD4xZFa
|
|
||||||
85zsZMlEurHM9o93vfjihyMxUla46o6uNyl32ebaPvLxEj/MyGOwkzAWa0qxy74J
|
|
||||||
aQjWl+dWivXNFfE/yD/7yVF+X9YdlSFGCRyIfkUwy9hxLqkUdXeFgwE=
|
|
||||||
-----END CERTIFICATE-----
|
|
||||||
</ca>
|
|
||||||
<cert>
|
|
||||||
-----BEGIN CERTIFICATE-----
|
|
||||||
MIIE5TCCAs2gAwIBAgICEAEwDQYJKoZIhvcNAQELBQAwMTELMAkGA1UEBhMCR0Ix
|
|
||||||
DzANBgNVBAgMBkxvbmRvbjERMA8GA1UECgwIV1dXIEx0ZC4wHhcNMTkwMTA2MDc1
|
|
||||||
MzQ5WhcNMjkwMTAzMDc1MzQ5WjAUMRIwEAYDVQQDDAl2cG5jbGllbnQwggIiMA0G
|
|
||||||
CSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCmpYnmgBet9aZInpN9DRi6EHjye699
|
|
||||||
GFCVuBc4zw8PoZieC/jt/hQ8jrwQC8KVU7g2nuAZNfX1wXJ0hLDKAZWSJhvlAeN1
|
|
||||||
/oA9oe/kikatUcijJnfipFJlhPJ8kru+UXH8ypwHoxbxd/2u+KDTjg3dJqNPGUak
|
|
||||||
2KlSxHDbS0OWEhBIdn4A2iD7HpDPBO805KfAWVtfQ6Pvy5XHHNm6s796x91hes3Q
|
|
||||||
ONY1TLKE+tHFMoTXn3C0a4/DU35Kj1JrZDdER+DrNmxGhInq2CXEGsyw8MwXYHLS
|
|
||||||
1H8jvuX8u7yS2QX2cjwzc4PcJFPjEPTtUqPb1Ob+xOYb0bGOEE64+7xNeA+Mk5XL
|
|
||||||
i08DJEFAU0hSCnHtz1y/JrXScKaHgtVzDm2TsXal9jO7ikVi3zhC6EcO0T4PFBX0
|
|
||||||
lACXUDpXv50WI3ftoIACtO+paxM+wuXCt9ZhC6BZbRBs9EtVUdBDXAsSp+yhb9Jr
|
|
||||||
GZzY+GIfmiPY027Cd0FYumspCHgBvUM4D9rCDVSiwr1DqNHqzlrjbwW3mCHLtmjE
|
|
||||||
qU5Jg9DBUB3J01AERUgBYE8O8BkmSkuKi0mwgi9LdQ4SbizqVreMip+kXFAwRMg8
|
|
||||||
Pw1h/cDUd9G1ZM+bZzHrjp4rdDHK8NAhDgJvxhGuhuwVFpA/LhvJ4tJBWjhJn7j5
|
|
||||||
IEHpNWA1xNp1IwIDAQABoyQwIjALBgNVHQ8EBAMCB4AwEwYDVR0lBAwwCgYIKwYB
|
|
||||||
BQUHAwIwDQYJKoZIhvcNAQELBQADggIBAFcqTpXcEe83shOnp+nOvGscMT0PwSNo
|
|
||||||
ojy5xR9UhHs3ijyJ3DeaCO4xh7V8PTzCTpg3NOs8+19/nAhSr+QBWQKwrQhQ7Uub
|
|
||||||
zv8AMXJ4tU1ZAyx0lX8FzUe/GsI8muqosK8F09jnTgGk05yCca9kDVzffGk1mivx
|
|
||||||
d6ANRdUkprZV1VPA/eKXBQYstbeYBitPql5anmh54fEvXt7S1SdPATXI/eTzaxtP
|
|
||||||
2KyPl7OZDA+mvS0qPFcY+MB2fjjdoyl74BShCJyI5sBCfN6WY6hNQ7meVWa6bCLQ
|
|
||||||
EgvrZqh0lkWhy3mKcTL8eZJeF2SoYHQCSY75gQM0gdCODHTvkJJknLVmtzHHTJL3
|
|
||||||
gVbdqFo/OiGGpD4XKpChNv//1kkRrwPBG4YDXu2/vsPoZKRVgjpQbNol+z+Ee1y3
|
|
||||||
MqGz09aGxC98KUuxrUYwT8fbVVyLm0Fu2O8u+Qz3s6dPkWqD94YGxh2pq9SD5aTl
|
|
||||||
/92LaIyqfMlWXj38yDUxjsENsTDtsSrx5cw6+BpB+VMmSuXlIYgE4khiEnYzCXbj
|
|
||||||
5rduGmz4t4rhZZaa3n3L+G0sCQUqmnYNAjEMYcKIZvkTI3GoW3s0FROeUL1zLir2
|
|
||||||
mdvWmQHTq39p/iBWmMTP/YofQPv8P1TWbKWaKalAf4+fLXHiTL7KHFw4YXXKE1iA
|
|
||||||
GI6Ngos0UzHR
|
|
||||||
-----END CERTIFICATE-----
|
|
||||||
</cert>
|
|
||||||
<key>
|
|
||||||
-----BEGIN ENCRYPTED PRIVATE KEY-----
|
|
||||||
MIIJljBABgkqhkiG9w0BBQ0wMzAbBgkqhkiG9w0BBQwwDgQITIzpduKGeaYCAggA
|
|
||||||
MBQGCCqGSIb3DQMHBAjjV2SrWFzjJQSCCVBzWLc0eFF9x2OmoOddeslRnNd0DdiZ
|
|
||||||
eOjsK93BkhlSpBJVLJY7x6DD3JJokgDCFl/sWHjU4zn4C5UogaqhrAIYeFpjx4w4
|
|
||||||
4adU8bb3K4WOHAbQk5f+76HWr8LlCb4Ws2x0e8OlVwRBNJKAfumAoODE2ZZ0qitt
|
|
||||||
5FeBix1XecaSpFl8J4BoytFD1R1Pf1KHL/iZ9Vh0SGGqE+ElDPOd8+PfsIKy09ZD
|
|
||||||
/kMiiItnCF3NwG+s69GJJbGhIPdaas/yqVjtKOdQ8y7VBbrERzZ1mOVWN8zQj4cl
|
|
||||||
5whPtMBgSYgkM9UcDNaQbqn/q4yXjPF+mWuZ6EyD4yNwpbroHEna5SAbaMiIHUah
|
|
||||||
gXbSDrbFAMPqbpnpG5pUO9xM8YI8VYhlJU6MtzTjNwkmwyCzhV9WAEMBrMO3ZQGc
|
|
||||||
FmG9HchAdKO75K7bHaaAZbBvt2LMMg+cvMSFiojKvOKXrC3ntQzHCrGL0IPEmdYK
|
|
||||||
r1SIBD42zjsYPEY/MD0aV6eP/8DHbTMhF1oaQxxGLotv2+yVzoI+MTBaBFanQ+41
|
|
||||||
7LBSB8oK8uq11vVz4LIp4xC+uyH+qoKE59mPG6QoRKC5GSjIj6J9hbDOS0DcHILg
|
|
||||||
S0ebY2s2pSpVfcLZJslzSoI3ArNCs0fdkhZ2wD9/kX9BRAtZQSOsAcNVXc2sQsCj
|
|
||||||
zeIZ9V7HlNGdZnG2CAPI5RWe8RSzz1T+IF9rUUD43Hi0csQ6y3IFQEXJtmXArVXo
|
|
||||||
F6WoxqpXF3IdvLcnTDX1CK+h+QztSRysiRvWCPbISv84BIlx6OEVu+c0D82D+AUz
|
|
||||||
Wf6DRsXIzqFKly/MZNsYG7Sx0t1eHaKaw+SCsWLRdiFsdmL+LUAcqVsJNCshKp6H
|
|
||||||
Qlg3w0g9eU//qt2HnE0dx597PeSnyjRYSswt2R5dSaDh6x9KUeXc+kcTJTwxQ66c
|
|
||||||
gSopFZyoGOxHLGoCBZV1qGGKbUVnbX8hy3eunVRNVsgOBFhmMYy1kaWajOGIfVSz
|
|
||||||
jErclJpCJjuJDnK5L9ipLpQtb2VbVIgVbzwQ+p6AGBU39YO7R/ql4/DUyvo35mMx
|
|
||||||
X9tr8uGYRWxkbBJSKZ6FNG0jUI++7goT66vMWb9Sn3Xsczj1J9INMeY4OGwXGZ+3
|
|
||||||
VZrVsPMed0IJ4NIYJ0FRVhv7Y04aexJmvHqLUeRdJLk4l9kJNHoKJoleT8IUhThl
|
|
||||||
nqP76jFabL3jX9fUpDxPNNoMiz+en4L8bX3dnLlvo8xeLnUaqT63Y+CgRVyVZJSI
|
|
||||||
7PUZwGBWFHpuboLTYMgaQK6+UOp/rqtDFAkBXRD0ncSL2KYcy6I5IN9YDcYvTqvU
|
|
||||||
N5TsVjftGKCCZAFyCkVqVjfV3uKJAiK4LHJe9J94Aq3lWeaw//gg1UjWrXCRwKuZ
|
|
||||||
hO0kOEN7tw7YxOSOEzyQ3+j3TnWrToF/9QrCfY/+tOvwAVmLTD1e/dNTCt/SboaY
|
|
||||||
2FGSI0TmPRSewxCT2L9hBgM1wtDdgSofVAwxW9qK+/0JPZm+C6gGc2ipZNdDH4uN
|
|
||||||
+5j0zKZ98u7w6xRW23wCV+cnJ7IvtpXZvChFUnwnq9WanJakr/zsNsuhGpVOnD46
|
|
||||||
QOZzO3U8VwXwK1yMas0oN7XSTwf0vGZYBaCtKkF7PrLlVeOYaj50jQNXUvfLt+bQ
|
|
||||||
c64apwATE3JK2FcaV32m8UPz3bF04uuMIxBldH3Mvyp0X+MXaLERiuefUZwwppp5
|
|
||||||
yFYuy+z03asOYeQrG8LSsTGNOgJXPu1Or32GMHlil1s74uOodA5T8XHEmX4Fxuik
|
|
||||||
ok2itZL3yo/Sl73AC9yeSr7R9+Hf7SUTBt8AVeNPhmNDSi1AbyYhT1y0G7Dqwxvb
|
|
||||||
oG5ZQyKPlquf8a5Xzodq6lPdXJwi8ZLmAuBelAg6A4MJZzMMrhOzQXyiMHVNtwdk
|
|
||||||
c3LzES9bKEWgJR1CGR2RWrxUfqV6Y+uC+r5nPU/DZSOjJ0u7kWWvdQiySakxhQhO
|
|
||||||
qkT/+PeYcspzB2juDA5kq9s5Votyl05nHoM7L3UdRxzA4IhKawL9lRWu0Q/0Gn4C
|
|
||||||
axG2hUi61rxsx/epruIBz/01dlxw6xUpZBviArGxx/Z+QD0e5tKiuNCAOGQ5mm61
|
|
||||||
UtopO6vBq6oS4O/xO/xHhYlcLKJ2D3C8v1JTDQXIQ1OY6IaqIXRiaB4/bz9eCByV
|
|
||||||
8tEVW8/zs03M9zcxNuRL4tuBU0yhUCTCtGgQGUMnOYl4d1ZvZvUHb3oJqEI3AVOJ
|
|
||||||
/tTVni6P3V4TdaF17EyndLZbIz9mRp3Tai4lsZXbRpevKzQfkFvg3vRZKj5Fymp6
|
|
||||||
4SypXMKvgAQ/R6m9T8L0/rT90jf4GHhKhbSYXkJmAZQ/yz9eyjP9SFgHk3P0/z0x
|
|
||||||
pa8oc50PAumDpz73pLFwYGffZb7yAAb+Uv4bjBgw+UytIWsbWJBmGAZpVOxKCFEX
|
|
||||||
NGcflCfANB7FGv72a6fm3cf4IeqS3KGGQalIzOwwtIDWe2SVkp2LSx5JeFFCIjDP
|
|
||||||
dKynm1tczPfoL/tUzcoRqI61zVpb3pAzKmrsWnSgA5Zl+LPZq23g5QRjCNeu1xkh
|
|
||||||
GXMeXvQ0Q1VfLG4iw7j6zx52qiFy0HTQ8FK6cSA/nJN6/fE/2p+buAKxP5qjsFCP
|
|
||||||
+/QQB681rfKGrQV1yh8TKuJ04h5gdxF4sC6cliHdw8daA2y4rQorBjM7F1EF8VmZ
|
|
||||||
NcC1cEclv/E3QwOkBJsaom2rw7LkeOHLjqorGAf5eazO2AFZXVVG5yWrNyZWnaYf
|
|
||||||
LYrXCk/4yLSexVEgiC81uSQL2uhvkatrUdDi4zV9mMrHKR10w8LVEuXSkS8IK3h5
|
|
||||||
ln+HDc+rqUZG0ChHaF/GJ5VpQ6BLcMYNaoc75AuYU2rlSvMWnaR9UdiNVx3nrxld
|
|
||||||
/SvNn8K+lFiKCr0J0DiVDztCpGOq4k2JSlCr+C+YxvipRr+VZOzpxx4RvkRFKAq6
|
|
||||||
ix0demDcAk+YB6OZP3JAEy/yoiK/f61KiRpv0VVnHRFKyBv6MIyZmXkn5SesXF5C
|
|
||||||
aBAV1zRdnV4EHXZy3qKIdvDP/5qp/6WcNI4edkAwr9bl+BqMe+0dy6QcsU9dLeQa
|
|
||||||
OcpDZqHOxCXYTtiSIVM5WvSfPI5j6OdXCsrDU0VZOiiKegnGKNhz8Hn1aLZpGmoU
|
|
||||||
TkqhRGpXchHSXNsGwT9AWlSJCnEF1dT0OOJzYbIbcwLa3WcKXHADpgfLJJ/KXHDJ
|
|
||||||
buf/Epyjpi6dgg==
|
|
||||||
-----END ENCRYPTED PRIVATE KEY-----
|
|
||||||
</key>
|
|
||||||
7
tags/index.html
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>Tags · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta http-equiv=Content-Security-Policy content="upgrade-insecure-requests; block-all-mixed-content; default-src 'self'; child-src 'self'; font-src 'self' https://fonts.gstatic.com https://cdn.jsdelivr.net/; form-action 'self'; frame-src 'self' https://www.youtube.com https://disqus.com; img-src 'self' https://referrer.disqus.com https://c.disquscdn.com https://*.disqus.com; object-src 'none'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com/ https://cdn.jsdelivr.net/; script-src 'self' 'unsafe-inline' https://www.google-analytics.com https://cdn.jsdelivr.net/ https://pagead2.googlesyndication.com https://static.cloudflareinsights.com https://unpkg.com https://ericxliu-me.disqus.com https://disqus.com https://*.disqus.com https://*.disquscdn.com https://unpkg.com; connect-src 'self' https://www.google-analytics.com https://pagead2.googlesyndication.com https://cloudflareinsights.com ws://localhost:1313 ws://localhost:* wss://localhost:* https://links.services.disqus.com https://*.disqus.com;"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Tags"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="https://ericxliu.me/tags/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Tags"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=canonical href=https://ericxliu.me/tags/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.4b392a85107b91dbdabc528edf014a6ab1a30cd44cafcd5325c8efe796794fca.css integrity="sha256-SzkqhRB7kdvavFKO3wFKarGjDNRMr81TJcjv55Z5T8o=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/tags/index.xml title="Eric X. Liu's Personal Page"><script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-3972604619956476" crossorigin=anonymous></script><script type=application/ld+json>{"@context":"http://schema.org","@type":"Person","name":"Eric X. Liu","url":"https:\/\/ericxliu.me\/","description":"Software \u0026 Performance Engineer at Google","sameAs":["https:\/\/www.linkedin.com\/in\/eric-x-liu-46648b93\/","https:\/\/git.ericxliu.me\/eric"]}</script></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=https://ericxliu.me/>Eric X. Liu's Personal Page
|
||||||
|
</a><input type=checkbox id=menu-toggle>
|
||||||
|
<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/about/>About</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=https://ericxliu.me/tags/>Tags</a></h1></header><ul></ul></section></div><footer class=footer><section class=container>©
|
||||||
|
2016 -
|
||||||
|
2026
|
||||||
|
Eric X. Liu
|
||||||
|
<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6100dca">[6100dca]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
|
||||||
1
tags/index.xml
Normal file
@@ -0,0 +1 @@
|
|||||||
|
<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Tags on Eric X. Liu's Personal Page</title><link>https://ericxliu.me/tags/</link><description>Recent content in Tags on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><atom:link href="https://ericxliu.me/tags/index.xml" rel="self" type="application/rss+xml"/></channel></rss>
|
||||||
1
tags/page/1/index.html
Normal file
@@ -0,0 +1 @@
|
|||||||
|
<!doctype html><html lang=en><head><title>https://ericxliu.me/tags/</title><link rel=canonical href=https://ericxliu.me/tags/><meta charset=utf-8><meta http-equiv=refresh content="0; url=https://ericxliu.me/tags/"></head></html>
|
||||||