deploy: 34aa99a15d

2025-12-20 01:50:43 +00:00
parent 19d2678a16
commit 40a88799ee
27 changed files with 141 additions and 58 deletions
--- a/404.html
+++ b/404.html
@@ -4,4 +4,4 @@
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/about/index.html
+++ b/about/index.html
@@ -4,4 +4,4 @@
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/categories/index.html
+++ b/categories/index.html
@@ -4,4 +4,4 @@
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/index.html
+++ b/index.html
@@ -1,7 +1,7 @@
-<!doctype html><html lang=en><head><title>Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Eric X. Liu's Personal Page"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Eric X. Liu's Personal Page"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=canonical href=/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.f03d6359cf766772af14fbe07ce6aca734b321c2e15acba0bbf4e2254941c460.css integrity="sha256-8D1jWc92Z3KvFPvgfOaspzSzIcLhWsugu/TiJUlBxGA=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/index.xml title="Eric X. Liu's Personal Page"><meta name=generator content="Hugo 0.151.0"></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=/>Eric X. Liu's Personal Page
+<!doctype html><html lang=en><head><title>Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Eric X. Liu's Personal Page"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Eric X. Liu's Personal Page"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=canonical href=/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.f03d6359cf766772af14fbe07ce6aca734b321c2e15acba0bbf4e2254941c460.css integrity="sha256-8D1jWc92Z3KvFPvgfOaspzSzIcLhWsugu/TiJUlBxGA=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/index.xml title="Eric X. Liu's Personal Page"><meta name=generator content="Hugo 0.153.0"></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=/>Eric X. Liu's Personal Page
 </a><input type=checkbox id=menu-toggle>
 <label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container centered"><div class=about><div class=avatar><img src=/images/gravatar.png alt=avatar></div><h1>Eric X. Liu</h1><h2>Software & Performance Engineer @Google</h2><ul><li><a href=https://git.ericxliu.me/eric aria-label=Git><i class="fa-brands fa-git fa-2x" aria-hidden=true></i></a></li><li><a href=https://www.linkedin.com/in/eric-x-liu-46648b93/ aria-label=linkedin><i class="fa-brands fa-linkedin fa-2x" aria-hidden=true></i></a></li><li><a href aria-label="Personal email"><i class="fa fa-envelope fa-2x" aria-hidden=true></i></a></li></ul></div></section></div><footer class=footer><section class=container>©
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/index.xml
+++ b/index.xml
@@ -1,4 +1,13 @@
-<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Eric X. Liu's Personal Page</title><link>/</link><description>Recent content on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><lastBuildDate>Sat, 04 Oct 2025 20:41:50 +0000</lastBuildDate><atom:link href="/index.xml" rel="self" type="application/rss+xml"/><item><title>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)</title><link>/posts/benchmarking-llms-on-jetson-orin-nano/</link><pubDate>Sat, 04 Oct 2025 00:00:00 +0000</pubDate><guid>/posts/benchmarking-llms-on-jetson-orin-nano/</guid><description>&lt;h2 id="introduction"&gt;
+<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Eric X. Liu's Personal Page</title><link>/</link><description>Recent content on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><lastBuildDate>Fri, 19 Dec 2025 21:21:55 +0000</lastBuildDate><atom:link href="/index.xml" rel="self" type="application/rss+xml"/><item><title>The Convergence of Fast Weights, Linear Attention, and State Space Models</title><link>/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/</link><pubDate>Fri, 19 Dec 2025 00:00:00 +0000</pubDate><guid>/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/</guid><description>&lt;p&gt;Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformer’s attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (&amp;ldquo;Fast Weights&amp;rdquo;) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).&lt;/p&gt;
+&lt;p&gt;This article explores the mathematical equivalence between Hinton’s concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV.&lt;/p&gt;</description></item><item><title>vAttention</title><link>/posts/vattention/</link><pubDate>Mon, 08 Dec 2025 00:00:00 +0000</pubDate><guid>/posts/vattention/</guid><description>&lt;p&gt;Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While &lt;strong&gt;PagedAttention&lt;/strong&gt; (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPU’s native hardware Memory Management Unit (MMU) offers a more performant and portable solution.&lt;/p&gt;
+&lt;h4 id="the-status-quo-pagedattention-and-software-tables"&gt;
+ The Status Quo: PagedAttention and Software Tables
+ &lt;a class="heading-link" href="#the-status-quo-pagedattention-and-software-tables"&gt;
+ &lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
+ &lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
+ &lt;/a&gt;
+&lt;/h4&gt;
+&lt;p&gt;Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined &amp;ldquo;page table&amp;rdquo; (the Block Table) [1].&lt;/p&gt;</description></item><item><title>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)</title><link>/posts/benchmarking-llms-on-jetson-orin-nano/</link><pubDate>Sat, 04 Oct 2025 00:00:00 +0000</pubDate><guid>/posts/benchmarking-llms-on-jetson-orin-nano/</guid><description>&lt;h2 id="introduction"&gt;
 Introduction
 &lt;a class="heading-link" href="#introduction"&gt;
 &lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
--- a/posts/benchmarking-llms-on-jetson-orin-nano/index.html
+++ b/posts/benchmarking-llms-on-jetson-orin-nano/index.html
@@ -62,4 +62,4 @@ After running 66 inference tests across seven different language models ranging
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/breville-barista-pro-maintenance/index.html
+++ b/posts/breville-barista-pro-maintenance/index.html
@@ -25,4 +25,4 @@ Understanding the Two Primary Maintenance Cycles Link to heading The Breville Ba
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/index.html
+++ b/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/index.html
@@ -20,4 +20,4 @@ Our overarching philosophy is simple: isolate and change only one variable at a
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/flashing-jetson-orin-nano-in-virtualized-environments/index.html
+++ b/posts/flashing-jetson-orin-nano-in-virtualized-environments/index.html
@@ -76,7 +76,7 @@ Flashing NVIDIA Jetson devices remotely presents unique challenges when the host
 </span></span></code></pre></div><ol start=2><li>Created udev rules to automatically move USB network interfaces to the container:</li></ol><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span><span style=color:#8b949e;font-style:italic># /etc/udev/rules.d/99-jetson-usb-network.rules</span>
 </span></span><span style=display:flex><span><span style=color:#79c0ff>ACTION</span><span style=color:#ff7b72;font-weight:700>==</span><span style=color:#a5d6ff>&#34;add&#34;</span>, <span style=color:#79c0ff>SUBSYSTEM</span><span style=color:#ff7b72;font-weight:700>==</span><span style=color:#a5d6ff>&#34;net&#34;</span>, <span style=color:#79c0ff>KERNEL</span><span style=color:#ff7b72;font-weight:700>==</span><span style=color:#a5d6ff>&#34;enx*&#34;</span>, <span style=color:#79c0ff>RUN</span><span style=color:#ff7b72;font-weight:700>+=</span><span style=color:#a5d6ff>&#34;/usr/local/bin/handle-jetson-usb-network.sh %k&#34;</span>
 </span></span></code></pre></div><ol start=3><li>Created handler script to move interfaces into container namespace:</li></ol><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span><span style=color:#8b949e;font-weight:700;font-style:italic>#!/bin/bash
-</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-weight:700;font-style:italic></span><span style=color:#79c0ff>INTERFACE</span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#79c0ff>$1</span>
+</span></span></span><span style=display:flex><span><span style=color:#79c0ff>INTERFACE</span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#79c0ff>$1</span>
 </span></span><span style=display:flex><span><span style=color:#79c0ff>CONTAINER_ID</span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#a5d6ff>106</span>
 </span></span><span style=display:flex><span><span style=color:#79c0ff>CONTAINER_PID</span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#ff7b72>$(</span>pct exec <span style=color:#79c0ff>$CONTAINER_ID</span> -- pidof systemd | awk <span style=color:#a5d6ff>&#39;{print $1}&#39;</span><span style=color:#ff7b72>)</span>
 </span></span><span style=display:flex><span>ip link set <span style=color:#a5d6ff>&#34;</span><span style=color:#79c0ff>$INTERFACE</span><span style=color:#a5d6ff>&#34;</span> netns <span style=color:#a5d6ff>&#34;ct</span><span style=color:#79c0ff>$CONTAINER_ID</span><span style=color:#a5d6ff>&#34;</span>
@@ -108,7 +108,7 @@ Flashing NVIDIA Jetson devices remotely presents unique challenges when the host
 <a class=heading-link href=#2-create-vm-with-pci-passthrough><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
 <span class=sr-only>Link to heading</span></a></h4><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span><span style=color:#8b949e;font-style:italic># Create VM</span>
 </span></span><span style=display:flex><span>qm create <span style=color:#a5d6ff>200</span> --name jetson-flash --memory <span style=color:#a5d6ff>4096</span> --cores <span style=color:#a5d6ff>4</span> <span style=color:#79c0ff>\
-</span></span></span><span style=display:flex><span><span style=color:#79c0ff></span>    --net0 virtio,bridge<span style=color:#ff7b72;font-weight:700>=</span>vmbr0 --scsihw virtio-scsi-pci
+</span></span></span><span style=display:flex><span>    --net0 virtio,bridge<span style=color:#ff7b72;font-weight:700>=</span>vmbr0 --scsihw virtio-scsi-pci
 </span></span><span style=display:flex><span>
 </span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic># Set machine type to q35 (required for PCIe passthrough)</span>
 </span></span><span style=display:flex><span>qm set <span style=color:#a5d6ff>200</span> --machine q35
@@ -118,11 +118,11 @@ Flashing NVIDIA Jetson devices remotely presents unique challenges when the host
 </span></span><span style=display:flex><span>
 </span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic># Configure disk and cloud-init</span>
 </span></span><span style=display:flex><span>qm set <span style=color:#a5d6ff>200</span> --scsi0 local-lvm:vm-200-disk-0 --boot <span style=color:#79c0ff>order</span><span style=color:#ff7b72;font-weight:700>=</span>scsi0 <span style=color:#79c0ff>\
-</span></span></span><span style=display:flex><span><span style=color:#79c0ff></span>    --ide2 local-lvm:cloudinit
+</span></span></span><span style=display:flex><span>    --ide2 local-lvm:cloudinit
 </span></span><span style=display:flex><span>
 </span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic># Configure cloud-init</span>
 </span></span><span style=display:flex><span>qm set <span style=color:#a5d6ff>200</span> --ciuser sdkmanager --cipassword sdkmanager <span style=color:#79c0ff>\
-</span></span></span><span style=display:flex><span><span style=color:#79c0ff></span>    --ipconfig0 <span style=color:#79c0ff>ip</span><span style=color:#ff7b72;font-weight:700>=</span>dhcp --sshkeys ~/.ssh/authorized_keys
+</span></span></span><span style=display:flex><span>    --ipconfig0 <span style=color:#79c0ff>ip</span><span style=color:#ff7b72;font-weight:700>=</span>dhcp --sshkeys ~/.ssh/authorized_keys
 </span></span><span style=display:flex><span>
 </span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic># Add PCI passthrough for USB controller</span>
 </span></span><span style=display:flex><span>qm set <span style=color:#a5d6ff>200</span> --hostpci0 0000:22:00.3,pcie<span style=color:#ff7b72;font-weight:700>=</span><span style=color:#a5d6ff>1</span>
@@ -168,4 +168,4 @@ Flashing NVIDIA Jetson devices remotely presents unique challenges when the host
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/how-rvq-teaches-llms-to-see-and-hear/index.html
+++ b/posts/how-rvq-teaches-llms-to-see-and-hear/index.html
@@ -18,4 +18,4 @@ The answer lies in creating a universal language—a bridge between the continuo
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/index.html
+++ b/posts/index.html
@@ -1,6 +1,8 @@
 <!doctype html><html lang=en><head><title>Posts · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Posts"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="/posts/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Posts"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=canonical href=/posts/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.f03d6359cf766772af14fbe07ce6aca734b321c2e15acba0bbf4e2254941c460.css integrity="sha256-8D1jWc92Z3KvFPvgfOaspzSzIcLhWsugu/TiJUlBxGA=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/posts/index.xml title="Eric X. Liu's Personal Page"></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=/>Eric X. Liu's Personal Page
 </a><input type=checkbox id=menu-toggle>
-<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=/posts/>Posts</a></h1></header><ul><li><span class=date>October 4, 2025</span>
+<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=/posts/>Posts</a></h1></header><ul><li><span class=date>December 19, 2025</span>
+<a class=title href=/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/>The Convergence of Fast Weights, Linear Attention, and State Space Models</a></li><li><span class=date>December 8, 2025</span>
+<a class=title href=/posts/vattention/>vAttention</a></li><li><span class=date>October 4, 2025</span>
 <a class=title href=/posts/benchmarking-llms-on-jetson-orin-nano/>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)</a></li><li><span class=date>October 2, 2025</span>
 <a class=title href=/posts/flashing-jetson-orin-nano-in-virtualized-environments/>Flashing Jetson Orin Nano in Virtualized Environments</a></li><li><span class=date>September 28, 2025</span>
 <a class=title href=/posts/openwrt-mwan3-wireguard-endpoint-exclusion/>OpenWrt: Fix WireGuard Connectivity with MWAN3 by Excluding the VPN Endpoint</a></li><li><span class=date>September 22, 2025</span>
@@ -8,10 +10,8 @@
 <a class=title href=/posts/quantization-in-llms/>Quantization in LLMs</a></li><li><span class=date>August 16, 2025</span>
 <a class=title href=/posts/breville-barista-pro-maintenance/>Breville Barista Pro Maintenance</a></li><li><span class=date>August 9, 2025</span>
 <a class=title href=/posts/secure-boot-dkms-and-mok-on-proxmox-debian/>Fixing GPU Operator Pods Stuck in Init: Secure Boot, DKMS, and MOK on Proxmox + Debian</a></li><li><span class=date>August 7, 2025</span>
-<a class=title href=/posts/how-rvq-teaches-llms-to-see-and-hear/>Beyond Words: How RVQ Teaches LLMs to See and Hear</a></li><li><span class=date>August 3, 2025</span>
-<a class=title href=/posts/supabase-deep-dive/>Supabase Deep Dive: It's Not Magic, It's Just Postgres</a></li><li><span class=date>August 2, 2025</span>
-<a class=title href=/posts/ppo-for-language-models/>A Deep Dive into PPO for Language Models</a></li></ul><ul class=pagination><li>1</li><li><a href=/posts/page/2/>2</a></li><li class=hidden><a href=/posts/page/2/>&#8250;</a></li><li><a href=/posts/page/2/>&#187;</a></li></ul></section></div><footer class=footer><section class=container>©
+<a class=title href=/posts/how-rvq-teaches-llms-to-see-and-hear/>Beyond Words: How RVQ Teaches LLMs to See and Hear</a></li></ul><ul class=pagination><li>1</li><li><a href=/posts/page/2/>2</a></li><li class=hidden><a href=/posts/page/2/>&#8250;</a></li><li><a href=/posts/page/2/>&#187;</a></li></ul></section></div><footer class=footer><section class=container>©
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/index.xml
+++ b/posts/index.xml
@@ -1,4 +1,13 @@
-<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Posts on Eric X. Liu's Personal Page</title><link>/posts/</link><description>Recent content in Posts on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><lastBuildDate>Sat, 04 Oct 2025 20:41:50 +0000</lastBuildDate><atom:link href="/posts/index.xml" rel="self" type="application/rss+xml"/><item><title>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)</title><link>/posts/benchmarking-llms-on-jetson-orin-nano/</link><pubDate>Sat, 04 Oct 2025 00:00:00 +0000</pubDate><guid>/posts/benchmarking-llms-on-jetson-orin-nano/</guid><description>&lt;h2 id="introduction"&gt;
+<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>Posts on Eric X. Liu's Personal Page</title><link>/posts/</link><description>Recent content in Posts on Eric X. Liu's Personal Page</description><generator>Hugo</generator><language>en</language><lastBuildDate>Fri, 19 Dec 2025 21:21:55 +0000</lastBuildDate><atom:link href="/posts/index.xml" rel="self" type="application/rss+xml"/><item><title>The Convergence of Fast Weights, Linear Attention, and State Space Models</title><link>/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/</link><pubDate>Fri, 19 Dec 2025 00:00:00 +0000</pubDate><guid>/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/</guid><description>&lt;p&gt;Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformer’s attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (&amp;ldquo;Fast Weights&amp;rdquo;) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).&lt;/p&gt;
+&lt;p&gt;This article explores the mathematical equivalence between Hinton’s concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV.&lt;/p&gt;</description></item><item><title>vAttention</title><link>/posts/vattention/</link><pubDate>Mon, 08 Dec 2025 00:00:00 +0000</pubDate><guid>/posts/vattention/</guid><description>&lt;p&gt;Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While &lt;strong&gt;PagedAttention&lt;/strong&gt; (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPU’s native hardware Memory Management Unit (MMU) offers a more performant and portable solution.&lt;/p&gt;
+&lt;h4 id="the-status-quo-pagedattention-and-software-tables"&gt;
+ The Status Quo: PagedAttention and Software Tables
+ &lt;a class="heading-link" href="#the-status-quo-pagedattention-and-software-tables"&gt;
+ &lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
+ &lt;span class="sr-only"&gt;Link to heading&lt;/span&gt;
+ &lt;/a&gt;
+&lt;/h4&gt;
+&lt;p&gt;Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined &amp;ldquo;page table&amp;rdquo; (the Block Table) [1].&lt;/p&gt;</description></item><item><title>Why Your Jetson Orin Nano's 40 TOPS Goes Unused (And What That Means for Edge AI)</title><link>/posts/benchmarking-llms-on-jetson-orin-nano/</link><pubDate>Sat, 04 Oct 2025 00:00:00 +0000</pubDate><guid>/posts/benchmarking-llms-on-jetson-orin-nano/</guid><description>&lt;h2 id="introduction"&gt;
 Introduction
 &lt;a class="heading-link" href="#introduction"&gt;
 &lt;i class="fa-solid fa-link" aria-hidden="true" title="Link to heading"&gt;&lt;/i&gt;
--- a/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/index.html
+++ b/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/index.html
@@ -44,4 +44,4 @@ The <strong>Top-K routing</strong> mechanism, as illustrated in the provided ima
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/openwrt-mwan3-wireguard-endpoint-exclusion/index.html
+++ b/posts/openwrt-mwan3-wireguard-endpoint-exclusion/index.html
@@ -98,4 +98,4 @@ When using WireGuard together with MWAN3 on OpenWrt, the tunnel can fail to esta
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/page/2/index.html
+++ b/posts/page/2/index.html
@@ -1,6 +1,8 @@
 <!doctype html><html lang=en><head><title>Posts · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta name=author content="Eric X. Liu"><meta name=description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="Posts"><meta name=twitter:description content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:url" content="/posts/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="Posts"><meta property="og:description" content="Eric X. Liu - Software & Performance Engineer at Google. Sharing insights about software engineering, performance optimization, tech industry experiences, mountain biking adventures, Jeep overlanding, and outdoor activities."><meta property="og:locale" content="en"><meta property="og:type" content="website"><link rel=canonical href=/posts/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.f03d6359cf766772af14fbe07ce6aca734b321c2e15acba0bbf4e2254941c460.css integrity="sha256-8D1jWc92Z3KvFPvgfOaspzSzIcLhWsugu/TiJUlBxGA=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5><link rel=alternate type=application/rss+xml href=/posts/index.xml title="Eric X. Liu's Personal Page"></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=/>Eric X. Liu's Personal Page
 </a><input type=checkbox id=menu-toggle>
-<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=/posts/>Posts</a></h1></header><ul><li><span class=date>July 2, 2025</span>
+<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container list"><header><h1 class=title><a class=title-link href=/posts/>Posts</a></h1></header><ul><li><span class=date>August 3, 2025</span>
+<a class=title href=/posts/supabase-deep-dive/>Supabase Deep Dive: It's Not Magic, It's Just Postgres</a></li><li><span class=date>August 2, 2025</span>
+<a class=title href=/posts/ppo-for-language-models/>A Deep Dive into PPO for Language Models</a></li><li><span class=date>July 2, 2025</span>
 <a class=title href=/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/>Mixture-of-Experts (MoE) Models Challenges & Solutions in Practice</a></li><li><span class=date>June 1, 2025</span>
 <a class=title href=/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/>An Architectural Deep Dive of T5</a></li><li><span class=date>May 1, 2025</span>
 <a class=title href=/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/>Mastering Your Breville Barista Pro: The Ultimate Guide to Dialing In Espresso</a></li><li><span class=date>April 1, 2025</span>
@@ -9,4 +11,4 @@
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/ppo-for-language-models/index.html
+++ b/posts/ppo-for-language-models/index.html
@@ -25,4 +25,4 @@ where <code>δ_t = r_t + γV(s_{t+1}) - V(s_t)</code></p><ul><li><strong>γ (gam
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/quantization-in-llms/index.html
+++ b/posts/quantization-in-llms/index.html
@@ -7,4 +7,4 @@
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/secure-boot-dkms-and-mok-on-proxmox-debian/index.html
+++ b/posts/secure-boot-dkms-and-mok-on-proxmox-debian/index.html
@@ -30,11 +30,11 @@ nvidia-smi failed to communicate with the NVIDIA driver modprobe nvidia → “K
 </span></span></code></pre></div><h3 id=step-2--make-dkms-sign-nvidia-modules-with-a-mok>Step 2 — Make DKMS sign NVIDIA modules with a MOK
 <a class=heading-link href=#step-2--make-dkms-sign-nvidia-modules-with-a-mok><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
 <span class=sr-only>Link to heading</span></a></h3><p>Debian already generated a DKMS key at <code>/var/lib/dkms/mok.key</code>. Create an X.509 cert in DER format:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo openssl req -new -x509 <span style=color:#79c0ff>\
-</span></span></span><span style=display:flex><span><span style=color:#79c0ff></span>  -key /var/lib/dkms/mok.key <span style=color:#79c0ff>\
-</span></span></span><span style=display:flex><span><span style=color:#79c0ff></span>  -out /var/lib/dkms/mok.der <span style=color:#79c0ff>\
-</span></span></span><span style=display:flex><span><span style=color:#79c0ff></span>  -outform DER <span style=color:#79c0ff>\
-</span></span></span><span style=display:flex><span><span style=color:#79c0ff></span>  -subj <span style=color:#a5d6ff>&#34;/CN=DKMS MOK/&#34;</span> <span style=color:#79c0ff>\
-</span></span></span><span style=display:flex><span><span style=color:#79c0ff></span>  -days <span style=color:#a5d6ff>36500</span>
+</span></span></span><span style=display:flex><span>  -key /var/lib/dkms/mok.key <span style=color:#79c0ff>\
+</span></span></span><span style=display:flex><span>  -out /var/lib/dkms/mok.der <span style=color:#79c0ff>\
+</span></span></span><span style=display:flex><span>  -outform DER <span style=color:#79c0ff>\
+</span></span></span><span style=display:flex><span>  -subj <span style=color:#a5d6ff>&#34;/CN=DKMS MOK/&#34;</span> <span style=color:#79c0ff>\
+</span></span></span><span style=display:flex><span>  -days <span style=color:#a5d6ff>36500</span>
 </span></span></code></pre></div><p>Enable DKMS signing:</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo sed -i <span style=color:#a5d6ff>&#39;s|^mok_signing_key=.*|mok_signing_key=/var/lib/dkms/mok.key|&#39;</span> /etc/dkms/framework.conf
 </span></span><span style=display:flex><span>sudo sed -i <span style=color:#a5d6ff>&#39;s|^mok_certificate=.*|mok_certificate=/var/lib/dkms/mok.der|&#39;</span> /etc/dkms/framework.conf
 </span></span></code></pre></div><p>Rebuild/install modules (signs them now):</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-bash data-lang=bash><span style=display:flex><span>sudo dkms build nvidia/<span style=color:#ff7b72>$(</span>modinfo -F version nvidia<span style=color:#ff7b72>)</span> -k <span style=color:#ff7b72>$(</span>uname -r<span style=color:#ff7b72>)</span> --force
@@ -59,4 +59,4 @@ nvidia-smi failed to communicate with the NVIDIA driver modprobe nvidia → “K
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/supabase-deep-dive/index.html
+++ b/posts/supabase-deep-dive/index.html
@@ -18,47 +18,47 @@ Supabase enters this space with a radically different philosophy: transparency.
 <a class=heading-link href=#phase-3-the-security-layer-row-level-security><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
 <span class=sr-only>Link to heading</span></a></h4><p>This is not an optional step. RLS is the cornerstone of Supabase security.</p><ol><li><strong>Deny by Default:</strong> For any table holding user data, immediately enable RLS. This blocks all access until you explicitly grant it.</li></ol><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-sql data-lang=sql><span style=display:flex><span><span style=color:#ff7b72>ALTER</span><span style=color:#6e7681> </span><span style=color:#ff7b72>TABLE</span><span style=color:#6e7681> </span>tasks<span style=color:#6e7681> </span>ENABLE<span style=color:#6e7681> </span><span style=color:#ff7b72>ROW</span><span style=color:#6e7681> </span><span style=color:#ff7b72>LEVEL</span><span style=color:#6e7681> </span><span style=color:#ff7b72>SECURITY</span>;<span style=color:#6e7681>
 </span></span></span></code></pre></div><ol start=2><li><strong>Write &ldquo;Allow&rdquo; Policies:</strong> Create policies based on your user stories. Policies are SQL rules that the database enforces on every single query.</li></ol><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-sql data-lang=sql><span style=display:flex><span><span style=color:#8b949e;font-style:italic>-- Users can see tasks in projects they are a member of.
-</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic></span><span style=color:#ff7b72>CREATE</span><span style=color:#6e7681> </span>POLICY<span style=color:#6e7681> </span><span style=color:#a5d6ff>&#34;Allow read access to tasks in user&#39;s projects&#34;</span><span style=color:#6e7681>
-</span></span></span><span style=display:flex><span><span style=color:#6e7681></span><span style=color:#ff7b72>ON</span><span style=color:#6e7681> </span>tasks<span style=color:#6e7681> </span><span style=color:#ff7b72>FOR</span><span style=color:#6e7681> </span><span style=color:#ff7b72>SELECT</span><span style=color:#6e7681>
-</span></span></span><span style=display:flex><span><span style=color:#6e7681></span><span style=color:#ff7b72>USING</span><span style=color:#6e7681> </span>(<span style=color:#6e7681>
+</span></span></span><span style=display:flex><span><span style=color:#ff7b72>CREATE</span><span style=color:#6e7681> </span>POLICY<span style=color:#6e7681> </span><span style=color:#a5d6ff>&#34;Allow read access to tasks in user&#39;s projects&#34;</span><span style=color:#6e7681>
+</span></span></span><span style=display:flex><span><span style=color:#ff7b72>ON</span><span style=color:#6e7681> </span>tasks<span style=color:#6e7681> </span><span style=color:#ff7b72>FOR</span><span style=color:#6e7681> </span><span style=color:#ff7b72>SELECT</span><span style=color:#6e7681>
+</span></span></span><span style=display:flex><span><span style=color:#ff7b72>USING</span><span style=color:#6e7681> </span>(<span style=color:#6e7681>
 </span></span></span><span style=display:flex><span><span style=color:#6e7681>  </span><span style=color:#ff7b72>EXISTS</span><span style=color:#6e7681> </span>(<span style=color:#6e7681>
 </span></span></span><span style=display:flex><span><span style=color:#6e7681>    </span><span style=color:#ff7b72>SELECT</span><span style=color:#6e7681> </span><span style=color:#a5d6ff>1</span><span style=color:#6e7681> </span><span style=color:#ff7b72>FROM</span><span style=color:#6e7681> </span>project_users<span style=color:#6e7681>
 </span></span></span><span style=display:flex><span><span style=color:#6e7681>    </span><span style=color:#ff7b72>WHERE</span><span style=color:#6e7681> </span>project_users.project_id<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>tasks.project_id<span style=color:#6e7681>
 </span></span></span><span style=display:flex><span><span style=color:#6e7681>    </span><span style=color:#ff7b72>AND</span><span style=color:#6e7681> </span>project_users.user_id<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>auth.uid()<span style=color:#6e7681>
 </span></span></span><span style=display:flex><span><span style=color:#6e7681>  </span>)<span style=color:#6e7681>
-</span></span></span><span style=display:flex><span><span style=color:#6e7681></span>);<span style=color:#6e7681>
+</span></span></span><span style=display:flex><span>);<span style=color:#6e7681>
 </span></span></span><span style=display:flex><span><span style=color:#6e7681>
-</span></span></span><span style=display:flex><span><span style=color:#6e7681></span><span style=color:#8b949e;font-style:italic>-- Users can only insert tasks for themselves.
-</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic></span><span style=color:#ff7b72>CREATE</span><span style=color:#6e7681> </span>POLICY<span style=color:#6e7681> </span><span style=color:#a5d6ff>&#34;Allow users to create their own tasks&#34;</span><span style=color:#6e7681>
-</span></span></span><span style=display:flex><span><span style=color:#6e7681></span><span style=color:#ff7b72>ON</span><span style=color:#6e7681> </span>tasks<span style=color:#6e7681> </span><span style=color:#ff7b72>FOR</span><span style=color:#6e7681> </span><span style=color:#ff7b72>INSERT</span><span style=color:#6e7681>
-</span></span></span><span style=display:flex><span><span style=color:#6e7681></span><span style=color:#ff7b72>WITH</span><span style=color:#6e7681> </span><span style=color:#ff7b72>CHECK</span><span style=color:#6e7681> </span>(<span style=color:#6e7681> </span>auth.uid()<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>tasks.assignee_id<span style=color:#6e7681> </span>);<span style=color:#6e7681>
+</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic>-- Users can only insert tasks for themselves.
+</span></span></span><span style=display:flex><span><span style=color:#ff7b72>CREATE</span><span style=color:#6e7681> </span>POLICY<span style=color:#6e7681> </span><span style=color:#a5d6ff>&#34;Allow users to create their own tasks&#34;</span><span style=color:#6e7681>
+</span></span></span><span style=display:flex><span><span style=color:#ff7b72>ON</span><span style=color:#6e7681> </span>tasks<span style=color:#6e7681> </span><span style=color:#ff7b72>FOR</span><span style=color:#6e7681> </span><span style=color:#ff7b72>INSERT</span><span style=color:#6e7681>
+</span></span></span><span style=display:flex><span><span style=color:#ff7b72>WITH</span><span style=color:#6e7681> </span><span style=color:#ff7b72>CHECK</span><span style=color:#6e7681> </span>(<span style=color:#6e7681> </span>auth.uid()<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>tasks.assignee_id<span style=color:#6e7681> </span>);<span style=color:#6e7681>
 </span></span></span></code></pre></div><p>The <code>auth.uid()</code> function is a special Supabase utility that securely returns the ID of the logged-in user making the request.</p><h4 id=phase-4-the-apis-data-access>Phase 4: The APIs (Data Access)
 <a class=heading-link href=#phase-4-the-apis-data-access><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
 <span class=sr-only>Link to heading</span></a></h4><p>With your data structured and secured, you can now build the access points.</p><ul><li><strong>For Simple CRUD:</strong> Use Supabase&rsquo;s auto-generated API. It&rsquo;s convenient, respects all your RLS policies, and is perfect for simple reads and writes on a single table.</li></ul><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#ff7b72>const</span> { data, error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.from(<span style=color:#a5d6ff>&#39;tasks&#39;</span>).select(<span style=color:#a5d6ff>&#39;*&#39;</span>);
 </span></span></code></pre></div><ul><li><strong>For Complex Logic:</strong> Use PostgreSQL Functions (RPC). Encapsulate complex <code>JOIN</code>s or multi-step transactions into a single, callable function. This reduces network chattiness and keeps your business logic secure on the server.</li></ul><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-sql data-lang=sql><span style=display:flex><span><span style=color:#8b949e;font-style:italic>-- A function to get a task and its project name in one call
-</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic></span><span style=color:#ff7b72>CREATE</span><span style=color:#6e7681> </span><span style=color:#ff7b72>OR</span><span style=color:#6e7681> </span><span style=color:#ff7b72>REPLACE</span><span style=color:#6e7681> </span><span style=color:#ff7b72>FUNCTION</span><span style=color:#6e7681> </span>get_task_with_project(task_id_input<span style=color:#6e7681> </span>int)<span style=color:#6e7681>
-</span></span></span><span style=display:flex><span><span style=color:#6e7681></span><span style=color:#ff7b72>RETURNS</span><span style=color:#6e7681> </span><span style=color:#ff7b72>TABLE</span><span style=color:#6e7681> </span>(task_title<span style=color:#6e7681> </span>text,<span style=color:#6e7681> </span>project_name<span style=color:#6e7681> </span>text)<span style=color:#6e7681> </span><span style=color:#ff7b72>AS</span><span style=color:#6e7681> </span><span style=color:#f85149>$$</span><span style=color:#6e7681>
-</span></span></span><span style=display:flex><span><span style=color:#6e7681></span><span style=color:#ff7b72>BEGIN</span><span style=color:#6e7681>
+</span></span></span><span style=display:flex><span><span style=color:#ff7b72>CREATE</span><span style=color:#6e7681> </span><span style=color:#ff7b72>OR</span><span style=color:#6e7681> </span><span style=color:#ff7b72>REPLACE</span><span style=color:#6e7681> </span><span style=color:#ff7b72>FUNCTION</span><span style=color:#6e7681> </span>get_task_with_project(task_id_input<span style=color:#6e7681> </span>int)<span style=color:#6e7681>
+</span></span></span><span style=display:flex><span><span style=color:#ff7b72>RETURNS</span><span style=color:#6e7681> </span><span style=color:#ff7b72>TABLE</span><span style=color:#6e7681> </span>(task_title<span style=color:#6e7681> </span>text,<span style=color:#6e7681> </span>project_name<span style=color:#6e7681> </span>text)<span style=color:#6e7681> </span><span style=color:#ff7b72>AS</span><span style=color:#6e7681> </span><span style=color:#f85149>$$</span><span style=color:#6e7681>
+</span></span></span><span style=display:flex><span><span style=color:#ff7b72>BEGIN</span><span style=color:#6e7681>
 </span></span></span><span style=display:flex><span><span style=color:#6e7681>  </span><span style=color:#ff7b72>RETURN</span><span style=color:#6e7681> </span>QUERY<span style=color:#6e7681>
 </span></span></span><span style=display:flex><span><span style=color:#6e7681>    </span><span style=color:#ff7b72>SELECT</span><span style=color:#6e7681> </span>tasks.title,<span style=color:#6e7681> </span>projects.name<span style=color:#6e7681>
 </span></span></span><span style=display:flex><span><span style=color:#6e7681>    </span><span style=color:#ff7b72>FROM</span><span style=color:#6e7681> </span>tasks<span style=color:#6e7681>
 </span></span></span><span style=display:flex><span><span style=color:#6e7681>    </span><span style=color:#ff7b72>JOIN</span><span style=color:#6e7681> </span>projects<span style=color:#6e7681> </span><span style=color:#ff7b72>ON</span><span style=color:#6e7681> </span>tasks.project_id<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>projects.id<span style=color:#6e7681>
 </span></span></span><span style=display:flex><span><span style=color:#6e7681>    </span><span style=color:#ff7b72>WHERE</span><span style=color:#6e7681> </span>tasks.id<span style=color:#6e7681> </span><span style=color:#ff7b72;font-weight:700>=</span><span style=color:#6e7681> </span>task_id_input;<span style=color:#6e7681>
-</span></span></span><span style=display:flex><span><span style=color:#6e7681></span><span style=color:#ff7b72>END</span>;<span style=color:#6e7681>
-</span></span></span><span style=display:flex><span><span style=color:#6e7681></span><span style=color:#f85149>$$</span><span style=color:#6e7681> </span><span style=color:#ff7b72>LANGUAGE</span><span style=color:#6e7681> </span>plpgsql;<span style=color:#6e7681>
+</span></span></span><span style=display:flex><span><span style=color:#ff7b72>END</span>;<span style=color:#6e7681>
+</span></span></span><span style=display:flex><span><span style=color:#f85149>$$</span><span style=color:#6e7681> </span><span style=color:#ff7b72>LANGUAGE</span><span style=color:#6e7681> </span>plpgsql;<span style=color:#6e7681>
 </span></span></span></code></pre></div><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#8b949e;font-style:italic>// Called simply from the frontend
-</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic></span><span style=color:#ff7b72>const</span> { data, error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.rpc(<span style=color:#a5d6ff>&#39;get_task_with_project&#39;</span>, { task_id_input<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>123</span> });
+</span></span></span><span style=display:flex><span><span style=color:#ff7b72>const</span> { data, error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.rpc(<span style=color:#a5d6ff>&#39;get_task_with_project&#39;</span>, { task_id_input<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>123</span> });
 </span></span></code></pre></div><h3 id=a-tour-of-the-core-services>A Tour of the Core Services
 <a class=heading-link href=#a-tour-of-the-core-services><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
 <span class=sr-only>Link to heading</span></a></h3><p>Beyond the database, Supabase provides a suite of essential tools.</p><h4 id=authentication>Authentication
 <a class=heading-link href=#authentication><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
 <span class=sr-only>Link to heading</span></a></h4><p>A complete user management system that integrates directly with your database. When a user signs up, a corresponding entry is created in the managed <code>auth.users</code> table, which you can then reference in your own tables.</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#8b949e;font-style:italic>// Sign up a new user and handle social logins with ease
-</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic></span><span style=color:#ff7b72>const</span> { data, error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.auth.signUp({ email, password });
+</span></span></span><span style=display:flex><span><span style=color:#ff7b72>const</span> { data, error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.auth.signUp({ email, password });
 </span></span><span style=display:flex><span><span style=color:#ff7b72>const</span> { data, error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.auth.signInWithOAuth({ provider<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>&#39;github&#39;</span> });
 </span></span></code></pre></div><h4 id=storage>Storage
 <a class=heading-link href=#storage><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
 <span class=sr-only>Link to heading</span></a></h4><p>A simple, S3-compatible object store for managing files like user avatars or documents. It&rsquo;s integrated with Postgres and RLS, allowing you to write fine-grained access policies on files and folders (buckets).</p><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#8b949e;font-style:italic>// Upload a user avatar to a public &#39;avatars&#39; bucket
-</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic></span><span style=color:#ff7b72>const</span> { error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.storage
+</span></span></span><span style=display:flex><span><span style=color:#ff7b72>const</span> { error } <span style=color:#ff7b72;font-weight:700>=</span> <span style=color:#ff7b72>await</span> supabase.storage
 </span></span><span style=display:flex><span>  .from(<span style=color:#a5d6ff>&#39;avatars&#39;</span>)
 </span></span><span style=display:flex><span>  .upload(<span style=color:#a5d6ff>`public/</span><span style=color:#a5d6ff>${</span>userId<span style=color:#a5d6ff>}</span><span style=color:#a5d6ff>.png`</span>, file);
 </span></span></code></pre></div><h4 id=edge-functions-vs-database-functions>Edge Functions vs. Database Functions
@@ -74,14 +74,14 @@ Supabase enters this space with a radically different philosophy: transparency.
 <span class=sr-only>Link to heading</span></a></h4><ul><li><strong>Use For:</strong> Small, JSON-based messages like chat messages, live notifications, activity feeds, and presence indicators (&ldquo;who&rsquo;s online&rdquo;). Use the <code>broadcast</code> feature for ephemeral data like cursor positions that you don&rsquo;t need to save.</li><li><strong>Do NOT Use For:</strong> Large, continuous data streams. It is <strong>not</strong> a replacement for WebRTC for video/audio calls. The system is designed for small, infrequent payloads.</li></ul><div class=highlight><pre tabindex=0 style=color:#e6edf3;background-color:#0d1117;-moz-tab-size:4;-o-tab-size:4;tab-size:4><code class=language-javascript data-lang=javascript><span style=display:flex><span><span style=color:#ff7b72>const</span> channel <span style=color:#ff7b72;font-weight:700>=</span> supabase.channel(<span style=color:#a5d6ff>&#39;public:messages&#39;</span>);
 </span></span><span style=display:flex><span>
 </span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic>// Subscribe to new rows in the &#39;messages&#39; table
-</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic></span>channel
+</span></span></span><span style=display:flex><span>channel
 </span></span><span style=display:flex><span>  .on(
 </span></span><span style=display:flex><span>    <span style=color:#a5d6ff>&#39;postgres_changes&#39;</span>,
 </span></span><span style=display:flex><span>    { event<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>&#39;INSERT&#39;</span>, schema<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>&#39;public&#39;</span>, table<span style=color:#ff7b72;font-weight:700>:</span> <span style=color:#a5d6ff>&#39;messages&#39;</span> },
 </span></span><span style=display:flex><span>    (payload) =&gt; {
 </span></span><span style=display:flex><span>      console.log(<span style=color:#a5d6ff>&#39;New message received!&#39;</span>, payload.<span style=color:#ff7b72>new</span>);
 </span></span><span style=display:flex><span>      <span style=color:#8b949e;font-style:italic>// Update your UI here
-</span></span></span><span style=display:flex><span><span style=color:#8b949e;font-style:italic></span>    }
+</span></span></span><span style=display:flex><span>    }
 </span></span><span style=display:flex><span>  )
 </span></span><span style=display:flex><span>  .subscribe();
 </span></span></code></pre></div><h3 id=final-words-of-advice>Final Words of Advice
@@ -90,4 +90,4 @@ Supabase enters this space with a radically different philosophy: transparency.
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/index.html
+++ b/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/index.html
@@ -30,4 +30,4 @@ But to truly understand the field, we must look at the pivotal models that explo
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/index.html
+++ b/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/index.html
@@ -0,0 +1,29 @@
+<!doctype html><html lang=en><head><title>The Convergence of Fast Weights, Linear Attention, and State Space Models · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta name=author content="Eric X. Liu"><meta name=description content="Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformer’s attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (&ldquo;Fast Weights&rdquo;) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).
+This article explores the mathematical equivalence between Hinton’s concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="The Convergence of Fast Weights, Linear Attention, and State Space Models"><meta name=twitter:description content="Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformer’s attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (“Fast Weights”) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).
+This article explores the mathematical equivalence between Hinton’s concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV."><meta property="og:url" content="/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="The Convergence of Fast Weights, Linear Attention, and State Space Models"><meta property="og:description" content="Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformer’s attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (“Fast Weights”) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).
+This article explores the mathematical equivalence between Hinton’s concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-12-19T00:00:00+00:00"><meta property="article:modified_time" content="2025-12-19T21:21:55+00:00"><link rel=canonical href=/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.f03d6359cf766772af14fbe07ce6aca734b321c2e15acba0bbf4e2254941c460.css integrity="sha256-8D1jWc92Z3KvFPvgfOaspzSzIcLhWsugu/TiJUlBxGA=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=/>Eric X. Liu's Personal Page
+</a><input type=checkbox id=menu-toggle>
+<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/>The Convergence of Fast Weights, Linear Attention, and State Space Models</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
+<time datetime=2025-12-19T00:00:00Z>December 19, 2025
+</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
+5-minute read</span></div></div></header><div class=post-content><p>Modern Large Language Models (LLMs) are dominated by the Transformer architecture. However, as context windows grow, the computational cost of the Transformer’s attention mechanism has become a primary bottleneck. Recent discussions in the AI community—most notably by Geoffrey Hinton—have highlighted a theoretical link between biological memory mechanisms (&ldquo;Fast Weights&rdquo;) and efficient engineering solutions like Linear Transformers and State Space Models (SSMs).</p><p>This article explores the mathematical equivalence between Hinton’s concept of Fast Weights as Associative Memory and the recurrence mechanisms found in models such as Mamba and RWKV.</p><h2 id=1-the-standard-transformer-bottleneck>1. The Standard Transformer Bottleneck
+<a class=heading-link href=#1-the-standard-transformer-bottleneck><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
+<span class=sr-only>Link to heading</span></a></h2><p>To understand the motivation for Fast Weights, one must first identify the inefficiency in standard Transformers. The core operation is <strong>Self-Attention</strong>, defined as:</p>$$ \text{Attention}(Q, K, V) = \text{softmax}\left(\frac{Q K^T}{\sqrt{d}}\right) V $$<p>During inference (generating tokens one by one), the model computes a Query ($Q$) for the current token and compares it against the Keys ($K$) and Values ($V$) of all previous tokens.</p><ul><li><strong>Computational Cost:</strong> Quadratic $O(N^2)$ during training; Linear $O(N)$ per step during inference.</li><li><strong>Memory Cost:</strong> The KV Cache. To calculate the softmax, the model must explicitly store the $K$ and $V$ vectors for the entire history in GPU memory. For long contexts (e.g., 1 million tokens), this memory footprint becomes prohibitive.</li></ul><p>The <strong>Softmax</strong> function is the culprit. It introduces a non-linearity that binds $Q$ and $K$ together, preventing the mathematical separation of the current query from the historical context.</p><h2 id=2-fast-weights-as-associative-memory>2. Fast Weights as Associative Memory
+<a class=heading-link href=#2-fast-weights-as-associative-memory><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
+<span class=sr-only>Link to heading</span></a></h2><p>Geoffrey Hinton proposes that the brain does not maintain a &ldquo;digital buffer&rdquo; of past activations (like a KV cache). Instead, it relies on <strong>Fast Weights</strong>.</p><p>In this framework, neural connections possess two timescales:</p><ol><li><strong>Slow Weights:</strong> The standard parameters learned over long periods (training).</li><li><strong>Fast Weights:</strong> Synaptic strengths that change rapidly during a forward pass to store temporary context.</li></ol><p>Hinton formalizes this temporary storage as an <strong>Associative Memory</strong>. When a network encounters a new key-value pair ($k, v$), it does not store the vectors in a list. Instead, it updates a fast weight matrix $W_{fast}$ using the Hebbian learning rule (outer product):</p>$$ W_{fast} \leftarrow \lambda W_{fast} + (v \otimes k) $$<p>Here, $\lambda$ is a decay factor ($0 < \lambda < 1$) representing forgetfulness. This matrix $W_{fast}$ compresses the history into a fixed-size representation of size $d \times d$, regardless of the sequence length.</p><h2 id=3-mathematical-unification-linear-attention>3. Mathematical Unification: Linear Attention
+<a class=heading-link href=#3-mathematical-unification-linear-attention><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
+<span class=sr-only>Link to heading</span></a></h2><p>The connection between Fast Weights and Transformers is established by removing the softmax function from the attention mechanism, a technique known as <strong>Linear Attention</strong>.</p><p>If we treat the interaction between $Q$ and $K$ as linear, the attention equation becomes:</p>$$ \text{LinearAttention} = (Q K^T) V $$<p>Using the associative property of matrix multiplication, we can reorder the operations:</p>$$ Q (K^T V) $$<p>This reordering fundamentally alters the mechanism:</p><ul><li><strong>Left Side $(Q K^T) V$:</strong> Compare Query to all Keys, then multiply by Values. Requires storing history.</li><li><strong>Right Side $Q (K^T V)$:</strong> Compute the summation of Key-Value outer products first.</li></ul><p>The term $(K^T V)$ represents the summation of all past associations. This term <strong>is</strong> the Fast Weight matrix $W_{fast}$ described by Hinton.</p>$$ \text{State}_t = \sum_{i=1}^t k_i v_i^T $$<p>Thus, Linear Attention is effectively a system where the &ldquo;state&rdquo; is a matrix of Fast Weights that is updated at every time step.</p><h2 id=4-state-space-models-ssms-as-recurrent-fast-weights>4. State Space Models (SSMs) as Recurrent Fast Weights
+<a class=heading-link href=#4-state-space-models-ssms-as-recurrent-fast-weights><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
+<span class=sr-only>Link to heading</span></a></h2><p>State Space Models (like S4 and Mamba) typically define sequence modeling through continuous control theory, discretized into a recurrence:</p>$$ h_t = \bar{A} h_{t-1} + \bar{B} x_t $$<p></p>$$ y_t = \bar{C} h_t $$<p>While derived differently, this recurrence is mathematically equivalent to the Linear Attention/Fast Weight mechanism. We can demonstrate this by &ldquo;unrolling&rdquo; the SSM recursion to see how the output $y_t$ depends on the history.</p><p>The output at time $t$ is the sum of inputs weighted by decaying powers of $\bar{A}$:</p>$$ y_t = \sum_{j=1}^t \bar{C} (\bar{A}^{t-j}) (\bar{B} x_j) $$<p>Comparing this to the Linear Attention formulation with decay $\lambda$:</p>$$ \text{Attention}_t = q_t \sum_{j=1}^t (\lambda^{t-j}) (k_j^T v_j) $$<p>The mapping between architectures becomes clear:</p><ul><li><strong>Query ($q_t$)</strong> $\leftrightarrow$ Output Matrix <strong>$\bar{C}$</strong></li><li><strong>Key/Value ($k_j^T v_j$)</strong> $\leftrightarrow$ Input Matrix <strong>$\bar{B} x_j$</strong> (Input Projection)</li><li><strong>Decay Factor ($\lambda$)</strong> $\leftrightarrow$ State Matrix <strong>$\bar{A}$</strong></li><li><strong>Fast Weight Matrix ($S_t$)</strong> $\leftrightarrow$ Hidden State <strong>$h_t$</strong></li></ul><p>Therefore, an SSM is mechanically a Transformer that uses Fast Weights (a fixed-size recurrent state) rather than a KV Cache (a growing buffer) to handle attention.</p><h2 id=5-implications-for-inference-optimization>5. Implications for Inference Optimization
+<a class=heading-link href=#5-implications-for-inference-optimization><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
+<span class=sr-only>Link to heading</span></a></h2><p>This theoretical convergence has significant implications for inference efficiency.</p><h3 id=standard-transformer>Standard Transformer
+<a class=heading-link href=#standard-transformer><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
+<span class=sr-only>Link to heading</span></a></h3><ul><li><strong>Mechanism:</strong> Stores history in a KV Cache.</li><li><strong>Memory:</strong> $O(N)$ (Grows linearly with sequence length).</li><li><strong>Performance:</strong> High recall/precision because it retains the exact history.</li></ul><h3 id=fast-weight--ssm-mamba--rwkv>Fast Weight / SSM (Mamba / RWKV)
+<a class=heading-link href=#fast-weight--ssm-mamba--rwkv><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
+<span class=sr-only>Link to heading</span></a></h3><ul><li><strong>Mechanism:</strong> Compresses history into a single Matrix/Vector state.</li><li><strong>Memory:</strong> $O(1)$ (Constant memory, regardless of sequence length).</li><li><strong>Performance:</strong> Historically lower than Transformers due to &ldquo;compression loss&rdquo; (trying to stuff infinite history into a finite matrix).</li></ul><p><strong>The Solution:</strong> Modern SSMs like Mamba improve upon basic Linear Attention by introducing <strong>Selectivity</strong>. Instead of compressing <em>all</em> history equally (which blurs the memory), Mamba allows the model to dynamically gate the inputs—choosing to store relevant information and reset/forget irrelevant noise. This allows the Fast Weight approach to compete with the accuracy of explicit Attention while maintaining constant memory usage.</p><h3 id=references>References
+<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
+<span class=sr-only>Link to heading</span></a></h3><ol><li><strong>Hinton, G. E., & Plaut, D. C. (1987).</strong> &ldquo;Using Fast Weights to Deblur Old Memories.&rdquo; <em>Proceedings of the 9th Annual Conference of the Cognitive Science Society.</em></li><li><strong>Ba, J., Hinton, G. E., et al. (2016).</strong> &ldquo;Using Fast Weights to Attend to the Recent Past.&rdquo; <em>Advances in Neural Information Processing Systems (NeurIPS).</em></li><li><strong>Katharopoulos, A., et al. (2020).</strong> &ldquo;Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention.&rdquo; <em>International Conference on Machine Learning (ICML).</em></li><li><strong>Gu, A., & Dao, T. (2023).</strong> &ldquo;Mamba: Linear-Time Sequence Modeling with Selective State Spaces.&rdquo; <em>arXiv preprint arXiv:2312.00752.</em></li><li><strong>Vaswani, A., et al. (2017).</strong> &ldquo;Attention Is All You Need.&rdquo; <em>Advances in Neural Information Processing Systems (NeurIPS).</em></li></ol></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
+2016 -
+2025
+Eric X. Liu
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/transformer-s-core-mechanics/index.html
+++ b/posts/transformer-s-core-mechanics/index.html
@@ -36,4 +36,4 @@ In deep learning, a &ldquo;channel&rdquo; can be thought of as a feature dimensi
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/unifi-vlan-migration-to-zone-based-architecture/index.html
+++ b/posts/unifi-vlan-migration-to-zone-based-architecture/index.html
@@ -28,4 +28,4 @@ This article documents that journey. It details the pitfalls encountered, the co
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/useful/index.html
+++ b/posts/useful/index.html
@@ -9,4 +9,4 @@ One-minute read</span></div></div></header><div class=post-content><ul><li><a hr
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/posts/vattention/index.html
+++ b/posts/vattention/index.html
@@ -0,0 +1,34 @@
+<!doctype html><html lang=en><head><title>vAttention · Eric X. Liu's Personal Page</title><meta charset=utf-8><meta name=viewport content="width=device-width,initial-scale=1"><meta name=color-scheme content="light dark"><meta name=author content="Eric X. Liu"><meta name=description content="Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While PagedAttention (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPU’s native hardware Memory Management Unit (MMU) offers a more performant and portable solution.
+
+  The Status Quo: PagedAttention and Software Tables
+  
+    
+    Link to heading
+  
+
+Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined &ldquo;page table&rdquo; (the Block Table) [1]."><meta name=keywords content="software engineer,performance engineering,Google engineer,tech blog,software development,performance optimization,Eric Liu,engineering blog,mountain biking,Jeep enthusiast,overlanding,camping,outdoor adventures"><meta name=twitter:card content="summary"><meta name=twitter:title content="vAttention"><meta name=twitter:description content="Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While PagedAttention (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPU’s native hardware Memory Management Unit (MMU) offers a more performant and portable solution.
+The Status Quo: PagedAttention and Software Tables Link to heading Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined “page table” (the Block Table) [1]."><meta property="og:url" content="/posts/vattention/"><meta property="og:site_name" content="Eric X. Liu's Personal Page"><meta property="og:title" content="vAttention"><meta property="og:description" content="Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While PagedAttention (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPU’s native hardware Memory Management Unit (MMU) offers a more performant and portable solution.
+The Status Quo: PagedAttention and Software Tables Link to heading Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined “page table” (the Block Table) [1]."><meta property="og:locale" content="en"><meta property="og:type" content="article"><meta property="article:section" content="posts"><meta property="article:published_time" content="2025-12-08T00:00:00+00:00"><meta property="article:modified_time" content="2025-12-19T21:21:55+00:00"><link rel=canonical href=/posts/vattention/><link rel=preload href=/fonts/fa-brands-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-regular-400.woff2 as=font type=font/woff2 crossorigin><link rel=preload href=/fonts/fa-solid-900.woff2 as=font type=font/woff2 crossorigin><link rel=stylesheet href=/css/coder.min.f03d6359cf766772af14fbe07ce6aca734b321c2e15acba0bbf4e2254941c460.css integrity="sha256-8D1jWc92Z3KvFPvgfOaspzSzIcLhWsugu/TiJUlBxGA=" crossorigin=anonymous media=screen><link rel=stylesheet href=/css/coder-dark.min.a00e6364bacbc8266ad1cc81230774a1397198f8cfb7bcba29b7d6fcb54ce57f.css integrity="sha256-oA5jZLrLyCZq0cyBIwd0oTlxmPjPt7y6KbfW/LVM5X8=" crossorigin=anonymous media=screen><link rel=icon type=image/svg+xml href=/images/favicon.svg sizes=any><link rel=icon type=image/png href=/images/favicon-32x32.png sizes=32x32><link rel=icon type=image/png href=/images/favicon-16x16.png sizes=16x16><link rel=apple-touch-icon href=/images/apple-touch-icon.png><link rel=apple-touch-icon sizes=180x180 href=/images/apple-touch-icon.png><link rel=manifest href=/site.webmanifest><link rel=mask-icon href=/images/safari-pinned-tab.svg color=#5bbad5></head><body class="preload-transitions colorscheme-auto"><div class=float-container><a id=dark-mode-toggle class=colorscheme-toggle><i class="fa-solid fa-adjust fa-fw" aria-hidden=true></i></a></div><main class=wrapper><nav class=navigation><section class=container><a class=navigation-title href=/>Eric X. Liu's Personal Page
+</a><input type=checkbox id=menu-toggle>
+<label class="menu-button float-right" for=menu-toggle><i class="fa-solid fa-bars fa-fw" aria-hidden=true></i></label><ul class=navigation-list><li class=navigation-item><a class=navigation-link href=/posts/>Posts</a></li><li class=navigation-item><a class=navigation-link href=https://chat.ericxliu.me>Chat</a></li><li class=navigation-item><a class=navigation-link href=https://git.ericxliu.me/user/oauth2/Authenitk>Git</a></li><li class=navigation-item><a class=navigation-link href=https://coder.ericxliu.me/api/v2/users/oidc/callback>Coder</a></li><li class=navigation-item><a class=navigation-link href=/>|</a></li><li class=navigation-item><a class=navigation-link href=https://sso.ericxliu.me>Sign in</a></li></ul></section></nav><div class=content><section class="container post"><article><header><div class=post-title><h1 class=title><a class=title-link href=/posts/vattention/>vAttention</a></h1></div><div class=post-meta><div class=date><span class=posted-on><i class="fa-solid fa-calendar" aria-hidden=true></i>
+<time datetime=2025-12-08T00:00:00Z>December 8, 2025
+</time></span><span class=reading-time><i class="fa-solid fa-clock" aria-hidden=true></i>
+4-minute read</span></div></div></header><div class=post-content><p>Large Language Model (LLM) inference is memory-bound, primarily due to the Key-Value (KV) cache—a store of intermediate state that grows linearly with sequence length. Efficient management of this memory is critical for throughput. While <strong>PagedAttention</strong> (popularized by vLLM) became the industry standard by solving memory fragmentation via software, recent research suggests that leveraging the GPU’s native hardware Memory Management Unit (MMU) offers a more performant and portable solution.</p><h4 id=the-status-quo-pagedattention-and-software-tables>The Status Quo: PagedAttention and Software Tables
+<a class=heading-link href=#the-status-quo-pagedattention-and-software-tables><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
+<span class=sr-only>Link to heading</span></a></h4><p>Prior to PagedAttention, systems allocated contiguous memory for the maximum possible context length, leading to severe fragmentation and wasted memory. PagedAttention addressed this by chunking the KV cache into non-contiguous blocks, managed by a software-defined &ldquo;page table&rdquo; (the Block Table) [1].</p><p>While effective at reducing fragmentation, this approach introduces significant complexity:</p><ul><li><strong>Kernel Rewriting:</strong> Because the KV cache is no longer contiguous in virtual memory, standard attention kernels (like cuDNN SDPA or vanilla FlashAttention) cannot be used directly. Developers must rewrite kernels to manually dereference block tables [1].</li><li><strong>Software Overhead:</strong> The system must manage virtual-to-physical mapping in user space, duplicating work typically handled by the OS. This adds runtime overhead to the critical path of both the CPU (managing tables) and the GPU (performing lookups) [1].</li><li><strong>Performance Penalties:</strong> PagedAttention-based kernels have been observed to be slower than their non-paged counterparts. For example, vLLM&rsquo;s paged kernel has shown to be up to 2.8x slower than FlashAttention-2 in specific tests [1].</li></ul><h4 id=the-hardware-native-alternative-vattention>The Hardware-Native Alternative: vAttention
+<a class=heading-link href=#the-hardware-native-alternative-vattention><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
+<span class=sr-only>Link to heading</span></a></h4><p><strong>vAttention</strong> proposes returning the responsibility of memory management to the OS and hardware. By utilizing the CUDA Virtual Memory Management (VMM) APIs, it is possible to decouple the allocation of virtual memory from physical memory [1].</p><p><strong>How it works:</strong></p><ol><li><strong>Virtual Contiguity:</strong> The system reserves a large, contiguous range of virtual addresses for the KV cache at request start.</li><li><strong>Physical Paging:</strong> Physical memory pages are allocated and mapped to this virtual range only on demand (dynamically) as the token sequence grows [1].</li><li><strong>Hardware Lookups:</strong> Because the GPU sees a contiguous virtual address range, the hardware Translation Lookaside Buffer (TLB) handles the address translation. This allows the use of unmodified, high-performance kernels like FlashAttention-2 or FlashAttention-3 without custom paging logic [1].</li></ol><h4 id=technical-challenges-and-solutions>Technical Challenges and Solutions
+<a class=heading-link href=#technical-challenges-and-solutions><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
+<span class=sr-only>Link to heading</span></a></h4><p>Historically, using the GPU native virtual memory for high-frequency token generation faced two major bottlenecks: <strong>Control Plane Latency</strong> and <strong>Page Granularity</strong>.</p><p><strong>1. Control Plane Latency (The API Bottleneck)</strong>
+Standard memory allocation (<code>cudaMalloc</code>) is monolithic—it allocates virtual and physical memory simultaneously. The more granular driver API, <code>cuMemMap</code>, allows separating these steps but involves expensive round-trips to the OS driver. Invoking these APIs synchronously during decoding (which generates one token at a time) would stall the GPU execution pipeline [1].</p><p>To solve this, vAttention utilizes <strong>execution overlap</strong>:</p><ul><li>Because LLM decoding is autoregressive and predictable, the system knows exactly when new memory is needed (one token ahead).</li><li>The CPU initiates the memory mapping for the <em>next</em> token asynchronously while the GPU is still computing the <em>current</em> token. By the time the GPU reaches the next step, the TLB and page tables are already updated, effectively hiding the driver latency [1].</li></ul><p><strong>2. Page Size Granularity (The Fragmentation Bottleneck)</strong>
+The GPU TLB hierarchy is sensitive to page sizes.</p><ul><li><strong>4KB Pages:</strong> Too small. Mapping gigabytes of KV cache with 4KB pages causes &ldquo;TLB thrashing,&rdquo; degrading performance.</li><li><strong>2MB Huge Pages:</strong> The standard for CUDA large allocations. However, allocating 2MB for a single token update causes massive internal fragmentation, negating the benefits of dynamic allocation.</li></ul><p>Research identified <strong>64KB</strong> as the optimal page size, offering a balance between TLB efficiency and memory utilization. While standard CUDA APIs default to 2MB, vAttention utilizes modified driver calls to enable 64KB pages, eliminating TLB thrashing without incurring the fragmentation cost of huge pages [1].</p><h4 id=performance-and-portability-implications>Performance and Portability Implications
+<a class=heading-link href=#performance-and-portability-implications><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
+<span class=sr-only>Link to heading</span></a></h4><p>Moving memory management from software (PagedAttention) to hardware (vAttention) yields measurable benefits:</p><ul><li><strong>Throughput:</strong> In prefill-heavy workloads, vAttention outperforms PagedAttention-based systems (like vLLM and FlashInfer) by up to 1.23x due to the elimination of software lookup overheads. In decoding, it matches or exceeds the performance of optimized paged kernels [1].</li><li><strong>Portability:</strong> A significant advantage is software compatibility. When NVIDIA released FlashAttention-3 (optimized for Hopper H100 GPUs), it did not initially support PagedAttention. vAttention enabled the immediate use of FlashAttention-3 with dynamic memory support, achieving up to 1.5x higher throughput than PagedAttention-based FlashAttention-2 [1].</li></ul><h4 id=conclusion>Conclusion
+<a class=heading-link href=#conclusion><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
+<span class=sr-only>Link to heading</span></a></h4><p>While PagedAttention solved the critical issue of memory fragmentation in LLMs, it necessitated a complex software abstraction layer. By leveraging low-level CUDA VMM APIs, handling allocations asynchronously to hide driver latency, and optimizing page sizes, it is possible to achieve dynamic memory management using the GPU&rsquo;s native hardware. This restores the illusion of contiguous memory, simplifies kernel development, and improves inference performance.</p><h3 id=references>References
+<a class=heading-link href=#references><i class="fa-solid fa-link" aria-hidden=true title="Link to heading"></i>
+<span class=sr-only>Link to heading</span></a></h3><p>[1] R. Prabhu et al., &ldquo;vAttention: Dynamic Memory Management for Serving LLMs without PagedAttention,&rdquo; in <em>Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems (ASPLOS &lsquo;25)</em>, 2025.</p></div><footer><div id=disqus_thread></div><script>window.disqus_config=function(){},function(){if(["localhost","127.0.0.1"].indexOf(window.location.hostname)!=-1){document.getElementById("disqus_thread").innerHTML="Disqus comments not available by default when the website is previewed locally.";return}var t=document,e=t.createElement("script");e.async=!0,e.src="//ericxliu-me.disqus.com/embed.js",e.setAttribute("data-timestamp",+new Date),(t.head||t.body).appendChild(e)}(),document.addEventListener("themeChanged",function(){document.readyState=="complete"&&DISQUS.reset({reload:!0,config:disqus_config})})</script></footer></article><link rel=stylesheet href=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.css integrity=sha384-vKruj+a13U8yHIkAyGgK1J3ArTLzrFGBbBc0tDp4ad/EyewESeXE/Iv67Aj8gKZ0 crossorigin=anonymous><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/katex.min.js integrity=sha384-PwRUT/YqbnEjkZO0zZxNqcxACrXe+j766U2amXcgMg5457rve2Y7I6ZJSm2A0mS4 crossorigin=anonymous></script><script defer src=https://cdn.jsdelivr.net/npm/katex@0.16.4/dist/contrib/auto-render.min.js integrity=sha384-+VBxd3r6XgURycqtZ117nYw44OOcIax56Z4dCRWbxyPt0Koah1uHoK0o4+/RRE05 crossorigin=anonymous onload='renderMathInElement(document.body,{delimiters:[{left:"$$",right:"$$",display:!0},{left:"$",right:"$",display:!1},{left:"\\(",right:"\\)",display:!1},{left:"\\[",right:"\\]",display:!0}]})'></script></section></div><footer class=footer><section class=container>©
+2016 -
+2025
+Eric X. Liu
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -1 +1 @@
-<?xml version="1.0" encoding="utf-8" standalone="yes"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml"><url><loc>/</loc><lastmod>2025-10-04T20:41:50+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/</loc><lastmod>2025-10-04T20:41:50+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/benchmarking-llms-on-jetson-orin-nano/</loc><lastmod>2025-10-04T20:41:50+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/flashing-jetson-orin-nano-in-virtualized-environments/</loc><lastmod>2025-10-02T08:42:39+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/openwrt-mwan3-wireguard-endpoint-exclusion/</loc><lastmod>2025-10-02T08:34:05+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/unifi-vlan-migration-to-zone-based-architecture/</loc><lastmod>2025-10-02T08:42:39+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/quantization-in-llms/</loc><lastmod>2025-08-20T06:02:35+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/breville-barista-pro-maintenance/</loc><lastmod>2025-08-20T06:04:36+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/secure-boot-dkms-and-mok-on-proxmox-debian/</loc><lastmod>2025-08-14T06:50:22+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/how-rvq-teaches-llms-to-see-and-hear/</loc><lastmod>2025-08-08T17:36:52+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/supabase-deep-dive/</loc><lastmod>2025-08-04T03:59:37+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/ppo-for-language-models/</loc><lastmod>2025-10-02T08:42:39+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/</loc><lastmod>2025-08-03T06:02:48+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/</loc><lastmod>2025-08-03T03:41:10+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/</loc><lastmod>2025-08-03T04:20:20+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/transformer-s-core-mechanics/</loc><lastmod>2025-10-02T08:42:39+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/useful/</loc><lastmod>2025-08-03T08:37:28-07:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/about/</loc><lastmod>2020-06-16T23:30:17-07:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/categories/</loc><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/tags/</loc><changefreq>weekly</changefreq><priority>0.5</priority></url></urlset>
+<?xml version="1.0" encoding="utf-8" standalone="yes"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml"><url><loc>/</loc><lastmod>2025-12-19T21:21:55+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/</loc><lastmod>2025-12-19T21:21:55+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/the-convergence-of-fast-weights-linear-attention-and-state-space-models/</loc><lastmod>2025-12-19T21:21:55+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/vattention/</loc><lastmod>2025-12-19T21:21:55+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/benchmarking-llms-on-jetson-orin-nano/</loc><lastmod>2025-10-04T20:41:50+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/flashing-jetson-orin-nano-in-virtualized-environments/</loc><lastmod>2025-10-02T08:42:39+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/openwrt-mwan3-wireguard-endpoint-exclusion/</loc><lastmod>2025-10-02T08:34:05+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/unifi-vlan-migration-to-zone-based-architecture/</loc><lastmod>2025-10-02T08:42:39+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/quantization-in-llms/</loc><lastmod>2025-08-20T06:02:35+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/breville-barista-pro-maintenance/</loc><lastmod>2025-08-20T06:04:36+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/secure-boot-dkms-and-mok-on-proxmox-debian/</loc><lastmod>2025-08-14T06:50:22+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/how-rvq-teaches-llms-to-see-and-hear/</loc><lastmod>2025-08-08T17:36:52+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/supabase-deep-dive/</loc><lastmod>2025-08-04T03:59:37+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/ppo-for-language-models/</loc><lastmod>2025-10-02T08:42:39+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/mixture-of-experts-moe-models-challenges-solutions-in-practice/</loc><lastmod>2025-08-03T06:02:48+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/t5-the-transformer-that-zigged-when-others-zagged-an-architectural-deep-dive/</loc><lastmod>2025-08-03T03:41:10+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/espresso-theory-application-a-guide-for-the-breville-barista-pro/</loc><lastmod>2025-08-03T04:20:20+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/transformer-s-core-mechanics/</loc><lastmod>2025-10-02T08:42:39+00:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/posts/useful/</loc><lastmod>2025-08-03T08:37:28-07:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/about/</loc><lastmod>2020-06-16T23:30:17-07:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/categories/</loc><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>/tags/</loc><changefreq>weekly</changefreq><priority>0.5</priority></url></urlset>
--- a/tags/index.html
+++ b/tags/index.html
@@ -4,4 +4,4 @@
 2016 -
 2025
 Eric X. Liu
-<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/6ed1d69">[6ed1d69]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>
+<a href="https://git.ericxliu.me/eric/ericxliu-me/commit/34aa99a">[34aa99a]</a></section></footer></main><script src=/js/coder.min.6ae284be93d2d19dad1f02b0039508d9aab3180a12a06dcc71b0b0ef7825a317.js integrity="sha256-auKEvpPS0Z2tHwKwA5UI2aqzGAoSoG3McbCw73gloxc="></script><script defer src=https://static.cloudflareinsights.com/beacon.min.js data-cf-beacon='{"token": "987638e636ce4dbb932d038af74c17d1"}'></script></body></html>