// Copyright (c) Tailscale Inc & AUTHORS // SPDX-License-Identifier: BSD-3-Clause // Package revproxy implements a minimal HTTP reverse proxy that caches files // locally on disk, backed by objects in an S3 bucket. // // # Limitations // // By default, only objects marked "immutable" by the target server are // eligible to be cached. Volatile objects that specify a max-age are also // cached in-memory, but are not persisted on disk or in S3. If we think it's // worthwhile we can spend some time to add more elaborate cache pruning, but // for now we're doing the simpler thing. package revproxy import ( "bytes" "crypto/sha256" "expvar" "fmt" "io" "net/http" "net/http/httputil" "net/url" "path" "path/filepath" "runtime" "slices" "strconv" "strings" "sync" "time" "github.com/creachadair/mds/cache" "github.com/creachadair/mds/mapset" "github.com/creachadair/scheddle" "github.com/creachadair/taskgroup" "github.com/tailscale/go-cache-plugin/lib/s3util" ) // Server is a caching reverse proxy server that caches successful responses to // GET requests for certain designated domains. // // The host field of the request URL must match one of the configured targets. // If not, the request is rejected with HTTP 502 (Bad Gateway). Otherwise, the // request is forwarded. A successful response will be cached if the server's // Cache-Control does not include "no-store", and does include "immutable". // // In addition, a successful response that is not immutable and specifies a // max-age will be cached temporarily in-memory. // // # Cache Format // // A cached response is a file with a header section and the body, separated by // a blank line. Only a subset of response headers are saved. // // # Cache Responses // // For requests handled by the proxy, the response includes an "X-Cache" header // indicating how the response was obtained: // // - "hit, memory": The response was served out of the memory cache. // - "hit, local": The response was served out of the local cache. // - "hit, remote": The response was faulted in from S3. // - "fetch, cached": The response was forwarded to the target and cached. // - "fetch, uncached": The response was forwarded to the target and not cached. // // For results intersecting with the cache, it also reports a X-Cache-Id giving // the storage key of the cache object. type Server struct { // Targets is the list of hosts for which the proxy should forward requests. // Host names should be fully-qualified ("host.example.com"). Targets []string // Local is the path of a local cache directory where responses are cached. // It must be non-empty. Local string // S3Client is the S3 client used to read and write cache entries to the // backing store. It must be non-nil S3Client *s3util.Client // KeyPrefix, if non-empty, is prepended to each key stored into S3, with an // intervening slash. KeyPrefix string // Logf, if non-nil, is used to write log messages. If nil, logs are // discarded. Logf func(string, ...any) // LogRequests, if true, enables detailed (but noisy) debug logging of all // requests handled by the reverse proxy. Logs are written to Logf. // // Each request is presented in the format: // // B U:"" H: C: // E H: B: (