chore: expose original length when serving slim binaries (#17735)

This will be used in the extensions and desktop apps to enable
compression AND progress reporting for the download by comparing the
original content length to the amount of bytes written to disk.

Closes #16340
This commit is contained in:
Dean Sheather
2025-05-16 15:19:28 +10:00
committed by GitHub
parent 90e93a2399
commit c7917ea9e5
2 changed files with 182 additions and 88 deletions

View File

@ -108,46 +108,8 @@ func New(opts *Options) *Handler {
panic(fmt.Sprintf("Failed to parse html files: %v", err)) panic(fmt.Sprintf("Failed to parse html files: %v", err))
} }
binHashCache := newBinHashCache(opts.BinFS, opts.BinHashes)
mux := http.NewServeMux() mux := http.NewServeMux()
mux.Handle("/bin/", http.StripPrefix("/bin", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { mux.Handle("/bin/", binHandler(opts.BinFS, newBinMetadataCache(opts.BinFS, opts.BinHashes)))
// Convert underscores in the filename to hyphens. We eventually want to
// change our hyphen-based filenames to underscores, but we need to
// support both for now.
r.URL.Path = strings.ReplaceAll(r.URL.Path, "_", "-")
// Set ETag header to the SHA1 hash of the file contents.
name := filePath(r.URL.Path)
if name == "" || name == "/" {
// Serve the directory listing. This intentionally allows directory listings to
// be served. This file system should not contain anything sensitive.
http.FileServer(opts.BinFS).ServeHTTP(rw, r)
return
}
if strings.Contains(name, "/") {
// We only serve files from the root of this directory, so avoid any
// shenanigans by blocking slashes in the URL path.
http.NotFound(rw, r)
return
}
hash, err := binHashCache.getHash(name)
if xerrors.Is(err, os.ErrNotExist) {
http.NotFound(rw, r)
return
}
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
// ETag header needs to be quoted.
rw.Header().Set("ETag", fmt.Sprintf(`%q`, hash))
// http.FileServer will see the ETag header and automatically handle
// If-Match and If-None-Match headers on the request properly.
http.FileServer(opts.BinFS).ServeHTTP(rw, r)
})))
mux.Handle("/", http.FileServer( mux.Handle("/", http.FileServer(
http.FS( http.FS(
// OnlyFiles is a wrapper around the file system that prevents directory // OnlyFiles is a wrapper around the file system that prevents directory
@ -172,6 +134,60 @@ func New(opts *Options) *Handler {
return handler return handler
} }
func binHandler(binFS http.FileSystem, binMetadataCache *binMetadataCache) http.Handler {
return http.StripPrefix("/bin", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
// Convert underscores in the filename to hyphens. We eventually want to
// change our hyphen-based filenames to underscores, but we need to
// support both for now.
r.URL.Path = strings.ReplaceAll(r.URL.Path, "_", "-")
// Set ETag header to the SHA1 hash of the file contents.
name := filePath(r.URL.Path)
if name == "" || name == "/" {
// Serve the directory listing. This intentionally allows directory listings to
// be served. This file system should not contain anything sensitive.
http.FileServer(binFS).ServeHTTP(rw, r)
return
}
if strings.Contains(name, "/") {
// We only serve files from the root of this directory, so avoid any
// shenanigans by blocking slashes in the URL path.
http.NotFound(rw, r)
return
}
metadata, err := binMetadataCache.getMetadata(name)
if xerrors.Is(err, os.ErrNotExist) {
http.NotFound(rw, r)
return
}
if err != nil {
http.Error(rw, err.Error(), http.StatusInternalServerError)
return
}
// http.FileServer will not set Content-Length when performing chunked
// transport encoding, which is used for large files like our binaries
// so stream compression can be used.
//
// Clients like IDE extensions and the desktop apps can compare the
// value of this header with the amount of bytes written to disk after
// decompression to show progress. Without this, they cannot show
// progress without disabling compression.
//
// There isn't really a spec for a length header for the "inner" content
// size, but some nginx modules use this header.
rw.Header().Set("X-Original-Content-Length", fmt.Sprintf("%d", metadata.sizeBytes))
// Get and set ETag header. Must be quoted.
rw.Header().Set("ETag", fmt.Sprintf(`%q`, metadata.sha1Hash))
// http.FileServer will see the ETag header and automatically handle
// If-Match and If-None-Match headers on the request properly.
http.FileServer(binFS).ServeHTTP(rw, r)
}))
}
type Handler struct { type Handler struct {
opts *Options opts *Options
@ -217,7 +233,7 @@ func (h *Handler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
h.handler.ServeHTTP(rw, r) h.handler.ServeHTTP(rw, r)
return return
// If requesting assets, serve straight up with caching. // If requesting assets, serve straight up with caching.
case reqFile == "assets" || strings.HasPrefix(reqFile, "assets/"): case reqFile == "assets" || strings.HasPrefix(reqFile, "assets/") || strings.HasPrefix(reqFile, "icon/"):
// It could make sense to cache 404s, but the problem is that during an // It could make sense to cache 404s, but the problem is that during an
// upgrade a load balancer may route partially to the old server, and that // upgrade a load balancer may route partially to the old server, and that
// would make new asset paths get cached as 404s and not load even once the // would make new asset paths get cached as 404s and not load even once the
@ -952,68 +968,95 @@ func RenderStaticErrorPage(rw http.ResponseWriter, r *http.Request, data ErrorPa
} }
} }
type binHashCache struct { type binMetadata struct {
binFS http.FileSystem sizeBytes int64 // -1 if not known yet
// SHA1 was chosen because it's fast to compute and reasonable for
hashes map[string]string // determining if a file has changed. The ETag is not used a security
mut sync.RWMutex // measure.
sf singleflight.Group sha1Hash string // always set if in the cache
sem chan struct{}
} }
func newBinHashCache(binFS http.FileSystem, binHashes map[string]string) *binHashCache { type binMetadataCache struct {
b := &binHashCache{ binFS http.FileSystem
binFS: binFS, originalHashes map[string]string
hashes: make(map[string]string, len(binHashes)),
mut: sync.RWMutex{}, metadata map[string]binMetadata
sf: singleflight.Group{}, mut sync.RWMutex
sem: make(chan struct{}, 4), sf singleflight.Group
sem chan struct{}
}
func newBinMetadataCache(binFS http.FileSystem, binSha1Hashes map[string]string) *binMetadataCache {
b := &binMetadataCache{
binFS: binFS,
originalHashes: make(map[string]string, len(binSha1Hashes)),
metadata: make(map[string]binMetadata, len(binSha1Hashes)),
mut: sync.RWMutex{},
sf: singleflight.Group{},
sem: make(chan struct{}, 4),
} }
// Make a copy since we're gonna be mutating it.
for k, v := range binHashes { // Previously we copied binSha1Hashes to the cache immediately. Since we now
b.hashes[k] = v // read other information like size from the file, we can't do that. Instead
// we copy the hashes to a different map that will be used to populate the
// cache on the first request.
for k, v := range binSha1Hashes {
b.originalHashes[k] = v
} }
return b return b
} }
func (b *binHashCache) getHash(name string) (string, error) { func (b *binMetadataCache) getMetadata(name string) (binMetadata, error) {
b.mut.RLock() b.mut.RLock()
hash, ok := b.hashes[name] metadata, ok := b.metadata[name]
b.mut.RUnlock() b.mut.RUnlock()
if ok { if ok {
return hash, nil return metadata, nil
} }
// Avoid DOS by using a pool, and only doing work once per file. // Avoid DOS by using a pool, and only doing work once per file.
v, err, _ := b.sf.Do(name, func() (interface{}, error) { v, err, _ := b.sf.Do(name, func() (any, error) {
b.sem <- struct{}{} b.sem <- struct{}{}
defer func() { <-b.sem }() defer func() { <-b.sem }()
f, err := b.binFS.Open(name) f, err := b.binFS.Open(name)
if err != nil { if err != nil {
return "", err return binMetadata{}, err
} }
defer f.Close() defer f.Close()
h := sha1.New() //#nosec // Not used for cryptography. var metadata binMetadata
_, err = io.Copy(h, f)
stat, err := f.Stat()
if err != nil { if err != nil {
return "", err return binMetadata{}, err
}
metadata.sizeBytes = stat.Size()
if hash, ok := b.originalHashes[name]; ok {
metadata.sha1Hash = hash
} else {
h := sha1.New() //#nosec // Not used for cryptography.
_, err := io.Copy(h, f)
if err != nil {
return binMetadata{}, err
}
metadata.sha1Hash = hex.EncodeToString(h.Sum(nil))
} }
hash := hex.EncodeToString(h.Sum(nil))
b.mut.Lock() b.mut.Lock()
b.hashes[name] = hash b.metadata[name] = metadata
b.mut.Unlock() b.mut.Unlock()
return hash, nil return metadata, nil
}) })
if err != nil { if err != nil {
return "", err return binMetadata{}, err
} }
//nolint:forcetypeassert //nolint:forcetypeassert
return strings.ToLower(v.(string)), nil return v.(binMetadata), nil
} }
func applicationNameOrDefault(cfg codersdk.AppearanceConfig) string { func applicationNameOrDefault(cfg codersdk.AppearanceConfig) string {

View File

@ -19,6 +19,7 @@ import (
"time" "time"
"github.com/go-chi/chi/v5" "github.com/go-chi/chi/v5"
"github.com/go-chi/chi/v5/middleware"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -373,11 +374,13 @@ func TestServingBin(t *testing.T) {
delete(sampleBinFSMissingSha256, binCoderSha1) delete(sampleBinFSMissingSha256, binCoderSha1)
type req struct { type req struct {
url string url string
ifNoneMatch string ifNoneMatch string
wantStatus int wantStatus int
wantBody []byte wantBody []byte
wantEtag string wantOriginalSize int
wantEtag string
compression bool
} }
tests := []struct { tests := []struct {
name string name string
@ -390,17 +393,27 @@ func TestServingBin(t *testing.T) {
fs: sampleBinFS(), fs: sampleBinFS(),
reqs: []req{ reqs: []req{
{ {
url: "/bin/coder-linux-amd64", url: "/bin/coder-linux-amd64",
wantStatus: http.StatusOK, wantStatus: http.StatusOK,
wantBody: []byte("compressed"), wantBody: []byte("compressed"),
wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), wantOriginalSize: 10,
wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]),
}, },
// Test ETag support. // Test ETag support.
{ {
url: "/bin/coder-linux-amd64", url: "/bin/coder-linux-amd64",
ifNoneMatch: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), ifNoneMatch: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]),
wantStatus: http.StatusNotModified, wantStatus: http.StatusNotModified,
wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), wantOriginalSize: 10,
wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]),
},
// Test compression support with X-Original-Content-Length
// header.
{
url: "/bin/coder-linux-amd64",
wantStatus: http.StatusOK,
wantOriginalSize: 10,
compression: true,
}, },
{url: "/bin/GITKEEP", wantStatus: http.StatusNotFound}, {url: "/bin/GITKEEP", wantStatus: http.StatusNotFound},
}, },
@ -462,9 +475,24 @@ func TestServingBin(t *testing.T) {
}, },
reqs: []req{ reqs: []req{
// We support both hyphens and underscores for compatibility. // We support both hyphens and underscores for compatibility.
{url: "/bin/coder-linux-amd64", wantStatus: http.StatusOK, wantBody: []byte("embed")}, {
{url: "/bin/coder_linux_amd64", wantStatus: http.StatusOK, wantBody: []byte("embed")}, url: "/bin/coder-linux-amd64",
{url: "/bin/GITKEEP", wantStatus: http.StatusOK, wantBody: []byte("")}, wantStatus: http.StatusOK,
wantBody: []byte("embed"),
wantOriginalSize: 5,
},
{
url: "/bin/coder_linux_amd64",
wantStatus: http.StatusOK,
wantBody: []byte("embed"),
wantOriginalSize: 5,
},
{
url: "/bin/GITKEEP",
wantStatus: http.StatusOK,
wantBody: []byte(""),
wantOriginalSize: 0,
},
}, },
}, },
} }
@ -482,12 +510,14 @@ func TestServingBin(t *testing.T) {
require.Error(t, err, "extraction or read did not fail") require.Error(t, err, "extraction or read did not fail")
} }
srv := httptest.NewServer(site.New(&site.Options{ site := site.New(&site.Options{
Telemetry: telemetry.NewNoop(), Telemetry: telemetry.NewNoop(),
BinFS: binFS, BinFS: binFS,
BinHashes: binHashes, BinHashes: binHashes,
SiteFS: rootFS, SiteFS: rootFS,
})) })
compressor := middleware.NewCompressor(1, "text/*", "application/*")
srv := httptest.NewServer(compressor.Handler(site))
defer srv.Close() defer srv.Close()
// Create a context // Create a context
@ -502,6 +532,9 @@ func TestServingBin(t *testing.T) {
if tr.ifNoneMatch != "" { if tr.ifNoneMatch != "" {
req.Header.Set("If-None-Match", tr.ifNoneMatch) req.Header.Set("If-None-Match", tr.ifNoneMatch)
} }
if tr.compression {
req.Header.Set("Accept-Encoding", "gzip")
}
resp, err := http.DefaultClient.Do(req) resp, err := http.DefaultClient.Do(req)
require.NoError(t, err, "http do failed") require.NoError(t, err, "http do failed")
@ -520,10 +553,28 @@ func TestServingBin(t *testing.T) {
assert.Empty(t, gotBody, "body is not empty") assert.Empty(t, gotBody, "body is not empty")
} }
if tr.compression {
assert.Equal(t, "gzip", resp.Header.Get("Content-Encoding"), "content encoding is not gzip")
} else {
assert.Empty(t, resp.Header.Get("Content-Encoding"), "content encoding is not empty")
}
if tr.wantEtag != "" { if tr.wantEtag != "" {
assert.NotEmpty(t, resp.Header.Get("ETag"), "etag header is empty") assert.NotEmpty(t, resp.Header.Get("ETag"), "etag header is empty")
assert.Equal(t, tr.wantEtag, resp.Header.Get("ETag"), "etag did not match") assert.Equal(t, tr.wantEtag, resp.Header.Get("ETag"), "etag did not match")
} }
if tr.wantOriginalSize > 0 {
// This is a custom header that we set to help the
// client know the size of the decompressed data. See
// the comment in site.go.
headerStr := resp.Header.Get("X-Original-Content-Length")
assert.NotEmpty(t, headerStr, "X-Original-Content-Length header is empty")
originalSize, err := strconv.Atoi(headerStr)
if assert.NoErrorf(t, err, "could not parse X-Original-Content-Length header %q", headerStr) {
assert.EqualValues(t, tr.wantOriginalSize, originalSize, "X-Original-Content-Length did not match")
}
}
}) })
} }
}) })