diff --git a/site/site.go b/site/site.go index e47e15848c..2b64d3cf98 100644 --- a/site/site.go +++ b/site/site.go @@ -108,46 +108,8 @@ func New(opts *Options) *Handler { panic(fmt.Sprintf("Failed to parse html files: %v", err)) } - binHashCache := newBinHashCache(opts.BinFS, opts.BinHashes) - mux := http.NewServeMux() - mux.Handle("/bin/", http.StripPrefix("/bin", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { - // Convert underscores in the filename to hyphens. We eventually want to - // change our hyphen-based filenames to underscores, but we need to - // support both for now. - r.URL.Path = strings.ReplaceAll(r.URL.Path, "_", "-") - - // Set ETag header to the SHA1 hash of the file contents. - name := filePath(r.URL.Path) - if name == "" || name == "/" { - // Serve the directory listing. This intentionally allows directory listings to - // be served. This file system should not contain anything sensitive. - http.FileServer(opts.BinFS).ServeHTTP(rw, r) - return - } - if strings.Contains(name, "/") { - // We only serve files from the root of this directory, so avoid any - // shenanigans by blocking slashes in the URL path. - http.NotFound(rw, r) - return - } - hash, err := binHashCache.getHash(name) - if xerrors.Is(err, os.ErrNotExist) { - http.NotFound(rw, r) - return - } - if err != nil { - http.Error(rw, err.Error(), http.StatusInternalServerError) - return - } - - // ETag header needs to be quoted. - rw.Header().Set("ETag", fmt.Sprintf(`%q`, hash)) - - // http.FileServer will see the ETag header and automatically handle - // If-Match and If-None-Match headers on the request properly. - http.FileServer(opts.BinFS).ServeHTTP(rw, r) - }))) + mux.Handle("/bin/", binHandler(opts.BinFS, newBinMetadataCache(opts.BinFS, opts.BinHashes))) mux.Handle("/", http.FileServer( http.FS( // OnlyFiles is a wrapper around the file system that prevents directory @@ -172,6 +134,60 @@ func New(opts *Options) *Handler { return handler } +func binHandler(binFS http.FileSystem, binMetadataCache *binMetadataCache) http.Handler { + return http.StripPrefix("/bin", http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + // Convert underscores in the filename to hyphens. We eventually want to + // change our hyphen-based filenames to underscores, but we need to + // support both for now. + r.URL.Path = strings.ReplaceAll(r.URL.Path, "_", "-") + + // Set ETag header to the SHA1 hash of the file contents. + name := filePath(r.URL.Path) + if name == "" || name == "/" { + // Serve the directory listing. This intentionally allows directory listings to + // be served. This file system should not contain anything sensitive. + http.FileServer(binFS).ServeHTTP(rw, r) + return + } + if strings.Contains(name, "/") { + // We only serve files from the root of this directory, so avoid any + // shenanigans by blocking slashes in the URL path. + http.NotFound(rw, r) + return + } + + metadata, err := binMetadataCache.getMetadata(name) + if xerrors.Is(err, os.ErrNotExist) { + http.NotFound(rw, r) + return + } + if err != nil { + http.Error(rw, err.Error(), http.StatusInternalServerError) + return + } + + // http.FileServer will not set Content-Length when performing chunked + // transport encoding, which is used for large files like our binaries + // so stream compression can be used. + // + // Clients like IDE extensions and the desktop apps can compare the + // value of this header with the amount of bytes written to disk after + // decompression to show progress. Without this, they cannot show + // progress without disabling compression. + // + // There isn't really a spec for a length header for the "inner" content + // size, but some nginx modules use this header. + rw.Header().Set("X-Original-Content-Length", fmt.Sprintf("%d", metadata.sizeBytes)) + + // Get and set ETag header. Must be quoted. + rw.Header().Set("ETag", fmt.Sprintf(`%q`, metadata.sha1Hash)) + + // http.FileServer will see the ETag header and automatically handle + // If-Match and If-None-Match headers on the request properly. + http.FileServer(binFS).ServeHTTP(rw, r) + })) +} + type Handler struct { opts *Options @@ -217,7 +233,7 @@ func (h *Handler) ServeHTTP(rw http.ResponseWriter, r *http.Request) { h.handler.ServeHTTP(rw, r) return // If requesting assets, serve straight up with caching. - case reqFile == "assets" || strings.HasPrefix(reqFile, "assets/"): + case reqFile == "assets" || strings.HasPrefix(reqFile, "assets/") || strings.HasPrefix(reqFile, "icon/"): // It could make sense to cache 404s, but the problem is that during an // upgrade a load balancer may route partially to the old server, and that // would make new asset paths get cached as 404s and not load even once the @@ -952,68 +968,95 @@ func RenderStaticErrorPage(rw http.ResponseWriter, r *http.Request, data ErrorPa } } -type binHashCache struct { - binFS http.FileSystem - - hashes map[string]string - mut sync.RWMutex - sf singleflight.Group - sem chan struct{} +type binMetadata struct { + sizeBytes int64 // -1 if not known yet + // SHA1 was chosen because it's fast to compute and reasonable for + // determining if a file has changed. The ETag is not used a security + // measure. + sha1Hash string // always set if in the cache } -func newBinHashCache(binFS http.FileSystem, binHashes map[string]string) *binHashCache { - b := &binHashCache{ - binFS: binFS, - hashes: make(map[string]string, len(binHashes)), - mut: sync.RWMutex{}, - sf: singleflight.Group{}, - sem: make(chan struct{}, 4), +type binMetadataCache struct { + binFS http.FileSystem + originalHashes map[string]string + + metadata map[string]binMetadata + mut sync.RWMutex + sf singleflight.Group + sem chan struct{} +} + +func newBinMetadataCache(binFS http.FileSystem, binSha1Hashes map[string]string) *binMetadataCache { + b := &binMetadataCache{ + binFS: binFS, + originalHashes: make(map[string]string, len(binSha1Hashes)), + + metadata: make(map[string]binMetadata, len(binSha1Hashes)), + mut: sync.RWMutex{}, + sf: singleflight.Group{}, + sem: make(chan struct{}, 4), } - // Make a copy since we're gonna be mutating it. - for k, v := range binHashes { - b.hashes[k] = v + + // Previously we copied binSha1Hashes to the cache immediately. Since we now + // read other information like size from the file, we can't do that. Instead + // we copy the hashes to a different map that will be used to populate the + // cache on the first request. + for k, v := range binSha1Hashes { + b.originalHashes[k] = v } return b } -func (b *binHashCache) getHash(name string) (string, error) { +func (b *binMetadataCache) getMetadata(name string) (binMetadata, error) { b.mut.RLock() - hash, ok := b.hashes[name] + metadata, ok := b.metadata[name] b.mut.RUnlock() if ok { - return hash, nil + return metadata, nil } // Avoid DOS by using a pool, and only doing work once per file. - v, err, _ := b.sf.Do(name, func() (interface{}, error) { + v, err, _ := b.sf.Do(name, func() (any, error) { b.sem <- struct{}{} defer func() { <-b.sem }() f, err := b.binFS.Open(name) if err != nil { - return "", err + return binMetadata{}, err } defer f.Close() - h := sha1.New() //#nosec // Not used for cryptography. - _, err = io.Copy(h, f) + var metadata binMetadata + + stat, err := f.Stat() if err != nil { - return "", err + return binMetadata{}, err + } + metadata.sizeBytes = stat.Size() + + if hash, ok := b.originalHashes[name]; ok { + metadata.sha1Hash = hash + } else { + h := sha1.New() //#nosec // Not used for cryptography. + _, err := io.Copy(h, f) + if err != nil { + return binMetadata{}, err + } + metadata.sha1Hash = hex.EncodeToString(h.Sum(nil)) } - hash := hex.EncodeToString(h.Sum(nil)) b.mut.Lock() - b.hashes[name] = hash + b.metadata[name] = metadata b.mut.Unlock() - return hash, nil + return metadata, nil }) if err != nil { - return "", err + return binMetadata{}, err } //nolint:forcetypeassert - return strings.ToLower(v.(string)), nil + return v.(binMetadata), nil } func applicationNameOrDefault(cfg codersdk.AppearanceConfig) string { diff --git a/site/site_test.go b/site/site_test.go index 63f3f9aa17..d257bd9519 100644 --- a/site/site_test.go +++ b/site/site_test.go @@ -19,6 +19,7 @@ import ( "time" "github.com/go-chi/chi/v5" + "github.com/go-chi/chi/v5/middleware" "github.com/google/uuid" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -373,11 +374,13 @@ func TestServingBin(t *testing.T) { delete(sampleBinFSMissingSha256, binCoderSha1) type req struct { - url string - ifNoneMatch string - wantStatus int - wantBody []byte - wantEtag string + url string + ifNoneMatch string + wantStatus int + wantBody []byte + wantOriginalSize int + wantEtag string + compression bool } tests := []struct { name string @@ -390,17 +393,27 @@ func TestServingBin(t *testing.T) { fs: sampleBinFS(), reqs: []req{ { - url: "/bin/coder-linux-amd64", - wantStatus: http.StatusOK, - wantBody: []byte("compressed"), - wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), + url: "/bin/coder-linux-amd64", + wantStatus: http.StatusOK, + wantBody: []byte("compressed"), + wantOriginalSize: 10, + wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), }, // Test ETag support. { - url: "/bin/coder-linux-amd64", - ifNoneMatch: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), - wantStatus: http.StatusNotModified, - wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), + url: "/bin/coder-linux-amd64", + ifNoneMatch: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), + wantStatus: http.StatusNotModified, + wantOriginalSize: 10, + wantEtag: fmt.Sprintf("%q", sampleBinSHAs["coder-linux-amd64"]), + }, + // Test compression support with X-Original-Content-Length + // header. + { + url: "/bin/coder-linux-amd64", + wantStatus: http.StatusOK, + wantOriginalSize: 10, + compression: true, }, {url: "/bin/GITKEEP", wantStatus: http.StatusNotFound}, }, @@ -462,9 +475,24 @@ func TestServingBin(t *testing.T) { }, reqs: []req{ // We support both hyphens and underscores for compatibility. - {url: "/bin/coder-linux-amd64", wantStatus: http.StatusOK, wantBody: []byte("embed")}, - {url: "/bin/coder_linux_amd64", wantStatus: http.StatusOK, wantBody: []byte("embed")}, - {url: "/bin/GITKEEP", wantStatus: http.StatusOK, wantBody: []byte("")}, + { + url: "/bin/coder-linux-amd64", + wantStatus: http.StatusOK, + wantBody: []byte("embed"), + wantOriginalSize: 5, + }, + { + url: "/bin/coder_linux_amd64", + wantStatus: http.StatusOK, + wantBody: []byte("embed"), + wantOriginalSize: 5, + }, + { + url: "/bin/GITKEEP", + wantStatus: http.StatusOK, + wantBody: []byte(""), + wantOriginalSize: 0, + }, }, }, } @@ -482,12 +510,14 @@ func TestServingBin(t *testing.T) { require.Error(t, err, "extraction or read did not fail") } - srv := httptest.NewServer(site.New(&site.Options{ + site := site.New(&site.Options{ Telemetry: telemetry.NewNoop(), BinFS: binFS, BinHashes: binHashes, SiteFS: rootFS, - })) + }) + compressor := middleware.NewCompressor(1, "text/*", "application/*") + srv := httptest.NewServer(compressor.Handler(site)) defer srv.Close() // Create a context @@ -502,6 +532,9 @@ func TestServingBin(t *testing.T) { if tr.ifNoneMatch != "" { req.Header.Set("If-None-Match", tr.ifNoneMatch) } + if tr.compression { + req.Header.Set("Accept-Encoding", "gzip") + } resp, err := http.DefaultClient.Do(req) require.NoError(t, err, "http do failed") @@ -520,10 +553,28 @@ func TestServingBin(t *testing.T) { assert.Empty(t, gotBody, "body is not empty") } + if tr.compression { + assert.Equal(t, "gzip", resp.Header.Get("Content-Encoding"), "content encoding is not gzip") + } else { + assert.Empty(t, resp.Header.Get("Content-Encoding"), "content encoding is not empty") + } + if tr.wantEtag != "" { assert.NotEmpty(t, resp.Header.Get("ETag"), "etag header is empty") assert.Equal(t, tr.wantEtag, resp.Header.Get("ETag"), "etag did not match") } + + if tr.wantOriginalSize > 0 { + // This is a custom header that we set to help the + // client know the size of the decompressed data. See + // the comment in site.go. + headerStr := resp.Header.Get("X-Original-Content-Length") + assert.NotEmpty(t, headerStr, "X-Original-Content-Length header is empty") + originalSize, err := strconv.Atoi(headerStr) + if assert.NoErrorf(t, err, "could not parse X-Original-Content-Length header %q", headerStr) { + assert.EqualValues(t, tr.wantOriginalSize, originalSize, "X-Original-Content-Length did not match") + } + } }) } })