chore: add files cache for reading template tar archives from db (#17141)

This commit is contained in:
ケイラ
2025-04-02 15:42:16 -07:00
committed by GitHub
parent c06294235f
commit ac7ea08873
5 changed files with 308 additions and 0 deletions

17
archive/fs/tar.go Normal file
View File

@ -0,0 +1,17 @@
package archivefs
import (
"archive/tar"
"io"
"io/fs"
"github.com/spf13/afero"
"github.com/spf13/afero/tarfs"
)
func FromTarReader(r io.Reader) fs.FS {
tr := tar.NewReader(r)
tfs := tarfs.New(tr)
rofs := afero.NewReadOnlyFs(tfs)
return afero.NewIOFS(rofs)
}

110
coderd/files/cache.go Normal file
View File

@ -0,0 +1,110 @@
package files
import (
"bytes"
"context"
"io/fs"
"sync"
"github.com/google/uuid"
"golang.org/x/xerrors"
archivefs "github.com/coder/coder/v2/archive/fs"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/util/lazy"
)
// NewFromStore returns a file cache that will fetch files from the provided
// database.
func NewFromStore(store database.Store) Cache {
fetcher := func(ctx context.Context, fileID uuid.UUID) (fs.FS, error) {
file, err := store.GetFileByID(ctx, fileID)
if err != nil {
return nil, xerrors.Errorf("failed to read file from database: %w", err)
}
content := bytes.NewBuffer(file.Data)
return archivefs.FromTarReader(content), nil
}
return Cache{
lock: sync.Mutex{},
data: make(map[uuid.UUID]*cacheEntry),
fetcher: fetcher,
}
}
// Cache persists the files for template versions, and is used by dynamic
// parameters to deduplicate the files in memory. When any number of users opens
// the workspace creation form for a given template version, it's files are
// loaded into memory exactly once. We hold those files until there are no
// longer any open connections, and then we remove the value from the map.
type Cache struct {
lock sync.Mutex
data map[uuid.UUID]*cacheEntry
fetcher
}
type cacheEntry struct {
// refCount must only be accessed while the Cache lock is held.
refCount int
value *lazy.ValueWithError[fs.FS]
}
type fetcher func(context.Context, uuid.UUID) (fs.FS, error)
// Acquire will load the fs.FS for the given file. It guarantees that parallel
// calls for the same fileID will only result in one fetch, and that parallel
// calls for distinct fileIDs will fetch in parallel.
//
// Every call to Acquire must have a matching call to Release.
func (c *Cache) Acquire(ctx context.Context, fileID uuid.UUID) (fs.FS, error) {
// It's important that this `Load` call occurs outside of `prepare`, after the
// mutex has been released, or we would continue to hold the lock until the
// entire file has been fetched, which may be slow, and would prevent other
// files from being fetched in parallel.
return c.prepare(ctx, fileID).Load()
}
func (c *Cache) prepare(ctx context.Context, fileID uuid.UUID) *lazy.ValueWithError[fs.FS] {
c.lock.Lock()
defer c.lock.Unlock()
entry, ok := c.data[fileID]
if !ok {
value := lazy.NewWithError(func() (fs.FS, error) {
return c.fetcher(ctx, fileID)
})
entry = &cacheEntry{
value: value,
refCount: 0,
}
c.data[fileID] = entry
}
entry.refCount++
return entry.value
}
// Release decrements the reference count for the given fileID, and frees the
// backing data if there are no further references being held.
func (c *Cache) Release(fileID uuid.UUID) {
c.lock.Lock()
defer c.lock.Unlock()
entry, ok := c.data[fileID]
if !ok {
// If we land here, it's almost certainly because a bug already happened,
// and we're freeing something that's already been freed, or we're calling
// this function with an incorrect ID. Should this function return an error?
return
}
entry.refCount--
if entry.refCount > 0 {
return
}
delete(c.data, fileID)
}

View File

@ -0,0 +1,104 @@
package files
import (
"context"
"io/fs"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/google/uuid"
"github.com/spf13/afero"
"github.com/stretchr/testify/require"
"golang.org/x/sync/errgroup"
"github.com/coder/coder/v2/testutil"
)
func TestConcurrency(t *testing.T) {
t.Parallel()
emptyFS := afero.NewIOFS(afero.NewReadOnlyFs(afero.NewMemMapFs()))
var fetches atomic.Int64
c := newTestCache(func(_ context.Context, _ uuid.UUID) (fs.FS, error) {
fetches.Add(1)
// Wait long enough before returning to make sure that all of the goroutines
// will be waiting in line, ensuring that no one duplicated a fetch.
time.Sleep(testutil.IntervalMedium)
return emptyFS, nil
})
batches := 1000
groups := make([]*errgroup.Group, 0, batches)
for range batches {
groups = append(groups, new(errgroup.Group))
}
// Call Acquire with a unique ID per batch, many times per batch, with many
// batches all in parallel. This is pretty much the worst-case scenario:
// thousands of concurrent reads, with both warm and cold loads happening.
batchSize := 10
for _, g := range groups {
id := uuid.New()
for range batchSize {
g.Go(func() error {
// We don't bother to Release these references because the Cache will be
// released at the end of the test anyway.
_, err := c.Acquire(t.Context(), id)
return err
})
}
}
for _, g := range groups {
require.NoError(t, g.Wait())
}
require.Equal(t, int64(batches), fetches.Load())
}
func TestRelease(t *testing.T) {
t.Parallel()
emptyFS := afero.NewIOFS(afero.NewReadOnlyFs(afero.NewMemMapFs()))
c := newTestCache(func(_ context.Context, _ uuid.UUID) (fs.FS, error) {
return emptyFS, nil
})
batches := 100
ids := make([]uuid.UUID, 0, batches)
for range batches {
ids = append(ids, uuid.New())
}
// Acquire a bunch of references
batchSize := 10
for _, id := range ids {
for range batchSize {
it, err := c.Acquire(t.Context(), id)
require.NoError(t, err)
require.Equal(t, emptyFS, it)
}
}
// Make sure cache is fully loaded
require.Equal(t, len(c.data), batches)
// Now release all of the references
for _, id := range ids {
for range batchSize {
c.Release(id)
}
}
// ...and make sure that the cache has emptied itself.
require.Equal(t, len(c.data), 0)
}
func newTestCache(fetcher func(context.Context, uuid.UUID) (fs.FS, error)) Cache {
return Cache{
lock: sync.Mutex{},
data: make(map[uuid.UUID]*cacheEntry),
fetcher: fetcher,
}
}

View File

@ -0,0 +1,25 @@
package lazy
type ValueWithError[T any] struct {
inner Value[result[T]]
}
type result[T any] struct {
value T
err error
}
// NewWithError allows you to provide a lazy initializer that can fail.
func NewWithError[T any](fn func() (T, error)) *ValueWithError[T] {
return &ValueWithError[T]{
inner: Value[result[T]]{fn: func() result[T] {
value, err := fn()
return result[T]{value: value, err: err}
}},
}
}
func (v *ValueWithError[T]) Load() (T, error) {
result := v.inner.Load()
return result.value, result.err
}

View File

@ -0,0 +1,52 @@
package lazy_test
import (
"testing"
"github.com/stretchr/testify/require"
"golang.org/x/xerrors"
"github.com/coder/coder/v2/coderd/util/lazy"
)
func TestLazyWithErrorOK(t *testing.T) {
t.Parallel()
l := lazy.NewWithError(func() (int, error) {
return 1, nil
})
i, err := l.Load()
require.NoError(t, err)
require.Equal(t, 1, i)
}
func TestLazyWithErrorErr(t *testing.T) {
t.Parallel()
l := lazy.NewWithError(func() (int, error) {
return 0, xerrors.New("oh no! everything that could went horribly wrong!")
})
i, err := l.Load()
require.Error(t, err)
require.Equal(t, 0, i)
}
func TestLazyWithErrorPointers(t *testing.T) {
t.Parallel()
a := 1
l := lazy.NewWithError(func() (*int, error) {
return &a, nil
})
b, err := l.Load()
require.NoError(t, err)
c, err := l.Load()
require.NoError(t, err)
*b++
*c++
require.Equal(t, 3, a)
}