mirror of
https://github.com/coder/coder.git
synced 2025-07-18 14:17:22 +00:00
chore: increase fileCache hit rate in autobuilds lifecycle (#18507)
`wsbuilder` hits the file cache when running validation. This solution is imperfect, but by first sorting workspaces by their template version id, the cache hit rate should improve.
This commit is contained in:
@ -5,6 +5,8 @@ import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
@ -155,6 +157,22 @@ func (e *Executor) runOnce(t time.Time) Stats {
|
||||
return stats
|
||||
}
|
||||
|
||||
// Sort the workspaces by build template version ID so that we can group
|
||||
// identical template versions together. This is a slight (and imperfect)
|
||||
// optimization.
|
||||
//
|
||||
// `wsbuilder` needs to load the terraform files for a given template version
|
||||
// into memory. If 2 workspaces are using the same template version, they will
|
||||
// share the same files in the FileCache. This only happens if the builds happen
|
||||
// in parallel.
|
||||
// TODO: Actually make sure the cache has the files in the cache for the full
|
||||
// set of identical template versions. Then unload the files when the builds
|
||||
// are done. Right now, this relies on luck for the 10 goroutine workers to
|
||||
// overlap and keep the file reference in the cache alive.
|
||||
slices.SortFunc(workspaces, func(a, b database.GetWorkspacesEligibleForTransitionRow) int {
|
||||
return strings.Compare(a.BuildTemplateVersionID.UUID.String(), b.BuildTemplateVersionID.UUID.String())
|
||||
})
|
||||
|
||||
// We only use errgroup here for convenience of API, not for early
|
||||
// cancellation. This means we only return nil errors in th eg.Go.
|
||||
eg := errgroup.Group{}
|
||||
|
@ -19443,7 +19443,8 @@ func (q *sqlQuerier) GetWorkspacesByTemplateID(ctx context.Context, templateID u
|
||||
const getWorkspacesEligibleForTransition = `-- name: GetWorkspacesEligibleForTransition :many
|
||||
SELECT
|
||||
workspaces.id,
|
||||
workspaces.name
|
||||
workspaces.name,
|
||||
workspace_builds.template_version_id as build_template_version_id
|
||||
FROM
|
||||
workspaces
|
||||
LEFT JOIN
|
||||
@ -19562,8 +19563,9 @@ WHERE
|
||||
`
|
||||
|
||||
type GetWorkspacesEligibleForTransitionRow struct {
|
||||
ID uuid.UUID `db:"id" json:"id"`
|
||||
Name string `db:"name" json:"name"`
|
||||
ID uuid.UUID `db:"id" json:"id"`
|
||||
Name string `db:"name" json:"name"`
|
||||
BuildTemplateVersionID uuid.NullUUID `db:"build_template_version_id" json:"build_template_version_id"`
|
||||
}
|
||||
|
||||
func (q *sqlQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now time.Time) ([]GetWorkspacesEligibleForTransitionRow, error) {
|
||||
@ -19575,7 +19577,7 @@ func (q *sqlQuerier) GetWorkspacesEligibleForTransition(ctx context.Context, now
|
||||
var items []GetWorkspacesEligibleForTransitionRow
|
||||
for rows.Next() {
|
||||
var i GetWorkspacesEligibleForTransitionRow
|
||||
if err := rows.Scan(&i.ID, &i.Name); err != nil {
|
||||
if err := rows.Scan(&i.ID, &i.Name, &i.BuildTemplateVersionID); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
items = append(items, i)
|
||||
|
@ -642,7 +642,8 @@ FROM pending_workspaces, building_workspaces, running_workspaces, failed_workspa
|
||||
-- name: GetWorkspacesEligibleForTransition :many
|
||||
SELECT
|
||||
workspaces.id,
|
||||
workspaces.name
|
||||
workspaces.name,
|
||||
workspace_builds.template_version_id as build_template_version_id
|
||||
FROM
|
||||
workspaces
|
||||
LEFT JOIN
|
||||
|
@ -71,12 +71,12 @@ func (c *Cache) registerMetrics(registerer prometheus.Registerer) *Cache {
|
||||
Help: "The count of file references currently open in the file cache. Multiple references can be held for the same file.",
|
||||
})
|
||||
|
||||
c.totalOpenFileReferences = f.NewCounter(prometheus.CounterOpts{
|
||||
c.totalOpenFileReferences = f.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: "coderd",
|
||||
Subsystem: subsystem,
|
||||
Name: "open_file_refs_total",
|
||||
Help: "The total number of file references ever opened in the file cache.",
|
||||
})
|
||||
Help: "The total number of file references ever opened in the file cache. The 'hit' label indicates if the file was loaded from the cache.",
|
||||
}, []string{"hit"})
|
||||
|
||||
return c
|
||||
}
|
||||
@ -97,7 +97,7 @@ type Cache struct {
|
||||
|
||||
type cacheMetrics struct {
|
||||
currentOpenFileReferences prometheus.Gauge
|
||||
totalOpenFileReferences prometheus.Counter
|
||||
totalOpenFileReferences *prometheus.CounterVec
|
||||
|
||||
currentOpenFiles prometheus.Gauge
|
||||
totalOpenedFiles prometheus.Counter
|
||||
@ -173,6 +173,7 @@ func (c *Cache) prepare(ctx context.Context, db database.Store, fileID uuid.UUID
|
||||
c.lock.Lock()
|
||||
defer c.lock.Unlock()
|
||||
|
||||
hitLabel := "true"
|
||||
entry, ok := c.data[fileID]
|
||||
if !ok {
|
||||
value := lazy.NewWithError(func() (CacheEntryValue, error) {
|
||||
@ -194,10 +195,11 @@ func (c *Cache) prepare(ctx context.Context, db database.Store, fileID uuid.UUID
|
||||
c.data[fileID] = entry
|
||||
c.currentOpenFiles.Inc()
|
||||
c.totalOpenedFiles.Inc()
|
||||
hitLabel = "false"
|
||||
}
|
||||
|
||||
c.currentOpenFileReferences.Inc()
|
||||
c.totalOpenFileReferences.Inc()
|
||||
c.totalOpenFileReferences.WithLabelValues(hitLabel).Inc()
|
||||
entry.refCount++
|
||||
return entry.value
|
||||
}
|
||||
|
@ -161,7 +161,9 @@ func TestConcurrency(t *testing.T) {
|
||||
require.Equal(t, batches, promhelp.GaugeValue(t, reg, cachePromMetricName("open_files_current"), nil))
|
||||
require.Equal(t, batches, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_total"), nil))
|
||||
require.Equal(t, batches*batchSize, promhelp.GaugeValue(t, reg, cachePromMetricName("open_file_refs_current"), nil))
|
||||
require.Equal(t, batches*batchSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_file_refs_total"), nil))
|
||||
hit, miss := promhelp.CounterValue(t, reg, cachePromMetricName("open_file_refs_total"), prometheus.Labels{"hit": "false"}),
|
||||
promhelp.CounterValue(t, reg, cachePromMetricName("open_file_refs_total"), prometheus.Labels{"hit": "true"})
|
||||
require.Equal(t, batches*batchSize, hit+miss)
|
||||
}
|
||||
|
||||
func TestRelease(t *testing.T) {
|
||||
@ -245,7 +247,6 @@ func TestRelease(t *testing.T) {
|
||||
// Total counts remain
|
||||
require.Equal(t, batches*fileSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_size_bytes_total"), nil))
|
||||
require.Equal(t, batches, promhelp.CounterValue(t, reg, cachePromMetricName("open_files_total"), nil))
|
||||
require.Equal(t, batches*batchSize, promhelp.CounterValue(t, reg, cachePromMetricName("open_file_refs_total"), nil))
|
||||
}
|
||||
|
||||
func cacheAuthzSetup(t *testing.T) (database.Store, *files.Cache, *coderdtest.RecordingAuthorizer) {
|
||||
|
Reference in New Issue
Block a user