diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index 158406bbe9..2f1402b1c4 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -1977,6 +1977,13 @@ func (q *querier) GetParameterSchemasByJobID(ctx context.Context, jobID uuid.UUI return q.db.GetParameterSchemasByJobID(ctx, jobID) } +func (q *querier) GetPrebuildsInProgress(ctx context.Context) ([]database.GetPrebuildsInProgressRow, error) { + if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceTemplate); err != nil { + return nil, err + } + return q.db.GetPrebuildsInProgress(ctx) +} + func (q *querier) GetPresetByWorkspaceBuildID(ctx context.Context, workspaceID uuid.UUID) (database.TemplateVersionPreset, error) { if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceTemplate); err != nil { return database.TemplateVersionPreset{}, err @@ -2144,6 +2151,13 @@ func (q *querier) GetReplicasUpdatedAfter(ctx context.Context, updatedAt time.Ti return q.db.GetReplicasUpdatedAfter(ctx, updatedAt) } +func (q *querier) GetRunningPrebuilds(ctx context.Context) ([]database.GetRunningPrebuildsRow, error) { + if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceTemplate); err != nil { + return nil, err + } + return q.db.GetRunningPrebuilds(ctx) +} + func (q *querier) GetRuntimeConfig(ctx context.Context, key string) (string, error) { if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceSystem); err != nil { return "", err @@ -2268,12 +2282,11 @@ func (q *querier) GetTemplateParameterInsights(ctx context.Context, arg database return q.db.GetTemplateParameterInsights(ctx, arg) } -func (q *querier) GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) ([]database.GetTemplatePrebuildStateRow, error) { - // TODO: authz +func (q *querier) GetTemplatePresetsWithPrebuilds(ctx context.Context, templateID uuid.NullUUID) ([]database.GetTemplatePresetsWithPrebuildsRow, error) { if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceTemplate); err != nil { return nil, err } - return q.db.GetTemplatePrebuildState(ctx, templateID) + return q.db.GetTemplatePresetsWithPrebuilds(ctx, templateID) } func (q *querier) GetTemplateUsageStats(ctx context.Context, arg database.GetTemplateUsageStatsParams) ([]database.TemplateUsageStat, error) { diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index e325b04773..c7cc09550f 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -3784,6 +3784,10 @@ func (q *FakeQuerier) GetParameterSchemasByJobID(_ context.Context, jobID uuid.U return parameters, nil } +func (q *FakeQuerier) GetPrebuildsInProgress(ctx context.Context) ([]database.GetPrebuildsInProgressRow, error) { + panic("not implemented") +} + func (q *FakeQuerier) GetPresetByWorkspaceBuildID(_ context.Context, workspaceBuildID uuid.UUID) (database.TemplateVersionPreset, error) { q.mutex.RLock() defer q.mutex.RUnlock() @@ -4489,6 +4493,10 @@ func (q *FakeQuerier) GetReplicasUpdatedAfter(_ context.Context, updatedAt time. return replicas, nil } +func (q *FakeQuerier) GetRunningPrebuilds(ctx context.Context) ([]database.GetRunningPrebuildsRow, error) { + panic("not implemented") +} + func (q *FakeQuerier) GetRuntimeConfig(_ context.Context, key string) (string, error) { q.mutex.Lock() defer q.mutex.Unlock() @@ -5528,7 +5536,7 @@ func (q *FakeQuerier) GetTemplateParameterInsights(ctx context.Context, arg data return rows, nil } -func (q *FakeQuerier) GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) ([]database.GetTemplatePrebuildStateRow, error) { +func (q *FakeQuerier) GetTemplatePresetsWithPrebuilds(ctx context.Context, templateID uuid.NullUUID) ([]database.GetTemplatePresetsWithPrebuildsRow, error) { panic("not implemented") } diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index 3217d28e96..a971a9f835 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -987,6 +987,13 @@ func (m queryMetricsStore) GetParameterSchemasByJobID(ctx context.Context, jobID return schemas, err } +func (m queryMetricsStore) GetPrebuildsInProgress(ctx context.Context) ([]database.GetPrebuildsInProgressRow, error) { + start := time.Now() + r0, r1 := m.s.GetPrebuildsInProgress(ctx) + m.queryLatencies.WithLabelValues("GetPrebuildsInProgress").Observe(time.Since(start).Seconds()) + return r0, r1 +} + func (m queryMetricsStore) GetPresetByWorkspaceBuildID(ctx context.Context, workspaceBuildID uuid.UUID) (database.TemplateVersionPreset, error) { start := time.Now() r0, r1 := m.s.GetPresetByWorkspaceBuildID(ctx, workspaceBuildID) @@ -1134,6 +1141,13 @@ func (m queryMetricsStore) GetReplicasUpdatedAfter(ctx context.Context, updatedA return replicas, err } +func (m queryMetricsStore) GetRunningPrebuilds(ctx context.Context) ([]database.GetRunningPrebuildsRow, error) { + start := time.Now() + r0, r1 := m.s.GetRunningPrebuilds(ctx) + m.queryLatencies.WithLabelValues("GetRunningPrebuilds").Observe(time.Since(start).Seconds()) + return r0, r1 +} + func (m queryMetricsStore) GetRuntimeConfig(ctx context.Context, key string) (string, error) { start := time.Now() r0, r1 := m.s.GetRuntimeConfig(ctx, key) @@ -1260,10 +1274,10 @@ func (m queryMetricsStore) GetTemplateParameterInsights(ctx context.Context, arg return r0, r1 } -func (m queryMetricsStore) GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) ([]database.GetTemplatePrebuildStateRow, error) { +func (m queryMetricsStore) GetTemplatePresetsWithPrebuilds(ctx context.Context, templateID uuid.NullUUID) ([]database.GetTemplatePresetsWithPrebuildsRow, error) { start := time.Now() - r0, r1 := m.s.GetTemplatePrebuildState(ctx, templateID) - m.queryLatencies.WithLabelValues("GetTemplatePrebuildState").Observe(time.Since(start).Seconds()) + r0, r1 := m.s.GetTemplatePresetsWithPrebuilds(ctx, templateID) + m.queryLatencies.WithLabelValues("GetTemplatePresetsWithPrebuilds").Observe(time.Since(start).Seconds()) return r0, r1 } diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index 7f4f3ffa8d..27c84e80f1 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -2031,6 +2031,21 @@ func (mr *MockStoreMockRecorder) GetParameterSchemasByJobID(ctx, jobID any) *gom return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetParameterSchemasByJobID", reflect.TypeOf((*MockStore)(nil).GetParameterSchemasByJobID), ctx, jobID) } +// GetPrebuildsInProgress mocks base method. +func (m *MockStore) GetPrebuildsInProgress(ctx context.Context) ([]database.GetPrebuildsInProgressRow, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetPrebuildsInProgress", ctx) + ret0, _ := ret[0].([]database.GetPrebuildsInProgressRow) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetPrebuildsInProgress indicates an expected call of GetPrebuildsInProgress. +func (mr *MockStoreMockRecorder) GetPrebuildsInProgress(ctx any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPrebuildsInProgress", reflect.TypeOf((*MockStore)(nil).GetPrebuildsInProgress), ctx) +} + // GetPresetByWorkspaceBuildID mocks base method. func (m *MockStore) GetPresetByWorkspaceBuildID(ctx context.Context, workspaceBuildID uuid.UUID) (database.TemplateVersionPreset, error) { m.ctrl.T.Helper() @@ -2346,6 +2361,21 @@ func (mr *MockStoreMockRecorder) GetReplicasUpdatedAfter(ctx, updatedAt any) *go return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetReplicasUpdatedAfter", reflect.TypeOf((*MockStore)(nil).GetReplicasUpdatedAfter), ctx, updatedAt) } +// GetRunningPrebuilds mocks base method. +func (m *MockStore) GetRunningPrebuilds(ctx context.Context) ([]database.GetRunningPrebuildsRow, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetRunningPrebuilds", ctx) + ret0, _ := ret[0].([]database.GetRunningPrebuildsRow) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetRunningPrebuilds indicates an expected call of GetRunningPrebuilds. +func (mr *MockStoreMockRecorder) GetRunningPrebuilds(ctx any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetRunningPrebuilds", reflect.TypeOf((*MockStore)(nil).GetRunningPrebuilds), ctx) +} + // GetRuntimeConfig mocks base method. func (m *MockStore) GetRuntimeConfig(ctx context.Context, key string) (string, error) { m.ctrl.T.Helper() @@ -2631,19 +2661,19 @@ func (mr *MockStoreMockRecorder) GetTemplateParameterInsights(ctx, arg any) *gom return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTemplateParameterInsights", reflect.TypeOf((*MockStore)(nil).GetTemplateParameterInsights), ctx, arg) } -// GetTemplatePrebuildState mocks base method. -func (m *MockStore) GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) ([]database.GetTemplatePrebuildStateRow, error) { +// GetTemplatePresetsWithPrebuilds mocks base method. +func (m *MockStore) GetTemplatePresetsWithPrebuilds(ctx context.Context, templateID uuid.NullUUID) ([]database.GetTemplatePresetsWithPrebuildsRow, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "GetTemplatePrebuildState", ctx, templateID) - ret0, _ := ret[0].([]database.GetTemplatePrebuildStateRow) + ret := m.ctrl.Call(m, "GetTemplatePresetsWithPrebuilds", ctx, templateID) + ret0, _ := ret[0].([]database.GetTemplatePresetsWithPrebuildsRow) ret1, _ := ret[1].(error) return ret0, ret1 } -// GetTemplatePrebuildState indicates an expected call of GetTemplatePrebuildState. -func (mr *MockStoreMockRecorder) GetTemplatePrebuildState(ctx, templateID any) *gomock.Call { +// GetTemplatePresetsWithPrebuilds indicates an expected call of GetTemplatePresetsWithPrebuilds. +func (mr *MockStoreMockRecorder) GetTemplatePresetsWithPrebuilds(ctx, templateID any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTemplatePrebuildState", reflect.TypeOf((*MockStore)(nil).GetTemplatePrebuildState), ctx, templateID) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetTemplatePresetsWithPrebuilds", reflect.TypeOf((*MockStore)(nil).GetTemplatePresetsWithPrebuilds), ctx, templateID) } // GetTemplateUsageStats mocks base method. diff --git a/coderd/database/querier.go b/coderd/database/querier.go index ee61011a0a..d5a1ae5a67 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -204,6 +204,7 @@ type sqlcQuerier interface { GetOrganizations(ctx context.Context, arg GetOrganizationsParams) ([]Organization, error) GetOrganizationsByUserID(ctx context.Context, userID uuid.UUID) ([]Organization, error) GetParameterSchemasByJobID(ctx context.Context, jobID uuid.UUID) ([]ParameterSchema, error) + GetPrebuildsInProgress(ctx context.Context) ([]GetPrebuildsInProgressRow, error) GetPresetByWorkspaceBuildID(ctx context.Context, workspaceBuildID uuid.UUID) (TemplateVersionPreset, error) GetPresetParametersByTemplateVersionID(ctx context.Context, templateVersionID uuid.UUID) ([]TemplateVersionPresetParameter, error) GetPresetsByTemplateVersionID(ctx context.Context, templateVersionID uuid.UUID) ([]TemplateVersionPreset, error) @@ -227,6 +228,7 @@ type sqlcQuerier interface { GetQuotaConsumedForUser(ctx context.Context, arg GetQuotaConsumedForUserParams) (int64, error) GetReplicaByID(ctx context.Context, id uuid.UUID) (Replica, error) GetReplicasUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]Replica, error) + GetRunningPrebuilds(ctx context.Context) ([]GetRunningPrebuildsRow, error) GetRuntimeConfig(ctx context.Context, key string) (string, error) GetTailnetAgents(ctx context.Context, id uuid.UUID) ([]TailnetAgent, error) GetTailnetClientsForAgent(ctx context.Context, agentID uuid.UUID) ([]TailnetClient, error) @@ -269,7 +271,7 @@ type sqlcQuerier interface { // created in the timeframe and return the aggregate usage counts of parameter // values. GetTemplateParameterInsights(ctx context.Context, arg GetTemplateParameterInsightsParams) ([]GetTemplateParameterInsightsRow, error) - GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) ([]GetTemplatePrebuildStateRow, error) + GetTemplatePresetsWithPrebuilds(ctx context.Context, templateID uuid.NullUUID) ([]GetTemplatePresetsWithPrebuildsRow, error) GetTemplateUsageStats(ctx context.Context, arg GetTemplateUsageStatsParams) ([]TemplateUsageStat, error) GetTemplateVersionByID(ctx context.Context, id uuid.UUID) (TemplateVersion, error) GetTemplateVersionByJobID(ctx context.Context, jobID uuid.UUID) (TemplateVersion, error) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index 6368ff8946..b8225144f0 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -5442,148 +5442,155 @@ func (q *sqlQuerier) ClaimPrebuild(ctx context.Context, arg ClaimPrebuildParams) return i, err } -const getTemplatePrebuildState = `-- name: GetTemplatePrebuildState :many -WITH - -- All prebuilds currently running - running_prebuilds AS (SELECT p.template_id, - b.template_version_id, - tvp_curr.id AS current_preset_id, - tvp_desired.id AS desired_preset_id, - COUNT(*) AS count, - SUM(CASE - WHEN p.lifecycle_state = 'ready'::workspace_agent_lifecycle_state THEN 1 - ELSE 0 END) AS eligible, - STRING_AGG(p.id::text, ',') AS ids - FROM workspace_prebuilds p - INNER JOIN workspace_latest_build b ON b.workspace_id = p.id - INNER JOIN provisioner_jobs pj ON b.job_id = pj.id - INNER JOIN templates t ON p.template_id = t.id - LEFT JOIN template_version_presets tvp_curr - ON tvp_curr.id = b.template_version_preset_id - LEFT JOIN template_version_presets tvp_desired - ON tvp_desired.template_version_id = t.active_version_id - WHERE (b.transition = 'start'::workspace_transition - -- if a deletion job fails, the workspace will still be running - OR pj.job_status IN ('failed'::provisioner_job_status, 'canceled'::provisioner_job_status, - 'unknown'::provisioner_job_status)) - AND (tvp_curr.name = tvp_desired.name - OR tvp_desired.id IS NULL) - GROUP BY p.template_id, b.template_version_id, tvp_curr.id, - tvp_desired.id), - -- All templates which have been configured for prebuilds (any version) - templates_with_prebuilds AS (SELECT t.id AS template_id, - tv.id AS template_version_id, - tv.id = t.active_version_id AS using_active_version, - tvpp.preset_id, - tvp.name, - MAX(tvpp.desired_instances) AS desired_instances, - t.deleted, - t.deprecated != '' AS deprecated - FROM templates t - INNER JOIN template_versions tv ON tv.template_id = t.id - INNER JOIN template_version_presets tvp ON tvp.template_version_id = tv.id - INNER JOIN template_version_preset_prebuilds tvpp ON tvpp.preset_id = tvp.id - WHERE t.id = $1::uuid - GROUP BY t.id, tv.id, tvpp.preset_id, tvp.name), - -- Jobs relating to prebuilds current in-flight - prebuilds_in_progress AS (SELECT wpb.template_version_id, wpb.transition, COUNT(wpb.transition) AS count - FROM workspace_latest_build wlb - INNER JOIN provisioner_jobs pj ON wlb.job_id = pj.id - INNER JOIN workspace_prebuild_builds wpb ON wpb.id = wlb.id - WHERE pj.job_status NOT IN - ('succeeded'::provisioner_job_status, 'canceled'::provisioner_job_status, - 'failed'::provisioner_job_status) - GROUP BY wpb.template_version_id, wpb.transition) -SELECT t.template_id, - t.template_version_id, - t.preset_id, - t.using_active_version AS is_active, - MAX(CASE - WHEN p.template_version_id = t.template_version_id THEN p.ids - ELSE '' END)::text AS running_prebuild_ids, - COALESCE(MAX(CASE WHEN t.using_active_version THEN p.count ELSE 0 END), - 0)::int AS actual, -- running prebuilds for active version - COALESCE(MAX(CASE WHEN t.using_active_version THEN p.eligible ELSE 0 END), - 0)::int AS eligible, -- prebuilds which can be claimed - MAX(CASE WHEN t.using_active_version THEN t.desired_instances ELSE 0 END)::int AS desired, -- we only care about the active version's desired instances - COALESCE(MAX(CASE - WHEN p.template_version_id = t.template_version_id AND - t.using_active_version = false - THEN p.count - ELSE 0 END), - 0)::int AS outdated, -- running prebuilds for inactive version - COALESCE(GREATEST( - (MAX(CASE WHEN t.using_active_version THEN p.count ELSE 0 END)::int - - - MAX(CASE WHEN t.using_active_version THEN t.desired_instances ELSE 0 END)), - 0), - 0) ::int AS extraneous, -- extra running prebuilds for active version - COALESCE(MAX(CASE - WHEN pip.transition = 'start'::workspace_transition THEN pip.count - ELSE 0 END), - 0)::int AS starting, - COALESCE(MAX(CASE - WHEN pip.transition = 'stop'::workspace_transition THEN pip.count - ELSE 0 END), - 0)::int AS stopping, -- not strictly needed, since prebuilds should never be left if a "stopped" state, but useful to know - COALESCE(MAX(CASE - WHEN pip.transition = 'delete'::workspace_transition THEN pip.count - ELSE 0 END), - 0)::int AS deleting, - t.deleted::bool AS template_deleted, - t.deprecated::bool AS template_deprecated -FROM templates_with_prebuilds t - LEFT JOIN running_prebuilds p - ON (p.template_version_id = t.template_version_id AND p.current_preset_id = t.preset_id) - LEFT JOIN prebuilds_in_progress pip ON pip.template_version_id = t.template_version_id -WHERE (t.using_active_version = TRUE - OR p.count > 0) -GROUP BY t.template_id, t.template_version_id, t.preset_id, t.using_active_version, t.deleted, t.deprecated +const getPrebuildsInProgress = `-- name: GetPrebuildsInProgress :many +SELECT wpb.template_version_id, wpb.transition, COUNT(wpb.transition) AS count +FROM workspace_latest_build wlb + INNER JOIN provisioner_jobs pj ON wlb.job_id = pj.id + INNER JOIN workspace_prebuild_builds wpb ON wpb.id = wlb.id +WHERE pj.job_status NOT IN + ('succeeded'::provisioner_job_status, 'canceled'::provisioner_job_status, + 'failed'::provisioner_job_status) +GROUP BY wpb.template_version_id, wpb.transition ` -type GetTemplatePrebuildStateRow struct { - TemplateID uuid.UUID `db:"template_id" json:"template_id"` - TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` - PresetID uuid.UUID `db:"preset_id" json:"preset_id"` - IsActive bool `db:"is_active" json:"is_active"` - RunningPrebuildIds string `db:"running_prebuild_ids" json:"running_prebuild_ids"` - Actual int32 `db:"actual" json:"actual"` - Eligible int32 `db:"eligible" json:"eligible"` - Desired int32 `db:"desired" json:"desired"` - Outdated int32 `db:"outdated" json:"outdated"` - Extraneous int32 `db:"extraneous" json:"extraneous"` - Starting int32 `db:"starting" json:"starting"` - Stopping int32 `db:"stopping" json:"stopping"` - Deleting int32 `db:"deleting" json:"deleting"` - TemplateDeleted bool `db:"template_deleted" json:"template_deleted"` - TemplateDeprecated bool `db:"template_deprecated" json:"template_deprecated"` +type GetPrebuildsInProgressRow struct { + TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` + Transition WorkspaceTransition `db:"transition" json:"transition"` + Count int64 `db:"count" json:"count"` } -func (q *sqlQuerier) GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) ([]GetTemplatePrebuildStateRow, error) { - rows, err := q.db.QueryContext(ctx, getTemplatePrebuildState, templateID) +func (q *sqlQuerier) GetPrebuildsInProgress(ctx context.Context) ([]GetPrebuildsInProgressRow, error) { + rows, err := q.db.QueryContext(ctx, getPrebuildsInProgress) if err != nil { return nil, err } defer rows.Close() - var items []GetTemplatePrebuildStateRow + var items []GetPrebuildsInProgressRow for rows.Next() { - var i GetTemplatePrebuildStateRow + var i GetPrebuildsInProgressRow + if err := rows.Scan(&i.TemplateVersionID, &i.Transition, &i.Count); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getRunningPrebuilds = `-- name: GetRunningPrebuilds :many +SELECT p.id AS workspace_id, + p.template_id, + b.template_version_id, + tvp_curr.id AS current_preset_id, + tvp_desired.id AS desired_preset_id, + CASE + WHEN p.lifecycle_state = 'ready'::workspace_agent_lifecycle_state THEN TRUE + ELSE FALSE END AS eligible +FROM workspace_prebuilds p + INNER JOIN workspace_latest_build b ON b.workspace_id = p.id + INNER JOIN provisioner_jobs pj ON b.job_id = pj.id + INNER JOIN templates t ON p.template_id = t.id + LEFT JOIN template_version_presets tvp_curr + ON tvp_curr.id = b.template_version_preset_id + LEFT JOIN template_version_presets tvp_desired + ON tvp_desired.template_version_id = t.active_version_id +WHERE (b.transition = 'start'::workspace_transition + -- if a deletion job fails, the workspace will still be running + OR pj.job_status IN ('failed'::provisioner_job_status, 'canceled'::provisioner_job_status, + 'unknown'::provisioner_job_status)) + AND (tvp_curr.name = tvp_desired.name + OR tvp_desired.id IS NULL) +` + +type GetRunningPrebuildsRow struct { + WorkspaceID uuid.UUID `db:"workspace_id" json:"workspace_id"` + TemplateID uuid.UUID `db:"template_id" json:"template_id"` + TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` + CurrentPresetID uuid.NullUUID `db:"current_preset_id" json:"current_preset_id"` + DesiredPresetID uuid.NullUUID `db:"desired_preset_id" json:"desired_preset_id"` + Eligible bool `db:"eligible" json:"eligible"` +} + +func (q *sqlQuerier) GetRunningPrebuilds(ctx context.Context) ([]GetRunningPrebuildsRow, error) { + rows, err := q.db.QueryContext(ctx, getRunningPrebuilds) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetRunningPrebuildsRow + for rows.Next() { + var i GetRunningPrebuildsRow + if err := rows.Scan( + &i.WorkspaceID, + &i.TemplateID, + &i.TemplateVersionID, + &i.CurrentPresetID, + &i.DesiredPresetID, + &i.Eligible, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + +const getTemplatePresetsWithPrebuilds = `-- name: GetTemplatePresetsWithPrebuilds :many +SELECT t.id AS template_id, + tv.id AS template_version_id, + tv.id = t.active_version_id AS using_active_version, + tvpp.preset_id, + tvp.name, + tvpp.desired_instances AS desired_instances, + t.deleted, + t.deprecated != '' AS deprecated +FROM templates t + INNER JOIN template_versions tv ON tv.template_id = t.id + INNER JOIN template_version_presets tvp ON tvp.template_version_id = tv.id + INNER JOIN template_version_preset_prebuilds tvpp ON tvpp.preset_id = tvp.id +WHERE (t.id = $1::uuid OR $1 IS NULL) +` + +type GetTemplatePresetsWithPrebuildsRow struct { + TemplateID uuid.UUID `db:"template_id" json:"template_id"` + TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` + UsingActiveVersion bool `db:"using_active_version" json:"using_active_version"` + PresetID uuid.UUID `db:"preset_id" json:"preset_id"` + Name string `db:"name" json:"name"` + DesiredInstances int32 `db:"desired_instances" json:"desired_instances"` + Deleted bool `db:"deleted" json:"deleted"` + Deprecated bool `db:"deprecated" json:"deprecated"` +} + +func (q *sqlQuerier) GetTemplatePresetsWithPrebuilds(ctx context.Context, templateID uuid.NullUUID) ([]GetTemplatePresetsWithPrebuildsRow, error) { + rows, err := q.db.QueryContext(ctx, getTemplatePresetsWithPrebuilds, templateID) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetTemplatePresetsWithPrebuildsRow + for rows.Next() { + var i GetTemplatePresetsWithPrebuildsRow if err := rows.Scan( &i.TemplateID, &i.TemplateVersionID, + &i.UsingActiveVersion, &i.PresetID, - &i.IsActive, - &i.RunningPrebuildIds, - &i.Actual, - &i.Eligible, - &i.Desired, - &i.Outdated, - &i.Extraneous, - &i.Starting, - &i.Stopping, - &i.Deleting, - &i.TemplateDeleted, - &i.TemplateDeprecated, + &i.Name, + &i.DesiredInstances, + &i.Deleted, + &i.Deprecated, ); err != nil { return nil, err } diff --git a/coderd/database/queries/prebuilds.sql b/coderd/database/queries/prebuilds.sql index 006ebf6d11..420825bed8 100644 --- a/coderd/database/queries/prebuilds.sql +++ b/coderd/database/queries/prebuilds.sql @@ -1,100 +1,51 @@ --- name: GetTemplatePrebuildState :many -WITH - -- All prebuilds currently running - running_prebuilds AS (SELECT p.template_id, - b.template_version_id, - tvp_curr.id AS current_preset_id, - tvp_desired.id AS desired_preset_id, - COUNT(*) AS count, - SUM(CASE - WHEN p.lifecycle_state = 'ready'::workspace_agent_lifecycle_state THEN 1 - ELSE 0 END) AS eligible, - STRING_AGG(p.id::text, ',') AS ids - FROM workspace_prebuilds p - INNER JOIN workspace_latest_build b ON b.workspace_id = p.id - INNER JOIN provisioner_jobs pj ON b.job_id = pj.id - INNER JOIN templates t ON p.template_id = t.id - LEFT JOIN template_version_presets tvp_curr - ON tvp_curr.id = b.template_version_preset_id - LEFT JOIN template_version_presets tvp_desired - ON tvp_desired.template_version_id = t.active_version_id - WHERE (b.transition = 'start'::workspace_transition - -- if a deletion job fails, the workspace will still be running - OR pj.job_status IN ('failed'::provisioner_job_status, 'canceled'::provisioner_job_status, - 'unknown'::provisioner_job_status)) - AND (tvp_curr.name = tvp_desired.name - OR tvp_desired.id IS NULL) - GROUP BY p.template_id, b.template_version_id, tvp_curr.id, - tvp_desired.id), - -- All templates which have been configured for prebuilds (any version) - templates_with_prebuilds AS (SELECT t.id AS template_id, - tv.id AS template_version_id, - tv.id = t.active_version_id AS using_active_version, - tvpp.preset_id, - tvp.name, - MAX(tvpp.desired_instances) AS desired_instances, - t.deleted, - t.deprecated != '' AS deprecated - FROM templates t - INNER JOIN template_versions tv ON tv.template_id = t.id - INNER JOIN template_version_presets tvp ON tvp.template_version_id = tv.id - INNER JOIN template_version_preset_prebuilds tvpp ON tvpp.preset_id = tvp.id - WHERE t.id = @template_id::uuid - GROUP BY t.id, tv.id, tvpp.preset_id, tvp.name), - -- Jobs relating to prebuilds current in-flight - prebuilds_in_progress AS (SELECT wpb.template_version_id, wpb.transition, COUNT(wpb.transition) AS count - FROM workspace_latest_build wlb - INNER JOIN provisioner_jobs pj ON wlb.job_id = pj.id - INNER JOIN workspace_prebuild_builds wpb ON wpb.id = wlb.id - WHERE pj.job_status NOT IN - ('succeeded'::provisioner_job_status, 'canceled'::provisioner_job_status, - 'failed'::provisioner_job_status) - GROUP BY wpb.template_version_id, wpb.transition) -SELECT t.template_id, - t.template_version_id, - t.preset_id, - t.using_active_version AS is_active, - MAX(CASE - WHEN p.template_version_id = t.template_version_id THEN p.ids - ELSE '' END)::text AS running_prebuild_ids, - COALESCE(MAX(CASE WHEN t.using_active_version THEN p.count ELSE 0 END), - 0)::int AS actual, -- running prebuilds for active version - COALESCE(MAX(CASE WHEN t.using_active_version THEN p.eligible ELSE 0 END), - 0)::int AS eligible, -- prebuilds which can be claimed - MAX(CASE WHEN t.using_active_version THEN t.desired_instances ELSE 0 END)::int AS desired, -- we only care about the active version's desired instances - COALESCE(MAX(CASE - WHEN p.template_version_id = t.template_version_id AND - t.using_active_version = false - THEN p.count - ELSE 0 END), - 0)::int AS outdated, -- running prebuilds for inactive version - COALESCE(GREATEST( - (MAX(CASE WHEN t.using_active_version THEN p.count ELSE 0 END)::int - - - MAX(CASE WHEN t.using_active_version THEN t.desired_instances ELSE 0 END)), - 0), - 0) ::int AS extraneous, -- extra running prebuilds for active version - COALESCE(MAX(CASE - WHEN pip.transition = 'start'::workspace_transition THEN pip.count - ELSE 0 END), - 0)::int AS starting, - COALESCE(MAX(CASE - WHEN pip.transition = 'stop'::workspace_transition THEN pip.count - ELSE 0 END), - 0)::int AS stopping, -- not strictly needed, since prebuilds should never be left if a "stopped" state, but useful to know - COALESCE(MAX(CASE - WHEN pip.transition = 'delete'::workspace_transition THEN pip.count - ELSE 0 END), - 0)::int AS deleting, - t.deleted::bool AS template_deleted, - t.deprecated::bool AS template_deprecated -FROM templates_with_prebuilds t - LEFT JOIN running_prebuilds p - ON (p.template_version_id = t.template_version_id AND p.current_preset_id = t.preset_id) - LEFT JOIN prebuilds_in_progress pip ON pip.template_version_id = t.template_version_id -WHERE (t.using_active_version = TRUE - OR p.count > 0) -GROUP BY t.template_id, t.template_version_id, t.preset_id, t.using_active_version, t.deleted, t.deprecated; +-- name: GetRunningPrebuilds :many +SELECT p.id AS workspace_id, + p.template_id, + b.template_version_id, + tvp_curr.id AS current_preset_id, + tvp_desired.id AS desired_preset_id, + CASE + WHEN p.lifecycle_state = 'ready'::workspace_agent_lifecycle_state THEN TRUE + ELSE FALSE END AS eligible +FROM workspace_prebuilds p + INNER JOIN workspace_latest_build b ON b.workspace_id = p.id + INNER JOIN provisioner_jobs pj ON b.job_id = pj.id + INNER JOIN templates t ON p.template_id = t.id + LEFT JOIN template_version_presets tvp_curr + ON tvp_curr.id = b.template_version_preset_id + LEFT JOIN template_version_presets tvp_desired + ON tvp_desired.template_version_id = t.active_version_id +WHERE (b.transition = 'start'::workspace_transition + -- if a deletion job fails, the workspace will still be running + OR pj.job_status IN ('failed'::provisioner_job_status, 'canceled'::provisioner_job_status, + 'unknown'::provisioner_job_status)) + AND (tvp_curr.name = tvp_desired.name + OR tvp_desired.id IS NULL); + +-- name: GetTemplatePresetsWithPrebuilds :many +SELECT t.id AS template_id, + tv.id AS template_version_id, + tv.id = t.active_version_id AS using_active_version, + tvpp.preset_id, + tvp.name, + tvpp.desired_instances AS desired_instances, + t.deleted, + t.deprecated != '' AS deprecated +FROM templates t + INNER JOIN template_versions tv ON tv.template_id = t.id + INNER JOIN template_version_presets tvp ON tvp.template_version_id = tv.id + INNER JOIN template_version_preset_prebuilds tvpp ON tvpp.preset_id = tvp.id +WHERE (t.id = sqlc.narg('template_id')::uuid OR sqlc.narg('template_id') IS NULL); + +-- name: GetPrebuildsInProgress :many +SELECT wpb.template_version_id, wpb.transition, COUNT(wpb.transition) AS count +FROM workspace_latest_build wlb + INNER JOIN provisioner_jobs pj ON wlb.job_id = pj.id + INNER JOIN workspace_prebuild_builds wpb ON wpb.id = wlb.id +WHERE pj.job_status NOT IN + ('succeeded'::provisioner_job_status, 'canceled'::provisioner_job_status, + 'failed'::provisioner_job_status) +GROUP BY wpb.template_version_id, wpb.transition; -- name: ClaimPrebuild :one -- TODO: rewrite to use named CTE instead? diff --git a/coderd/util/slice/slice.go b/coderd/util/slice/slice.go index 508827dfaa..b89f1a43ec 100644 --- a/coderd/util/slice/slice.go +++ b/coderd/util/slice/slice.go @@ -77,6 +77,17 @@ func Find[T any](haystack []T, cond func(T) bool) (T, bool) { return empty, false } +// Filter returns all elements that satisfy the condition. +func Filter[T any](haystack []T, cond func(T) bool) []T { + out := make([]T, 0, len(haystack)) + for _, hay := range haystack { + if cond(hay) { + out = append(out, hay) + } + } + return out +} + // Overlap returns if the 2 sets have any overlap (element(s) in common) func Overlap[T comparable](a []T, b []T) bool { return OverlapCompare(a, b, func(a, b T) bool { diff --git a/docs/reference/cli/server.md b/docs/reference/cli/server.md index 98cb2a90c2..8915512703 100644 --- a/docs/reference/cli/server.md +++ b/docs/reference/cli/server.md @@ -1548,3 +1548,14 @@ The endpoint to which to send webhooks. | Default | 5 | The upper limit of attempts to send a notification. + +### --workspace-prebuilds-reconciliation-interval + +| | | +|-------------|-----------------------------------------------------------------| +| Type | duration | +| Environment | $CODER_WORKSPACE_PREBUILDS_RECONCILIATION_INTERVAL | +| YAML | workspace_prebuilds.reconciliation_interval | +| Default | 15s | + +How often to reconcile workspace prebuilds state. diff --git a/enterprise/coderd/prebuilds/controller.go b/enterprise/coderd/prebuilds/controller.go index aea447ee8d..bf3f95cc4f 100644 --- a/enterprise/coderd/prebuilds/controller.go +++ b/enterprise/coderd/prebuilds/controller.go @@ -3,6 +3,7 @@ package prebuilds import ( "context" "crypto/rand" + "database/sql" "encoding/base32" "fmt" "math" @@ -11,7 +12,9 @@ import ( "sync/atomic" "time" + "github.com/coder/coder/v2/coderd/util/slice" "github.com/hashicorp/go-multierror" + "golang.org/x/exp/slices" "github.com/coder/coder/v2/coderd/audit" @@ -113,9 +116,18 @@ func (c *Controller) reconcile(ctx context.Context, templateID *uuid.UUID) { logger.Debug(ctx, "starting reconciliation") - // get all templates or specific one requested + // This tx holds a global lock, which prevents any other coderd replica from starting a reconciliation and + // possibly getting an inconsistent view of the state. + // + // The lock MUST be held until ALL modifications have been effected. + // + // It is run with RepeatableRead isolation, so it's effectively snapshotting the data at the start of the tx. + // + // This is a read-only tx, so returning an error (i.e. causing a rollback) has no impact. err := c.store.InTx(func(db database.Store) error { start := time.Now() + + // TODO: give up after some time waiting on this? err := db.AcquireLock(ctx, database.LockIDReconcileTemplatePrebuilds) if err != nil { logger.Warn(ctx, "failed to acquire top-level prebuilds reconciliation lock; likely running on another coderd replica", slog.Error(err)) @@ -127,42 +139,50 @@ func (c *Controller) reconcile(ctx context.Context, templateID *uuid.UUID) { innerCtx, cancel := context.WithTimeout(ctx, time.Second*30) defer cancel() - var ids []uuid.UUID + var id uuid.NullUUID if templateID != nil { - ids = append(ids, *templateID) + id.UUID = *templateID } - templates, err := db.GetTemplatesWithFilter(innerCtx, database.GetTemplatesWithFilterParams{ - IDs: ids, - }) - if err != nil { - c.logger.Debug(innerCtx, "could not fetch template(s)") - return xerrors.Errorf("fetch template(s): %w", err) - } - - if len(templates) == 0 { - c.logger.Debug(innerCtx, "no templates found") + presetsWithPrebuilds, err := db.GetTemplatePresetsWithPrebuilds(ctx, id) + if len(presetsWithPrebuilds) == 0 { + logger.Debug(innerCtx, "no templates found with prebuilds configured") return nil } + runningPrebuilds, err := db.GetRunningPrebuilds(ctx) + if err != nil { + return xerrors.Errorf("failed to get running prebuilds: %w", err) + } + + prebuildsInProgress, err := db.GetPrebuildsInProgress(ctx) + if err != nil { + return xerrors.Errorf("failed to get prebuilds in progress: %w", err) + } + // TODO: bounded concurrency? probably not but consider var eg errgroup.Group - for _, template := range templates { + for _, preset := range presetsWithPrebuilds { eg.Go(func() error { // Pass outer context. // TODO: name these better to avoid the comment. - return c.reconcileTemplate(ctx, template) + err := c.reconcilePrebuildsForPreset(ctx, preset, runningPrebuilds, prebuildsInProgress) + if err != nil { + logger.Error(ctx, "failed to reconcile prebuilds for preset", slog.Error(err), slog.F("preset_id", preset.PresetID)) + } + // DO NOT return error otherwise the tx will end. + return nil }) } return eg.Wait() }, &database.TxOptions{ - // TODO: isolation + Isolation: sql.LevelRepeatableRead, ReadOnly: true, TxIdentifier: "template_prebuilds", }) if err != nil { - logger.Error(ctx, "failed to acquire database transaction", slog.Error(err)) + logger.Error(ctx, "failed to reconcile", slog.Error(err)) } } @@ -170,47 +190,101 @@ type reconciliationActions struct { deleteIDs []uuid.UUID createIDs []uuid.UUID - meta database.GetTemplatePrebuildStateRow + actual int32 // Running prebuilds for active version. + desired int32 // Active template version's desired instances as defined in preset. + eligible int32 // Prebuilds which can be claimed. + outdated int32 // Prebuilds which no longer match the active template version. + extraneous int32 // Extra running prebuilds for active version (somehow). + starting, stopping, deleting int32 // Prebuilds currently being provisioned up or down. } // calculateActions MUST be called within the context of a transaction (TODO: isolation) // with an advisory lock to prevent TOCTOU races. -func (c *Controller) calculateActions(ctx context.Context, template database.Template, state database.GetTemplatePrebuildStateRow) (*reconciliationActions, error) { +func (c *Controller) calculateActions(ctx context.Context, preset database.GetTemplatePresetsWithPrebuildsRow, running []database.GetRunningPrebuildsRow, inProgress []database.GetPrebuildsInProgressRow) (*reconciliationActions, error) { // TODO: align workspace states with how we represent them on the FE and the CLI // right now there's some slight differences which can lead to additional prebuilds being created // TODO: add mechanism to prevent prebuilds being reconciled from being claimable by users; i.e. if a prebuild is // about to be deleted, it should not be deleted if it has been claimed - beware of TOCTOU races! + var ( + actual int32 // Running prebuilds for active version. + desired int32 // Active template version's desired instances as defined in preset. + eligible int32 // Prebuilds which can be claimed. + outdated int32 // Prebuilds which no longer match the active template version. + extraneous int32 // Extra running prebuilds for active version (somehow). + starting, stopping, deleting int32 // Prebuilds currently being provisioned up or down. + ) + + if preset.UsingActiveVersion { + actual = int32(len(running)) + desired = preset.DesiredInstances + } + + for _, prebuild := range running { + if preset.UsingActiveVersion { + if prebuild.Eligible { + eligible++ + } + + extraneous = int32(math.Max(float64(actual-preset.DesiredInstances), 0)) + } + + if prebuild.TemplateVersionID == preset.TemplateVersionID && !preset.UsingActiveVersion { + outdated++ + } + } + + for _, progress := range inProgress { + switch progress.Transition { + case database.WorkspaceTransitionStart: + starting++ + case database.WorkspaceTransitionStop: + stopping++ + case database.WorkspaceTransitionDelete: + deleting++ + default: + c.logger.Warn(ctx, "unknown transition found in prebuilds in progress result", slog.F("transition", progress.Transition)) + } + } + var ( toCreate = int(math.Max(0, float64( - state.Desired- // The number specified in the preset - (state.Actual+state.Starting)- // The current number of prebuilds (or builds in-flight) - state.Stopping), // The number of prebuilds currently being stopped (should be 0) + desired- // The number specified in the preset + (actual+starting)- // The current number of prebuilds (or builds in-flight) + stopping), // The number of prebuilds currently being stopped (should be 0) )) toDelete = int(math.Max(0, float64( - state.Outdated- // The number of prebuilds running above the desired count for active version - state.Deleting), // The number of prebuilds currently being deleted + outdated- // The number of prebuilds running above the desired count for active version + deleting), // The number of prebuilds currently being deleted )) - actions = &reconciliationActions{meta: state} - runningIDs = strings.Split(state.RunningPrebuildIds, ",") + actions = &reconciliationActions{ + actual: actual, + desired: desired, + eligible: eligible, + outdated: outdated, + extraneous: extraneous, + starting: starting, + stopping: stopping, + deleting: deleting, + } ) // Bail early to avoid scheduling new prebuilds while operations are in progress. - if (toCreate+toDelete) > 0 && (state.Starting+state.Stopping+state.Deleting) > 0 { + if (toCreate+toDelete) > 0 && (starting+stopping+deleting) > 0 { c.logger.Warn(ctx, "prebuild operations in progress, skipping reconciliation", - slog.F("template_id", template.ID), slog.F("starting", state.Starting), - slog.F("stopping", state.Stopping), slog.F("deleting", state.Deleting), + slog.F("template_id", preset.TemplateID.String()), slog.F("starting", starting), + slog.F("stopping", stopping), slog.F("deleting", deleting), slog.F("wanted_to_create", toCreate), slog.F("wanted_to_delete", toDelete)) return actions, nil } // It's possible that an operator could stop/start prebuilds which interfere with the reconciliation loop, so // we check if there are somehow more prebuilds than we expect, and then pick random victims to be deleted. - if len(runningIDs) > 0 && state.Extraneous > 0 { + if extraneous > 0 { // Sort running IDs randomly so we can pick random victims. - slices.SortFunc(runningIDs, func(_, _ string) int { + slices.SortFunc(running, func(_, _ database.GetRunningPrebuildsRow) int { if mrand.Float64() > 0.5 { return -1 } @@ -219,30 +293,22 @@ func (c *Controller) calculateActions(ctx context.Context, template database.Tem }) var victims []uuid.UUID - for i := 0; i < int(state.Extraneous); i++ { - if i >= len(runningIDs) { + for i := 0; i < int(extraneous); i++ { + if i >= len(running) { // This should never happen. c.logger.Warn(ctx, "unexpected reconciliation state; extraneous count exceeds running prebuilds count!", - slog.F("running_count", len(runningIDs)), - slog.F("extraneous", state.Extraneous)) + slog.F("running_count", len(running)), + slog.F("extraneous", extraneous)) continue } - victim := runningIDs[i] - - id, err := uuid.Parse(victim) - if err != nil { - c.logger.Warn(ctx, "invalid prebuild ID", slog.F("template_id", template.ID), - slog.F("id", string(victim)), slog.Error(err)) - } else { - victims = append(victims, id) - } + victims = append(victims, running[i].WorkspaceID) } actions.deleteIDs = append(actions.deleteIDs, victims...) c.logger.Warn(ctx, "found extra prebuilds running, picking random victim(s)", - slog.F("template_id", template.ID), slog.F("desired", state.Desired), slog.F("actual", state.Actual), slog.F("extra", state.Extraneous), + slog.F("template_id", preset.TemplateID.String()), slog.F("desired", desired), slog.F("actual", actual), slog.F("extra", extraneous), slog.F("victims", victims)) // Prevent the rest of the reconciliation from completing @@ -251,133 +317,116 @@ func (c *Controller) calculateActions(ctx context.Context, template database.Tem // If the template has become deleted or deprecated since the last reconciliation, we need to ensure we // scale those prebuilds down to zero. - if state.TemplateDeleted || state.TemplateDeprecated { + if preset.Deleted || preset.Deprecated { toCreate = 0 - toDelete = int(state.Actual + state.Outdated) + toDelete = int(actual + outdated) } for i := 0; i < toCreate; i++ { actions.createIDs = append(actions.createIDs, uuid.New()) } - if toDelete > 0 && len(runningIDs) != toDelete { + if toDelete > 0 && len(running) != toDelete { c.logger.Warn(ctx, "mismatch between running prebuilds and expected deletion count!", - slog.F("template_id", template.ID), slog.F("running", len(runningIDs)), slog.F("to_delete", toDelete)) + slog.F("template_id", preset.TemplateID.String()), slog.F("running", len(running)), slog.F("to_delete", toDelete)) } // TODO: implement lookup to not perform same action on workspace multiple times in $period // i.e. a workspace cannot be deleted for some reason, which continually makes it eligible for deletion for i := 0; i < toDelete; i++ { - if i >= len(runningIDs) { + if i >= len(running) { // Above warning will have already addressed this. continue } - running := runningIDs[i] - id, err := uuid.Parse(running) - if err != nil { - c.logger.Warn(ctx, "invalid prebuild ID", slog.F("template_id", template.ID), - slog.F("id", string(running)), slog.Error(err)) - continue - } - - actions.deleteIDs = append(actions.deleteIDs, id) + actions.deleteIDs = append(actions.deleteIDs, running[i].WorkspaceID) } return actions, nil } -func (c *Controller) reconcileTemplate(ctx context.Context, template database.Template) error { - logger := c.logger.With(slog.F("template_id", template.ID.String())) +func (c *Controller) reconcilePrebuildsForPreset(ctx context.Context, preset database.GetTemplatePresetsWithPrebuildsRow, + allRunning []database.GetRunningPrebuildsRow, allInProgress []database.GetPrebuildsInProgressRow, +) error { + logger := c.logger.With(slog.F("template_id", preset.TemplateID.String())) - // get number of desired vs actual prebuild instances - err := c.store.InTx(func(db database.Store) error { - err := db.AcquireLock(ctx, database.GenLockID(fmt.Sprintf("template:%s", template.ID.String()))) - if err != nil { - logger.Warn(ctx, "failed to acquire template prebuilds lock; likely running on another coderd replica", slog.Error(err)) - return nil + var lastErr multierror.Error + vlogger := logger.With(slog.F("template_version_id", preset.TemplateVersionID), slog.F("preset_id", preset.PresetID)) + + running := slice.Filter(allRunning, func(prebuild database.GetRunningPrebuildsRow) bool { + if !prebuild.DesiredPresetID.Valid && !prebuild.CurrentPresetID.Valid { + return false } - - innerCtx, cancel := context.WithTimeout(ctx, time.Second*30) - defer cancel() - - versionStates, err := db.GetTemplatePrebuildState(ctx, template.ID) - if err != nil { - return xerrors.Errorf("failed to retrieve template's prebuild states: %w", err) - } - - var lastErr multierror.Error - for _, state := range versionStates { - vlogger := logger.With(slog.F("template_version_id", state.TemplateVersionID), slog.F("preset_id", state.PresetID)) - - actions, err := c.calculateActions(innerCtx, template, state) - if err != nil { - vlogger.Error(ctx, "failed to calculate reconciliation actions", slog.Error(err)) - continue - } - - // TODO: authz // Can't use existing profiles (i.e. AsSystemRestricted) because of dbauthz rules - ownerCtx := dbauthz.As(ctx, rbac.Subject{ - ID: "owner", - Roles: rbac.RoleIdentifiers{rbac.RoleOwner()}, - Groups: []string{}, - Scope: rbac.ExpandableScope(rbac.ScopeAll), - }) - - levelFn := vlogger.Debug - if len(actions.createIDs) > 0 || len(actions.deleteIDs) > 0 { - // Only log with info level when there's a change that needs to be effected. - levelFn = vlogger.Info - } - levelFn(innerCtx, "template prebuild state retrieved", - slog.F("to_create", len(actions.createIDs)), slog.F("to_delete", len(actions.deleteIDs)), - slog.F("desired", actions.meta.Desired), slog.F("actual", actions.meta.Actual), - slog.F("outdated", actions.meta.Outdated), slog.F("extraneous", actions.meta.Extraneous), - slog.F("starting", actions.meta.Starting), slog.F("stopping", actions.meta.Stopping), - slog.F("deleting", actions.meta.Deleting), slog.F("eligible", actions.meta.Eligible)) - - // Provision workspaces within the same tx so we don't get any timing issues here. - // i.e. we hold the advisory lock until all reconciliatory actions have been taken. - // TODO: max per reconciliation iteration? - - // TODO: probably need to split these to have a transaction each... rolling back would lead to an - // inconsistent state if 1 of n creations/deletions fail. - for _, id := range actions.createIDs { - if err := c.createPrebuild(ownerCtx, db, id, template, state.PresetID); err != nil { - vlogger.Error(ctx, "failed to create prebuild", slog.Error(err)) - lastErr.Errors = append(lastErr.Errors, err) - } - } - - for _, id := range actions.deleteIDs { - if err := c.deletePrebuild(ownerCtx, db, id, template, state.PresetID); err != nil { - vlogger.Error(ctx, "failed to delete prebuild", slog.Error(err)) - lastErr.Errors = append(lastErr.Errors, err) - } - } - } - - return lastErr.ErrorOrNil() - }, &database.TxOptions{ - // TODO: isolation - TxIdentifier: "template_prebuilds", + return prebuild.CurrentPresetID.UUID == preset.PresetID && + prebuild.TemplateVersionID == preset.TemplateVersionID // Not strictly necessary since presets are 1:1 with template versions, but no harm in being extra safe. }) + + inProgress := slice.Filter(allInProgress, func(prebuild database.GetPrebuildsInProgressRow) bool { + return prebuild.TemplateVersionID == preset.TemplateVersionID + }) + + actions, err := c.calculateActions(ctx, preset, running, inProgress) if err != nil { - logger.Error(ctx, "failed to acquire database transaction", slog.Error(err)) + vlogger.Error(ctx, "failed to calculate reconciliation actions", slog.Error(err)) + return xerrors.Errorf("failed to calculate reconciliation actions: %w", err) } - return nil + // TODO: authz // Can't use existing profiles (i.e. AsSystemRestricted) because of dbauthz rules + ownerCtx := dbauthz.As(ctx, rbac.Subject{ + ID: "owner", + Roles: rbac.RoleIdentifiers{rbac.RoleOwner()}, + Groups: []string{}, + Scope: rbac.ExpandableScope(rbac.ScopeAll), + }) + + levelFn := vlogger.Debug + if len(actions.createIDs) > 0 || len(actions.deleteIDs) > 0 { + // Only log with info level when there's a change that needs to be effected. + levelFn = vlogger.Info + } + levelFn(ctx, "template prebuild state retrieved", + slog.F("to_create", len(actions.createIDs)), slog.F("to_delete", len(actions.deleteIDs)), + slog.F("desired", actions.desired), slog.F("actual", actions.actual), + slog.F("outdated", actions.outdated), slog.F("extraneous", actions.extraneous), + slog.F("starting", actions.starting), slog.F("stopping", actions.stopping), + slog.F("deleting", actions.deleting), slog.F("eligible", actions.eligible)) + + // Provision workspaces within the same tx so we don't get any timing issues here. + // i.e. we hold the advisory lock until all reconciliatory actions have been taken. + // TODO: max per reconciliation iteration? + + // TODO: i've removed the surrounding tx, but if we restore it then we need to pass down the store to these funcs. + for _, id := range actions.createIDs { + if err := c.createPrebuild(ownerCtx, id, preset.TemplateID, preset.PresetID); err != nil { + vlogger.Error(ctx, "failed to create prebuild", slog.Error(err)) + lastErr.Errors = append(lastErr.Errors, err) + } + } + + for _, id := range actions.deleteIDs { + if err := c.deletePrebuild(ownerCtx, id, preset.TemplateID, preset.PresetID); err != nil { + vlogger.Error(ctx, "failed to delete prebuild", slog.Error(err)) + lastErr.Errors = append(lastErr.Errors, err) + } + } + + return lastErr.ErrorOrNil() } -func (c *Controller) createPrebuild(ctx context.Context, db database.Store, prebuildID uuid.UUID, template database.Template, presetID uuid.UUID) error { +func (c *Controller) createPrebuild(ctx context.Context, prebuildID uuid.UUID, templateID uuid.UUID, presetID uuid.UUID) error { name, err := generateName() if err != nil { return xerrors.Errorf("failed to generate unique prebuild ID: %w", err) } + template, err := c.store.GetTemplateByID(ctx, templateID) + if err != nil { + return xerrors.Errorf("failed to get template: %w", err) + } + now := dbtime.Now() // Workspaces are created without any versions. - minimumWorkspace, err := db.InsertWorkspace(ctx, database.InsertWorkspaceParams{ + minimumWorkspace, err := c.store.InsertWorkspace(ctx, database.InsertWorkspaceParams{ ID: prebuildID, CreatedAt: now, UpdatedAt: now, @@ -393,7 +442,7 @@ func (c *Controller) createPrebuild(ctx context.Context, db database.Store, preb } // We have to refetch the workspace for the joined in fields. - workspace, err := db.GetWorkspaceByID(ctx, minimumWorkspace.ID) + workspace, err := c.store.GetWorkspaceByID(ctx, minimumWorkspace.ID) if err != nil { return xerrors.Errorf("get workspace by ID: %w", err) } @@ -401,23 +450,28 @@ func (c *Controller) createPrebuild(ctx context.Context, db database.Store, preb c.logger.Info(ctx, "attempting to create prebuild", slog.F("name", name), slog.F("workspace_id", prebuildID.String()), slog.F("preset_id", presetID.String())) - return c.provision(ctx, db, prebuildID, template, presetID, database.WorkspaceTransitionStart, workspace) + return c.provision(ctx, prebuildID, template, presetID, database.WorkspaceTransitionStart, workspace) } -func (c *Controller) deletePrebuild(ctx context.Context, db database.Store, prebuildID uuid.UUID, template database.Template, presetID uuid.UUID) error { - workspace, err := db.GetWorkspaceByID(ctx, prebuildID) +func (c *Controller) deletePrebuild(ctx context.Context, prebuildID uuid.UUID, templateID uuid.UUID, presetID uuid.UUID) error { + workspace, err := c.store.GetWorkspaceByID(ctx, prebuildID) if err != nil { return xerrors.Errorf("get workspace by ID: %w", err) } + template, err := c.store.GetTemplateByID(ctx, templateID) + if err != nil { + return xerrors.Errorf("failed to get template: %w", err) + } + c.logger.Info(ctx, "attempting to delete prebuild", slog.F("workspace_id", prebuildID.String()), slog.F("preset_id", presetID.String())) - return c.provision(ctx, db, prebuildID, template, presetID, database.WorkspaceTransitionDelete, workspace) + return c.provision(ctx, prebuildID, template, presetID, database.WorkspaceTransitionDelete, workspace) } -func (c *Controller) provision(ctx context.Context, db database.Store, prebuildID uuid.UUID, template database.Template, presetID uuid.UUID, transition database.WorkspaceTransition, workspace database.Workspace) error { - tvp, err := db.GetPresetParametersByTemplateVersionID(ctx, template.ActiveVersionID) +func (c *Controller) provision(ctx context.Context, prebuildID uuid.UUID, template database.Template, presetID uuid.UUID, transition database.WorkspaceTransition, workspace database.Workspace) error { + tvp, err := c.store.GetPresetParametersByTemplateVersionID(ctx, template.ActiveVersionID) if err != nil { return xerrors.Errorf("fetch preset details: %w", err) } @@ -451,7 +505,7 @@ func (c *Controller) provision(ctx context.Context, db database.Store, prebuildI _, provisionerJob, _, err := builder.Build( ctx, - db, + c.store, func(action policy.Action, object rbac.Objecter) bool { return true // TODO: harden? },