diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index 405f449157..edc175ada5 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -2221,10 +2221,10 @@ func (q *querier) GetTemplateParameterInsights(ctx context.Context, arg database return q.db.GetTemplateParameterInsights(ctx, arg) } -func (q *querier) GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) (database.GetTemplatePrebuildStateRow, error) { +func (q *querier) GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) ([]database.GetTemplatePrebuildStateRow, error) { // TODO: authz if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceTemplate); err != nil { - return database.GetTemplatePrebuildStateRow{}, err + return nil, err } return q.db.GetTemplatePrebuildState(ctx, templateID) } diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index a093dc2df9..95a8f312f8 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -5464,7 +5464,7 @@ func (q *FakeQuerier) GetTemplateParameterInsights(ctx context.Context, arg data return rows, nil } -func (q *FakeQuerier) GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) (database.GetTemplatePrebuildStateRow, error) { +func (q *FakeQuerier) GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) ([]database.GetTemplatePrebuildStateRow, error) { panic("not implemented") } diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index 02a81fca88..82c1f09456 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -1253,7 +1253,7 @@ func (m queryMetricsStore) GetTemplateParameterInsights(ctx context.Context, arg return r0, r1 } -func (m queryMetricsStore) GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) (database.GetTemplatePrebuildStateRow, error) { +func (m queryMetricsStore) GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) ([]database.GetTemplatePrebuildStateRow, error) { start := time.Now() r0, r1 := m.s.GetTemplatePrebuildState(ctx, templateID) m.queryLatencies.WithLabelValues("GetTemplatePrebuildState").Observe(time.Since(start).Seconds()) diff --git a/coderd/database/querier.go b/coderd/database/querier.go index 7c4ce87a0b..cbd242a223 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -265,7 +265,7 @@ type sqlcQuerier interface { // created in the timeframe and return the aggregate usage counts of parameter // values. GetTemplateParameterInsights(ctx context.Context, arg GetTemplateParameterInsightsParams) ([]GetTemplateParameterInsightsRow, error) - GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) (GetTemplatePrebuildStateRow, error) + GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) ([]GetTemplatePrebuildStateRow, error) GetTemplateUsageStats(ctx context.Context, arg GetTemplateUsageStatsParams) ([]TemplateUsageStat, error) GetTemplateVersionByID(ctx context.Context, id uuid.UUID) (TemplateVersion, error) GetTemplateVersionByJobID(ctx context.Context, jobID uuid.UUID) (TemplateVersion, error) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index d21854cc27..e51357855a 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -5395,7 +5395,7 @@ func (q *sqlQuerier) GetParameterSchemasByJobID(ctx context.Context, jobID uuid. return items, nil } -const getTemplatePrebuildState = `-- name: GetTemplatePrebuildState :one +const getTemplatePrebuildState = `-- name: GetTemplatePrebuildState :many WITH -- All prebuilds currently running running_prebuilds AS (SELECT p.template_id, @@ -5405,6 +5405,7 @@ WITH FROM workspace_prebuilds p INNER JOIN workspace_latest_build b ON b.workspace_id = p.id INNER JOIN provisioner_jobs pj ON b.job_id = pj.id + INNER JOIN templates t ON p.template_id = t.id WHERE (b.transition = 'start'::workspace_transition -- if a deletion job fails, the workspace will still be running OR pj.job_status IN ('failed'::provisioner_job_status, 'canceled'::provisioner_job_status, @@ -5421,51 +5422,54 @@ WITH INNER JOIN template_versions tv ON tv.template_id = t.id INNER JOIN template_version_presets tvp ON tvp.template_version_id = tv.id INNER JOIN template_version_preset_prebuilds tvpp ON tvpp.preset_id = tvp.id - WHERE t.id = $1::uuid + WHERE t.id = $1::uuid GROUP BY t.id, tv.id, tvpp.id), -- Jobs relating to prebuilds current in-flight prebuilds_in_progress AS (SELECT wpb.template_version_id, wpb.transition, COUNT(wpb.transition) AS count - FROM workspace_prebuild_builds wpb - INNER JOIN workspace_latest_build wlb ON wpb.workspace_id = wlb.workspace_id + FROM workspace_latest_build wlb INNER JOIN provisioner_jobs pj ON wlb.job_id = pj.id + INNER JOIN workspace_prebuild_builds wpb ON wpb.id = wlb.id WHERE pj.job_status NOT IN ('succeeded'::provisioner_job_status, 'canceled'::provisioner_job_status, 'failed'::provisioner_job_status) GROUP BY wpb.template_version_id, wpb.transition) SELECT t.template_id, t.template_version_id, - t.using_active_version AS is_active, - p.ids AS running_prebuild_ids, - CAST(COALESCE( - MAX(CASE WHEN t.using_active_version THEN p.count ELSE 0 END), - 0) AS INT) AS actual, -- running prebuilds for active version + t.using_active_version AS is_active, + CAST(COALESCE(MAX(CASE WHEN p.template_version_id = t.template_version_id THEN p.ids END), + '') AS TEXT) AS running_prebuild_ids, + CAST(COALESCE(MAX(CASE WHEN t.using_active_version THEN p.count ELSE 0 END), + 0) AS INT) AS actual, -- running prebuilds for active version CAST(COALESCE(MAX(CASE WHEN t.using_active_version THEN t.desired_instances ELSE 0 END), - 0) AS INT) AS desired, -- we only care about the active version's desired instances - CAST(COALESCE(MAX(CASE WHEN t.using_active_version THEN 0 ELSE p.count END), - 0) AS INT) AS extraneous, -- running prebuilds for inactive version + 0) AS INT) AS desired, -- we only care about the active version's desired instances + CAST(COALESCE(MAX(CASE WHEN p.template_version_id = t.template_version_id AND t.using_active_version = false THEN p.count END), + 0) AS INT) AS extraneous, -- running prebuilds for inactive version CAST(COALESCE(MAX(CASE WHEN pip.transition = 'start'::workspace_transition THEN pip.count - ELSE 0 END), 0) AS INT) AS starting, + ELSE 0 END), + 0) AS INT) AS starting, CAST(COALESCE(MAX(CASE WHEN pip.transition = 'stop'::workspace_transition THEN pip.count ELSE 0 END), - 0) AS INT) AS stopping, -- not strictly needed, since prebuilds should never be left if a "stopped" state, but useful to know + 0) AS INT) AS stopping, -- not strictly needed, since prebuilds should never be left if a "stopped" state, but useful to know CAST(COALESCE(MAX(CASE WHEN pip.transition = 'delete'::workspace_transition THEN pip.count - ELSE 0 END), 0) AS INT) AS deleting, - t.deleted AS template_deleted, - t.deprecated AS template_deprecated + ELSE 0 END), + 0) AS INT) AS deleting, + t.deleted AS template_deleted, + t.deprecated AS template_deprecated FROM templates_with_prebuilds t - LEFT JOIN running_prebuilds p ON p.template_id = t.template_id + LEFT JOIN running_prebuilds p ON p.template_version_id = t.template_version_id LEFT JOIN prebuilds_in_progress pip ON pip.template_version_id = t.template_version_id -GROUP BY t.using_active_version, t.template_id, t.template_version_id, p.count, p.ids, t.deleted, t.deprecated +GROUP BY t.using_active_version, t.template_id, t.template_version_id, p.count, p.ids, + p.template_version_id, t.deleted, t.deprecated ` type GetTemplatePrebuildStateRow struct { TemplateID uuid.UUID `db:"template_id" json:"template_id"` TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"` IsActive bool `db:"is_active" json:"is_active"` - RunningPrebuildIds []byte `db:"running_prebuild_ids" json:"running_prebuild_ids"` + RunningPrebuildIds string `db:"running_prebuild_ids" json:"running_prebuild_ids"` Actual int32 `db:"actual" json:"actual"` Desired int32 `db:"desired" json:"desired"` Extraneous int32 `db:"extraneous" json:"extraneous"` @@ -5476,24 +5480,40 @@ type GetTemplatePrebuildStateRow struct { TemplateDeprecated bool `db:"template_deprecated" json:"template_deprecated"` } -func (q *sqlQuerier) GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) (GetTemplatePrebuildStateRow, error) { - row := q.db.QueryRowContext(ctx, getTemplatePrebuildState, templateID) - var i GetTemplatePrebuildStateRow - err := row.Scan( - &i.TemplateID, - &i.TemplateVersionID, - &i.IsActive, - &i.RunningPrebuildIds, - &i.Actual, - &i.Desired, - &i.Extraneous, - &i.Starting, - &i.Stopping, - &i.Deleting, - &i.TemplateDeleted, - &i.TemplateDeprecated, - ) - return i, err +func (q *sqlQuerier) GetTemplatePrebuildState(ctx context.Context, templateID uuid.UUID) ([]GetTemplatePrebuildStateRow, error) { + rows, err := q.db.QueryContext(ctx, getTemplatePrebuildState, templateID) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetTemplatePrebuildStateRow + for rows.Next() { + var i GetTemplatePrebuildStateRow + if err := rows.Scan( + &i.TemplateID, + &i.TemplateVersionID, + &i.IsActive, + &i.RunningPrebuildIds, + &i.Actual, + &i.Desired, + &i.Extraneous, + &i.Starting, + &i.Stopping, + &i.Deleting, + &i.TemplateDeleted, + &i.TemplateDeprecated, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil } const getPresetByWorkspaceBuildID = `-- name: GetPresetByWorkspaceBuildID :one diff --git a/coderd/database/queries/prebuilds.sql b/coderd/database/queries/prebuilds.sql index dce2590354..2291371fe2 100644 --- a/coderd/database/queries/prebuilds.sql +++ b/coderd/database/queries/prebuilds.sql @@ -1,4 +1,4 @@ --- name: GetTemplatePrebuildState :one +-- name: GetTemplatePrebuildState :many WITH -- All prebuilds currently running running_prebuilds AS (SELECT p.template_id, @@ -8,6 +8,7 @@ WITH FROM workspace_prebuilds p INNER JOIN workspace_latest_build b ON b.workspace_id = p.id INNER JOIN provisioner_jobs pj ON b.job_id = pj.id + INNER JOIN templates t ON p.template_id = t.id WHERE (b.transition = 'start'::workspace_transition -- if a deletion job fails, the workspace will still be running OR pj.job_status IN ('failed'::provisioner_job_status, 'canceled'::provisioner_job_status, @@ -28,37 +29,36 @@ WITH GROUP BY t.id, tv.id, tvpp.id), -- Jobs relating to prebuilds current in-flight prebuilds_in_progress AS (SELECT wpb.template_version_id, wpb.transition, COUNT(wpb.transition) AS count - FROM workspace_prebuild_builds wpb - INNER JOIN workspace_latest_build wlb ON wpb.workspace_id = wlb.workspace_id + FROM workspace_latest_build wlb INNER JOIN provisioner_jobs pj ON wlb.job_id = pj.id + INNER JOIN workspace_prebuild_builds wpb ON wpb.id = wlb.id WHERE pj.job_status NOT IN ('succeeded'::provisioner_job_status, 'canceled'::provisioner_job_status, 'failed'::provisioner_job_status) GROUP BY wpb.template_version_id, wpb.transition) SELECT t.template_id, t.template_version_id, - t.using_active_version AS is_active, - p.ids AS running_prebuild_ids, - CAST(COALESCE( - MAX(CASE WHEN t.using_active_version THEN p.count ELSE 0 END), - 0) AS INT) AS actual, -- running prebuilds for active version + t.using_active_version AS is_active, + CAST(COALESCE(MAX(CASE WHEN p.template_version_id = t.template_version_id THEN p.ids END), + '') AS TEXT) AS running_prebuild_ids, + CAST(COALESCE(MAX(CASE WHEN t.using_active_version THEN p.count ELSE 0 END), + 0) AS INT) AS actual, -- running prebuilds for active version CAST(COALESCE(MAX(CASE WHEN t.using_active_version THEN t.desired_instances ELSE 0 END), - 0) AS INT) AS desired, -- we only care about the active version's desired instances - CAST(COALESCE(MAX(CASE WHEN t.using_active_version THEN 0 ELSE p.count END), - 0) AS INT) AS extraneous, -- running prebuilds for inactive version + 0) AS INT) AS desired, -- we only care about the active version's desired instances CAST(COALESCE(MAX(CASE - WHEN pip.transition = 'start'::workspace_transition THEN pip.count - ELSE 0 END), 0) AS INT) AS starting, - CAST(COALESCE(MAX(CASE - WHEN pip.transition = 'stop'::workspace_transition THEN pip.count - ELSE 0 END), - 0) AS INT) AS stopping, -- not strictly needed, since prebuilds should never be left if a "stopped" state, but useful to know - CAST(COALESCE(MAX(CASE - WHEN pip.transition = 'delete'::workspace_transition THEN pip.count - ELSE 0 END), 0) AS INT) AS deleting, - t.deleted AS template_deleted, - t.deprecated AS template_deprecated + WHEN p.template_version_id = t.template_version_id AND t.using_active_version = false + THEN p.count END), + 0) AS INT) AS extraneous, -- running prebuilds for inactive version + CAST(COALESCE(MAX(CASE WHEN pip.transition = 'start'::workspace_transition THEN pip.count ELSE 0 END), + 0) AS INT) AS starting, + CAST(COALESCE(MAX(CASE WHEN pip.transition = 'stop'::workspace_transition THEN pip.count ELSE 0 END), + 0) AS INT) AS stopping, -- not strictly needed, since prebuilds should never be left if a "stopped" state, but useful to know + CAST(COALESCE(MAX(CASE WHEN pip.transition = 'delete'::workspace_transition THEN pip.count ELSE 0 END), + 0) AS INT) AS deleting, + t.deleted AS template_deleted, + t.deprecated AS template_deprecated FROM templates_with_prebuilds t - LEFT JOIN running_prebuilds p ON p.template_id = t.template_id + LEFT JOIN running_prebuilds p ON p.template_version_id = t.template_version_id LEFT JOIN prebuilds_in_progress pip ON pip.template_version_id = t.template_version_id -GROUP BY t.using_active_version, t.template_id, t.template_version_id, p.count, p.ids, t.deleted, t.deprecated; +GROUP BY t.using_active_version, t.template_id, t.template_version_id, p.count, p.ids, + p.template_version_id, t.deleted, t.deprecated; diff --git a/coderd/prebuilds/controller.go b/coderd/prebuilds/controller.go index 2c16981863..003b0f1f99 100644 --- a/coderd/prebuilds/controller.go +++ b/coderd/prebuilds/controller.go @@ -1,7 +1,6 @@ package prebuilds import ( - "bytes" "context" "fmt" "github.com/coder/coder/v2/coderd/audit" @@ -12,6 +11,7 @@ import ( "github.com/coder/coder/v2/coderd/rbac/policy" "github.com/coder/coder/v2/coderd/wsbuilder" "math" + "strings" "time" "cdr.dev/slog" @@ -175,10 +175,10 @@ func (c Controller) calculateActions(ctx context.Context, template database.Temp actions.createIDs = append(actions.createIDs, uuid.New()) } - runningIDs := bytes.Split(state.RunningPrebuildIds, []byte{','}) + runningIDs := strings.Split(state.RunningPrebuildIds, ",") if toDelete > 0 && len(runningIDs) != toDelete { c.logger.Warn(ctx, "mismatch between running prebuilds and expected deletion count!", - slog.F("template_id", template.ID), slog.F("running", len(runningIDs)), slog.F("to_destroy", toDelete)) + slog.F("template_id", template.ID), slog.F("running", len(runningIDs)), slog.F("to_delete", toDelete)) } // TODO: implement lookup to not perform same action on workspace multiple times in $period @@ -190,7 +190,7 @@ func (c Controller) calculateActions(ctx context.Context, template database.Temp } running := runningIDs[i] - id, err := uuid.ParseBytes(running) + id, err := uuid.Parse(running) if err != nil { c.logger.Warn(ctx, "invalid prebuild ID", slog.F("template_id", template.ID), slog.F("id", string(running)), slog.Error(err)) @@ -217,63 +217,64 @@ func (c Controller) reconcileTemplate(ctx context.Context, template database.Tem innerCtx, cancel := context.WithTimeout(ctx, time.Second*30) defer cancel() - // TODO: change to "many" and return 1 or more rows, but only one should be returned - // more than 1 response is indicative of a query problem - state, err := db.GetTemplatePrebuildState(ctx, template.ID) + versionStates, err := db.GetTemplatePrebuildState(ctx, template.ID) if err != nil { return xerrors.Errorf("failed to retrieve template's prebuild states: %w", err) } - actions, err := c.calculateActions(innerCtx, template, state) - if err != nil { - logger.Error(ctx, "failed to calculate reconciliation actions", slog.Error(err)) - return nil - } + for _, state := range versionStates { + vlogger := logger.With(slog.F("template_version_id", state.TemplateVersionID)) - // TODO: authz // Can't use existing profiles (i.e. AsSystemRestricted) because of dbauthz rules - var ownerCtx = dbauthz.As(ctx, rbac.Subject{ - ID: "owner", - Roles: rbac.RoleIdentifiers{rbac.RoleOwner()}, - Groups: []string{}, - Scope: rbac.ExpandableScope(rbac.ScopeAll), - }) - - levelFn := logger.Debug - if len(actions.createIDs) > 0 || len(actions.deleteIDs) > 0 { - // Only log with info level when there's a change that needs to be effected. - levelFn = logger.Info - } - levelFn(innerCtx, "template prebuild state retrieved", - slog.F("to_create", len(actions.createIDs)), slog.F("to_destroy", len(actions.deleteIDs)), - slog.F("desired", actions.meta.Desired), slog.F("actual", actions.meta.Actual), slog.F("extraneous", actions.meta.Extraneous), - slog.F("starting", actions.meta.Starting), slog.F("stopping", actions.meta.Stopping), slog.F("deleting", actions.meta.Deleting)) - - // Provision workspaces within the same tx so we don't get any timing issues here. - // i.e. we hold the advisory lock until all reconciliatory actions have been taken. - // TODO: max per reconciliation iteration? - - for _, id := range actions.createIDs { - if err := c.createPrebuild(ownerCtx, db, id, template); err != nil { - logger.Error(ctx, "failed to create prebuild", slog.Error(err)) + actions, err := c.calculateActions(innerCtx, template, state) + if err != nil { + vlogger.Error(ctx, "failed to calculate reconciliation actions", slog.Error(err)) + continue } - } - for _, id := range actions.deleteIDs { - if err := c.deletePrebuild(ownerCtx, db, id, template); err != nil { - logger.Error(ctx, "failed to delete prebuild", slog.Error(err)) + // TODO: authz // Can't use existing profiles (i.e. AsSystemRestricted) because of dbauthz rules + var ownerCtx = dbauthz.As(ctx, rbac.Subject{ + ID: "owner", + Roles: rbac.RoleIdentifiers{rbac.RoleOwner()}, + Groups: []string{}, + Scope: rbac.ExpandableScope(rbac.ScopeAll), + }) + + levelFn := vlogger.Debug + if len(actions.createIDs) > 0 || len(actions.deleteIDs) > 0 { + // Only log with info level when there's a change that needs to be effected. + levelFn = vlogger.Info + } + levelFn(innerCtx, "template prebuild state retrieved", + slog.F("to_create", len(actions.createIDs)), slog.F("to_delete", len(actions.deleteIDs)), + slog.F("desired", actions.meta.Desired), slog.F("actual", actions.meta.Actual), slog.F("extraneous", actions.meta.Extraneous), + slog.F("starting", actions.meta.Starting), slog.F("stopping", actions.meta.Stopping), slog.F("deleting", actions.meta.Deleting)) + + // Provision workspaces within the same tx so we don't get any timing issues here. + // i.e. we hold the advisory lock until all reconciliatory actions have been taken. + // TODO: max per reconciliation iteration? + + for _, id := range actions.createIDs { + if err := c.createPrebuild(ownerCtx, db, id, template); err != nil { + vlogger.Error(ctx, "failed to create prebuild", slog.Error(err)) + } + } + + for _, id := range actions.deleteIDs { + if err := c.deletePrebuild(ownerCtx, db, id, template); err != nil { + vlogger.Error(ctx, "failed to delete prebuild", slog.Error(err)) + } } } return nil }, &database.TxOptions{ // TODO: isolation - TxIdentifier: "tempdlate_prebuilds", + TxIdentifier: "template_prebuilds", }) if err != nil { logger.Error(ctx, "failed to acquire database transaction", slog.Error(err)) } - // trigger n InsertProvisionerJob calls to scale up or down instances return nil }