From e1df5368a5797bc251eedf15d82d3cc225aa61da Mon Sep 17 00:00:00 2001 From: Sas Swart Date: Mon, 3 Mar 2025 18:43:27 +0000 Subject: [PATCH] add tests for prebuilds finalise database integration tests for prebuilds reintegrate with danny's latest changes add back assertions for deletion integration tests of prebuilds tidy up prebuilds tests --- coderd/database/queries.sql.go | 8 +- coderd/database/queries/prebuilds.sql | 7 +- enterprise/coderd/prebuilds/claim_test.go | 3 +- .../coderd/prebuilds/controller_test.go | 461 ++++++++++++------ enterprise/coderd/prebuilds/reconciliation.go | 5 +- 5 files changed, 309 insertions(+), 175 deletions(-) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index cf34699378..f2c674a5ee 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -5800,9 +5800,8 @@ FROM workspace_latest_build wlb INNER JOIN provisioner_jobs pj ON wlb.job_id = pj.id INNER JOIN workspace_prebuild_builds wpb ON wpb.id = wlb.id INNER JOIN templates t ON t.active_version_id = wlb.template_version_id -WHERE pj.job_status NOT IN -- Jobs that are not in terminal states. - ('succeeded'::provisioner_job_status, 'canceled'::provisioner_job_status, - 'failed'::provisioner_job_status) +WHERE wlb.transition = 'start'::workspace_transition + AND pj.job_status IN ('pending'::provisioner_job_status, 'running'::provisioner_job_status) GROUP BY t.id, wpb.template_version_id, wpb.transition ` @@ -5861,8 +5860,7 @@ FROM workspace_prebuilds p ON tvp_curr.id = p.current_preset_id -- See https://github.com/coder/internal/issues/398. WHERE (b.transition = 'start'::workspace_transition -- Jobs that are not in terminal states. - AND pj.job_status NOT IN ('failed'::provisioner_job_status, 'canceled'::provisioner_job_status, - 'unknown'::provisioner_job_status)) + AND pj.job_status = 'succeeded'::provisioner_job_status) ` type GetRunningPrebuildsRow struct { diff --git a/coderd/database/queries/prebuilds.sql b/coderd/database/queries/prebuilds.sql index e98999935f..c40c312470 100644 --- a/coderd/database/queries/prebuilds.sql +++ b/coderd/database/queries/prebuilds.sql @@ -18,8 +18,7 @@ FROM workspace_prebuilds p ON tvp_curr.id = p.current_preset_id -- See https://github.com/coder/internal/issues/398. WHERE (b.transition = 'start'::workspace_transition -- Jobs that are not in terminal states. - AND pj.job_status NOT IN ('failed'::provisioner_job_status, 'canceled'::provisioner_job_status, - 'unknown'::provisioner_job_status)); + AND pj.job_status = 'succeeded'::provisioner_job_status); -- name: GetTemplatePresetsWithPrebuilds :many SELECT t.id AS template_id, @@ -42,9 +41,7 @@ FROM workspace_latest_build wlb INNER JOIN provisioner_jobs pj ON wlb.job_id = pj.id INNER JOIN workspace_prebuild_builds wpb ON wpb.id = wlb.id INNER JOIN templates t ON t.active_version_id = wlb.template_version_id -WHERE pj.job_status NOT IN -- Jobs that are not in terminal states. - ('succeeded'::provisioner_job_status, 'canceled'::provisioner_job_status, - 'failed'::provisioner_job_status) +WHERE pj.job_status IN ('pending'::provisioner_job_status, 'running'::provisioner_job_status) GROUP BY t.id, wpb.template_version_id, wpb.transition; -- name: ClaimPrebuild :one diff --git a/enterprise/coderd/prebuilds/claim_test.go b/enterprise/coderd/prebuilds/claim_test.go index 30c15f5bb8..5d79dac5e3 100644 --- a/enterprise/coderd/prebuilds/claim_test.go +++ b/enterprise/coderd/prebuilds/claim_test.go @@ -8,10 +8,11 @@ import ( "testing" "time" - "github.com/coder/serpent" "github.com/google/uuid" "github.com/stretchr/testify/require" + "github.com/coder/serpent" + "github.com/coder/coder/v2/coderd/coderdtest" "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/dbauthz" diff --git a/enterprise/coderd/prebuilds/controller_test.go b/enterprise/coderd/prebuilds/controller_test.go index 0b699c7cd1..8e8447636a 100644 --- a/enterprise/coderd/prebuilds/controller_test.go +++ b/enterprise/coderd/prebuilds/controller_test.go @@ -3,13 +3,17 @@ package prebuilds_test import ( "context" "database/sql" + "fmt" "testing" "time" - "github.com/coder/serpent" "github.com/google/uuid" "github.com/stretchr/testify/require" + "github.com/coder/serpent" + + "tailscale.com/types/ptr" + "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database/dbgen" "github.com/coder/coder/v2/coderd/database/dbtestutil" @@ -119,6 +123,260 @@ func TestNoReconciliationActionsIfNoPrebuilds(t *testing.T) { require.Empty(t, jobs) } +func TestPrebuildReconciliation(t *testing.T) { + t.Parallel() + + if !dbtestutil.WillUsePostgres() { + t.Skip("This test requires postgres") + } + + type testCase struct { + name string + prebuildLatestTransitions []database.WorkspaceTransition + prebuildJobStatuses []database.ProvisionerJobStatus + templateVersionActive []bool + shouldCreateNewPrebuild *bool + shouldDeleteOldPrebuild *bool + } + + testCases := []testCase{ + { + name: "never create prebuilds for inactive template versions", + prebuildLatestTransitions: []database.WorkspaceTransition{ + database.WorkspaceTransitionStart, + database.WorkspaceTransitionStop, + database.WorkspaceTransitionDelete, + }, + prebuildJobStatuses: []database.ProvisionerJobStatus{ + database.ProvisionerJobStatusSucceeded, + database.ProvisionerJobStatusCanceled, + database.ProvisionerJobStatusFailed, + database.ProvisionerJobStatusPending, + database.ProvisionerJobStatusRunning, + database.ProvisionerJobStatusCanceling, + }, + templateVersionActive: []bool{false}, + shouldCreateNewPrebuild: ptr.To(false), + }, + { + name: "no need to create a new prebuild if one is already running", + prebuildLatestTransitions: []database.WorkspaceTransition{ + database.WorkspaceTransitionStart, + }, + prebuildJobStatuses: []database.ProvisionerJobStatus{ + database.ProvisionerJobStatusSucceeded, + }, + templateVersionActive: []bool{true}, + shouldCreateNewPrebuild: ptr.To(false), + }, + { + name: "don't create a new prebuild if one is queued to build or already building", + prebuildLatestTransitions: []database.WorkspaceTransition{ + database.WorkspaceTransitionStart, + }, + prebuildJobStatuses: []database.ProvisionerJobStatus{ + database.ProvisionerJobStatusPending, + database.ProvisionerJobStatusRunning, + }, + templateVersionActive: []bool{true}, + shouldCreateNewPrebuild: ptr.To(false), + }, + { + name: "create a new prebuild if one is in a state that disqualifies it from ever being claimed", + prebuildLatestTransitions: []database.WorkspaceTransition{ + database.WorkspaceTransitionStop, + database.WorkspaceTransitionDelete, + }, + prebuildJobStatuses: []database.ProvisionerJobStatus{ + database.ProvisionerJobStatusPending, + database.ProvisionerJobStatusRunning, + database.ProvisionerJobStatusCanceling, + database.ProvisionerJobStatusSucceeded, + }, + templateVersionActive: []bool{true}, + shouldCreateNewPrebuild: ptr.To(true), + }, + { + name: "create a new prebuild if one is in any kind of exceptional state", + prebuildLatestTransitions: []database.WorkspaceTransition{ + database.WorkspaceTransitionStart, + database.WorkspaceTransitionStop, + database.WorkspaceTransitionDelete, + }, + prebuildJobStatuses: []database.ProvisionerJobStatus{ + database.ProvisionerJobStatusCanceled, + database.ProvisionerJobStatusFailed, + }, + templateVersionActive: []bool{true}, + shouldCreateNewPrebuild: ptr.To(true), + }, + { + name: "never attempt to interfere with active builds", + // The workspace builder does not allow scheduling a new build if there is already a build + // pending, running, or canceling. As such, we should never attempt to start, stop or delete + // such prebuilds. Rather, we should wait for the existing build to complete and reconcile + // again in the next cycle. + prebuildLatestTransitions: []database.WorkspaceTransition{ + database.WorkspaceTransitionStart, + database.WorkspaceTransitionStop, + database.WorkspaceTransitionDelete, + }, + prebuildJobStatuses: []database.ProvisionerJobStatus{ + database.ProvisionerJobStatusPending, + database.ProvisionerJobStatusRunning, + database.ProvisionerJobStatusCanceling, + }, + templateVersionActive: []bool{true, false}, + shouldDeleteOldPrebuild: ptr.To(false), + }, + { + name: "never delete prebuilds in an exceptional state", + // We don't want to destroy evidence that might be useful to operators + // when troubleshooting issues. So we leave these prebuilds in place. + // Operators are expected to manually delete these prebuilds. + prebuildLatestTransitions: []database.WorkspaceTransition{ + database.WorkspaceTransitionStart, + database.WorkspaceTransitionStop, + database.WorkspaceTransitionDelete, + }, + prebuildJobStatuses: []database.ProvisionerJobStatus{ + database.ProvisionerJobStatusCanceled, + database.ProvisionerJobStatusFailed, + }, + templateVersionActive: []bool{true, false}, + shouldDeleteOldPrebuild: ptr.To(false), + }, + { + name: "delete running prebuilds for inactive template versions", + // We only support prebuilds for active template versions. + // If a template version is inactive, we should delete any prebuilds + // that are running. + prebuildLatestTransitions: []database.WorkspaceTransition{ + database.WorkspaceTransitionStart, + }, + prebuildJobStatuses: []database.ProvisionerJobStatus{ + database.ProvisionerJobStatusSucceeded, + }, + templateVersionActive: []bool{false}, + shouldDeleteOldPrebuild: ptr.To(true), + }, + { + name: "don't delete running prebuilds for active template versions", + prebuildLatestTransitions: []database.WorkspaceTransition{ + database.WorkspaceTransitionStart, + }, + prebuildJobStatuses: []database.ProvisionerJobStatus{ + database.ProvisionerJobStatusSucceeded, + }, + templateVersionActive: []bool{true}, + shouldDeleteOldPrebuild: ptr.To(false), + }, + { + name: "don't delete stopped or already deleted prebuilds", + // We don't ever stop prebuilds. A stopped prebuild is an exceptional state. + // As such we keep it, to allow operators to investigate the cause. + prebuildLatestTransitions: []database.WorkspaceTransition{ + database.WorkspaceTransitionStop, + database.WorkspaceTransitionDelete, + }, + prebuildJobStatuses: []database.ProvisionerJobStatus{ + database.ProvisionerJobStatusSucceeded, + }, + templateVersionActive: []bool{true, false}, + shouldDeleteOldPrebuild: ptr.To(false), + }, + } + for _, tc := range testCases { + tc := tc + for _, templateVersionActive := range tc.templateVersionActive { + templateVersionActive := templateVersionActive + for _, prebuildLatestTransition := range tc.prebuildLatestTransitions { + prebuildLatestTransition := prebuildLatestTransition + for _, prebuildJobStatus := range tc.prebuildJobStatuses { + t.Run(fmt.Sprintf("%s - %s - %s", tc.name, prebuildLatestTransition, prebuildJobStatus), func(t *testing.T) { + t.Parallel() + ctx := testutil.Context(t, testutil.WaitShort) + cfg := codersdk.PrebuildsConfig{} + logger := testutil.Logger(t) + db, pubsub := dbtestutil.NewDB(t) + controller := prebuilds.NewStoreReconciler(db, pubsub, cfg, logger) + + orgID, userID, templateID := setupTestDBTemplate(t, db) + templateVersionID := setupTestDBTemplateVersion( + t, + ctx, + db, + pubsub, + orgID, + userID, + templateID, + ) + _, prebuildID := setupTestDBPrebuild( + t, + ctx, + db, + pubsub, + prebuildLatestTransition, + prebuildJobStatus, + orgID, + templateID, + templateVersionID, + ) + + if !templateVersionActive { + // Create a new template version and mark it as active + // This marks the template version that we care about as inactive + setupTestDBTemplateVersion( + t, + ctx, + db, + pubsub, + orgID, + userID, + templateID, + ) + } + + // Run the reconciliation multiple times to ensure idempotency + // 8 was arbitrary, but large enough to reasonably trust the result + for range 8 { + controller.ReconcileAll(ctx) + + if tc.shouldCreateNewPrebuild != nil { + newPrebuildCount := 0 + workspaces, err := db.GetWorkspacesByTemplateID(ctx, templateID) + require.NoError(t, err) + for _, workspace := range workspaces { + if workspace.ID != prebuildID { + newPrebuildCount++ + } + } + // This test configures a preset that desires one prebuild. + // In cases where new prebuilds should be created, there should be exactly one. + require.Equal(t, *tc.shouldCreateNewPrebuild, newPrebuildCount == 1) + } + + if tc.shouldDeleteOldPrebuild != nil { + builds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{ + WorkspaceID: prebuildID, + }) + require.NoError(t, err) + if *tc.shouldDeleteOldPrebuild { + require.Equal(t, 2, len(builds)) + require.Equal(t, database.WorkspaceTransitionDelete, builds[0].Transition) + } else { + require.Equal(t, 1, len(builds)) + require.Equal(t, prebuildLatestTransition, builds[0].Transition) + } + } + } + }) + } + } + } + } +} + func setupTestDBTemplate( t *testing.T, db database.Store, @@ -138,20 +396,17 @@ func setupTestDBTemplate( return org.ID, user.ID, template.ID } -func setupTestDBPrebuild( + +func setupTestDBTemplateVersion( t *testing.T, ctx context.Context, db database.Store, pubsub pubsub.Pubsub, - prebuildStatus database.WorkspaceStatus, orgID uuid.UUID, userID uuid.UUID, templateID uuid.UUID, -) ( - templateVersionID uuid.UUID, - presetID uuid.UUID, - prebuildID uuid.UUID, -) { +) uuid.UUID { + t.Helper() templateVersionJob := dbgen.ProvisionerJob(t, db, pubsub, database.ProvisionerJob{ ID: uuid.New(), CreatedAt: time.Now().Add(-2 * time.Hour), @@ -169,8 +424,26 @@ func setupTestDBPrebuild( ID: templateID, ActiveVersionID: templateVersion.ID, }) + return templateVersion.ID +} + +func setupTestDBPrebuild( + t *testing.T, + ctx context.Context, + db database.Store, + pubsub pubsub.Pubsub, + transition database.WorkspaceTransition, + prebuildStatus database.ProvisionerJobStatus, + orgID uuid.UUID, + templateID uuid.UUID, + templateVersionID uuid.UUID, +) ( + presetID uuid.UUID, + prebuildID uuid.UUID, +) { + t.Helper() preset, err := db.InsertPreset(ctx, database.InsertPresetParams{ - TemplateVersionID: templateVersion.ID, + TemplateVersionID: templateVersionID, Name: "test", }) require.NoError(t, err) @@ -187,30 +460,29 @@ func setupTestDBPrebuild( }) require.NoError(t, err) - completedAt := sql.NullTime{} cancelledAt := sql.NullTime{} - transition := database.WorkspaceTransitionStart - deleted := false + completedAt := sql.NullTime{} + + startedAt := sql.NullTime{} + if prebuildStatus != database.ProvisionerJobStatusPending { + startedAt = sql.NullTime{Time: time.Now().Add(-2 * time.Hour), Valid: true} + } + buildError := sql.NullString{} - switch prebuildStatus { - case database.WorkspaceStatusRunning: - completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true} - case database.WorkspaceStatusStopped: - completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true} - transition = database.WorkspaceTransitionStop - case database.WorkspaceStatusFailed: + if prebuildStatus == database.ProvisionerJobStatusFailed { completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true} buildError = sql.NullString{String: "build failed", Valid: true} - case database.WorkspaceStatusCanceled: + } + + deleted := false + switch prebuildStatus { + case database.ProvisionerJobStatusCanceling: + cancelledAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true} + case database.ProvisionerJobStatusCanceled: completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true} cancelledAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true} - case database.WorkspaceStatusDeleted: + case database.ProvisionerJobStatusSucceeded: completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true} - transition = database.WorkspaceTransitionDelete - deleted = true - case database.WorkspaceStatusPending: - completedAt = sql.NullTime{} - transition = database.WorkspaceTransitionStart default: } @@ -223,6 +495,7 @@ func setupTestDBPrebuild( job := dbgen.ProvisionerJob(t, db, pubsub, database.ProvisionerJob{ InitiatorID: prebuilds.OwnerID, CreatedAt: time.Now().Add(-2 * time.Hour), + StartedAt: startedAt, CompletedAt: completedAt, CanceledAt: cancelledAt, OrganizationID: orgID, @@ -231,145 +504,13 @@ func setupTestDBPrebuild( dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{ WorkspaceID: workspace.ID, InitiatorID: prebuilds.OwnerID, - TemplateVersionID: templateVersion.ID, + TemplateVersionID: templateVersionID, JobID: job.ID, TemplateVersionPresetID: uuid.NullUUID{UUID: preset.ID, Valid: true}, Transition: transition, }) - return templateVersion.ID, preset.ID, workspace.ID + return preset.ID, workspace.ID } -func TestActiveTemplateVersionPrebuilds(t *testing.T) { - if !dbtestutil.WillUsePostgres() { - t.Skip("This test requires postgres") - } - - t.Parallel() - - type testCase struct { - name string - prebuildStatus database.WorkspaceStatus - shouldCreateNewPrebuild bool - shouldDeleteOldPrebuild bool - } - - testCases := []testCase{ - { - name: "running prebuild", - prebuildStatus: database.WorkspaceStatusRunning, - shouldCreateNewPrebuild: false, - shouldDeleteOldPrebuild: false, - }, - { - name: "stopped prebuild", - prebuildStatus: database.WorkspaceStatusStopped, - shouldCreateNewPrebuild: true, - shouldDeleteOldPrebuild: false, - }, - { - name: "failed prebuild", - prebuildStatus: database.WorkspaceStatusFailed, - shouldCreateNewPrebuild: true, - shouldDeleteOldPrebuild: false, - }, - { - name: "canceled prebuild", - prebuildStatus: database.WorkspaceStatusCanceled, - shouldCreateNewPrebuild: true, - shouldDeleteOldPrebuild: false, - }, - // { - // name: "deleted prebuild", - // prebuildStatus: database.WorkspaceStatusDeleted, - // shouldConsiderPrebuildRunning: false, - // shouldConsiderPrebuildInProgress: false, - // shouldCreateNewPrebuild: true, - // shouldDeleteOldPrebuild: false, - // }, - { - name: "pending prebuild", - prebuildStatus: database.WorkspaceStatusPending, - shouldCreateNewPrebuild: false, - shouldDeleteOldPrebuild: false, - }, - } - for _, tc := range testCases { - tc := tc - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - ctx := testutil.Context(t, testutil.WaitShort) - db, pubsub := dbtestutil.NewDB(t) - cfg := codersdk.PrebuildsConfig{} - logger := testutil.Logger(t) - controller := prebuilds.NewStoreReconciler(db, pubsub, cfg, logger) - - orgID, userID, templateID := setupTestDBTemplate(t, db) - _, _, prebuildID := setupTestDBPrebuild( - t, - ctx, - db, - pubsub, - tc.prebuildStatus, - orgID, - userID, - templateID, - ) - - require.NoError(t, controller.ReconcileAll(ctx)) - - createdNewPrebuild := false - deletedOldPrebuild := true - workspaces, err := db.GetWorkspacesByTemplateID(ctx, templateID) - require.NoError(t, err) - for _, workspace := range workspaces { - if workspace.ID == prebuildID { - deletedOldPrebuild = false - } - - if workspace.ID != prebuildID { - createdNewPrebuild = true - } - } - require.Equal(t, tc.shouldCreateNewPrebuild, createdNewPrebuild) - require.Equal(t, tc.shouldDeleteOldPrebuild, deletedOldPrebuild) - }) - } -} - -func TestInactiveTemplateVersionPrebuilds(t *testing.T) { - // Scenario: Prebuilds are never created and always deleted if the template version is inactive - t.Parallel() - t.Skip("todo") - - ctx := testutil.Context(t, testutil.WaitShort) - db, pubsub := dbtestutil.NewDB(t) - cfg := codersdk.PrebuildsConfig{} - logger := testutil.Logger(t) - controller := prebuilds.NewStoreReconciler(db, pubsub, cfg, logger) - - // when does a prebuild get deleted? - // * when it is in some way permanently ineligible to be claimed - // * this could be because the build failed or was canceled - // * or it belongs to a template version that is no longer active - // * or it belongs to a template version that is deprecated - // * when there are more prebuilds than the preset desires - // * someone could have manually created a workspace for the prebuild user - // * any workspaces that were created for the prebuilds user and don't match a preset should be deleted - deferred - - // given a preset that desires 2 prebuilds - // and there are 3 running prebuilds for the preset - // and there are 4 non-running prebuilds for the preset - // * one is not running because its latest build was a stop transition - // * another is not running because its latest build was a delete transition - // * a third is not running because its latest build was a start transition but the build failed - // * a fourth is not running because its latest build was a start transition but the build was canceled - // when we trigger the reconciliation loop for all templates - require.NoError(t, controller.ReconcileAll(ctx)) - // then the four non running prebuilds are deleted - // and 1 of the running prebuilds is deleted - // because stopped, deleted and failed builds are not considered running in terms of the definition of "running" above. -} - -// TODO (sasswart): test idempotency of reconciliation // TODO (sasswart): test mutual exclusion diff --git a/enterprise/coderd/prebuilds/reconciliation.go b/enterprise/coderd/prebuilds/reconciliation.go index e0efd8339f..3523e49e02 100644 --- a/enterprise/coderd/prebuilds/reconciliation.go +++ b/enterprise/coderd/prebuilds/reconciliation.go @@ -176,7 +176,6 @@ func (p presetState) calculateActions() (*reconciliationActions, error) { return 0 }) - var victims []uuid.UUID for i := 0; i < int(extraneous); i++ { if i >= len(p.running) { // This should never happen. @@ -187,11 +186,9 @@ func (p presetState) calculateActions() (*reconciliationActions, error) { continue } - victims = append(victims, p.running[i].WorkspaceID) + actions.deleteIDs = append(actions.deleteIDs, p.running[i].WorkspaceID) } - actions.deleteIDs = append(actions.deleteIDs, victims...) - // TODO: move up // c.logger.Warn(ctx, "found extra prebuilds running, picking random victim(s)", // slog.F("template_id", p.preset.TemplateID.String()), slog.F("desired", desired), slog.F("actual", actual), slog.F("extra", extraneous),