Merge branch 'dk/prebuilds' of github.com:/coder/coder into dk/prebuilds-tests

This commit is contained in:
Danny Kopping
2025-03-03 12:46:44 +00:00
7 changed files with 178 additions and 143 deletions

View File

@ -5624,7 +5624,7 @@ FROM workspace_prebuilds p
ON tvp_curr.id = p.current_preset_id -- See https://github.com/coder/internal/issues/398. ON tvp_curr.id = p.current_preset_id -- See https://github.com/coder/internal/issues/398.
WHERE (b.transition = 'start'::workspace_transition WHERE (b.transition = 'start'::workspace_transition
-- Jobs that are not in terminal states. -- Jobs that are not in terminal states.
OR pj.job_status IN ('failed'::provisioner_job_status, 'canceled'::provisioner_job_status, AND pj.job_status NOT IN ('failed'::provisioner_job_status, 'canceled'::provisioner_job_status,
'unknown'::provisioner_job_status)) 'unknown'::provisioner_job_status))
` `

View File

@ -18,7 +18,7 @@ FROM workspace_prebuilds p
ON tvp_curr.id = p.current_preset_id -- See https://github.com/coder/internal/issues/398. ON tvp_curr.id = p.current_preset_id -- See https://github.com/coder/internal/issues/398.
WHERE (b.transition = 'start'::workspace_transition WHERE (b.transition = 'start'::workspace_transition
-- Jobs that are not in terminal states. -- Jobs that are not in terminal states.
OR pj.job_status IN ('failed'::provisioner_job_status, 'canceled'::provisioner_job_status, AND pj.job_status NOT IN ('failed'::provisioner_job_status, 'canceled'::provisioner_job_status,
'unknown'::provisioner_job_status)); 'unknown'::provisioner_job_status));
-- name: GetTemplatePresetsWithPrebuilds :many -- name: GetTemplatePresetsWithPrebuilds :many

View File

@ -20,8 +20,8 @@ func (e EnterpriseClaimer) Claim(ctx context.Context, store database.Store, user
err := store.InTx(func(db database.Store) error { err := store.InTx(func(db database.Store) error {
// TODO: do we need this? // TODO: do we need this?
//// Ensure no other replica can claim a prebuild for this user simultaneously. //// Ensure no other replica can claim a prebuild for this user simultaneously.
//err := store.AcquireLock(ctx, database.GenLockID(fmt.Sprintf("prebuild-user-claim-%s", userID.String()))) // err := store.AcquireLock(ctx, database.GenLockID(fmt.Sprintf("prebuild-user-claim-%s", userID.String())))
//if err != nil { // if err != nil {
// return xerrors.Errorf("acquire claim lock for user %q: %w", userID.String(), err) // return xerrors.Errorf("acquire claim lock for user %q: %w", userID.String(), err)
//} //}

View File

@ -223,6 +223,9 @@ func (c *Controller) determineState(ctx context.Context, store database.Store, t
} }
presetsWithPrebuilds, err := db.GetTemplatePresetsWithPrebuilds(ctx, dbTemplateID) presetsWithPrebuilds, err := db.GetTemplatePresetsWithPrebuilds(ctx, dbTemplateID)
if err != nil {
return xerrors.Errorf("failed to get template presets with prebuilds: %w", err)
}
if len(presetsWithPrebuilds) == 0 { if len(presetsWithPrebuilds) == 0 {
return nil return nil
} }

View File

@ -17,6 +17,7 @@ import (
"github.com/coder/coder/v2/coderd/database/dbauthz" "github.com/coder/coder/v2/coderd/database/dbauthz"
"github.com/coder/coder/v2/coderd/database/dbgen" "github.com/coder/coder/v2/coderd/database/dbgen"
"github.com/coder/coder/v2/coderd/database/dbtestutil" "github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/coderd/database/pubsub"
"github.com/coder/coder/v2/coderd/rbac" "github.com/coder/coder/v2/coderd/rbac"
"github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/provisioner/echo" "github.com/coder/coder/v2/provisioner/echo"
@ -124,18 +125,139 @@ func TestNoReconciliationActionsIfNoPrebuilds(t *testing.T) {
require.Empty(t, jobs) require.Empty(t, jobs)
} }
func TestPrebuildCreation(t *testing.T) { func setupTestDBTemplate(
t *testing.T,
db database.Store,
) (
orgID uuid.UUID,
userID uuid.UUID,
templateID uuid.UUID,
) {
t.Helper()
org := dbgen.Organization(t, db, database.Organization{})
user := dbgen.User(t, db, database.User{})
template := dbgen.Template(t, db, database.Template{
CreatedBy: user.ID,
OrganizationID: org.ID,
})
return org.ID, user.ID, template.ID
}
func setupTestDBPrebuild(
t *testing.T,
ctx context.Context,
db database.Store,
pubsub pubsub.Pubsub,
prebuildStatus database.WorkspaceStatus,
orgID uuid.UUID,
userID uuid.UUID,
templateID uuid.UUID,
) (
templateVersionID uuid.UUID,
presetID uuid.UUID,
prebuildID uuid.UUID,
) {
templateVersionJob := dbgen.ProvisionerJob(t, db, pubsub, database.ProvisionerJob{
ID: uuid.New(),
CreatedAt: time.Now().Add(-2 * time.Hour),
CompletedAt: sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true},
OrganizationID: orgID,
InitiatorID: userID,
})
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: templateID, Valid: true},
OrganizationID: orgID,
CreatedBy: userID,
JobID: templateVersionJob.ID,
})
db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
ID: templateID,
ActiveVersionID: templateVersion.ID,
})
preset, err := db.InsertPreset(ctx, database.InsertPresetParams{
TemplateVersionID: templateVersion.ID,
Name: "test",
})
require.NoError(t, err)
_, err = db.InsertPresetParameters(ctx, database.InsertPresetParametersParams{
TemplateVersionPresetID: preset.ID,
Names: []string{"test"},
Values: []string{"test"},
})
require.NoError(t, err)
_, err = db.InsertPresetPrebuild(ctx, database.InsertPresetPrebuildParams{
ID: uuid.New(),
PresetID: preset.ID,
DesiredInstances: 1,
})
require.NoError(t, err)
completedAt := sql.NullTime{}
cancelledAt := sql.NullTime{}
transition := database.WorkspaceTransitionStart
deleted := false
buildError := sql.NullString{}
switch prebuildStatus {
case database.WorkspaceStatusRunning:
completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true}
case database.WorkspaceStatusStopped:
completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true}
transition = database.WorkspaceTransitionStop
case database.WorkspaceStatusFailed:
completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true}
buildError = sql.NullString{String: "build failed", Valid: true}
case database.WorkspaceStatusCanceled:
completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true}
cancelledAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true}
case database.WorkspaceStatusDeleted:
completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true}
transition = database.WorkspaceTransitionDelete
deleted = true
case database.WorkspaceStatusPending:
completedAt = sql.NullTime{}
transition = database.WorkspaceTransitionStart
default:
}
workspace := dbgen.Workspace(t, db, database.WorkspaceTable{
TemplateID: templateID,
OrganizationID: orgID,
OwnerID: OwnerID,
Deleted: deleted,
})
job := dbgen.ProvisionerJob(t, db, pubsub, database.ProvisionerJob{
InitiatorID: OwnerID,
CreatedAt: time.Now().Add(-2 * time.Hour),
CompletedAt: completedAt,
CanceledAt: cancelledAt,
OrganizationID: orgID,
Error: buildError,
})
dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
WorkspaceID: workspace.ID,
InitiatorID: OwnerID,
TemplateVersionID: templateVersion.ID,
JobID: job.ID,
TemplateVersionPresetID: uuid.NullUUID{UUID: preset.ID, Valid: true},
Transition: transition,
})
return templateVersion.ID, preset.ID, workspace.ID
}
func TestActiveTemplateVersionPrebuilds(t *testing.T) {
if !dbtestutil.WillUsePostgres() { if !dbtestutil.WillUsePostgres() {
t.Skip("This test requires postgres") t.Skip("This test requires postgres")
} }
t.Parallel() t.Parallel()
// Scenario: Prebuilds are created if and only if they are needed
type testCase struct { type testCase struct {
name string name string
prebuildStatus database.WorkspaceStatus prebuildStatus database.WorkspaceStatus
shouldCreateNewPrebuild bool shouldCreateNewPrebuild bool
shouldDeleteOldPrebuild bool
} }
testCases := []testCase{ testCases := []testCase{
@ -143,34 +265,43 @@ func TestPrebuildCreation(t *testing.T) {
name: "running prebuild", name: "running prebuild",
prebuildStatus: database.WorkspaceStatusRunning, prebuildStatus: database.WorkspaceStatusRunning,
shouldCreateNewPrebuild: false, shouldCreateNewPrebuild: false,
shouldDeleteOldPrebuild: false,
}, },
{ {
name: "stopped prebuild", name: "stopped prebuild",
prebuildStatus: database.WorkspaceStatusStopped, prebuildStatus: database.WorkspaceStatusStopped,
shouldCreateNewPrebuild: true, shouldCreateNewPrebuild: true,
shouldDeleteOldPrebuild: false,
}, },
{ {
name: "failed prebuild", name: "failed prebuild",
prebuildStatus: database.WorkspaceStatusFailed, prebuildStatus: database.WorkspaceStatusFailed,
shouldCreateNewPrebuild: true, shouldCreateNewPrebuild: true,
shouldDeleteOldPrebuild: false,
}, },
{ {
name: "canceled prebuild", name: "canceled prebuild",
prebuildStatus: database.WorkspaceStatusCanceled, prebuildStatus: database.WorkspaceStatusCanceled,
shouldCreateNewPrebuild: true, shouldCreateNewPrebuild: true,
shouldDeleteOldPrebuild: false,
}, },
{ // {
name: "deleted prebuild", // name: "deleted prebuild",
prebuildStatus: database.WorkspaceStatusDeleted, // prebuildStatus: database.WorkspaceStatusDeleted,
shouldCreateNewPrebuild: true, // shouldConsiderPrebuildRunning: false,
}, // shouldConsiderPrebuildInProgress: false,
// shouldCreateNewPrebuild: true,
// shouldDeleteOldPrebuild: false,
// },
{ {
name: "pending prebuild", name: "pending prebuild",
prebuildStatus: database.WorkspaceStatusPending, prebuildStatus: database.WorkspaceStatusPending,
shouldCreateNewPrebuild: false, shouldCreateNewPrebuild: false,
shouldDeleteOldPrebuild: false,
}, },
} }
for _, tc := range testCases { for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) { t.Run(tc.name, func(t *testing.T) {
t.Parallel() t.Parallel()
ctx := testutil.Context(t, testutil.WaitShort) ctx := testutil.Context(t, testutil.WaitShort)
@ -179,141 +310,43 @@ func TestPrebuildCreation(t *testing.T) {
logger := testutil.Logger(t) logger := testutil.Logger(t)
controller := NewController(db, pubsub, cfg, logger) controller := NewController(db, pubsub, cfg, logger)
// given a user orgID, userID, templateID := setupTestDBTemplate(t, db)
org := dbgen.Organization(t, db, database.Organization{}) _, _, prebuildID := setupTestDBPrebuild(
user := dbgen.User(t, db, database.User{}) t,
ctx,
template := dbgen.Template(t, db, database.Template{ db,
CreatedBy: user.ID, pubsub,
OrganizationID: org.ID, tc.prebuildStatus,
}) orgID,
templateVersionJob := dbgen.ProvisionerJob(t, db, pubsub, database.ProvisionerJob{ userID,
ID: uuid.New(), templateID,
CreatedAt: time.Now().Add(-2 * time.Hour), )
CompletedAt: sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true},
OrganizationID: org.ID,
InitiatorID: user.ID,
})
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
OrganizationID: org.ID,
CreatedBy: user.ID,
JobID: templateVersionJob.ID,
})
db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
ID: template.ID,
ActiveVersionID: templateVersion.ID,
})
preset, err := db.InsertPreset(ctx, database.InsertPresetParams{
TemplateVersionID: templateVersion.ID,
Name: "test",
})
require.NoError(t, err)
_, err = db.InsertPresetParameters(ctx, database.InsertPresetParametersParams{
TemplateVersionPresetID: preset.ID,
Names: []string{"test"},
Values: []string{"test"},
})
require.NoError(t, err)
_, err = db.InsertPresetPrebuild(ctx, database.InsertPresetPrebuildParams{
ID: uuid.New(),
PresetID: preset.ID,
DesiredInstances: 1,
})
require.NoError(t, err)
completedAt := sql.NullTime{}
cancelledAt := sql.NullTime{}
transition := database.WorkspaceTransitionStart
deleted := false
buildError := sql.NullString{}
switch tc.prebuildStatus {
case database.WorkspaceStatusRunning:
completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true}
case database.WorkspaceStatusStopped:
completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true}
transition = database.WorkspaceTransitionStop
case database.WorkspaceStatusFailed:
completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true}
buildError = sql.NullString{String: "build failed", Valid: true}
case database.WorkspaceStatusCanceled:
completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true}
cancelledAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true}
case database.WorkspaceStatusDeleted:
completedAt = sql.NullTime{Time: time.Now().Add(-time.Hour), Valid: true}
transition = database.WorkspaceTransitionDelete
deleted = true
case database.WorkspaceStatusPending:
completedAt = sql.NullTime{}
transition = database.WorkspaceTransitionStart
default:
}
workspace := dbgen.Workspace(t, db, database.WorkspaceTable{
TemplateID: template.ID,
OrganizationID: org.ID,
OwnerID: OwnerID,
Deleted: deleted,
})
job := dbgen.ProvisionerJob(t, db, pubsub, database.ProvisionerJob{
InitiatorID: OwnerID,
CreatedAt: time.Now().Add(-2 * time.Hour),
CompletedAt: completedAt,
CanceledAt: cancelledAt,
OrganizationID: org.ID,
Error: buildError,
})
dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
WorkspaceID: workspace.ID,
InitiatorID: OwnerID,
TemplateVersionID: templateVersion.ID,
JobID: job.ID,
TemplateVersionPresetID: uuid.NullUUID{UUID: preset.ID, Valid: true},
Transition: transition,
})
controller.reconcile(ctx, nil) controller.reconcile(ctx, nil)
jobs, err := db.GetProvisionerJobsCreatedAfter(ctx, time.Now().Add(-time.Hour))
createdNewPrebuild := false
deletedOldPrebuild := true
workspaces, err := db.GetWorkspacesByTemplateID(ctx, templateID)
require.NoError(t, err) require.NoError(t, err)
require.Equal(t, tc.shouldCreateNewPrebuild, len(jobs) == 1) for _, workspace := range workspaces {
if workspace.ID == prebuildID {
deletedOldPrebuild = false
}
if workspace.ID != prebuildID {
createdNewPrebuild = true
}
}
require.Equal(t, tc.shouldCreateNewPrebuild, createdNewPrebuild)
require.Equal(t, tc.shouldDeleteOldPrebuild, deletedOldPrebuild)
}) })
} }
} }
func TestDeleteUnwantedPrebuilds(t *testing.T) { func TestInactiveTemplateVersionPrebuilds(t *testing.T) {
if !dbtestutil.WillUsePostgres() { // Scenario: Prebuilds are never created and always deleted if the template version is inactive
t.Skip("This test requires postgres")
}
// Scenario: Prebuilds are deleted if and only if they are extraneous
t.Parallel() t.Parallel()
t.Skip("todo")
ctx := testutil.Context(t, testutil.WaitShort)
db, pubsub := dbtestutil.NewDB(t)
cfg := codersdk.PrebuildsConfig{}
logger := testutil.Logger(t)
controller := NewController(db, pubsub, cfg, logger)
// when does a prebuild get deleted?
// * when it is in some way permanently ineligible to be claimed
// * this could be because the build failed or was canceled
// * or it belongs to a template version that is no longer active
// * or it belongs to a template version that is deprecated
// * when there are more prebuilds than the preset desires
// * someone could have manually created a workspace for the prebuild user
// * any workspaces that were created for the prebuilds user and don't match a preset should be deleted - deferred
// given a preset that desires 2 prebuilds
// and there are 3 running prebuilds for the preset
// and there are 4 non-running prebuilds for the preset
// * one is not running because its latest build was a stop transition
// * another is not running because its latest build was a delete transition
// * a third is not running because its latest build was a start transition but the build failed
// * a fourth is not running because its latest build was a start transition but the build was canceled
// when we trigger the reconciliation loop for all templates
controller.reconcile(ctx, nil)
// then the four non running prebuilds are deleted
// and 1 of the running prebuilds is deleted
// because stopped, deleted and failed builds are not considered running in terms of the definition of "running" above.
} }
type partiallyMockedDB struct { type partiallyMockedDB struct {

View File

@ -11,7 +11,6 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"cdr.dev/slog/sloggers/slogtest" "cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbgen" "github.com/coder/coder/v2/coderd/database/dbgen"
"github.com/coder/coder/v2/coderd/database/dbtestutil" "github.com/coder/coder/v2/coderd/database/dbtestutil"

View File

@ -153,7 +153,7 @@ func (p presetState) calculateActions() (*reconciliationActions, error) {
// TODO: optimization: we should probably be able to create prebuilds while others are deleting for a given preset. // TODO: optimization: we should probably be able to create prebuilds while others are deleting for a given preset.
if (toCreate+toDelete) > 0 && (starting+stopping+deleting) > 0 { if (toCreate+toDelete) > 0 && (starting+stopping+deleting) > 0 {
// TODO: move up // TODO: move up
//c.logger.Warn(ctx, "prebuild operations in progress, skipping reconciliation", // c.logger.Warn(ctx, "prebuild operations in progress, skipping reconciliation",
// slog.F("template_id", p.preset.TemplateID.String()), slog.F("starting", starting), // slog.F("template_id", p.preset.TemplateID.String()), slog.F("starting", starting),
// slog.F("stopping", stopping), slog.F("deleting", deleting), // slog.F("stopping", stopping), slog.F("deleting", deleting),
// slog.F("wanted_to_create", create), slog.F("wanted_to_delete", toDelete)) // slog.F("wanted_to_create", create), slog.F("wanted_to_delete", toDelete))
@ -181,7 +181,7 @@ func (p presetState) calculateActions() (*reconciliationActions, error) {
if i >= len(p.running) { if i >= len(p.running) {
// This should never happen. // This should never happen.
// TODO: move up // TODO: move up
//c.logger.Warn(ctx, "unexpected reconciliation state; extraneous count exceeds running prebuilds count!", // c.logger.Warn(ctx, "unexpected reconciliation state; extraneous count exceeds running prebuilds count!",
// slog.F("running_count", len(p.running)), // slog.F("running_count", len(p.running)),
// slog.F("extraneous", extraneous)) // slog.F("extraneous", extraneous))
continue continue
@ -193,7 +193,7 @@ func (p presetState) calculateActions() (*reconciliationActions, error) {
actions.deleteIDs = append(actions.deleteIDs, victims...) actions.deleteIDs = append(actions.deleteIDs, victims...)
// TODO: move up // TODO: move up
//c.logger.Warn(ctx, "found extra prebuilds running, picking random victim(s)", // c.logger.Warn(ctx, "found extra prebuilds running, picking random victim(s)",
// slog.F("template_id", p.preset.TemplateID.String()), slog.F("desired", desired), slog.F("actual", actual), slog.F("extra", extraneous), // slog.F("template_id", p.preset.TemplateID.String()), slog.F("desired", desired), slog.F("actual", actual), slog.F("extra", extraneous),
// slog.F("victims", victims)) // slog.F("victims", victims))
@ -205,7 +205,7 @@ func (p presetState) calculateActions() (*reconciliationActions, error) {
if toDelete > 0 && len(p.running) != toDelete { if toDelete > 0 && len(p.running) != toDelete {
// TODO: move up // TODO: move up
//c.logger.Warn(ctx, "mismatch between running prebuilds and expected deletion count!", // c.logger.Warn(ctx, "mismatch between running prebuilds and expected deletion count!",
// slog.F("template_id", s.preset.TemplateID.String()), slog.F("running", len(p.running)), slog.F("to_delete", toDelete)) // slog.F("template_id", s.preset.TemplateID.String()), slog.F("running", len(p.running)), slog.F("to_delete", toDelete))
} }