Files
coder/enterprise/coderd/prebuilds/reconcile_test.go

1700 lines
58 KiB
Go

package prebuilds_test
import (
"context"
"database/sql"
"fmt"
"sync"
"testing"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
"golang.org/x/xerrors"
"github.com/coder/coder/v2/coderd/database/dbtime"
"github.com/coder/coder/v2/coderd/notifications"
"github.com/coder/coder/v2/coderd/notifications/notificationstest"
"github.com/coder/coder/v2/coderd/util/slice"
sdkproto "github.com/coder/coder/v2/provisionersdk/proto"
"github.com/google/uuid"
"github.com/stretchr/testify/require"
"tailscale.com/types/ptr"
"cdr.dev/slog"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/quartz"
"github.com/coder/serpent"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbgen"
"github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/coderd/database/pubsub"
agplprebuilds "github.com/coder/coder/v2/coderd/prebuilds"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/enterprise/coderd/prebuilds"
"github.com/coder/coder/v2/testutil"
)
func TestNoReconciliationActionsIfNoPresets(t *testing.T) {
// Scenario: No reconciliation actions are taken if there are no presets
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.Skip("dbmem times out on nesting transactions, postgres ignores the inner ones")
}
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitLong)
db, ps := dbtestutil.NewDB(t)
cfg := codersdk.PrebuildsConfig{
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
}
logger := testutil.Logger(t)
controller := prebuilds.NewStoreReconciler(db, ps, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer())
// given a template version with no presets
org := dbgen.Organization(t, db, database.Organization{})
user := dbgen.User(t, db, database.User{})
template := dbgen.Template(t, db, database.Template{
CreatedBy: user.ID,
OrganizationID: org.ID,
})
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
OrganizationID: org.ID,
CreatedBy: user.ID,
})
// verify that the db state is correct
gotTemplateVersion, err := db.GetTemplateVersionByID(ctx, templateVersion.ID)
require.NoError(t, err)
require.Equal(t, templateVersion, gotTemplateVersion)
// when we trigger the reconciliation loop for all templates
require.NoError(t, controller.ReconcileAll(ctx))
// then no reconciliation actions are taken
// because without presets, there are no prebuilds
// and without prebuilds, there is nothing to reconcile
jobs, err := db.GetProvisionerJobsCreatedAfter(ctx, clock.Now().Add(earlier))
require.NoError(t, err)
require.Empty(t, jobs)
}
func TestNoReconciliationActionsIfNoPrebuilds(t *testing.T) {
// Scenario: No reconciliation actions are taken if there are no prebuilds
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.Skip("dbmem times out on nesting transactions, postgres ignores the inner ones")
}
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitLong)
db, ps := dbtestutil.NewDB(t)
cfg := codersdk.PrebuildsConfig{
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
}
logger := testutil.Logger(t)
controller := prebuilds.NewStoreReconciler(db, ps, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer())
// given there are presets, but no prebuilds
org := dbgen.Organization(t, db, database.Organization{})
user := dbgen.User(t, db, database.User{})
template := dbgen.Template(t, db, database.Template{
CreatedBy: user.ID,
OrganizationID: org.ID,
})
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
OrganizationID: org.ID,
CreatedBy: user.ID,
})
preset, err := db.InsertPreset(ctx, database.InsertPresetParams{
TemplateVersionID: templateVersion.ID,
Name: "test",
})
require.NoError(t, err)
_, err = db.InsertPresetParameters(ctx, database.InsertPresetParametersParams{
TemplateVersionPresetID: preset.ID,
Names: []string{"test"},
Values: []string{"test"},
})
require.NoError(t, err)
// verify that the db state is correct
presetParameters, err := db.GetPresetParametersByTemplateVersionID(ctx, templateVersion.ID)
require.NoError(t, err)
require.NotEmpty(t, presetParameters)
// when we trigger the reconciliation loop for all templates
require.NoError(t, controller.ReconcileAll(ctx))
// then no reconciliation actions are taken
// because without prebuilds, there is nothing to reconcile
// even if there are presets
jobs, err := db.GetProvisionerJobsCreatedAfter(ctx, clock.Now().Add(earlier))
require.NoError(t, err)
require.Empty(t, jobs)
}
func TestPrebuildReconciliation(t *testing.T) {
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.Skip("This test requires postgres")
}
type testCase struct {
name string
prebuildLatestTransitions []database.WorkspaceTransition
prebuildJobStatuses []database.ProvisionerJobStatus
templateVersionActive []bool
templateDeleted []bool
shouldCreateNewPrebuild *bool
shouldDeleteOldPrebuild *bool
}
testCases := []testCase{
{
name: "never create prebuilds for inactive template versions",
prebuildLatestTransitions: allTransitions,
prebuildJobStatuses: allJobStatuses,
templateVersionActive: []bool{false},
shouldCreateNewPrebuild: ptr.To(false),
templateDeleted: []bool{false},
},
{
name: "no need to create a new prebuild if one is already running",
prebuildLatestTransitions: []database.WorkspaceTransition{
database.WorkspaceTransitionStart,
},
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusSucceeded,
},
templateVersionActive: []bool{true},
shouldCreateNewPrebuild: ptr.To(false),
templateDeleted: []bool{false},
},
{
name: "don't create a new prebuild if one is queued to build or already building",
prebuildLatestTransitions: []database.WorkspaceTransition{
database.WorkspaceTransitionStart,
},
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusPending,
database.ProvisionerJobStatusRunning,
},
templateVersionActive: []bool{true},
shouldCreateNewPrebuild: ptr.To(false),
templateDeleted: []bool{false},
},
{
name: "create a new prebuild if one is in a state that disqualifies it from ever being claimed",
prebuildLatestTransitions: []database.WorkspaceTransition{
database.WorkspaceTransitionStop,
database.WorkspaceTransitionDelete,
},
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusPending,
database.ProvisionerJobStatusRunning,
database.ProvisionerJobStatusCanceling,
database.ProvisionerJobStatusSucceeded,
},
templateVersionActive: []bool{true},
shouldCreateNewPrebuild: ptr.To(true),
templateDeleted: []bool{false},
},
{
// See TestFailedBuildBackoff for the start/failed case.
name: "create a new prebuild if one is in any kind of exceptional state",
prebuildLatestTransitions: []database.WorkspaceTransition{
database.WorkspaceTransitionStop,
database.WorkspaceTransitionDelete,
},
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusCanceled,
},
templateVersionActive: []bool{true},
shouldCreateNewPrebuild: ptr.To(true),
templateDeleted: []bool{false},
},
{
name: "never attempt to interfere with active builds",
// The workspace builder does not allow scheduling a new build if there is already a build
// pending, running, or canceling. As such, we should never attempt to start, stop or delete
// such prebuilds. Rather, we should wait for the existing build to complete and reconcile
// again in the next cycle.
prebuildLatestTransitions: allTransitions,
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusPending,
database.ProvisionerJobStatusRunning,
database.ProvisionerJobStatusCanceling,
},
templateVersionActive: []bool{true, false},
shouldDeleteOldPrebuild: ptr.To(false),
templateDeleted: []bool{false},
},
{
name: "never delete prebuilds in an exceptional state",
// We don't want to destroy evidence that might be useful to operators
// when troubleshooting issues. So we leave these prebuilds in place.
// Operators are expected to manually delete these prebuilds.
prebuildLatestTransitions: allTransitions,
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusCanceled,
database.ProvisionerJobStatusFailed,
},
templateVersionActive: []bool{true, false},
shouldDeleteOldPrebuild: ptr.To(false),
templateDeleted: []bool{false},
},
{
name: "delete running prebuilds for inactive template versions",
// We only support prebuilds for active template versions.
// If a template version is inactive, we should delete any prebuilds
// that are running.
prebuildLatestTransitions: []database.WorkspaceTransition{
database.WorkspaceTransitionStart,
},
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusSucceeded,
},
templateVersionActive: []bool{false},
shouldDeleteOldPrebuild: ptr.To(true),
templateDeleted: []bool{false},
},
{
name: "don't delete running prebuilds for active template versions",
prebuildLatestTransitions: []database.WorkspaceTransition{
database.WorkspaceTransitionStart,
},
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusSucceeded,
},
templateVersionActive: []bool{true},
shouldDeleteOldPrebuild: ptr.To(false),
templateDeleted: []bool{false},
},
{
name: "don't delete stopped or already deleted prebuilds",
// We don't ever stop prebuilds. A stopped prebuild is an exceptional state.
// As such we keep it, to allow operators to investigate the cause.
prebuildLatestTransitions: []database.WorkspaceTransition{
database.WorkspaceTransitionStop,
database.WorkspaceTransitionDelete,
},
prebuildJobStatuses: []database.ProvisionerJobStatus{
database.ProvisionerJobStatusSucceeded,
},
templateVersionActive: []bool{true, false},
shouldDeleteOldPrebuild: ptr.To(false),
templateDeleted: []bool{false},
},
{
// Templates can be soft-deleted (`deleted=true`) or hard-deleted (row is removed).
// On the former there is *no* DB constraint to prevent soft deletion, so we have to ensure that if somehow
// the template was soft-deleted any running prebuilds will be removed.
// On the latter there is a DB constraint to prevent row deletion if any workspaces reference the deleting template.
name: "soft-deleted templates MAY have prebuilds",
prebuildLatestTransitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart},
prebuildJobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded},
templateVersionActive: []bool{true, false},
shouldCreateNewPrebuild: ptr.To(false),
shouldDeleteOldPrebuild: ptr.To(true),
templateDeleted: []bool{true},
},
}
for _, tc := range testCases {
tc := tc // capture for parallel
for _, templateVersionActive := range tc.templateVersionActive {
for _, prebuildLatestTransition := range tc.prebuildLatestTransitions {
for _, prebuildJobStatus := range tc.prebuildJobStatuses {
for _, templateDeleted := range tc.templateDeleted {
for _, useBrokenPubsub := range []bool{true, false} {
t.Run(fmt.Sprintf("%s - %s - %s - pubsub_broken=%v", tc.name, prebuildLatestTransition, prebuildJobStatus, useBrokenPubsub), func(t *testing.T) {
t.Parallel()
t.Cleanup(func() {
if t.Failed() {
t.Logf("failed to run test: %s", tc.name)
t.Logf("templateVersionActive: %t", templateVersionActive)
t.Logf("prebuildLatestTransition: %s", prebuildLatestTransition)
t.Logf("prebuildJobStatus: %s", prebuildJobStatus)
}
})
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitShort)
cfg := codersdk.PrebuildsConfig{}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
templateVersionID := setupTestDBTemplateVersion(
ctx,
t,
clock,
db,
pubSub,
org.ID,
ownerID,
template.ID,
)
preset := setupTestDBPreset(
t,
db,
templateVersionID,
1,
uuid.New().String(),
)
prebuild, _ := setupTestDBPrebuild(
t,
clock,
db,
pubSub,
prebuildLatestTransition,
prebuildJobStatus,
org.ID,
preset,
template.ID,
templateVersionID,
)
if !templateVersionActive {
// Create a new template version and mark it as active
// This marks the template version that we care about as inactive
setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
}
if useBrokenPubsub {
pubSub = &brokenPublisher{Pubsub: pubSub}
}
controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer())
// Run the reconciliation multiple times to ensure idempotency
// 8 was arbitrary, but large enough to reasonably trust the result
for i := 1; i <= 8; i++ {
require.NoErrorf(t, controller.ReconcileAll(ctx), "failed on iteration %d", i)
if tc.shouldCreateNewPrebuild != nil {
newPrebuildCount := 0
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
for _, workspace := range workspaces {
if workspace.ID != prebuild.ID {
newPrebuildCount++
}
}
// This test configures a preset that desires one prebuild.
// In cases where new prebuilds should be created, there should be exactly one.
require.Equal(t, *tc.shouldCreateNewPrebuild, newPrebuildCount == 1)
}
if tc.shouldDeleteOldPrebuild != nil {
builds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
WorkspaceID: prebuild.ID,
})
require.NoError(t, err)
if *tc.shouldDeleteOldPrebuild {
require.Equal(t, 2, len(builds))
require.Equal(t, database.WorkspaceTransitionDelete, builds[0].Transition)
} else {
require.Equal(t, 1, len(builds))
require.Equal(t, prebuildLatestTransition, builds[0].Transition)
}
}
}
})
}
}
}
}
}
}
}
// brokenPublisher is used to validate that Publish() calls which always fail do not affect the reconciler's behavior,
// since the messages published are not essential but merely advisory.
type brokenPublisher struct {
pubsub.Pubsub
}
// Publish deliberately fails.
// I'm explicitly _not_ checking for EventJobPosted (coderd/database/provisionerjobs/provisionerjobs.go) since that
// requires too much knowledge of the underlying implementation.
func (*brokenPublisher) Publish(event string, _ []byte) error {
// Mimick some work being done.
<-time.After(testutil.IntervalFast)
return xerrors.Errorf("failed to publish %q", event)
}
func TestMultiplePresetsPerTemplateVersion(t *testing.T) {
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.Skip("This test requires postgres")
}
prebuildLatestTransition := database.WorkspaceTransitionStart
prebuildJobStatus := database.ProvisionerJobStatusRunning
templateDeleted := false
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitShort)
cfg := codersdk.PrebuildsConfig{}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer())
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
templateVersionID := setupTestDBTemplateVersion(
ctx,
t,
clock,
db,
pubSub,
org.ID,
ownerID,
template.ID,
)
preset := setupTestDBPreset(
t,
db,
templateVersionID,
4,
uuid.New().String(),
)
preset2 := setupTestDBPreset(
t,
db,
templateVersionID,
10,
uuid.New().String(),
)
prebuildIDs := make([]uuid.UUID, 0)
for i := 0; i < int(preset.DesiredInstances.Int32); i++ {
prebuild, _ := setupTestDBPrebuild(
t,
clock,
db,
pubSub,
prebuildLatestTransition,
prebuildJobStatus,
org.ID,
preset,
template.ID,
templateVersionID,
)
prebuildIDs = append(prebuildIDs, prebuild.ID)
}
// Run the reconciliation multiple times to ensure idempotency
// 8 was arbitrary, but large enough to reasonably trust the result
for i := 1; i <= 8; i++ {
require.NoErrorf(t, controller.ReconcileAll(ctx), "failed on iteration %d", i)
newPrebuildCount := 0
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
for _, workspace := range workspaces {
if slice.Contains(prebuildIDs, workspace.ID) {
continue
}
newPrebuildCount++
}
// NOTE: preset1 doesn't block creation of instances in preset2
require.Equal(t, preset2.DesiredInstances.Int32, int32(newPrebuildCount)) // nolint:gosec
}
}
func TestInvalidPreset(t *testing.T) {
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.Skip("This test requires postgres")
}
templateDeleted := false
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitShort)
cfg := codersdk.PrebuildsConfig{}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer())
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
templateVersionID := setupTestDBTemplateVersion(
ctx,
t,
clock,
db,
pubSub,
org.ID,
ownerID,
template.ID,
)
// Add required param, which is not set in preset. It means that creating of prebuild will constantly fail.
dbgen.TemplateVersionParameter(t, db, database.TemplateVersionParameter{
TemplateVersionID: templateVersionID,
Name: "required-param",
Description: "required param to make sure creating prebuild will fail",
Type: "bool",
DefaultValue: "",
Required: true,
})
setupTestDBPreset(
t,
db,
templateVersionID,
1,
uuid.New().String(),
)
// Run the reconciliation multiple times to ensure idempotency
// 8 was arbitrary, but large enough to reasonably trust the result
for i := 1; i <= 8; i++ {
require.NoErrorf(t, controller.ReconcileAll(ctx), "failed on iteration %d", i)
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
newPrebuildCount := len(workspaces)
// NOTE: we don't have any new prebuilds, because their creation constantly fails.
require.Equal(t, int32(0), int32(newPrebuildCount)) // nolint:gosec
}
}
func TestDeletionOfPrebuiltWorkspaceWithInvalidPreset(t *testing.T) {
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.Skip("This test requires postgres")
}
templateDeleted := false
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitShort)
cfg := codersdk.PrebuildsConfig{}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer())
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
preset := setupTestDBPreset(t, db, templateVersionID, 1, uuid.New().String())
prebuiltWorkspace, _ := setupTestDBPrebuild(
t,
clock,
db,
pubSub,
database.WorkspaceTransitionStart,
database.ProvisionerJobStatusSucceeded,
org.ID,
preset,
template.ID,
templateVersionID,
)
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
// make sure we have only one workspace
require.Equal(t, 1, len(workspaces))
// Create a new template version and mark it as active.
// This marks the previous template version as inactive.
templateVersionID = setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
// Add required param, which is not set in preset.
// It means that creating of new prebuilt workspace will fail, but we should be able to clean up old prebuilt workspaces.
dbgen.TemplateVersionParameter(t, db, database.TemplateVersionParameter{
TemplateVersionID: templateVersionID,
Name: "required-param",
Description: "required param which isn't set in preset",
Type: "bool",
DefaultValue: "",
Required: true,
})
// Old prebuilt workspace should be deleted.
require.NoError(t, controller.ReconcileAll(ctx))
builds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
WorkspaceID: prebuiltWorkspace.ID,
})
require.NoError(t, err)
// Make sure old prebuild workspace was deleted, despite it contains required parameter which isn't set in preset.
require.Equal(t, 2, len(builds))
require.Equal(t, database.WorkspaceTransitionDelete, builds[0].Transition)
}
func TestSkippingHardLimitedPresets(t *testing.T) {
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.Skip("This test requires postgres")
}
// Test cases verify the behavior of prebuild creation depending on configured failure limits.
testCases := []struct {
name string
hardLimit int64
isHardLimitHit bool
}{
{
name: "hard limit is hit - skip creation of prebuilt workspace",
hardLimit: 1,
isHardLimitHit: true,
},
{
name: "hard limit is not hit - try to create prebuilt workspace again",
hardLimit: 2,
isHardLimitHit: false,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
templateDeleted := false
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitShort)
cfg := codersdk.PrebuildsConfig{
FailureHardLimit: serpent.Int64(tc.hardLimit),
ReconciliationBackoffInterval: 0,
}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
fakeEnqueuer := newFakeEnqueuer()
registry := prometheus.NewRegistry()
controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock, registry, fakeEnqueuer)
// Template admin to receive a notification.
templateAdmin := dbgen.User(t, db, database.User{
RBACRoles: []string{codersdk.RoleTemplateAdmin},
})
// Set up test environment with a template, version, and preset.
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
preset := setupTestDBPreset(t, db, templateVersionID, 1, uuid.New().String())
// Create a failed prebuild workspace that counts toward the hard failure limit.
setupTestDBPrebuild(
t,
clock,
db,
pubSub,
database.WorkspaceTransitionStart,
database.ProvisionerJobStatusFailed,
org.ID,
preset,
template.ID,
templateVersionID,
)
// Verify initial state: one failed workspace exists.
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
workspaceCount := len(workspaces)
require.Equal(t, 1, workspaceCount)
// Verify initial state: metric is not set - meaning preset is not hard limited.
require.NoError(t, controller.ForceMetricsUpdate(ctx))
mf, err := registry.Gather()
require.NoError(t, err)
metric := findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
})
require.Nil(t, metric)
// We simulate a failed prebuild in the test; Consequently, the backoff mechanism is triggered when ReconcileAll is called.
// Even though ReconciliationBackoffInterval is set to zero, we still need to advance the clock by at least one nanosecond.
clock.Advance(time.Nanosecond).MustWait(ctx)
// Trigger reconciliation to attempt creating a new prebuild.
// The outcome depends on whether the hard limit has been reached.
require.NoError(t, controller.ReconcileAll(ctx))
// These two additional calls to ReconcileAll should not trigger any notifications.
// A notification is only sent once.
require.NoError(t, controller.ReconcileAll(ctx))
require.NoError(t, controller.ReconcileAll(ctx))
// Verify the final state after reconciliation.
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
updatedPreset, err := db.GetPresetByID(ctx, preset.ID)
require.NoError(t, err)
if !tc.isHardLimitHit {
// When hard limit is not reached, a new workspace should be created.
require.Equal(t, 2, len(workspaces))
require.Equal(t, database.PrebuildStatusHealthy, updatedPreset.PrebuildStatus)
// When hard limit is not reached, metric is not set.
mf, err = registry.Gather()
require.NoError(t, err)
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
})
require.Nil(t, metric)
return
}
// When hard limit is reached, no new workspace should be created.
require.Equal(t, 1, len(workspaces))
require.Equal(t, database.PrebuildStatusHardLimited, updatedPreset.PrebuildStatus)
// When hard limit is reached, a notification should be sent.
matching := fakeEnqueuer.Sent(func(notification *notificationstest.FakeNotification) bool {
if !assert.Equal(t, notifications.PrebuildFailureLimitReached, notification.TemplateID, "unexpected template") {
return false
}
if !assert.Equal(t, templateAdmin.ID, notification.UserID, "unexpected receiver") {
return false
}
return true
})
require.Len(t, matching, 1)
// When hard limit is reached, metric is set to 1.
mf, err = registry.Gather()
require.NoError(t, err)
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
})
require.NotNil(t, metric)
require.NotNil(t, metric.GetGauge())
require.EqualValues(t, 1, metric.GetGauge().GetValue())
})
}
}
func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.Skip("This test requires postgres")
}
// Test cases verify the behavior of prebuild creation depending on configured failure limits.
testCases := []struct {
name string
hardLimit int64
createNewTemplateVersion bool
deleteTemplate bool
}{
{
// hard limit is hit - but we allow deletion of prebuilt workspace because it's outdated (new template version was created)
name: "new template version is created",
hardLimit: 1,
createNewTemplateVersion: true,
deleteTemplate: false,
},
{
// hard limit is hit - but we allow deletion of prebuilt workspace because template is deleted
name: "template is deleted",
hardLimit: 1,
createNewTemplateVersion: false,
deleteTemplate: true,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitShort)
cfg := codersdk.PrebuildsConfig{
FailureHardLimit: serpent.Int64(tc.hardLimit),
ReconciliationBackoffInterval: 0,
}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
fakeEnqueuer := newFakeEnqueuer()
registry := prometheus.NewRegistry()
controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock, registry, fakeEnqueuer)
// Template admin to receive a notification.
templateAdmin := dbgen.User(t, db, database.User{
RBACRoles: []string{codersdk.RoleTemplateAdmin},
})
// Set up test environment with a template, version, and preset.
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, false)
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
preset := setupTestDBPreset(t, db, templateVersionID, 2, uuid.New().String())
// Create a successful prebuilt workspace.
successfulWorkspace, _ := setupTestDBPrebuild(
t,
clock,
db,
pubSub,
database.WorkspaceTransitionStart,
database.ProvisionerJobStatusSucceeded,
org.ID,
preset,
template.ID,
templateVersionID,
)
// Make sure that prebuilt workspaces created in such order: [successful, failed].
clock.Advance(time.Second).MustWait(ctx)
// Create a failed prebuilt workspace that counts toward the hard failure limit.
setupTestDBPrebuild(
t,
clock,
db,
pubSub,
database.WorkspaceTransitionStart,
database.ProvisionerJobStatusFailed,
org.ID,
preset,
template.ID,
templateVersionID,
)
getJobStatusMap := func(workspaces []database.WorkspaceTable) map[database.ProvisionerJobStatus]int {
jobStatusMap := make(map[database.ProvisionerJobStatus]int)
for _, workspace := range workspaces {
workspaceBuilds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
WorkspaceID: workspace.ID,
})
require.NoError(t, err)
for _, workspaceBuild := range workspaceBuilds {
job, err := db.GetProvisionerJobByID(ctx, workspaceBuild.JobID)
require.NoError(t, err)
jobStatusMap[job.JobStatus]++
}
}
return jobStatusMap
}
// Verify initial state: two workspaces exist, one successful, one failed.
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
require.Equal(t, 2, len(workspaces))
jobStatusMap := getJobStatusMap(workspaces)
require.Len(t, jobStatusMap, 2)
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusSucceeded])
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusFailed])
// Verify initial state: metric is not set - meaning preset is not hard limited.
require.NoError(t, controller.ForceMetricsUpdate(ctx))
mf, err := registry.Gather()
require.NoError(t, err)
metric := findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
})
require.Nil(t, metric)
// We simulate a failed prebuild in the test; Consequently, the backoff mechanism is triggered when ReconcileAll is called.
// Even though ReconciliationBackoffInterval is set to zero, we still need to advance the clock by at least one nanosecond.
clock.Advance(time.Nanosecond).MustWait(ctx)
// Trigger reconciliation to attempt creating a new prebuild.
// The outcome depends on whether the hard limit has been reached.
require.NoError(t, controller.ReconcileAll(ctx))
// These two additional calls to ReconcileAll should not trigger any notifications.
// A notification is only sent once.
require.NoError(t, controller.ReconcileAll(ctx))
require.NoError(t, controller.ReconcileAll(ctx))
// Verify the final state after reconciliation.
// When hard limit is reached, no new workspace should be created.
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
require.Equal(t, 2, len(workspaces))
jobStatusMap = getJobStatusMap(workspaces)
require.Len(t, jobStatusMap, 2)
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusSucceeded])
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusFailed])
updatedPreset, err := db.GetPresetByID(ctx, preset.ID)
require.NoError(t, err)
require.Equal(t, database.PrebuildStatusHardLimited, updatedPreset.PrebuildStatus)
// When hard limit is reached, a notification should be sent.
matching := fakeEnqueuer.Sent(func(notification *notificationstest.FakeNotification) bool {
if !assert.Equal(t, notifications.PrebuildFailureLimitReached, notification.TemplateID, "unexpected template") {
return false
}
if !assert.Equal(t, templateAdmin.ID, notification.UserID, "unexpected receiver") {
return false
}
return true
})
require.Len(t, matching, 1)
// When hard limit is reached, metric is set to 1.
mf, err = registry.Gather()
require.NoError(t, err)
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
})
require.NotNil(t, metric)
require.NotNil(t, metric.GetGauge())
require.EqualValues(t, 1, metric.GetGauge().GetValue())
if tc.createNewTemplateVersion {
// Create a new template version and mark it as active
// This marks the template version that we care about as inactive
setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
}
if tc.deleteTemplate {
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
ID: template.ID,
Deleted: true,
UpdatedAt: dbtime.Now(),
}))
}
// Trigger reconciliation to make sure that successful, but outdated prebuilt workspace will be deleted.
require.NoError(t, controller.ReconcileAll(ctx))
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
require.Equal(t, 2, len(workspaces))
jobStatusMap = getJobStatusMap(workspaces)
require.Len(t, jobStatusMap, 3)
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusSucceeded])
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusFailed])
// Pending job should be the job that deletes successful, but outdated prebuilt workspace.
// Prebuilt workspace MUST be deleted, despite the fact that preset is marked as hard limited.
require.Equal(t, 1, jobStatusMap[database.ProvisionerJobStatusPending])
workspaceBuilds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
WorkspaceID: successfulWorkspace.ID,
})
require.NoError(t, err)
require.Equal(t, 2, len(workspaceBuilds))
// Make sure that successfully created, but outdated prebuilt workspace was scheduled for deletion.
require.Equal(t, database.WorkspaceTransitionDelete, workspaceBuilds[0].Transition)
require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[1].Transition)
// Metric is deleted after preset became outdated.
mf, err = registry.Gather()
require.NoError(t, err)
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
})
require.Nil(t, metric)
})
}
}
func TestRunLoop(t *testing.T) {
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.Skip("This test requires postgres")
}
prebuildLatestTransition := database.WorkspaceTransitionStart
prebuildJobStatus := database.ProvisionerJobStatusRunning
templateDeleted := false
clock := quartz.NewMock(t)
ctx := testutil.Context(t, testutil.WaitShort)
backoffInterval := time.Minute
cfg := codersdk.PrebuildsConfig{
// Given: explicitly defined backoff configuration to validate timings.
ReconciliationBackoffLookback: serpent.Duration(muchEarlier * -10), // Has to be positive.
ReconciliationBackoffInterval: serpent.Duration(backoffInterval),
ReconciliationInterval: serpent.Duration(time.Second),
}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, pubSub := dbtestutil.NewDB(t)
reconciler := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock, prometheus.NewRegistry(), newNoopEnqueuer())
ownerID := uuid.New()
dbgen.User(t, db, database.User{
ID: ownerID,
})
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
templateVersionID := setupTestDBTemplateVersion(
ctx,
t,
clock,
db,
pubSub,
org.ID,
ownerID,
template.ID,
)
preset := setupTestDBPreset(
t,
db,
templateVersionID,
4,
uuid.New().String(),
)
preset2 := setupTestDBPreset(
t,
db,
templateVersionID,
10,
uuid.New().String(),
)
prebuildIDs := make([]uuid.UUID, 0)
for i := 0; i < int(preset.DesiredInstances.Int32); i++ {
prebuild, _ := setupTestDBPrebuild(
t,
clock,
db,
pubSub,
prebuildLatestTransition,
prebuildJobStatus,
org.ID,
preset,
template.ID,
templateVersionID,
)
prebuildIDs = append(prebuildIDs, prebuild.ID)
}
getNewPrebuildCount := func() int32 {
newPrebuildCount := 0
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
require.NoError(t, err)
for _, workspace := range workspaces {
if slice.Contains(prebuildIDs, workspace.ID) {
continue
}
newPrebuildCount++
}
return int32(newPrebuildCount) // nolint:gosec
}
// we need to wait until ticker is initialized, and only then use clock.Advance()
// otherwise clock.Advance() will be ignored
trap := clock.Trap().NewTicker()
go reconciler.Run(ctx)
// wait until ticker is initialized
trap.MustWait(ctx).MustRelease(ctx)
// start 1st iteration of ReconciliationLoop
// NOTE: at this point MustWait waits that iteration is started (ReconcileAll is called), but it doesn't wait until it completes
clock.Advance(cfg.ReconciliationInterval.Value()).MustWait(ctx)
// wait until ReconcileAll is completed
// TODO: is it possible to avoid Eventually and replace it with quartz?
// Ideally to have all control on test-level, and be able to advance loop iterations from the test.
require.Eventually(t, func() bool {
newPrebuildCount := getNewPrebuildCount()
// NOTE: preset1 doesn't block creation of instances in preset2
return preset2.DesiredInstances.Int32 == newPrebuildCount
}, testutil.WaitShort, testutil.IntervalFast)
// setup one more preset with 5 prebuilds
preset3 := setupTestDBPreset(
t,
db,
templateVersionID,
5,
uuid.New().String(),
)
newPrebuildCount := getNewPrebuildCount()
// nothing changed, because we didn't trigger a new iteration of a loop
require.Equal(t, preset2.DesiredInstances.Int32, newPrebuildCount)
// start 2nd iteration of ReconciliationLoop
// NOTE: at this point MustWait waits that iteration is started (ReconcileAll is called), but it doesn't wait until it completes
clock.Advance(cfg.ReconciliationInterval.Value()).MustWait(ctx)
// wait until ReconcileAll is completed
require.Eventually(t, func() bool {
newPrebuildCount := getNewPrebuildCount()
// both prebuilds for preset2 and preset3 were created
return preset2.DesiredInstances.Int32+preset3.DesiredInstances.Int32 == newPrebuildCount
}, testutil.WaitShort, testutil.IntervalFast)
// gracefully stop the reconciliation loop
reconciler.Stop(ctx, nil)
}
func TestFailedBuildBackoff(t *testing.T) {
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.Skip("This test requires postgres")
}
ctx := testutil.Context(t, testutil.WaitSuperLong)
// Setup.
clock := quartz.NewMock(t)
backoffInterval := time.Minute
cfg := codersdk.PrebuildsConfig{
// Given: explicitly defined backoff configuration to validate timings.
ReconciliationBackoffLookback: serpent.Duration(muchEarlier * -10), // Has to be positive.
ReconciliationBackoffInterval: serpent.Duration(backoffInterval),
ReconciliationInterval: serpent.Duration(time.Second),
}
logger := slogtest.Make(
t, &slogtest.Options{IgnoreErrors: true},
).Leveled(slog.LevelDebug)
db, ps := dbtestutil.NewDB(t)
reconciler := prebuilds.NewStoreReconciler(db, ps, cfg, logger, clock, prometheus.NewRegistry(), newNoopEnqueuer())
// Given: an active template version with presets and prebuilds configured.
const desiredInstances = 2
userID := uuid.New()
dbgen.User(t, db, database.User{
ID: userID,
})
org, template := setupTestDBTemplate(t, db, userID, false)
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, ps, org.ID, userID, template.ID)
preset := setupTestDBPreset(t, db, templateVersionID, desiredInstances, "test")
for range desiredInstances {
_, _ = setupTestDBPrebuild(t, clock, db, ps, database.WorkspaceTransitionStart, database.ProvisionerJobStatusFailed, org.ID, preset, template.ID, templateVersionID)
}
// When: determining what actions to take next, backoff is calculated because the prebuild is in a failed state.
snapshot, err := reconciler.SnapshotState(ctx, db)
require.NoError(t, err)
require.Len(t, snapshot.Presets, 1)
presetState, err := snapshot.FilterByPreset(preset.ID)
require.NoError(t, err)
state := presetState.CalculateState()
actions, err := reconciler.CalculateActions(ctx, *presetState)
require.NoError(t, err)
require.Equal(t, 1, len(actions))
// Then: the backoff time is in the future, no prebuilds are running, and we won't create any new prebuilds.
require.EqualValues(t, 0, state.Actual)
require.EqualValues(t, 0, actions[0].Create)
require.EqualValues(t, desiredInstances, state.Desired)
require.True(t, clock.Now().Before(actions[0].BackoffUntil))
// Then: the backoff time is as expected based on the number of failed builds.
require.NotNil(t, presetState.Backoff)
require.EqualValues(t, desiredInstances, presetState.Backoff.NumFailed)
require.EqualValues(t, backoffInterval*time.Duration(presetState.Backoff.NumFailed), clock.Until(actions[0].BackoffUntil).Truncate(backoffInterval))
// When: advancing to the next tick which is still within the backoff time.
clock.Advance(cfg.ReconciliationInterval.Value())
// Then: the backoff interval will not have changed.
snapshot, err = reconciler.SnapshotState(ctx, db)
require.NoError(t, err)
presetState, err = snapshot.FilterByPreset(preset.ID)
require.NoError(t, err)
newState := presetState.CalculateState()
newActions, err := reconciler.CalculateActions(ctx, *presetState)
require.NoError(t, err)
require.Equal(t, 1, len(newActions))
require.EqualValues(t, 0, newState.Actual)
require.EqualValues(t, 0, newActions[0].Create)
require.EqualValues(t, desiredInstances, newState.Desired)
require.EqualValues(t, actions[0].BackoffUntil, newActions[0].BackoffUntil)
// When: advancing beyond the backoff time.
clock.Advance(clock.Until(actions[0].BackoffUntil.Add(time.Second)))
// Then: we will attempt to create a new prebuild.
snapshot, err = reconciler.SnapshotState(ctx, db)
require.NoError(t, err)
presetState, err = snapshot.FilterByPreset(preset.ID)
require.NoError(t, err)
state = presetState.CalculateState()
actions, err = reconciler.CalculateActions(ctx, *presetState)
require.NoError(t, err)
require.Equal(t, 1, len(actions))
require.EqualValues(t, 0, state.Actual)
require.EqualValues(t, desiredInstances, state.Desired)
require.EqualValues(t, desiredInstances, actions[0].Create)
// When: the desired number of new prebuild are provisioned, but one fails again.
for i := 0; i < desiredInstances; i++ {
status := database.ProvisionerJobStatusFailed
if i == 1 {
status = database.ProvisionerJobStatusSucceeded
}
_, _ = setupTestDBPrebuild(t, clock, db, ps, database.WorkspaceTransitionStart, status, org.ID, preset, template.ID, templateVersionID)
}
// Then: the backoff time is roughly equal to two backoff intervals, since another build has failed.
snapshot, err = reconciler.SnapshotState(ctx, db)
require.NoError(t, err)
presetState, err = snapshot.FilterByPreset(preset.ID)
require.NoError(t, err)
state = presetState.CalculateState()
actions, err = reconciler.CalculateActions(ctx, *presetState)
require.NoError(t, err)
require.Equal(t, 1, len(actions))
require.EqualValues(t, 1, state.Actual)
require.EqualValues(t, desiredInstances, state.Desired)
require.EqualValues(t, 0, actions[0].Create)
require.EqualValues(t, 3, presetState.Backoff.NumFailed)
require.EqualValues(t, backoffInterval*time.Duration(presetState.Backoff.NumFailed), clock.Until(actions[0].BackoffUntil).Truncate(backoffInterval))
}
func TestReconciliationLock(t *testing.T) {
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.Skip("This test requires postgres")
}
ctx := testutil.Context(t, testutil.WaitSuperLong)
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
db, ps := dbtestutil.NewDB(t)
wg := sync.WaitGroup{}
mutex := sync.Mutex{}
for i := 0; i < 5; i++ {
wg.Add(1)
go func() {
defer wg.Done()
reconciler := prebuilds.NewStoreReconciler(
db,
ps,
codersdk.PrebuildsConfig{},
slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug),
quartz.NewMock(t),
prometheus.NewRegistry(),
newNoopEnqueuer())
reconciler.WithReconciliationLock(ctx, logger, func(_ context.Context, _ database.Store) error {
lockObtained := mutex.TryLock()
// As long as the postgres lock is held, this mutex should always be unlocked when we get here.
// If this mutex is ever locked at this point, then that means that the postgres lock is not being held while we're
// inside WithReconciliationLock, which is meant to hold the lock.
require.True(t, lockObtained)
// Sleep a bit to give reconcilers more time to contend for the lock
time.Sleep(time.Second)
defer mutex.Unlock()
return nil
})
}()
}
wg.Wait()
}
func TestTrackResourceReplacement(t *testing.T) {
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.Skip("This test requires postgres")
}
ctx := testutil.Context(t, testutil.WaitSuperLong)
// Setup.
clock := quartz.NewMock(t)
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelDebug)
db, ps := dbtestutil.NewDB(t)
fakeEnqueuer := newFakeEnqueuer()
registry := prometheus.NewRegistry()
reconciler := prebuilds.NewStoreReconciler(db, ps, codersdk.PrebuildsConfig{}, logger, clock, registry, fakeEnqueuer)
// Given: a template admin to receive a notification.
templateAdmin := dbgen.User(t, db, database.User{
RBACRoles: []string{codersdk.RoleTemplateAdmin},
})
// Given: a prebuilt workspace.
userID := uuid.New()
dbgen.User(t, db, database.User{ID: userID})
org, template := setupTestDBTemplate(t, db, userID, false)
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, ps, org.ID, userID, template.ID)
preset := setupTestDBPreset(t, db, templateVersionID, 1, "b0rked")
prebuiltWorkspace, prebuild := setupTestDBPrebuild(t, clock, db, ps, database.WorkspaceTransitionStart, database.ProvisionerJobStatusSucceeded, org.ID, preset, template.ID, templateVersionID)
// Given: no replacement has been tracked yet, we should not see a metric for it yet.
require.NoError(t, reconciler.ForceMetricsUpdate(ctx))
mf, err := registry.Gather()
require.NoError(t, err)
require.Nil(t, findMetric(mf, prebuilds.MetricResourceReplacementsCount, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
}))
// When: a claim occurred and resource replacements are detected (_how_ is out of scope of this test).
reconciler.TrackResourceReplacement(ctx, prebuiltWorkspace.ID, prebuild.ID, []*sdkproto.ResourceReplacement{
{
Resource: "docker_container[0]",
Paths: []string{"env", "image"},
},
{
Resource: "docker_volume[0]",
Paths: []string{"name"},
},
})
// Then: a notification will be sent detailing the replacement(s).
matching := fakeEnqueuer.Sent(func(notification *notificationstest.FakeNotification) bool {
// This is not an exhaustive check of the expected labels/data in the notification. This would tie the implementations
// too tightly together.
// All we need to validate is that a template of the right kind was sent, to the expected user, with some replacements.
if !assert.Equal(t, notification.TemplateID, notifications.TemplateWorkspaceResourceReplaced, "unexpected template") {
return false
}
if !assert.Equal(t, templateAdmin.ID, notification.UserID, "unexpected receiver") {
return false
}
if !assert.Len(t, notification.Data["replacements"], 2, "unexpected replacements count") {
return false
}
return true
})
require.Len(t, matching, 1)
// Then: the metric will be incremented.
mf, err = registry.Gather()
require.NoError(t, err)
metric := findMetric(mf, prebuilds.MetricResourceReplacementsCount, map[string]string{
"template_name": template.Name,
"preset_name": preset.Name,
"org_name": org.Name,
})
require.NotNil(t, metric)
require.NotNil(t, metric.GetCounter())
require.EqualValues(t, 1, metric.GetCounter().GetValue())
}
func newNoopEnqueuer() *notifications.NoopEnqueuer {
return notifications.NewNoopEnqueuer()
}
func newFakeEnqueuer() *notificationstest.FakeEnqueuer {
return notificationstest.NewFakeEnqueuer()
}
// nolint:revive // It's a control flag, but this is a test.
func setupTestDBTemplate(
t *testing.T,
db database.Store,
userID uuid.UUID,
templateDeleted bool,
) (
database.Organization,
database.Template,
) {
t.Helper()
org := dbgen.Organization(t, db, database.Organization{})
template := dbgen.Template(t, db, database.Template{
CreatedBy: userID,
OrganizationID: org.ID,
CreatedAt: time.Now().Add(muchEarlier),
})
if templateDeleted {
ctx := testutil.Context(t, testutil.WaitShort)
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
ID: template.ID,
Deleted: true,
}))
}
return org, template
}
// nolint:revive // It's a control flag, but this is a test.
func setupTestDBTemplateWithinOrg(
t *testing.T,
db database.Store,
userID uuid.UUID,
templateDeleted bool,
templateName string,
org database.Organization,
) database.Template {
t.Helper()
template := dbgen.Template(t, db, database.Template{
Name: templateName,
CreatedBy: userID,
OrganizationID: org.ID,
CreatedAt: time.Now().Add(muchEarlier),
})
if templateDeleted {
ctx := testutil.Context(t, testutil.WaitShort)
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
ID: template.ID,
Deleted: true,
}))
}
return template
}
const (
earlier = -time.Hour
muchEarlier = -time.Hour * 2
)
func setupTestDBTemplateVersion(
ctx context.Context,
t *testing.T,
clock quartz.Clock,
db database.Store,
ps pubsub.Pubsub,
orgID uuid.UUID,
userID uuid.UUID,
templateID uuid.UUID,
) uuid.UUID {
t.Helper()
templateVersionJob := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
CreatedAt: clock.Now().Add(muchEarlier),
CompletedAt: sql.NullTime{Time: clock.Now().Add(earlier), Valid: true},
OrganizationID: orgID,
InitiatorID: userID,
})
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: templateID, Valid: true},
OrganizationID: orgID,
CreatedBy: userID,
JobID: templateVersionJob.ID,
CreatedAt: time.Now().Add(muchEarlier),
})
require.NoError(t, db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
ID: templateID,
ActiveVersionID: templateVersion.ID,
}))
// Make sure immutable params don't break prebuilt workspace deletion logic
dbgen.TemplateVersionParameter(t, db, database.TemplateVersionParameter{
TemplateVersionID: templateVersion.ID,
Name: "test",
Description: "required & immutable param",
Type: "string",
DefaultValue: "",
Required: true,
Mutable: false,
})
return templateVersion.ID
}
func setupTestDBPreset(
t *testing.T,
db database.Store,
templateVersionID uuid.UUID,
desiredInstances int32,
presetName string,
) database.TemplateVersionPreset {
t.Helper()
preset := dbgen.Preset(t, db, database.InsertPresetParams{
TemplateVersionID: templateVersionID,
Name: presetName,
DesiredInstances: sql.NullInt32{
Valid: true,
Int32: desiredInstances,
},
})
dbgen.PresetParameter(t, db, database.InsertPresetParametersParams{
TemplateVersionPresetID: preset.ID,
Names: []string{"test"},
Values: []string{"test"},
})
return preset
}
func setupTestDBPrebuild(
t *testing.T,
clock quartz.Clock,
db database.Store,
ps pubsub.Pubsub,
transition database.WorkspaceTransition,
prebuildStatus database.ProvisionerJobStatus,
orgID uuid.UUID,
preset database.TemplateVersionPreset,
templateID uuid.UUID,
templateVersionID uuid.UUID,
) (database.WorkspaceTable, database.WorkspaceBuild) {
t.Helper()
return setupTestDBWorkspace(t, clock, db, ps, transition, prebuildStatus, orgID, preset, templateID, templateVersionID, agplprebuilds.SystemUserID, agplprebuilds.SystemUserID)
}
func setupTestDBWorkspace(
t *testing.T,
clock quartz.Clock,
db database.Store,
ps pubsub.Pubsub,
transition database.WorkspaceTransition,
prebuildStatus database.ProvisionerJobStatus,
orgID uuid.UUID,
preset database.TemplateVersionPreset,
templateID uuid.UUID,
templateVersionID uuid.UUID,
initiatorID uuid.UUID,
ownerID uuid.UUID,
) (database.WorkspaceTable, database.WorkspaceBuild) {
t.Helper()
cancelledAt := sql.NullTime{}
completedAt := sql.NullTime{}
startedAt := sql.NullTime{}
if prebuildStatus != database.ProvisionerJobStatusPending {
startedAt = sql.NullTime{Time: clock.Now().Add(muchEarlier), Valid: true}
}
buildError := sql.NullString{}
if prebuildStatus == database.ProvisionerJobStatusFailed {
completedAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
buildError = sql.NullString{String: "build failed", Valid: true}
}
switch prebuildStatus {
case database.ProvisionerJobStatusCanceling:
cancelledAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
case database.ProvisionerJobStatusCanceled:
completedAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
cancelledAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
case database.ProvisionerJobStatusSucceeded:
completedAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
default:
}
workspace := dbgen.Workspace(t, db, database.WorkspaceTable{
TemplateID: templateID,
OrganizationID: orgID,
OwnerID: ownerID,
Deleted: false,
})
job := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
InitiatorID: initiatorID,
CreatedAt: clock.Now().Add(muchEarlier),
StartedAt: startedAt,
CompletedAt: completedAt,
CanceledAt: cancelledAt,
OrganizationID: orgID,
Error: buildError,
})
workspaceBuild := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
WorkspaceID: workspace.ID,
InitiatorID: initiatorID,
TemplateVersionID: templateVersionID,
JobID: job.ID,
TemplateVersionPresetID: uuid.NullUUID{UUID: preset.ID, Valid: true},
Transition: transition,
CreatedAt: clock.Now(),
})
dbgen.WorkspaceBuildParameters(t, db, []database.WorkspaceBuildParameter{
{
WorkspaceBuildID: workspaceBuild.ID,
Name: "test",
Value: "test",
},
})
return workspace, workspaceBuild
}
// nolint:revive // It's a control flag, but this is a test.
func setupTestDBWorkspaceAgent(t *testing.T, db database.Store, workspaceID uuid.UUID, eligible bool) database.WorkspaceAgent {
build, err := db.GetLatestWorkspaceBuildByWorkspaceID(t.Context(), workspaceID)
require.NoError(t, err)
res := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{JobID: build.JobID})
agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
ResourceID: res.ID,
})
// A prebuilt workspace is considered eligible when its agent is in a "ready" lifecycle state.
// i.e. connected to the control plane and all startup scripts have run.
if eligible {
require.NoError(t, db.UpdateWorkspaceAgentLifecycleStateByID(t.Context(), database.UpdateWorkspaceAgentLifecycleStateByIDParams{
ID: agent.ID,
LifecycleState: database.WorkspaceAgentLifecycleStateReady,
StartedAt: sql.NullTime{Time: dbtime.Now().Add(-time.Minute), Valid: true},
ReadyAt: sql.NullTime{Time: dbtime.Now(), Valid: true},
}))
}
return agent
}
var allTransitions = []database.WorkspaceTransition{
database.WorkspaceTransitionStart,
database.WorkspaceTransitionStop,
database.WorkspaceTransitionDelete,
}
var allJobStatuses = []database.ProvisionerJobStatus{
database.ProvisionerJobStatusPending,
database.ProvisionerJobStatusRunning,
database.ProvisionerJobStatusSucceeded,
database.ProvisionerJobStatusFailed,
database.ProvisionerJobStatusCanceled,
database.ProvisionerJobStatusCanceling,
}
func allJobStatusesExcept(except ...database.ProvisionerJobStatus) []database.ProvisionerJobStatus {
return slice.Filter(except, func(status database.ProvisionerJobStatus) bool {
return !slice.Contains(allJobStatuses, status)
})
}