mirror of
https://github.com/coder/coder.git
synced 2025-07-12 00:14:10 +00:00
Relates to https://github.com/coder/coder/issues/17432 ### Part 1: Notes: - `GetPresetsAtFailureLimit` SQL query is added, which is similar to `GetPresetsBackoff`, they use same CTEs: `filtered_builds`, `time_sorted_builds`, but they are still different. - Query is executed on every loop iteration. We can consider marking specific preset as permanently failed as an optimization to avoid executing query on every loop iteration. But I decided don't do it for now. - By default `FailureHardLimit` is set to 3. - `FailureHardLimit` is configurable. Setting it to zero - means that hard limit is disabled. ### Part 2 Notes: - `PrebuildFailureLimitReached` notification is added. - Notification is sent to template admins. - Notification is sent only the first time, when hard limit is reached. But it will `log.Warn` on every loop iteration. - I introduced this enum: ```sql CREATE TYPE prebuild_status AS ENUM ( 'normal', -- Prebuilds are working as expected; this is the default, healthy state. 'hard_limited', -- Prebuilds have failed repeatedly and hit the configured hard failure limit; won't be retried anymore. 'validation_failed' -- Prebuilds failed due to a non-retryable validation error (e.g. template misconfiguration); won't be retried. ); ``` `validation_failed` not used in this PR, but I think it will be used in next one, so I wanted to save us an extra migration. - Notification looks like this: <img width="472" alt="image" src="https://github.com/user-attachments/assets/e10efea0-1790-4e7f-a65c-f94c40fced27" /> ### Latest notification views: <img width="463" alt="image" src="https://github.com/user-attachments/assets/11310c58-68d1-4075-a497-f76d854633fe" /> <img width="725" alt="image" src="https://github.com/user-attachments/assets/6bbfe21a-91ac-47c3-a9d1-21807bb0c53a" />
1425 lines
48 KiB
Go
1425 lines
48 KiB
Go
package prebuilds_test
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"fmt"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/stretchr/testify/assert"
|
|
"golang.org/x/xerrors"
|
|
|
|
"github.com/coder/coder/v2/coderd/database/dbtime"
|
|
"github.com/coder/coder/v2/coderd/notifications"
|
|
"github.com/coder/coder/v2/coderd/notifications/notificationstest"
|
|
"github.com/coder/coder/v2/coderd/util/slice"
|
|
sdkproto "github.com/coder/coder/v2/provisionersdk/proto"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/stretchr/testify/require"
|
|
"tailscale.com/types/ptr"
|
|
|
|
"cdr.dev/slog"
|
|
"cdr.dev/slog/sloggers/slogtest"
|
|
"github.com/coder/quartz"
|
|
|
|
"github.com/coder/serpent"
|
|
|
|
"github.com/coder/coder/v2/coderd/database"
|
|
"github.com/coder/coder/v2/coderd/database/dbgen"
|
|
"github.com/coder/coder/v2/coderd/database/dbtestutil"
|
|
"github.com/coder/coder/v2/coderd/database/pubsub"
|
|
agplprebuilds "github.com/coder/coder/v2/coderd/prebuilds"
|
|
"github.com/coder/coder/v2/codersdk"
|
|
"github.com/coder/coder/v2/enterprise/coderd/prebuilds"
|
|
"github.com/coder/coder/v2/testutil"
|
|
)
|
|
|
|
func TestNoReconciliationActionsIfNoPresets(t *testing.T) {
|
|
// Scenario: No reconciliation actions are taken if there are no presets
|
|
t.Parallel()
|
|
|
|
if !dbtestutil.WillUsePostgres() {
|
|
t.Skip("This test requires postgres")
|
|
}
|
|
|
|
clock := quartz.NewMock(t)
|
|
ctx := testutil.Context(t, testutil.WaitLong)
|
|
db, ps := dbtestutil.NewDB(t)
|
|
cfg := codersdk.PrebuildsConfig{
|
|
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
|
|
}
|
|
logger := testutil.Logger(t)
|
|
controller := prebuilds.NewStoreReconciler(db, ps, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer())
|
|
|
|
// given a template version with no presets
|
|
org := dbgen.Organization(t, db, database.Organization{})
|
|
user := dbgen.User(t, db, database.User{})
|
|
template := dbgen.Template(t, db, database.Template{
|
|
CreatedBy: user.ID,
|
|
OrganizationID: org.ID,
|
|
})
|
|
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
|
|
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
|
|
OrganizationID: org.ID,
|
|
CreatedBy: user.ID,
|
|
})
|
|
// verify that the db state is correct
|
|
gotTemplateVersion, err := db.GetTemplateVersionByID(ctx, templateVersion.ID)
|
|
require.NoError(t, err)
|
|
require.Equal(t, templateVersion, gotTemplateVersion)
|
|
|
|
// when we trigger the reconciliation loop for all templates
|
|
require.NoError(t, controller.ReconcileAll(ctx))
|
|
|
|
// then no reconciliation actions are taken
|
|
// because without presets, there are no prebuilds
|
|
// and without prebuilds, there is nothing to reconcile
|
|
jobs, err := db.GetProvisionerJobsCreatedAfter(ctx, clock.Now().Add(earlier))
|
|
require.NoError(t, err)
|
|
require.Empty(t, jobs)
|
|
}
|
|
|
|
func TestNoReconciliationActionsIfNoPrebuilds(t *testing.T) {
|
|
// Scenario: No reconciliation actions are taken if there are no prebuilds
|
|
t.Parallel()
|
|
|
|
if !dbtestutil.WillUsePostgres() {
|
|
t.Skip("This test requires postgres")
|
|
}
|
|
|
|
clock := quartz.NewMock(t)
|
|
ctx := testutil.Context(t, testutil.WaitLong)
|
|
db, ps := dbtestutil.NewDB(t)
|
|
cfg := codersdk.PrebuildsConfig{
|
|
ReconciliationInterval: serpent.Duration(testutil.WaitLong),
|
|
}
|
|
logger := testutil.Logger(t)
|
|
controller := prebuilds.NewStoreReconciler(db, ps, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer())
|
|
|
|
// given there are presets, but no prebuilds
|
|
org := dbgen.Organization(t, db, database.Organization{})
|
|
user := dbgen.User(t, db, database.User{})
|
|
template := dbgen.Template(t, db, database.Template{
|
|
CreatedBy: user.ID,
|
|
OrganizationID: org.ID,
|
|
})
|
|
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
|
|
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
|
|
OrganizationID: org.ID,
|
|
CreatedBy: user.ID,
|
|
})
|
|
preset, err := db.InsertPreset(ctx, database.InsertPresetParams{
|
|
TemplateVersionID: templateVersion.ID,
|
|
Name: "test",
|
|
})
|
|
require.NoError(t, err)
|
|
_, err = db.InsertPresetParameters(ctx, database.InsertPresetParametersParams{
|
|
TemplateVersionPresetID: preset.ID,
|
|
Names: []string{"test"},
|
|
Values: []string{"test"},
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
// verify that the db state is correct
|
|
presetParameters, err := db.GetPresetParametersByTemplateVersionID(ctx, templateVersion.ID)
|
|
require.NoError(t, err)
|
|
require.NotEmpty(t, presetParameters)
|
|
|
|
// when we trigger the reconciliation loop for all templates
|
|
require.NoError(t, controller.ReconcileAll(ctx))
|
|
|
|
// then no reconciliation actions are taken
|
|
// because without prebuilds, there is nothing to reconcile
|
|
// even if there are presets
|
|
jobs, err := db.GetProvisionerJobsCreatedAfter(ctx, clock.Now().Add(earlier))
|
|
require.NoError(t, err)
|
|
require.Empty(t, jobs)
|
|
}
|
|
|
|
func TestPrebuildReconciliation(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
if !dbtestutil.WillUsePostgres() {
|
|
t.Skip("This test requires postgres")
|
|
}
|
|
|
|
type testCase struct {
|
|
name string
|
|
prebuildLatestTransitions []database.WorkspaceTransition
|
|
prebuildJobStatuses []database.ProvisionerJobStatus
|
|
templateVersionActive []bool
|
|
templateDeleted []bool
|
|
shouldCreateNewPrebuild *bool
|
|
shouldDeleteOldPrebuild *bool
|
|
}
|
|
|
|
testCases := []testCase{
|
|
{
|
|
name: "never create prebuilds for inactive template versions",
|
|
prebuildLatestTransitions: allTransitions,
|
|
prebuildJobStatuses: allJobStatuses,
|
|
templateVersionActive: []bool{false},
|
|
shouldCreateNewPrebuild: ptr.To(false),
|
|
templateDeleted: []bool{false},
|
|
},
|
|
{
|
|
name: "no need to create a new prebuild if one is already running",
|
|
prebuildLatestTransitions: []database.WorkspaceTransition{
|
|
database.WorkspaceTransitionStart,
|
|
},
|
|
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
|
database.ProvisionerJobStatusSucceeded,
|
|
},
|
|
templateVersionActive: []bool{true},
|
|
shouldCreateNewPrebuild: ptr.To(false),
|
|
templateDeleted: []bool{false},
|
|
},
|
|
{
|
|
name: "don't create a new prebuild if one is queued to build or already building",
|
|
prebuildLatestTransitions: []database.WorkspaceTransition{
|
|
database.WorkspaceTransitionStart,
|
|
},
|
|
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
|
database.ProvisionerJobStatusPending,
|
|
database.ProvisionerJobStatusRunning,
|
|
},
|
|
templateVersionActive: []bool{true},
|
|
shouldCreateNewPrebuild: ptr.To(false),
|
|
templateDeleted: []bool{false},
|
|
},
|
|
{
|
|
name: "create a new prebuild if one is in a state that disqualifies it from ever being claimed",
|
|
prebuildLatestTransitions: []database.WorkspaceTransition{
|
|
database.WorkspaceTransitionStop,
|
|
database.WorkspaceTransitionDelete,
|
|
},
|
|
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
|
database.ProvisionerJobStatusPending,
|
|
database.ProvisionerJobStatusRunning,
|
|
database.ProvisionerJobStatusCanceling,
|
|
database.ProvisionerJobStatusSucceeded,
|
|
},
|
|
templateVersionActive: []bool{true},
|
|
shouldCreateNewPrebuild: ptr.To(true),
|
|
templateDeleted: []bool{false},
|
|
},
|
|
{
|
|
// See TestFailedBuildBackoff for the start/failed case.
|
|
name: "create a new prebuild if one is in any kind of exceptional state",
|
|
prebuildLatestTransitions: []database.WorkspaceTransition{
|
|
database.WorkspaceTransitionStop,
|
|
database.WorkspaceTransitionDelete,
|
|
},
|
|
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
|
database.ProvisionerJobStatusCanceled,
|
|
},
|
|
templateVersionActive: []bool{true},
|
|
shouldCreateNewPrebuild: ptr.To(true),
|
|
templateDeleted: []bool{false},
|
|
},
|
|
{
|
|
name: "never attempt to interfere with active builds",
|
|
// The workspace builder does not allow scheduling a new build if there is already a build
|
|
// pending, running, or canceling. As such, we should never attempt to start, stop or delete
|
|
// such prebuilds. Rather, we should wait for the existing build to complete and reconcile
|
|
// again in the next cycle.
|
|
prebuildLatestTransitions: allTransitions,
|
|
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
|
database.ProvisionerJobStatusPending,
|
|
database.ProvisionerJobStatusRunning,
|
|
database.ProvisionerJobStatusCanceling,
|
|
},
|
|
templateVersionActive: []bool{true, false},
|
|
shouldDeleteOldPrebuild: ptr.To(false),
|
|
templateDeleted: []bool{false},
|
|
},
|
|
{
|
|
name: "never delete prebuilds in an exceptional state",
|
|
// We don't want to destroy evidence that might be useful to operators
|
|
// when troubleshooting issues. So we leave these prebuilds in place.
|
|
// Operators are expected to manually delete these prebuilds.
|
|
prebuildLatestTransitions: allTransitions,
|
|
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
|
database.ProvisionerJobStatusCanceled,
|
|
database.ProvisionerJobStatusFailed,
|
|
},
|
|
templateVersionActive: []bool{true, false},
|
|
shouldDeleteOldPrebuild: ptr.To(false),
|
|
templateDeleted: []bool{false},
|
|
},
|
|
{
|
|
name: "delete running prebuilds for inactive template versions",
|
|
// We only support prebuilds for active template versions.
|
|
// If a template version is inactive, we should delete any prebuilds
|
|
// that are running.
|
|
prebuildLatestTransitions: []database.WorkspaceTransition{
|
|
database.WorkspaceTransitionStart,
|
|
},
|
|
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
|
database.ProvisionerJobStatusSucceeded,
|
|
},
|
|
templateVersionActive: []bool{false},
|
|
shouldDeleteOldPrebuild: ptr.To(true),
|
|
templateDeleted: []bool{false},
|
|
},
|
|
{
|
|
name: "don't delete running prebuilds for active template versions",
|
|
prebuildLatestTransitions: []database.WorkspaceTransition{
|
|
database.WorkspaceTransitionStart,
|
|
},
|
|
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
|
database.ProvisionerJobStatusSucceeded,
|
|
},
|
|
templateVersionActive: []bool{true},
|
|
shouldDeleteOldPrebuild: ptr.To(false),
|
|
templateDeleted: []bool{false},
|
|
},
|
|
{
|
|
name: "don't delete stopped or already deleted prebuilds",
|
|
// We don't ever stop prebuilds. A stopped prebuild is an exceptional state.
|
|
// As such we keep it, to allow operators to investigate the cause.
|
|
prebuildLatestTransitions: []database.WorkspaceTransition{
|
|
database.WorkspaceTransitionStop,
|
|
database.WorkspaceTransitionDelete,
|
|
},
|
|
prebuildJobStatuses: []database.ProvisionerJobStatus{
|
|
database.ProvisionerJobStatusSucceeded,
|
|
},
|
|
templateVersionActive: []bool{true, false},
|
|
shouldDeleteOldPrebuild: ptr.To(false),
|
|
templateDeleted: []bool{false},
|
|
},
|
|
{
|
|
// Templates can be soft-deleted (`deleted=true`) or hard-deleted (row is removed).
|
|
// On the former there is *no* DB constraint to prevent soft deletion, so we have to ensure that if somehow
|
|
// the template was soft-deleted any running prebuilds will be removed.
|
|
// On the latter there is a DB constraint to prevent row deletion if any workspaces reference the deleting template.
|
|
name: "soft-deleted templates MAY have prebuilds",
|
|
prebuildLatestTransitions: []database.WorkspaceTransition{database.WorkspaceTransitionStart},
|
|
prebuildJobStatuses: []database.ProvisionerJobStatus{database.ProvisionerJobStatusSucceeded},
|
|
templateVersionActive: []bool{true, false},
|
|
shouldCreateNewPrebuild: ptr.To(false),
|
|
shouldDeleteOldPrebuild: ptr.To(true),
|
|
templateDeleted: []bool{true},
|
|
},
|
|
}
|
|
for _, tc := range testCases {
|
|
tc := tc // capture for parallel
|
|
for _, templateVersionActive := range tc.templateVersionActive {
|
|
for _, prebuildLatestTransition := range tc.prebuildLatestTransitions {
|
|
for _, prebuildJobStatus := range tc.prebuildJobStatuses {
|
|
for _, templateDeleted := range tc.templateDeleted {
|
|
for _, useBrokenPubsub := range []bool{true, false} {
|
|
t.Run(fmt.Sprintf("%s - %s - %s - pubsub_broken=%v", tc.name, prebuildLatestTransition, prebuildJobStatus, useBrokenPubsub), func(t *testing.T) {
|
|
t.Parallel()
|
|
t.Cleanup(func() {
|
|
if t.Failed() {
|
|
t.Logf("failed to run test: %s", tc.name)
|
|
t.Logf("templateVersionActive: %t", templateVersionActive)
|
|
t.Logf("prebuildLatestTransition: %s", prebuildLatestTransition)
|
|
t.Logf("prebuildJobStatus: %s", prebuildJobStatus)
|
|
}
|
|
})
|
|
clock := quartz.NewMock(t)
|
|
ctx := testutil.Context(t, testutil.WaitShort)
|
|
cfg := codersdk.PrebuildsConfig{}
|
|
logger := slogtest.Make(
|
|
t, &slogtest.Options{IgnoreErrors: true},
|
|
).Leveled(slog.LevelDebug)
|
|
db, pubSub := dbtestutil.NewDB(t)
|
|
|
|
ownerID := uuid.New()
|
|
dbgen.User(t, db, database.User{
|
|
ID: ownerID,
|
|
})
|
|
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
|
|
templateVersionID := setupTestDBTemplateVersion(
|
|
ctx,
|
|
t,
|
|
clock,
|
|
db,
|
|
pubSub,
|
|
org.ID,
|
|
ownerID,
|
|
template.ID,
|
|
)
|
|
preset := setupTestDBPreset(
|
|
t,
|
|
db,
|
|
templateVersionID,
|
|
1,
|
|
uuid.New().String(),
|
|
)
|
|
prebuild, _ := setupTestDBPrebuild(
|
|
t,
|
|
clock,
|
|
db,
|
|
pubSub,
|
|
prebuildLatestTransition,
|
|
prebuildJobStatus,
|
|
org.ID,
|
|
preset,
|
|
template.ID,
|
|
templateVersionID,
|
|
)
|
|
|
|
if !templateVersionActive {
|
|
// Create a new template version and mark it as active
|
|
// This marks the template version that we care about as inactive
|
|
setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
|
|
}
|
|
|
|
if useBrokenPubsub {
|
|
pubSub = &brokenPublisher{Pubsub: pubSub}
|
|
}
|
|
controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer())
|
|
|
|
// Run the reconciliation multiple times to ensure idempotency
|
|
// 8 was arbitrary, but large enough to reasonably trust the result
|
|
for i := 1; i <= 8; i++ {
|
|
require.NoErrorf(t, controller.ReconcileAll(ctx), "failed on iteration %d", i)
|
|
|
|
if tc.shouldCreateNewPrebuild != nil {
|
|
newPrebuildCount := 0
|
|
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
|
require.NoError(t, err)
|
|
for _, workspace := range workspaces {
|
|
if workspace.ID != prebuild.ID {
|
|
newPrebuildCount++
|
|
}
|
|
}
|
|
// This test configures a preset that desires one prebuild.
|
|
// In cases where new prebuilds should be created, there should be exactly one.
|
|
require.Equal(t, *tc.shouldCreateNewPrebuild, newPrebuildCount == 1)
|
|
}
|
|
|
|
if tc.shouldDeleteOldPrebuild != nil {
|
|
builds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
|
|
WorkspaceID: prebuild.ID,
|
|
})
|
|
require.NoError(t, err)
|
|
if *tc.shouldDeleteOldPrebuild {
|
|
require.Equal(t, 2, len(builds))
|
|
require.Equal(t, database.WorkspaceTransitionDelete, builds[0].Transition)
|
|
} else {
|
|
require.Equal(t, 1, len(builds))
|
|
require.Equal(t, prebuildLatestTransition, builds[0].Transition)
|
|
}
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// brokenPublisher is used to validate that Publish() calls which always fail do not affect the reconciler's behavior,
|
|
// since the messages published are not essential but merely advisory.
|
|
type brokenPublisher struct {
|
|
pubsub.Pubsub
|
|
}
|
|
|
|
// Publish deliberately fails.
|
|
// I'm explicitly _not_ checking for EventJobPosted (coderd/database/provisionerjobs/provisionerjobs.go) since that
|
|
// requires too much knowledge of the underlying implementation.
|
|
func (*brokenPublisher) Publish(event string, _ []byte) error {
|
|
// Mimick some work being done.
|
|
<-time.After(testutil.IntervalFast)
|
|
return xerrors.Errorf("failed to publish %q", event)
|
|
}
|
|
|
|
func TestMultiplePresetsPerTemplateVersion(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
if !dbtestutil.WillUsePostgres() {
|
|
t.Skip("This test requires postgres")
|
|
}
|
|
|
|
prebuildLatestTransition := database.WorkspaceTransitionStart
|
|
prebuildJobStatus := database.ProvisionerJobStatusRunning
|
|
templateDeleted := false
|
|
|
|
clock := quartz.NewMock(t)
|
|
ctx := testutil.Context(t, testutil.WaitShort)
|
|
cfg := codersdk.PrebuildsConfig{}
|
|
logger := slogtest.Make(
|
|
t, &slogtest.Options{IgnoreErrors: true},
|
|
).Leveled(slog.LevelDebug)
|
|
db, pubSub := dbtestutil.NewDB(t)
|
|
controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer())
|
|
|
|
ownerID := uuid.New()
|
|
dbgen.User(t, db, database.User{
|
|
ID: ownerID,
|
|
})
|
|
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
|
|
templateVersionID := setupTestDBTemplateVersion(
|
|
ctx,
|
|
t,
|
|
clock,
|
|
db,
|
|
pubSub,
|
|
org.ID,
|
|
ownerID,
|
|
template.ID,
|
|
)
|
|
preset := setupTestDBPreset(
|
|
t,
|
|
db,
|
|
templateVersionID,
|
|
4,
|
|
uuid.New().String(),
|
|
)
|
|
preset2 := setupTestDBPreset(
|
|
t,
|
|
db,
|
|
templateVersionID,
|
|
10,
|
|
uuid.New().String(),
|
|
)
|
|
prebuildIDs := make([]uuid.UUID, 0)
|
|
for i := 0; i < int(preset.DesiredInstances.Int32); i++ {
|
|
prebuild, _ := setupTestDBPrebuild(
|
|
t,
|
|
clock,
|
|
db,
|
|
pubSub,
|
|
prebuildLatestTransition,
|
|
prebuildJobStatus,
|
|
org.ID,
|
|
preset,
|
|
template.ID,
|
|
templateVersionID,
|
|
)
|
|
prebuildIDs = append(prebuildIDs, prebuild.ID)
|
|
}
|
|
|
|
// Run the reconciliation multiple times to ensure idempotency
|
|
// 8 was arbitrary, but large enough to reasonably trust the result
|
|
for i := 1; i <= 8; i++ {
|
|
require.NoErrorf(t, controller.ReconcileAll(ctx), "failed on iteration %d", i)
|
|
|
|
newPrebuildCount := 0
|
|
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
|
require.NoError(t, err)
|
|
for _, workspace := range workspaces {
|
|
if slice.Contains(prebuildIDs, workspace.ID) {
|
|
continue
|
|
}
|
|
newPrebuildCount++
|
|
}
|
|
|
|
// NOTE: preset1 doesn't block creation of instances in preset2
|
|
require.Equal(t, preset2.DesiredInstances.Int32, int32(newPrebuildCount)) // nolint:gosec
|
|
}
|
|
}
|
|
|
|
func TestInvalidPreset(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
if !dbtestutil.WillUsePostgres() {
|
|
t.Skip("This test requires postgres")
|
|
}
|
|
|
|
templateDeleted := false
|
|
|
|
clock := quartz.NewMock(t)
|
|
ctx := testutil.Context(t, testutil.WaitShort)
|
|
cfg := codersdk.PrebuildsConfig{}
|
|
logger := slogtest.Make(
|
|
t, &slogtest.Options{IgnoreErrors: true},
|
|
).Leveled(slog.LevelDebug)
|
|
db, pubSub := dbtestutil.NewDB(t)
|
|
controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer())
|
|
|
|
ownerID := uuid.New()
|
|
dbgen.User(t, db, database.User{
|
|
ID: ownerID,
|
|
})
|
|
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
|
|
templateVersionID := setupTestDBTemplateVersion(
|
|
ctx,
|
|
t,
|
|
clock,
|
|
db,
|
|
pubSub,
|
|
org.ID,
|
|
ownerID,
|
|
template.ID,
|
|
)
|
|
// Add required param, which is not set in preset. It means that creating of prebuild will constantly fail.
|
|
dbgen.TemplateVersionParameter(t, db, database.TemplateVersionParameter{
|
|
TemplateVersionID: templateVersionID,
|
|
Name: "required-param",
|
|
Description: "required param to make sure creating prebuild will fail",
|
|
Type: "bool",
|
|
DefaultValue: "",
|
|
Required: true,
|
|
})
|
|
setupTestDBPreset(
|
|
t,
|
|
db,
|
|
templateVersionID,
|
|
1,
|
|
uuid.New().String(),
|
|
)
|
|
|
|
// Run the reconciliation multiple times to ensure idempotency
|
|
// 8 was arbitrary, but large enough to reasonably trust the result
|
|
for i := 1; i <= 8; i++ {
|
|
require.NoErrorf(t, controller.ReconcileAll(ctx), "failed on iteration %d", i)
|
|
|
|
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
|
require.NoError(t, err)
|
|
newPrebuildCount := len(workspaces)
|
|
|
|
// NOTE: we don't have any new prebuilds, because their creation constantly fails.
|
|
require.Equal(t, int32(0), int32(newPrebuildCount)) // nolint:gosec
|
|
}
|
|
}
|
|
|
|
func TestDeletionOfPrebuiltWorkspaceWithInvalidPreset(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
if !dbtestutil.WillUsePostgres() {
|
|
t.Skip("This test requires postgres")
|
|
}
|
|
|
|
templateDeleted := false
|
|
|
|
clock := quartz.NewMock(t)
|
|
ctx := testutil.Context(t, testutil.WaitShort)
|
|
cfg := codersdk.PrebuildsConfig{}
|
|
logger := slogtest.Make(
|
|
t, &slogtest.Options{IgnoreErrors: true},
|
|
).Leveled(slog.LevelDebug)
|
|
db, pubSub := dbtestutil.NewDB(t)
|
|
controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, quartz.NewMock(t), prometheus.NewRegistry(), newNoopEnqueuer())
|
|
|
|
ownerID := uuid.New()
|
|
dbgen.User(t, db, database.User{
|
|
ID: ownerID,
|
|
})
|
|
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
|
|
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
|
|
preset := setupTestDBPreset(t, db, templateVersionID, 1, uuid.New().String())
|
|
prebuiltWorkspace, _ := setupTestDBPrebuild(
|
|
t,
|
|
clock,
|
|
db,
|
|
pubSub,
|
|
database.WorkspaceTransitionStart,
|
|
database.ProvisionerJobStatusSucceeded,
|
|
org.ID,
|
|
preset,
|
|
template.ID,
|
|
templateVersionID,
|
|
)
|
|
|
|
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
|
require.NoError(t, err)
|
|
// make sure we have only one workspace
|
|
require.Equal(t, 1, len(workspaces))
|
|
|
|
// Create a new template version and mark it as active.
|
|
// This marks the previous template version as inactive.
|
|
templateVersionID = setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
|
|
// Add required param, which is not set in preset.
|
|
// It means that creating of new prebuilt workspace will fail, but we should be able to clean up old prebuilt workspaces.
|
|
dbgen.TemplateVersionParameter(t, db, database.TemplateVersionParameter{
|
|
TemplateVersionID: templateVersionID,
|
|
Name: "required-param",
|
|
Description: "required param which isn't set in preset",
|
|
Type: "bool",
|
|
DefaultValue: "",
|
|
Required: true,
|
|
})
|
|
|
|
// Old prebuilt workspace should be deleted.
|
|
require.NoError(t, controller.ReconcileAll(ctx))
|
|
|
|
builds, err := db.GetWorkspaceBuildsByWorkspaceID(ctx, database.GetWorkspaceBuildsByWorkspaceIDParams{
|
|
WorkspaceID: prebuiltWorkspace.ID,
|
|
})
|
|
require.NoError(t, err)
|
|
// Make sure old prebuild workspace was deleted, despite it contains required parameter which isn't set in preset.
|
|
require.Equal(t, 2, len(builds))
|
|
require.Equal(t, database.WorkspaceTransitionDelete, builds[0].Transition)
|
|
}
|
|
|
|
func TestSkippingHardLimitedPresets(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
if !dbtestutil.WillUsePostgres() {
|
|
t.Skip("This test requires postgres")
|
|
}
|
|
|
|
// Test cases verify the behavior of prebuild creation depending on configured failure limits.
|
|
testCases := []struct {
|
|
name string
|
|
hardLimit int64
|
|
isHardLimitHit bool
|
|
}{
|
|
{
|
|
name: "hard limit is hit - skip creation of prebuilt workspace",
|
|
hardLimit: 1,
|
|
isHardLimitHit: true,
|
|
},
|
|
{
|
|
name: "hard limit is not hit - try to create prebuilt workspace again",
|
|
hardLimit: 2,
|
|
isHardLimitHit: false,
|
|
},
|
|
}
|
|
|
|
for _, tc := range testCases {
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
templateDeleted := false
|
|
|
|
clock := quartz.NewMock(t)
|
|
ctx := testutil.Context(t, testutil.WaitShort)
|
|
cfg := codersdk.PrebuildsConfig{
|
|
FailureHardLimit: serpent.Int64(tc.hardLimit),
|
|
ReconciliationBackoffInterval: 0,
|
|
}
|
|
logger := slogtest.Make(
|
|
t, &slogtest.Options{IgnoreErrors: true},
|
|
).Leveled(slog.LevelDebug)
|
|
db, pubSub := dbtestutil.NewDB(t)
|
|
fakeEnqueuer := newFakeEnqueuer()
|
|
controller := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock, prometheus.NewRegistry(), fakeEnqueuer)
|
|
|
|
// Template admin to receive a notification.
|
|
templateAdmin := dbgen.User(t, db, database.User{
|
|
RBACRoles: []string{codersdk.RoleTemplateAdmin},
|
|
})
|
|
|
|
// Set up test environment with a template, version, and preset.
|
|
ownerID := uuid.New()
|
|
dbgen.User(t, db, database.User{
|
|
ID: ownerID,
|
|
})
|
|
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
|
|
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, pubSub, org.ID, ownerID, template.ID)
|
|
preset := setupTestDBPreset(t, db, templateVersionID, 1, uuid.New().String())
|
|
|
|
// Create a failed prebuild workspace that counts toward the hard failure limit.
|
|
setupTestDBPrebuild(
|
|
t,
|
|
clock,
|
|
db,
|
|
pubSub,
|
|
database.WorkspaceTransitionStart,
|
|
database.ProvisionerJobStatusFailed,
|
|
org.ID,
|
|
preset,
|
|
template.ID,
|
|
templateVersionID,
|
|
)
|
|
|
|
// Verify initial state: one failed workspace exists.
|
|
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
|
require.NoError(t, err)
|
|
workspaceCount := len(workspaces)
|
|
require.Equal(t, 1, workspaceCount)
|
|
|
|
// We simulate a failed prebuild in the test; Consequently, the backoff mechanism is triggered when ReconcileAll is called.
|
|
// Even though ReconciliationBackoffInterval is set to zero, we still need to advance the clock by at least one nanosecond.
|
|
clock.Advance(time.Nanosecond).MustWait(ctx)
|
|
|
|
// Trigger reconciliation to attempt creating a new prebuild.
|
|
// The outcome depends on whether the hard limit has been reached.
|
|
require.NoError(t, controller.ReconcileAll(ctx))
|
|
|
|
// These two additional calls to ReconcileAll should not trigger any notifications.
|
|
// A notification is only sent once.
|
|
require.NoError(t, controller.ReconcileAll(ctx))
|
|
require.NoError(t, controller.ReconcileAll(ctx))
|
|
|
|
// Verify the final state after reconciliation.
|
|
workspaces, err = db.GetWorkspacesByTemplateID(ctx, template.ID)
|
|
require.NoError(t, err)
|
|
updatedPreset, err := db.GetPresetByID(ctx, preset.ID)
|
|
require.NoError(t, err)
|
|
|
|
if !tc.isHardLimitHit {
|
|
// When hard limit is not reached, a new workspace should be created.
|
|
require.Equal(t, 2, len(workspaces))
|
|
require.Equal(t, database.PrebuildStatusHealthy, updatedPreset.PrebuildStatus)
|
|
return
|
|
}
|
|
|
|
// When hard limit is reached, no new workspace should be created.
|
|
require.Equal(t, 1, len(workspaces))
|
|
require.Equal(t, database.PrebuildStatusHardLimited, updatedPreset.PrebuildStatus)
|
|
|
|
// When hard limit is reached, a notification should be sent.
|
|
matching := fakeEnqueuer.Sent(func(notification *notificationstest.FakeNotification) bool {
|
|
if !assert.Equal(t, notifications.PrebuildFailureLimitReached, notification.TemplateID, "unexpected template") {
|
|
return false
|
|
}
|
|
|
|
if !assert.Equal(t, templateAdmin.ID, notification.UserID, "unexpected receiver") {
|
|
return false
|
|
}
|
|
|
|
return true
|
|
})
|
|
require.Len(t, matching, 1)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestRunLoop(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
if !dbtestutil.WillUsePostgres() {
|
|
t.Skip("This test requires postgres")
|
|
}
|
|
|
|
prebuildLatestTransition := database.WorkspaceTransitionStart
|
|
prebuildJobStatus := database.ProvisionerJobStatusRunning
|
|
templateDeleted := false
|
|
|
|
clock := quartz.NewMock(t)
|
|
ctx := testutil.Context(t, testutil.WaitShort)
|
|
backoffInterval := time.Minute
|
|
cfg := codersdk.PrebuildsConfig{
|
|
// Given: explicitly defined backoff configuration to validate timings.
|
|
ReconciliationBackoffLookback: serpent.Duration(muchEarlier * -10), // Has to be positive.
|
|
ReconciliationBackoffInterval: serpent.Duration(backoffInterval),
|
|
ReconciliationInterval: serpent.Duration(time.Second),
|
|
}
|
|
logger := slogtest.Make(
|
|
t, &slogtest.Options{IgnoreErrors: true},
|
|
).Leveled(slog.LevelDebug)
|
|
db, pubSub := dbtestutil.NewDB(t)
|
|
reconciler := prebuilds.NewStoreReconciler(db, pubSub, cfg, logger, clock, prometheus.NewRegistry(), newNoopEnqueuer())
|
|
|
|
ownerID := uuid.New()
|
|
dbgen.User(t, db, database.User{
|
|
ID: ownerID,
|
|
})
|
|
org, template := setupTestDBTemplate(t, db, ownerID, templateDeleted)
|
|
templateVersionID := setupTestDBTemplateVersion(
|
|
ctx,
|
|
t,
|
|
clock,
|
|
db,
|
|
pubSub,
|
|
org.ID,
|
|
ownerID,
|
|
template.ID,
|
|
)
|
|
preset := setupTestDBPreset(
|
|
t,
|
|
db,
|
|
templateVersionID,
|
|
4,
|
|
uuid.New().String(),
|
|
)
|
|
preset2 := setupTestDBPreset(
|
|
t,
|
|
db,
|
|
templateVersionID,
|
|
10,
|
|
uuid.New().String(),
|
|
)
|
|
prebuildIDs := make([]uuid.UUID, 0)
|
|
for i := 0; i < int(preset.DesiredInstances.Int32); i++ {
|
|
prebuild, _ := setupTestDBPrebuild(
|
|
t,
|
|
clock,
|
|
db,
|
|
pubSub,
|
|
prebuildLatestTransition,
|
|
prebuildJobStatus,
|
|
org.ID,
|
|
preset,
|
|
template.ID,
|
|
templateVersionID,
|
|
)
|
|
prebuildIDs = append(prebuildIDs, prebuild.ID)
|
|
}
|
|
getNewPrebuildCount := func() int32 {
|
|
newPrebuildCount := 0
|
|
workspaces, err := db.GetWorkspacesByTemplateID(ctx, template.ID)
|
|
require.NoError(t, err)
|
|
for _, workspace := range workspaces {
|
|
if slice.Contains(prebuildIDs, workspace.ID) {
|
|
continue
|
|
}
|
|
newPrebuildCount++
|
|
}
|
|
|
|
return int32(newPrebuildCount) // nolint:gosec
|
|
}
|
|
|
|
// we need to wait until ticker is initialized, and only then use clock.Advance()
|
|
// otherwise clock.Advance() will be ignored
|
|
trap := clock.Trap().NewTicker()
|
|
go reconciler.Run(ctx)
|
|
// wait until ticker is initialized
|
|
trap.MustWait(ctx).Release()
|
|
// start 1st iteration of ReconciliationLoop
|
|
// NOTE: at this point MustWait waits that iteration is started (ReconcileAll is called), but it doesn't wait until it completes
|
|
clock.Advance(cfg.ReconciliationInterval.Value()).MustWait(ctx)
|
|
|
|
// wait until ReconcileAll is completed
|
|
// TODO: is it possible to avoid Eventually and replace it with quartz?
|
|
// Ideally to have all control on test-level, and be able to advance loop iterations from the test.
|
|
require.Eventually(t, func() bool {
|
|
newPrebuildCount := getNewPrebuildCount()
|
|
|
|
// NOTE: preset1 doesn't block creation of instances in preset2
|
|
return preset2.DesiredInstances.Int32 == newPrebuildCount
|
|
}, testutil.WaitShort, testutil.IntervalFast)
|
|
|
|
// setup one more preset with 5 prebuilds
|
|
preset3 := setupTestDBPreset(
|
|
t,
|
|
db,
|
|
templateVersionID,
|
|
5,
|
|
uuid.New().String(),
|
|
)
|
|
newPrebuildCount := getNewPrebuildCount()
|
|
// nothing changed, because we didn't trigger a new iteration of a loop
|
|
require.Equal(t, preset2.DesiredInstances.Int32, newPrebuildCount)
|
|
|
|
// start 2nd iteration of ReconciliationLoop
|
|
// NOTE: at this point MustWait waits that iteration is started (ReconcileAll is called), but it doesn't wait until it completes
|
|
clock.Advance(cfg.ReconciliationInterval.Value()).MustWait(ctx)
|
|
|
|
// wait until ReconcileAll is completed
|
|
require.Eventually(t, func() bool {
|
|
newPrebuildCount := getNewPrebuildCount()
|
|
|
|
// both prebuilds for preset2 and preset3 were created
|
|
return preset2.DesiredInstances.Int32+preset3.DesiredInstances.Int32 == newPrebuildCount
|
|
}, testutil.WaitShort, testutil.IntervalFast)
|
|
|
|
// gracefully stop the reconciliation loop
|
|
reconciler.Stop(ctx, nil)
|
|
}
|
|
|
|
func TestFailedBuildBackoff(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
if !dbtestutil.WillUsePostgres() {
|
|
t.Skip("This test requires postgres")
|
|
}
|
|
ctx := testutil.Context(t, testutil.WaitSuperLong)
|
|
|
|
// Setup.
|
|
clock := quartz.NewMock(t)
|
|
backoffInterval := time.Minute
|
|
cfg := codersdk.PrebuildsConfig{
|
|
// Given: explicitly defined backoff configuration to validate timings.
|
|
ReconciliationBackoffLookback: serpent.Duration(muchEarlier * -10), // Has to be positive.
|
|
ReconciliationBackoffInterval: serpent.Duration(backoffInterval),
|
|
ReconciliationInterval: serpent.Duration(time.Second),
|
|
}
|
|
logger := slogtest.Make(
|
|
t, &slogtest.Options{IgnoreErrors: true},
|
|
).Leveled(slog.LevelDebug)
|
|
db, ps := dbtestutil.NewDB(t)
|
|
reconciler := prebuilds.NewStoreReconciler(db, ps, cfg, logger, clock, prometheus.NewRegistry(), newNoopEnqueuer())
|
|
|
|
// Given: an active template version with presets and prebuilds configured.
|
|
const desiredInstances = 2
|
|
userID := uuid.New()
|
|
dbgen.User(t, db, database.User{
|
|
ID: userID,
|
|
})
|
|
org, template := setupTestDBTemplate(t, db, userID, false)
|
|
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, ps, org.ID, userID, template.ID)
|
|
|
|
preset := setupTestDBPreset(t, db, templateVersionID, desiredInstances, "test")
|
|
for range desiredInstances {
|
|
_, _ = setupTestDBPrebuild(t, clock, db, ps, database.WorkspaceTransitionStart, database.ProvisionerJobStatusFailed, org.ID, preset, template.ID, templateVersionID)
|
|
}
|
|
|
|
// When: determining what actions to take next, backoff is calculated because the prebuild is in a failed state.
|
|
snapshot, err := reconciler.SnapshotState(ctx, db)
|
|
require.NoError(t, err)
|
|
require.Len(t, snapshot.Presets, 1)
|
|
presetState, err := snapshot.FilterByPreset(preset.ID)
|
|
require.NoError(t, err)
|
|
state := presetState.CalculateState()
|
|
actions, err := reconciler.CalculateActions(ctx, *presetState)
|
|
require.NoError(t, err)
|
|
|
|
// Then: the backoff time is in the future, no prebuilds are running, and we won't create any new prebuilds.
|
|
require.EqualValues(t, 0, state.Actual)
|
|
require.EqualValues(t, 0, actions.Create)
|
|
require.EqualValues(t, desiredInstances, state.Desired)
|
|
require.True(t, clock.Now().Before(actions.BackoffUntil))
|
|
|
|
// Then: the backoff time is as expected based on the number of failed builds.
|
|
require.NotNil(t, presetState.Backoff)
|
|
require.EqualValues(t, desiredInstances, presetState.Backoff.NumFailed)
|
|
require.EqualValues(t, backoffInterval*time.Duration(presetState.Backoff.NumFailed), clock.Until(actions.BackoffUntil).Truncate(backoffInterval))
|
|
|
|
// When: advancing to the next tick which is still within the backoff time.
|
|
clock.Advance(cfg.ReconciliationInterval.Value())
|
|
|
|
// Then: the backoff interval will not have changed.
|
|
snapshot, err = reconciler.SnapshotState(ctx, db)
|
|
require.NoError(t, err)
|
|
presetState, err = snapshot.FilterByPreset(preset.ID)
|
|
require.NoError(t, err)
|
|
newState := presetState.CalculateState()
|
|
newActions, err := reconciler.CalculateActions(ctx, *presetState)
|
|
require.NoError(t, err)
|
|
require.EqualValues(t, 0, newState.Actual)
|
|
require.EqualValues(t, 0, newActions.Create)
|
|
require.EqualValues(t, desiredInstances, newState.Desired)
|
|
require.EqualValues(t, actions.BackoffUntil, newActions.BackoffUntil)
|
|
|
|
// When: advancing beyond the backoff time.
|
|
clock.Advance(clock.Until(actions.BackoffUntil.Add(time.Second)))
|
|
|
|
// Then: we will attempt to create a new prebuild.
|
|
snapshot, err = reconciler.SnapshotState(ctx, db)
|
|
require.NoError(t, err)
|
|
presetState, err = snapshot.FilterByPreset(preset.ID)
|
|
require.NoError(t, err)
|
|
state = presetState.CalculateState()
|
|
actions, err = reconciler.CalculateActions(ctx, *presetState)
|
|
require.NoError(t, err)
|
|
require.EqualValues(t, 0, state.Actual)
|
|
require.EqualValues(t, desiredInstances, state.Desired)
|
|
require.EqualValues(t, desiredInstances, actions.Create)
|
|
|
|
// When: the desired number of new prebuild are provisioned, but one fails again.
|
|
for i := 0; i < desiredInstances; i++ {
|
|
status := database.ProvisionerJobStatusFailed
|
|
if i == 1 {
|
|
status = database.ProvisionerJobStatusSucceeded
|
|
}
|
|
_, _ = setupTestDBPrebuild(t, clock, db, ps, database.WorkspaceTransitionStart, status, org.ID, preset, template.ID, templateVersionID)
|
|
}
|
|
|
|
// Then: the backoff time is roughly equal to two backoff intervals, since another build has failed.
|
|
snapshot, err = reconciler.SnapshotState(ctx, db)
|
|
require.NoError(t, err)
|
|
presetState, err = snapshot.FilterByPreset(preset.ID)
|
|
require.NoError(t, err)
|
|
state = presetState.CalculateState()
|
|
actions, err = reconciler.CalculateActions(ctx, *presetState)
|
|
require.NoError(t, err)
|
|
require.EqualValues(t, 1, state.Actual)
|
|
require.EqualValues(t, desiredInstances, state.Desired)
|
|
require.EqualValues(t, 0, actions.Create)
|
|
require.EqualValues(t, 3, presetState.Backoff.NumFailed)
|
|
require.EqualValues(t, backoffInterval*time.Duration(presetState.Backoff.NumFailed), clock.Until(actions.BackoffUntil).Truncate(backoffInterval))
|
|
}
|
|
|
|
func TestReconciliationLock(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
if !dbtestutil.WillUsePostgres() {
|
|
t.Skip("This test requires postgres")
|
|
}
|
|
|
|
ctx := testutil.Context(t, testutil.WaitSuperLong)
|
|
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
|
|
db, ps := dbtestutil.NewDB(t)
|
|
|
|
wg := sync.WaitGroup{}
|
|
mutex := sync.Mutex{}
|
|
for i := 0; i < 5; i++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
defer wg.Done()
|
|
reconciler := prebuilds.NewStoreReconciler(
|
|
db,
|
|
ps,
|
|
codersdk.PrebuildsConfig{},
|
|
slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug),
|
|
quartz.NewMock(t),
|
|
prometheus.NewRegistry(),
|
|
newNoopEnqueuer())
|
|
reconciler.WithReconciliationLock(ctx, logger, func(_ context.Context, _ database.Store) error {
|
|
lockObtained := mutex.TryLock()
|
|
// As long as the postgres lock is held, this mutex should always be unlocked when we get here.
|
|
// If this mutex is ever locked at this point, then that means that the postgres lock is not being held while we're
|
|
// inside WithReconciliationLock, which is meant to hold the lock.
|
|
require.True(t, lockObtained)
|
|
// Sleep a bit to give reconcilers more time to contend for the lock
|
|
time.Sleep(time.Second)
|
|
defer mutex.Unlock()
|
|
return nil
|
|
})
|
|
}()
|
|
}
|
|
wg.Wait()
|
|
}
|
|
|
|
func TestTrackResourceReplacement(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
if !dbtestutil.WillUsePostgres() {
|
|
t.Skip("This test requires postgres")
|
|
}
|
|
|
|
ctx := testutil.Context(t, testutil.WaitSuperLong)
|
|
|
|
// Setup.
|
|
clock := quartz.NewMock(t)
|
|
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: false}).Leveled(slog.LevelDebug)
|
|
db, ps := dbtestutil.NewDB(t)
|
|
|
|
fakeEnqueuer := newFakeEnqueuer()
|
|
registry := prometheus.NewRegistry()
|
|
reconciler := prebuilds.NewStoreReconciler(db, ps, codersdk.PrebuildsConfig{}, logger, clock, registry, fakeEnqueuer)
|
|
|
|
// Given: a template admin to receive a notification.
|
|
templateAdmin := dbgen.User(t, db, database.User{
|
|
RBACRoles: []string{codersdk.RoleTemplateAdmin},
|
|
})
|
|
|
|
// Given: a prebuilt workspace.
|
|
userID := uuid.New()
|
|
dbgen.User(t, db, database.User{ID: userID})
|
|
org, template := setupTestDBTemplate(t, db, userID, false)
|
|
templateVersionID := setupTestDBTemplateVersion(ctx, t, clock, db, ps, org.ID, userID, template.ID)
|
|
preset := setupTestDBPreset(t, db, templateVersionID, 1, "b0rked")
|
|
prebuiltWorkspace, prebuild := setupTestDBPrebuild(t, clock, db, ps, database.WorkspaceTransitionStart, database.ProvisionerJobStatusSucceeded, org.ID, preset, template.ID, templateVersionID)
|
|
|
|
// Given: no replacement has been tracked yet, we should not see a metric for it yet.
|
|
require.NoError(t, reconciler.ForceMetricsUpdate(ctx))
|
|
mf, err := registry.Gather()
|
|
require.NoError(t, err)
|
|
require.Nil(t, findMetric(mf, prebuilds.MetricResourceReplacementsCount, map[string]string{
|
|
"template_name": template.Name,
|
|
"preset_name": preset.Name,
|
|
"org_name": org.Name,
|
|
}))
|
|
|
|
// When: a claim occurred and resource replacements are detected (_how_ is out of scope of this test).
|
|
reconciler.TrackResourceReplacement(ctx, prebuiltWorkspace.ID, prebuild.ID, []*sdkproto.ResourceReplacement{
|
|
{
|
|
Resource: "docker_container[0]",
|
|
Paths: []string{"env", "image"},
|
|
},
|
|
{
|
|
Resource: "docker_volume[0]",
|
|
Paths: []string{"name"},
|
|
},
|
|
})
|
|
|
|
// Then: a notification will be sent detailing the replacement(s).
|
|
matching := fakeEnqueuer.Sent(func(notification *notificationstest.FakeNotification) bool {
|
|
// This is not an exhaustive check of the expected labels/data in the notification. This would tie the implementations
|
|
// too tightly together.
|
|
// All we need to validate is that a template of the right kind was sent, to the expected user, with some replacements.
|
|
|
|
if !assert.Equal(t, notification.TemplateID, notifications.TemplateWorkspaceResourceReplaced, "unexpected template") {
|
|
return false
|
|
}
|
|
|
|
if !assert.Equal(t, templateAdmin.ID, notification.UserID, "unexpected receiver") {
|
|
return false
|
|
}
|
|
|
|
if !assert.Len(t, notification.Data["replacements"], 2, "unexpected replacements count") {
|
|
return false
|
|
}
|
|
|
|
return true
|
|
})
|
|
require.Len(t, matching, 1)
|
|
|
|
// Then: the metric will be incremented.
|
|
mf, err = registry.Gather()
|
|
require.NoError(t, err)
|
|
metric := findMetric(mf, prebuilds.MetricResourceReplacementsCount, map[string]string{
|
|
"template_name": template.Name,
|
|
"preset_name": preset.Name,
|
|
"org_name": org.Name,
|
|
})
|
|
require.NotNil(t, metric)
|
|
require.NotNil(t, metric.GetCounter())
|
|
require.EqualValues(t, 1, metric.GetCounter().GetValue())
|
|
}
|
|
|
|
func newNoopEnqueuer() *notifications.NoopEnqueuer {
|
|
return notifications.NewNoopEnqueuer()
|
|
}
|
|
|
|
func newFakeEnqueuer() *notificationstest.FakeEnqueuer {
|
|
return notificationstest.NewFakeEnqueuer()
|
|
}
|
|
|
|
// nolint:revive // It's a control flag, but this is a test.
|
|
func setupTestDBTemplate(
|
|
t *testing.T,
|
|
db database.Store,
|
|
userID uuid.UUID,
|
|
templateDeleted bool,
|
|
) (
|
|
database.Organization,
|
|
database.Template,
|
|
) {
|
|
t.Helper()
|
|
org := dbgen.Organization(t, db, database.Organization{})
|
|
|
|
template := dbgen.Template(t, db, database.Template{
|
|
CreatedBy: userID,
|
|
OrganizationID: org.ID,
|
|
CreatedAt: time.Now().Add(muchEarlier),
|
|
})
|
|
if templateDeleted {
|
|
ctx := testutil.Context(t, testutil.WaitShort)
|
|
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
|
|
ID: template.ID,
|
|
Deleted: true,
|
|
}))
|
|
}
|
|
return org, template
|
|
}
|
|
|
|
// nolint:revive // It's a control flag, but this is a test.
|
|
func setupTestDBTemplateWithinOrg(
|
|
t *testing.T,
|
|
db database.Store,
|
|
userID uuid.UUID,
|
|
templateDeleted bool,
|
|
templateName string,
|
|
org database.Organization,
|
|
) database.Template {
|
|
t.Helper()
|
|
|
|
template := dbgen.Template(t, db, database.Template{
|
|
Name: templateName,
|
|
CreatedBy: userID,
|
|
OrganizationID: org.ID,
|
|
CreatedAt: time.Now().Add(muchEarlier),
|
|
})
|
|
if templateDeleted {
|
|
ctx := testutil.Context(t, testutil.WaitShort)
|
|
require.NoError(t, db.UpdateTemplateDeletedByID(ctx, database.UpdateTemplateDeletedByIDParams{
|
|
ID: template.ID,
|
|
Deleted: true,
|
|
}))
|
|
}
|
|
return template
|
|
}
|
|
|
|
const (
|
|
earlier = -time.Hour
|
|
muchEarlier = -time.Hour * 2
|
|
)
|
|
|
|
func setupTestDBTemplateVersion(
|
|
ctx context.Context,
|
|
t *testing.T,
|
|
clock quartz.Clock,
|
|
db database.Store,
|
|
ps pubsub.Pubsub,
|
|
orgID uuid.UUID,
|
|
userID uuid.UUID,
|
|
templateID uuid.UUID,
|
|
) uuid.UUID {
|
|
t.Helper()
|
|
templateVersionJob := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
|
|
CreatedAt: clock.Now().Add(muchEarlier),
|
|
CompletedAt: sql.NullTime{Time: clock.Now().Add(earlier), Valid: true},
|
|
OrganizationID: orgID,
|
|
InitiatorID: userID,
|
|
})
|
|
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
|
|
TemplateID: uuid.NullUUID{UUID: templateID, Valid: true},
|
|
OrganizationID: orgID,
|
|
CreatedBy: userID,
|
|
JobID: templateVersionJob.ID,
|
|
CreatedAt: time.Now().Add(muchEarlier),
|
|
})
|
|
require.NoError(t, db.UpdateTemplateActiveVersionByID(ctx, database.UpdateTemplateActiveVersionByIDParams{
|
|
ID: templateID,
|
|
ActiveVersionID: templateVersion.ID,
|
|
}))
|
|
// Make sure immutable params don't break prebuilt workspace deletion logic
|
|
dbgen.TemplateVersionParameter(t, db, database.TemplateVersionParameter{
|
|
TemplateVersionID: templateVersion.ID,
|
|
Name: "test",
|
|
Description: "required & immutable param",
|
|
Type: "string",
|
|
DefaultValue: "",
|
|
Required: true,
|
|
Mutable: false,
|
|
})
|
|
return templateVersion.ID
|
|
}
|
|
|
|
func setupTestDBPreset(
|
|
t *testing.T,
|
|
db database.Store,
|
|
templateVersionID uuid.UUID,
|
|
desiredInstances int32,
|
|
presetName string,
|
|
) database.TemplateVersionPreset {
|
|
t.Helper()
|
|
preset := dbgen.Preset(t, db, database.InsertPresetParams{
|
|
TemplateVersionID: templateVersionID,
|
|
Name: presetName,
|
|
DesiredInstances: sql.NullInt32{
|
|
Valid: true,
|
|
Int32: desiredInstances,
|
|
},
|
|
})
|
|
dbgen.PresetParameter(t, db, database.InsertPresetParametersParams{
|
|
TemplateVersionPresetID: preset.ID,
|
|
Names: []string{"test"},
|
|
Values: []string{"test"},
|
|
})
|
|
return preset
|
|
}
|
|
|
|
func setupTestDBPrebuild(
|
|
t *testing.T,
|
|
clock quartz.Clock,
|
|
db database.Store,
|
|
ps pubsub.Pubsub,
|
|
transition database.WorkspaceTransition,
|
|
prebuildStatus database.ProvisionerJobStatus,
|
|
orgID uuid.UUID,
|
|
preset database.TemplateVersionPreset,
|
|
templateID uuid.UUID,
|
|
templateVersionID uuid.UUID,
|
|
) (database.WorkspaceTable, database.WorkspaceBuild) {
|
|
t.Helper()
|
|
return setupTestDBWorkspace(t, clock, db, ps, transition, prebuildStatus, orgID, preset, templateID, templateVersionID, agplprebuilds.SystemUserID, agplprebuilds.SystemUserID)
|
|
}
|
|
|
|
func setupTestDBWorkspace(
|
|
t *testing.T,
|
|
clock quartz.Clock,
|
|
db database.Store,
|
|
ps pubsub.Pubsub,
|
|
transition database.WorkspaceTransition,
|
|
prebuildStatus database.ProvisionerJobStatus,
|
|
orgID uuid.UUID,
|
|
preset database.TemplateVersionPreset,
|
|
templateID uuid.UUID,
|
|
templateVersionID uuid.UUID,
|
|
initiatorID uuid.UUID,
|
|
ownerID uuid.UUID,
|
|
) (database.WorkspaceTable, database.WorkspaceBuild) {
|
|
t.Helper()
|
|
cancelledAt := sql.NullTime{}
|
|
completedAt := sql.NullTime{}
|
|
|
|
startedAt := sql.NullTime{}
|
|
if prebuildStatus != database.ProvisionerJobStatusPending {
|
|
startedAt = sql.NullTime{Time: clock.Now().Add(muchEarlier), Valid: true}
|
|
}
|
|
|
|
buildError := sql.NullString{}
|
|
if prebuildStatus == database.ProvisionerJobStatusFailed {
|
|
completedAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
|
|
buildError = sql.NullString{String: "build failed", Valid: true}
|
|
}
|
|
|
|
switch prebuildStatus {
|
|
case database.ProvisionerJobStatusCanceling:
|
|
cancelledAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
|
|
case database.ProvisionerJobStatusCanceled:
|
|
completedAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
|
|
cancelledAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
|
|
case database.ProvisionerJobStatusSucceeded:
|
|
completedAt = sql.NullTime{Time: clock.Now().Add(earlier), Valid: true}
|
|
default:
|
|
}
|
|
|
|
workspace := dbgen.Workspace(t, db, database.WorkspaceTable{
|
|
TemplateID: templateID,
|
|
OrganizationID: orgID,
|
|
OwnerID: ownerID,
|
|
Deleted: false,
|
|
})
|
|
job := dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
|
|
InitiatorID: initiatorID,
|
|
CreatedAt: clock.Now().Add(muchEarlier),
|
|
StartedAt: startedAt,
|
|
CompletedAt: completedAt,
|
|
CanceledAt: cancelledAt,
|
|
OrganizationID: orgID,
|
|
Error: buildError,
|
|
})
|
|
workspaceBuild := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
|
|
WorkspaceID: workspace.ID,
|
|
InitiatorID: initiatorID,
|
|
TemplateVersionID: templateVersionID,
|
|
JobID: job.ID,
|
|
TemplateVersionPresetID: uuid.NullUUID{UUID: preset.ID, Valid: true},
|
|
Transition: transition,
|
|
CreatedAt: clock.Now(),
|
|
})
|
|
dbgen.WorkspaceBuildParameters(t, db, []database.WorkspaceBuildParameter{
|
|
{
|
|
WorkspaceBuildID: workspaceBuild.ID,
|
|
Name: "test",
|
|
Value: "test",
|
|
},
|
|
})
|
|
|
|
return workspace, workspaceBuild
|
|
}
|
|
|
|
// nolint:revive // It's a control flag, but this is a test.
|
|
func setupTestDBWorkspaceAgent(t *testing.T, db database.Store, workspaceID uuid.UUID, eligible bool) database.WorkspaceAgent {
|
|
build, err := db.GetLatestWorkspaceBuildByWorkspaceID(t.Context(), workspaceID)
|
|
require.NoError(t, err)
|
|
|
|
res := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{JobID: build.JobID})
|
|
agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
|
|
ResourceID: res.ID,
|
|
})
|
|
|
|
// A prebuilt workspace is considered eligible when its agent is in a "ready" lifecycle state.
|
|
// i.e. connected to the control plane and all startup scripts have run.
|
|
if eligible {
|
|
require.NoError(t, db.UpdateWorkspaceAgentLifecycleStateByID(t.Context(), database.UpdateWorkspaceAgentLifecycleStateByIDParams{
|
|
ID: agent.ID,
|
|
LifecycleState: database.WorkspaceAgentLifecycleStateReady,
|
|
StartedAt: sql.NullTime{Time: dbtime.Now().Add(-time.Minute), Valid: true},
|
|
ReadyAt: sql.NullTime{Time: dbtime.Now(), Valid: true},
|
|
}))
|
|
}
|
|
|
|
return agent
|
|
}
|
|
|
|
var allTransitions = []database.WorkspaceTransition{
|
|
database.WorkspaceTransitionStart,
|
|
database.WorkspaceTransitionStop,
|
|
database.WorkspaceTransitionDelete,
|
|
}
|
|
|
|
var allJobStatuses = []database.ProvisionerJobStatus{
|
|
database.ProvisionerJobStatusPending,
|
|
database.ProvisionerJobStatusRunning,
|
|
database.ProvisionerJobStatusSucceeded,
|
|
database.ProvisionerJobStatusFailed,
|
|
database.ProvisionerJobStatusCanceled,
|
|
database.ProvisionerJobStatusCanceling,
|
|
}
|
|
|
|
func allJobStatusesExcept(except ...database.ProvisionerJobStatus) []database.ProvisionerJobStatus {
|
|
return slice.Filter(except, func(status database.ProvisionerJobStatus) bool {
|
|
return !slice.Contains(allJobStatuses, status)
|
|
})
|
|
}
|