WIP: adding unit-tests for reconciliation loop

Signed-off-by: Danny Kopping <danny@coder.com>
This commit is contained in:
Danny Kopping
2025-02-24 16:15:21 +00:00
parent c88c04c022
commit e9b56d9346
4 changed files with 428 additions and 136 deletions

View File

@ -93,6 +93,22 @@ func (c *Controller) ReconcileTemplate(templateID uuid.UUID) {
c.nudgeCh <- &templateID
}
// reconcile will attempt to resolve the desired vs actual state of all templates which have presets with prebuilds configured.
//
// NOTE:
//
// This function will kick of n provisioner jobs, based on the calculated state modifications.
//
// These provisioning jobs are fire-and-forget. We DO NOT wait for the prebuilt workspaces to complete their
// provisioning. As a consequence, it's possible that another reconciliation run will occur, which will mean that
// multiple preset versions could be reconciling at once. This may mean some temporary over-provisioning, but the
// reconciliation loop will bring these resources back into their desired numbers in an EVENTUALLY-consistent way.
//
// For example: we could decide to provision 1 new instance in this reconciliation.
// While that workspace is being provisioned, another template version is created which means this same preset will
// be reconciled again, leading to another workspace being provisioned. Two workspace builds will be occurring
// simultaneously for the same preset, but once both jobs have completed the reconciliation loop will notice the
// extraneous instance and delete it.
func (c *Controller) reconcile(ctx context.Context, templateID *uuid.UUID) {
var logger slog.Logger
if templateID == nil {
@ -121,7 +137,7 @@ func (c *Controller) reconcile(ctx context.Context, templateID *uuid.UUID) {
err := c.store.InTx(func(db database.Store) error {
start := time.Now()
// TODO: give up after some time waiting on this?
// TODO: use TryAcquireLock here and bail out early.
err := db.AcquireLock(ctx, database.LockIDReconcileTemplatePrebuilds)
if err != nil {
logger.Warn(ctx, "failed to acquire top-level prebuilds reconciliation lock; likely running on another coderd replica", slog.Error(err))
@ -183,7 +199,7 @@ func (c *Controller) reconcile(ctx context.Context, templateID *uuid.UUID) {
}
// determineState determines the current state of prebuilds & the presets which define them.
// This function MUST be called within
// An application-level lock is used
func (c *Controller) determineState(ctx context.Context, store database.Store, id uuid.NullUUID) (*reconciliationState, error) {
if err := ctx.Err(); err != nil {
return nil, err
@ -259,14 +275,15 @@ func (c *Controller) reconcilePrebuildsForPreset(ctx context.Context, ps *preset
levelFn = vlogger.Info
}
levelFn(ctx, "template prebuild state retrieved",
slog.F("to_create", actions.create), slog.F("to_delete", len(actions.deleteIDs)),
slog.F("create_count", actions.create), slog.F("delete_count", len(actions.deleteIDs)),
slog.F("to_delete", actions.deleteIDs),
slog.F("desired", actions.desired), slog.F("actual", actions.actual),
slog.F("outdated", actions.outdated), slog.F("extraneous", actions.extraneous),
slog.F("starting", actions.starting), slog.F("stopping", actions.stopping),
slog.F("deleting", actions.deleting), slog.F("eligible", actions.eligible))
// Provision workspaces within the same tx so we don't get any timing issues here.
// i.e. we hold the advisory lock until all reconciliatory actions have been taken.
// i.e. we hold the advisory lock until all "reconciliatory" actions have been taken.
// TODO: max per reconciliation iteration?
// TODO: i've removed the surrounding tx, but if we restore it then we need to pass down the store to these funcs.

View File

@ -1,130 +0,0 @@
package prebuilds
import (
"testing"
"time"
"github.com/google/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/coder/coder/v2/coderd/database"
)
var (
templateID = uuid.New()
templateVersionID = uuid.New()
presetID = uuid.New()
preset2ID = uuid.New()
prebuildID = uuid.New()
)
func TestReconciliationActions(t *testing.T) {
cases := map[string]struct {
preset database.GetTemplatePresetsWithPrebuildsRow // TODO: make own structs; reusing these types is lame
running []database.GetRunningPrebuildsRow
inProgress []database.GetPrebuildsInProgressRow
expected reconciliationActions
}{
// New template version created which adds a new preset with prebuilds configured.
"CreateNetNew": {
preset: preset(true, 1),
expected: reconciliationActions{
desired: 1,
create: 1,
},
},
// New template version created, making an existing preset and its prebuilds outdated.
"DeleteOutdated": {
preset: preset(false, 1),
running: []database.GetRunningPrebuildsRow{
{
WorkspaceID: prebuildID,
TemplateID: templateID,
TemplateVersionID: templateVersionID,
CurrentPresetID: uuid.NullUUID{UUID: presetID, Valid: true},
DesiredPresetID: uuid.NullUUID{UUID: uuid.New(), Valid: true},
Ready: true,
},
},
expected: reconciliationActions{
outdated: 1,
deleteIDs: []uuid.UUID{prebuildID},
},
},
// Somehow an additional prebuild is running, delete it.
// This can happen if an operator messes with a prebuild's state (stop, start).
"DeleteOldestExtraneous": {
preset: preset(true, 1),
running: []database.GetRunningPrebuildsRow{
{
WorkspaceID: prebuildID,
TemplateID: templateID,
TemplateVersionID: templateVersionID,
CurrentPresetID: uuid.NullUUID{UUID: presetID, Valid: true},
DesiredPresetID: uuid.NullUUID{UUID: uuid.New(), Valid: true},
CreatedAt: time.Now().Add(-time.Hour),
},
{
WorkspaceID: uuid.New(),
TemplateID: templateID,
TemplateVersionID: templateVersionID,
CurrentPresetID: uuid.NullUUID{UUID: presetID, Valid: true},
DesiredPresetID: uuid.NullUUID{UUID: uuid.New(), Valid: true},
CreatedAt: time.Now(),
},
},
expected: reconciliationActions{
desired: 1,
extraneous: 1,
actual: 2,
deleteIDs: []uuid.UUID{prebuildID},
},
},
}
for name, tc := range cases {
t.Run(name, func(t *testing.T) {
t.Parallel()
ps := presetState{
preset: tc.preset,
running: tc.running,
inProgress: tc.inProgress,
}
actions, err := ps.calculateActions()
require.NoError(t, err, "could not calculate reconciliation actions")
validateActions(t, tc.expected, *actions)
})
}
}
func preset(active bool, instances int32) database.GetTemplatePresetsWithPrebuildsRow {
return database.GetTemplatePresetsWithPrebuildsRow{
TemplateID: templateID,
TemplateVersionID: templateVersionID,
UsingActiveVersion: active,
PresetID: presetID,
Name: "bob",
DesiredInstances: instances,
Deleted: false,
Deprecated: false,
}
}
// validateActions is a convenience func to make tests more readable; it exploits the fact that the default states for
// prebuilds align with zero values.
func validateActions(t *testing.T, expected, actual reconciliationActions) bool {
return assert.EqualValuesf(t, expected.deleteIDs, actual.deleteIDs, "'deleteIDs' did not match expectation") &&
assert.EqualValuesf(t, expected.create, actual.create, "'create' did not match expectation") &&
assert.EqualValuesf(t, expected.desired, actual.desired, "'desired' did not match expectation") &&
assert.EqualValuesf(t, expected.actual, actual.actual, "'actual' did not match expectation") &&
assert.EqualValuesf(t, expected.eligible, actual.eligible, "'eligible' did not match expectation") &&
assert.EqualValuesf(t, expected.extraneous, actual.extraneous, "'extraneous' did not match expectation") &&
assert.EqualValuesf(t, expected.outdated, actual.outdated, "'outdated' did not match expectation") &&
assert.EqualValuesf(t, expected.starting, actual.starting, "'starting' did not match expectation") &&
assert.EqualValuesf(t, expected.stopping, actual.stopping, "'stopping' did not match expectation") &&
assert.EqualValuesf(t, expected.deleting, actual.deleting, "'deleting' did not match expectation")
}

View File

@ -47,7 +47,7 @@ func (s reconciliationState) filterByPreset(presetID uuid.UUID) (*presetState, e
}
running := slice.Filter(s.runningPrebuilds, func(prebuild database.GetRunningPrebuildsRow) bool {
if !prebuild.DesiredPresetID.Valid && !prebuild.CurrentPresetID.Valid {
if !prebuild.CurrentPresetID.Valid {
return false
}
return prebuild.CurrentPresetID.UUID == preset.PresetID &&
@ -57,8 +57,10 @@ func (s reconciliationState) filterByPreset(presetID uuid.UUID) (*presetState, e
// These aren't preset-specific, but they need to inhibit all presets of this template from operating since they could
// be in-progress builds which might impact another preset. For example, if a template goes from no defined prebuilds to defined prebuilds
// and back, or a template is updated from one version to another.
// We group by the template so that all prebuilds being provisioned for a prebuild are inhibited if any prebuild for
// any preset in that template are in progress, to prevent clobbering.
inProgress := slice.Filter(s.prebuildsInProgress, func(prebuild database.GetPrebuildsInProgressRow) bool {
return prebuild.TemplateVersionID == preset.TemplateVersionID
return prebuild.TemplateID == preset.TemplateID
})
return &presetState{
@ -103,6 +105,8 @@ func (p presetState) calculateActions() (*reconciliationActions, error) {
}
}
// In-progress builds are common across all presets belonging to a given template.
// In other words: these values will be identical across all presets belonging to this template.
for _, progress := range p.inProgress {
switch progress.Transition {
case database.WorkspaceTransitionStart:
@ -138,6 +142,7 @@ func (p presetState) calculateActions() (*reconciliationActions, error) {
)
// Bail early to avoid scheduling new prebuilds while operations are in progress.
// TODO: optimization: we should probably be able to create prebuilds while others are deleting for a given preset.
if (toCreate+toDelete) > 0 && (starting+stopping+deleting) > 0 {
// TODO: move up
//c.logger.Warn(ctx, "prebuild operations in progress, skipping reconciliation",

View File

@ -0,0 +1,400 @@
package prebuilds
import (
"testing"
"time"
"github.com/google/uuid"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/coder/coder/v2/coderd/database"
)
type options struct {
templateID uuid.UUID
templateVersionID uuid.UUID
presetID uuid.UUID
presetName string
prebuildID uuid.UUID
workspaceName string
}
// templateID is common across all option sets.
var templateID = uuid.New()
const (
optionSet0 = iota
optionSet1
optionSet2
)
var opts = map[uint]options{
optionSet0: {
templateID: templateID,
templateVersionID: uuid.New(),
presetID: uuid.New(),
presetName: "my-preset",
prebuildID: uuid.New(),
workspaceName: "prebuilds0",
},
optionSet1: {
templateID: templateID,
templateVersionID: uuid.New(),
presetID: uuid.New(),
presetName: "my-preset",
prebuildID: uuid.New(),
workspaceName: "prebuilds1",
},
optionSet2: {
templateID: templateID,
templateVersionID: uuid.New(),
presetID: uuid.New(),
presetName: "my-preset",
prebuildID: uuid.New(),
workspaceName: "prebuilds2",
},
}
// A new template version with a preset without prebuilds configured should result in no prebuilds being created.
func TestNoPrebuilds(t *testing.T) {
current := opts[optionSet0]
presets := []database.GetTemplatePresetsWithPrebuildsRow{
preset(true, 0, current),
}
state := newReconciliationState(presets, nil, nil)
ps, err := state.filterByPreset(current.presetID)
require.NoError(t, err)
actions, err := ps.calculateActions()
require.NoError(t, err)
validateActions(t, reconciliationActions{ /*all zero values*/ }, *actions)
}
// A new template version with a preset with prebuilds configured should result in a new prebuild being created.
func TestNetNew(t *testing.T) {
current := opts[optionSet0]
presets := []database.GetTemplatePresetsWithPrebuildsRow{
preset(true, 1, current),
}
state := newReconciliationState(presets, nil, nil)
ps, err := state.filterByPreset(current.presetID)
require.NoError(t, err)
actions, err := ps.calculateActions()
require.NoError(t, err)
validateActions(t, reconciliationActions{
desired: 1,
create: 1,
}, *actions)
}
// A new template version is created with a preset with prebuilds configured; this outdates the older version and
// requires the old prebuilds to be destroyed and new prebuilds to be created.
func TestOutdatedPrebuilds(t *testing.T) {
outdated := opts[optionSet0]
current := opts[optionSet1]
// GIVEN: 2 presets, one outdated and one new.
presets := []database.GetTemplatePresetsWithPrebuildsRow{
preset(false, 1, outdated),
preset(true, 1, current),
}
// GIVEN: a running prebuild for the outdated preset.
running := []database.GetRunningPrebuildsRow{
prebuild(outdated),
}
// GIVEN: no in-progress builds.
var inProgress []database.GetPrebuildsInProgressRow
// WHEN: calculating the outdated preset's state.
state := newReconciliationState(presets, running, inProgress)
ps, err := state.filterByPreset(outdated.presetID)
require.NoError(t, err)
// THEN: we should identify that this prebuild is outdated and needs to be deleted.
actions, err := ps.calculateActions()
require.NoError(t, err)
validateActions(t, reconciliationActions{outdated: 1, deleteIDs: []uuid.UUID{outdated.prebuildID}}, *actions)
// WHEN: calculating the current preset's state.
ps, err = state.filterByPreset(current.presetID)
require.NoError(t, err)
// THEN: we should not be blocked from creating a new prebuild while the outdate one deletes.
actions, err = ps.calculateActions()
require.NoError(t, err)
validateActions(t, reconciliationActions{desired: 1, create: 1}, *actions)
}
// A new template version is created with a preset with prebuilds configured; while the outdated prebuild is deleting,
// the new preset's prebuild cannot be provisioned concurrently, to prevent clobbering.
func TestBlockedOnDeleteActions(t *testing.T) {
outdated := opts[optionSet0]
current := opts[optionSet1]
// GIVEN: 2 presets, one outdated and one new.
presets := []database.GetTemplatePresetsWithPrebuildsRow{
preset(false, 1, outdated),
preset(true, 1, current),
}
// GIVEN: a running prebuild for the outdated preset.
running := []database.GetRunningPrebuildsRow{
prebuild(outdated),
}
// GIVEN: one prebuild for the old preset which is currently deleting.
inProgress := []database.GetPrebuildsInProgressRow{
{
TemplateID: outdated.templateID,
TemplateVersionID: outdated.templateVersionID,
Transition: database.WorkspaceTransitionDelete,
Count: 1,
},
}
// WHEN: calculating the outdated preset's state.
state := newReconciliationState(presets, running, inProgress)
ps, err := state.filterByPreset(outdated.presetID)
require.NoError(t, err)
// THEN: we should identify that this prebuild is in progress, and not attempt to delete this prebuild again.
actions, err := ps.calculateActions()
require.NoError(t, err)
validateActions(t, reconciliationActions{outdated: 1, deleting: 1}, *actions)
// WHEN: calculating the current preset's state.
ps, err = state.filterByPreset(current.presetID)
require.NoError(t, err)
// THEN: we are blocked from creating a new prebuild while another one is busy provisioning.
actions, err = ps.calculateActions()
require.NoError(t, err)
validateActions(t, reconciliationActions{desired: 1, create: 0, deleting: 1}, *actions)
}
// A new template version is created with a preset with prebuilds configured. An operator comes along and stops one of the
// running prebuilds (this shouldn't be done, but it's possible). While this prebuild is stopping, all other prebuild
// actions are blocked.
func TestBlockedOnStopActions(t *testing.T) {
outdated := opts[optionSet0]
current := opts[optionSet1]
// GIVEN: 2 presets, one outdated and one new (which now expects 2 prebuilds!).
presets := []database.GetTemplatePresetsWithPrebuildsRow{
preset(false, 1, outdated),
preset(true, 2, current),
}
// GIVEN: NO running prebuilds for either preset.
var running []database.GetRunningPrebuildsRow
// GIVEN: one prebuild for the old preset which is currently stopping.
inProgress := []database.GetPrebuildsInProgressRow{
{
TemplateID: outdated.templateID,
TemplateVersionID: outdated.templateVersionID,
Transition: database.WorkspaceTransitionStop,
Count: 1,
},
}
// WHEN: calculating the outdated preset's state.
state := newReconciliationState(presets, running, inProgress)
ps, err := state.filterByPreset(outdated.presetID)
require.NoError(t, err)
// THEN: there is nothing to do.
actions, err := ps.calculateActions()
require.NoError(t, err)
validateActions(t, reconciliationActions{stopping: 1}, *actions)
// WHEN: calculating the current preset's state.
ps, err = state.filterByPreset(current.presetID)
require.NoError(t, err)
// THEN: we are blocked from creating a new prebuild while another one is busy provisioning.
actions, err = ps.calculateActions()
require.NoError(t, err)
validateActions(t, reconciliationActions{desired: 2, stopping: 1, create: 0}, *actions)
}
// A new template version is created with a preset with prebuilds configured; the outdated prebuilds are deleted,
// and one of the new prebuilds is already being provisioned, but we bail out early if operations are already in progress
// for this prebuild - to prevent clobbering.
func TestBlockedOnStartActions(t *testing.T) {
outdated := opts[optionSet0]
current := opts[optionSet1]
// GIVEN: 2 presets, one outdated and one new (which now expects 2 prebuilds!).
presets := []database.GetTemplatePresetsWithPrebuildsRow{
preset(false, 1, outdated),
preset(true, 2, current),
}
// GIVEN: NO running prebuilds for either preset.
var running []database.GetRunningPrebuildsRow
// GIVEN: one prebuild for the old preset which is currently provisioning.
inProgress := []database.GetPrebuildsInProgressRow{
{
TemplateID: current.templateID,
TemplateVersionID: current.templateVersionID,
Transition: database.WorkspaceTransitionStart,
Count: 1,
},
}
// WHEN: calculating the outdated preset's state.
state := newReconciliationState(presets, running, inProgress)
ps, err := state.filterByPreset(outdated.presetID)
require.NoError(t, err)
// THEN: there is nothing to do.
actions, err := ps.calculateActions()
require.NoError(t, err)
validateActions(t, reconciliationActions{starting: 1}, *actions)
// WHEN: calculating the current preset's state.
ps, err = state.filterByPreset(current.presetID)
require.NoError(t, err)
// THEN: we are blocked from creating a new prebuild while another one is busy provisioning.
actions, err = ps.calculateActions()
require.NoError(t, err)
validateActions(t, reconciliationActions{desired: 2, starting: 1, create: 0}, *actions)
}
// Additional prebuilds exist for a given preset configuration; these must be deleted.
func TestExtraneous(t *testing.T) {
current := opts[optionSet0]
// GIVEN: a preset with 1 desired prebuild.
presets := []database.GetTemplatePresetsWithPrebuildsRow{
preset(true, 1, current),
}
var older uuid.UUID
// GIVEN: 2 running prebuilds for the preset.
running := []database.GetRunningPrebuildsRow{
prebuild(current, func(row database.GetRunningPrebuildsRow) database.GetRunningPrebuildsRow {
// The older of the running prebuilds will be deleted in order to maintain freshness.
row.CreatedAt = time.Now().Add(-time.Hour)
older = row.WorkspaceID
return row
}),
prebuild(current, func(row database.GetRunningPrebuildsRow) database.GetRunningPrebuildsRow {
row.CreatedAt = time.Now()
return row
}),
}
// GIVEN: NO prebuilds in progress.
var inProgress []database.GetPrebuildsInProgressRow
// WHEN: calculating the current preset's state.
state := newReconciliationState(presets, running, inProgress)
ps, err := state.filterByPreset(current.presetID)
require.NoError(t, err)
// THEN: an extraneous prebuild is detected and marked for deletion.
actions, err := ps.calculateActions()
require.NoError(t, err)
validateActions(t, reconciliationActions{
actual: 2, desired: 1, extraneous: 1, deleteIDs: []uuid.UUID{older}, eligible: 2,
}, *actions)
}
// As above, but no actions will be performed because
func TestExtraneousInProgress(t *testing.T) {
current := opts[optionSet0]
// GIVEN: a preset with 1 desired prebuild.
presets := []database.GetTemplatePresetsWithPrebuildsRow{
preset(true, 1, current),
}
var older uuid.UUID
// GIVEN: 2 running prebuilds for the preset.
running := []database.GetRunningPrebuildsRow{
prebuild(current, func(row database.GetRunningPrebuildsRow) database.GetRunningPrebuildsRow {
// The older of the running prebuilds will be deleted in order to maintain freshness.
row.CreatedAt = time.Now().Add(-time.Hour)
older = row.WorkspaceID
return row
}),
prebuild(current, func(row database.GetRunningPrebuildsRow) database.GetRunningPrebuildsRow {
row.CreatedAt = time.Now()
return row
}),
}
// GIVEN: NO prebuilds in progress.
var inProgress []database.GetPrebuildsInProgressRow
// WHEN: calculating the current preset's state.
state := newReconciliationState(presets, running, inProgress)
ps, err := state.filterByPreset(current.presetID)
require.NoError(t, err)
// THEN: an extraneous prebuild is detected and marked for deletion.
actions, err := ps.calculateActions()
require.NoError(t, err)
validateActions(t, reconciliationActions{
actual: 2, desired: 1, extraneous: 1, deleteIDs: []uuid.UUID{older}, eligible: 2,
}, *actions)
}
func preset(active bool, instances int32, opts options) database.GetTemplatePresetsWithPrebuildsRow {
return database.GetTemplatePresetsWithPrebuildsRow{
TemplateID: opts.templateID,
TemplateVersionID: opts.templateVersionID,
PresetID: opts.presetID,
UsingActiveVersion: active,
Name: opts.presetName,
DesiredInstances: instances,
Deleted: false,
Deprecated: false,
}
}
func prebuild(opts options, muts ...func(row database.GetRunningPrebuildsRow) database.GetRunningPrebuildsRow) database.GetRunningPrebuildsRow {
entry := database.GetRunningPrebuildsRow{
WorkspaceID: opts.prebuildID,
WorkspaceName: opts.workspaceName,
TemplateID: opts.templateID,
TemplateVersionID: opts.templateVersionID,
CurrentPresetID: uuid.NullUUID{UUID: opts.presetID, Valid: true},
Ready: true,
CreatedAt: time.Now(),
}
for _, mut := range muts {
entry = mut(entry)
}
return entry
}
// validateActions is a convenience func to make tests more readable; it exploits the fact that the default states for
// prebuilds align with zero values.
func validateActions(t *testing.T, expected, actual reconciliationActions) bool {
return assert.EqualValuesf(t, expected.deleteIDs, actual.deleteIDs, "'deleteIDs' did not match expectation") &&
assert.EqualValuesf(t, expected.create, actual.create, "'create' did not match expectation") &&
assert.EqualValuesf(t, expected.desired, actual.desired, "'desired' did not match expectation") &&
assert.EqualValuesf(t, expected.actual, actual.actual, "'actual' did not match expectation") &&
assert.EqualValuesf(t, expected.eligible, actual.eligible, "'eligible' did not match expectation") &&
assert.EqualValuesf(t, expected.extraneous, actual.extraneous, "'extraneous' did not match expectation") &&
assert.EqualValuesf(t, expected.outdated, actual.outdated, "'outdated' did not match expectation") &&
assert.EqualValuesf(t, expected.starting, actual.starting, "'starting' did not match expectation") &&
assert.EqualValuesf(t, expected.stopping, actual.stopping, "'stopping' did not match expectation") &&
assert.EqualValuesf(t, expected.deleting, actual.deleting, "'deleting' did not match expectation")
}