docs: add documentation for prebuild scheduling feature (#18462)

Follow-up to https://github.com/coder/coder/pull/18126

Changes:
- address issue mentioned here:
https://github.com/coder/coder/pull/18126#discussion_r2144557600
- add docs for prebuilds scheduling

---------

Co-authored-by: Danny Kopping <danny@coder.com>
Co-authored-by: Atif Ali <atif@coder.com>
This commit is contained in:
Yevhenii Shcherbina
2025-06-20 10:08:47 -04:00
committed by GitHub
parent da5d5ba96a
commit 8e3022ed9e
4 changed files with 124 additions and 30 deletions

View File

@ -267,14 +267,14 @@ func (p PresetSnapshot) CalculateState() *ReconciliationState {
// - ActionTypeBackoff: Only BackoffUntil is set, indicating when to retry
// - ActionTypeCreate: Only Create is set, indicating how many prebuilds to create
// - ActionTypeDelete: Only DeleteIDs is set, containing IDs of prebuilds to delete
func (p PresetSnapshot) CalculateActions(clock quartz.Clock, backoffInterval time.Duration) ([]*ReconciliationActions, error) {
func (p PresetSnapshot) CalculateActions(backoffInterval time.Duration) ([]*ReconciliationActions, error) {
// TODO: align workspace states with how we represent them on the FE and the CLI
// right now there's some slight differences which can lead to additional prebuilds being created
// TODO: add mechanism to prevent prebuilds being reconciled from being claimable by users; i.e. if a prebuild is
// about to be deleted, it should not be deleted if it has been claimed - beware of TOCTOU races!
actions, needsBackoff := p.needsBackoffPeriod(clock, backoffInterval)
actions, needsBackoff := p.needsBackoffPeriod(p.clock, backoffInterval)
if needsBackoff {
return actions, nil
}

View File

@ -86,12 +86,12 @@ func TestNoPrebuilds(t *testing.T) {
preset(true, 0, current),
}
snapshot := prebuilds.NewGlobalSnapshot(presets, nil, nil, nil, nil, nil, quartz.NewMock(t), testutil.Logger(t))
snapshot := prebuilds.NewGlobalSnapshot(presets, nil, nil, nil, nil, nil, clock, testutil.Logger(t))
ps, err := snapshot.FilterByPreset(current.presetID)
require.NoError(t, err)
state := ps.CalculateState()
actions, err := ps.CalculateActions(clock, backoffInterval)
actions, err := ps.CalculateActions(backoffInterval)
require.NoError(t, err)
validateState(t, prebuilds.ReconciliationState{ /*all zero values*/ }, *state)
@ -108,12 +108,12 @@ func TestNetNew(t *testing.T) {
preset(true, 1, current),
}
snapshot := prebuilds.NewGlobalSnapshot(presets, nil, nil, nil, nil, nil, quartz.NewMock(t), testutil.Logger(t))
snapshot := prebuilds.NewGlobalSnapshot(presets, nil, nil, nil, nil, nil, clock, testutil.Logger(t))
ps, err := snapshot.FilterByPreset(current.presetID)
require.NoError(t, err)
state := ps.CalculateState()
actions, err := ps.CalculateActions(clock, backoffInterval)
actions, err := ps.CalculateActions(backoffInterval)
require.NoError(t, err)
validateState(t, prebuilds.ReconciliationState{
@ -156,7 +156,7 @@ func TestOutdatedPrebuilds(t *testing.T) {
// THEN: we should identify that this prebuild is outdated and needs to be deleted.
state := ps.CalculateState()
actions, err := ps.CalculateActions(clock, backoffInterval)
actions, err := ps.CalculateActions(backoffInterval)
require.NoError(t, err)
validateState(t, prebuilds.ReconciliationState{
Actual: 1,
@ -174,7 +174,7 @@ func TestOutdatedPrebuilds(t *testing.T) {
// THEN: we should not be blocked from creating a new prebuild while the outdate one deletes.
state = ps.CalculateState()
actions, err = ps.CalculateActions(clock, backoffInterval)
actions, err = ps.CalculateActions(backoffInterval)
require.NoError(t, err)
validateState(t, prebuilds.ReconciliationState{Desired: 1}, *state)
validateActions(t, []*prebuilds.ReconciliationActions{
@ -223,7 +223,7 @@ func TestDeleteOutdatedPrebuilds(t *testing.T) {
// THEN: we should identify that this prebuild is outdated and needs to be deleted.
// Despite the fact that deletion of another outdated prebuild is already in progress.
state := ps.CalculateState()
actions, err := ps.CalculateActions(clock, backoffInterval)
actions, err := ps.CalculateActions(backoffInterval)
require.NoError(t, err)
validateState(t, prebuilds.ReconciliationState{
Actual: 1,
@ -467,7 +467,7 @@ func TestInProgressActions(t *testing.T) {
// THEN: we should identify that this prebuild is in progress.
state := ps.CalculateState()
actions, err := ps.CalculateActions(clock, backoffInterval)
actions, err := ps.CalculateActions(backoffInterval)
require.NoError(t, err)
tc.checkFn(*state, actions)
})
@ -510,7 +510,7 @@ func TestExtraneous(t *testing.T) {
// THEN: an extraneous prebuild is detected and marked for deletion.
state := ps.CalculateState()
actions, err := ps.CalculateActions(clock, backoffInterval)
actions, err := ps.CalculateActions(backoffInterval)
require.NoError(t, err)
validateState(t, prebuilds.ReconciliationState{
Actual: 2, Desired: 1, Extraneous: 1, Eligible: 2,
@ -685,13 +685,13 @@ func TestExpiredPrebuilds(t *testing.T) {
}
// WHEN: calculating the current preset's state.
snapshot := prebuilds.NewGlobalSnapshot(presets, nil, running, nil, nil, nil, quartz.NewMock(t), testutil.Logger(t))
snapshot := prebuilds.NewGlobalSnapshot(presets, nil, running, nil, nil, nil, clock, testutil.Logger(t))
ps, err := snapshot.FilterByPreset(current.presetID)
require.NoError(t, err)
// THEN: we should identify that this prebuild is expired.
state := ps.CalculateState()
actions, err := ps.CalculateActions(clock, backoffInterval)
actions, err := ps.CalculateActions(backoffInterval)
require.NoError(t, err)
tc.checkFn(running, *state, actions)
})
@ -727,7 +727,7 @@ func TestDeprecated(t *testing.T) {
// THEN: all running prebuilds should be deleted because the template is deprecated.
state := ps.CalculateState()
actions, err := ps.CalculateActions(clock, backoffInterval)
actions, err := ps.CalculateActions(backoffInterval)
require.NoError(t, err)
validateState(t, prebuilds.ReconciliationState{
Actual: 1,
@ -774,13 +774,13 @@ func TestLatestBuildFailed(t *testing.T) {
}
// WHEN: calculating the current preset's state.
snapshot := prebuilds.NewGlobalSnapshot(presets, nil, running, inProgress, backoffs, nil, quartz.NewMock(t), testutil.Logger(t))
snapshot := prebuilds.NewGlobalSnapshot(presets, nil, running, inProgress, backoffs, nil, clock, testutil.Logger(t))
psCurrent, err := snapshot.FilterByPreset(current.presetID)
require.NoError(t, err)
// THEN: reconciliation should backoff.
state := psCurrent.CalculateState()
actions, err := psCurrent.CalculateActions(clock, backoffInterval)
actions, err := psCurrent.CalculateActions(backoffInterval)
require.NoError(t, err)
validateState(t, prebuilds.ReconciliationState{
Actual: 0, Desired: 1,
@ -798,7 +798,7 @@ func TestLatestBuildFailed(t *testing.T) {
// THEN: it should NOT be in backoff because all is OK.
state = psOther.CalculateState()
actions, err = psOther.CalculateActions(clock, backoffInterval)
actions, err = psOther.CalculateActions(backoffInterval)
require.NoError(t, err)
validateState(t, prebuilds.ReconciliationState{
Actual: 1, Desired: 1, Eligible: 1,
@ -812,7 +812,7 @@ func TestLatestBuildFailed(t *testing.T) {
psCurrent, err = snapshot.FilterByPreset(current.presetID)
require.NoError(t, err)
state = psCurrent.CalculateState()
actions, err = psCurrent.CalculateActions(clock, backoffInterval)
actions, err = psCurrent.CalculateActions(backoffInterval)
require.NoError(t, err)
validateState(t, prebuilds.ReconciliationState{
Actual: 0, Desired: 1,
@ -867,7 +867,7 @@ func TestMultiplePresetsPerTemplateVersion(t *testing.T) {
},
}
snapshot := prebuilds.NewGlobalSnapshot(presets, nil, nil, inProgress, nil, nil, quartz.NewMock(t), testutil.Logger(t))
snapshot := prebuilds.NewGlobalSnapshot(presets, nil, nil, inProgress, nil, nil, clock, testutil.Logger(t))
// Nothing has to be created for preset 1.
{
@ -875,7 +875,7 @@ func TestMultiplePresetsPerTemplateVersion(t *testing.T) {
require.NoError(t, err)
state := ps.CalculateState()
actions, err := ps.CalculateActions(clock, backoffInterval)
actions, err := ps.CalculateActions(backoffInterval)
require.NoError(t, err)
validateState(t, prebuilds.ReconciliationState{
@ -891,7 +891,7 @@ func TestMultiplePresetsPerTemplateVersion(t *testing.T) {
require.NoError(t, err)
state := ps.CalculateState()
actions, err := ps.CalculateActions(clock, backoffInterval)
actions, err := ps.CalculateActions(backoffInterval)
require.NoError(t, err)
validateState(t, prebuilds.ReconciliationState{
@ -995,7 +995,7 @@ func TestPrebuildScheduling(t *testing.T) {
require.NoError(t, err)
state := ps.CalculateState()
actions, err := ps.CalculateActions(clock, backoffInterval)
actions, err := ps.CalculateActions(backoffInterval)
require.NoError(t, err)
validateState(t, prebuilds.ReconciliationState{
@ -1016,7 +1016,7 @@ func TestPrebuildScheduling(t *testing.T) {
require.NoError(t, err)
state := ps.CalculateState()
actions, err := ps.CalculateActions(clock, backoffInterval)
actions, err := ps.CalculateActions(backoffInterval)
require.NoError(t, err)
validateState(t, prebuilds.ReconciliationState{

View File

@ -12,6 +12,7 @@ Prebuilt workspaces are:
- Created and maintained automatically by Coder to match your specified preset configurations.
- Claimed transparently when developers create workspaces.
- Monitored and replaced automatically to maintain your desired pool size.
- Automatically scaled based on time-based schedules to optimize resource usage.
## Relationship to workspace presets
@ -111,6 +112,105 @@ prebuilt workspace can remain before it is considered expired and eligible for c
Expired prebuilt workspaces are removed during the reconciliation loop to avoid stale environments and resource waste.
New prebuilt workspaces are only created to maintain the desired count if needed.
### Scheduling
Prebuilt workspaces support time-based scheduling to scale the number of instances up or down.
This allows you to reduce resource costs during off-hours while maintaining availability during peak usage times.
Configure scheduling by adding a `scheduling` block within your `prebuilds` configuration:
```tf
data "coder_workspace_preset" "goland" {
name = "GoLand: Large"
parameters {
jetbrains_ide = "GO"
cpus = 8
memory = 16
}
prebuilds {
instances = 0 # default to 0 instances
scheduling {
timezone = "UTC" # only a single timezone may be used for simplicity
# scale to 3 instances during the work week
schedule {
cron = "* 8-18 * * 1-5" # from 8AM-6:59PM, Mon-Fri, UTC
instances = 3 # scale to 3 instances
}
# scale to 1 instance on Saturdays for urgent support queries
schedule {
cron = "* 8-14 * * 6" # from 8AM-2:59PM, Sat, UTC
instances = 1 # scale to 1 instance
}
}
}
}
```
**Scheduling configuration:**
- **`timezone`**: The timezone for all cron expressions (required). Only a single timezone is supported per scheduling configuration.
- **`schedule`**: One or more schedule blocks defining when to scale to specific instance counts.
- **`cron`**: Cron expression interpreted as continuous time ranges (required).
- **`instances`**: Number of prebuilt workspaces to maintain during this schedule (required).
**How scheduling works:**
1. The reconciliation loop evaluates all active schedules every reconciliation interval (`CODER_WORKSPACE_PREBUILDS_RECONCILIATION_INTERVAL`).
2. The schedule that matches the current time becomes active. Overlapping schedules are disallowed by validation rules.
3. If no schedules match the current time, the base `instances` count is used.
4. The reconciliation loop automatically creates or destroys prebuilt workspaces to match the target count.
**Cron expression format:**
Cron expressions follow the format: `* HOUR DOM MONTH DAY-OF-WEEK`
- `*` (minute): Must always be `*` to ensure the schedule covers entire hours rather than specific minute intervals
- `HOUR`: 0-23, range (e.g., 8-18 for 8AM-6:59PM), or `*`
- `DOM` (day-of-month): 1-31, range, or `*`
- `MONTH`: 1-12, range, or `*`
- `DAY-OF-WEEK`: 0-6 (Sunday=0, Saturday=6), range (e.g., 1-5 for Monday to Friday), or `*`
**Important notes about cron expressions:**
- **Minutes must always be `*`**: To ensure the schedule covers entire hours
- **Time ranges are continuous**: A range like `8-18` means from 8AM to 6:59PM (inclusive of both start and end hours)
- **Weekday ranges**: `1-5` means Monday through Friday (Monday=1, Friday=5)
- **No overlapping schedules**: The validation system prevents overlapping schedules.
**Example schedules:**
```tf
# Business hours only (8AM-6:59PM, Mon-Fri)
schedule {
cron = "* 8-18 * * 1-5"
instances = 5
}
# 24/7 coverage with reduced capacity overnight and on weekends
schedule {
cron = "* 8-18 * * 1-5" # Business hours (8AM-6:59PM, Mon-Fri)
instances = 10
}
schedule {
cron = "* 19-23,0-7 * * 1,5" # Evenings and nights (7PM-11:59PM, 12AM-7:59AM, Mon-Fri)
instances = 2
}
schedule {
cron = "* * * * 6,0" # Weekends
instances = 2
}
# Weekend support (10AM-4:59PM, Sat-Sun)
schedule {
cron = "* 10-16 * * 6,0"
instances = 1
}
```
### Template updates and the prebuilt workspace lifecycle
Prebuilt workspaces are not updated after they are provisioned.
@ -195,12 +295,6 @@ The prebuilt workspaces feature has these current limitations:
[View issue](https://github.com/coder/internal/issues/364)
- **Autoscaling**
Prebuilt workspaces remain running until claimed. There's no automated mechanism to reduce instances during off-hours.
[View issue](https://github.com/coder/internal/issues/312)
### Monitoring and observability
#### Available metrics

View File

@ -518,7 +518,7 @@ func (c *StoreReconciler) CalculateActions(ctx context.Context, snapshot prebuil
return nil, ctx.Err()
}
return snapshot.CalculateActions(c.clock, c.cfg.ReconciliationBackoffInterval.Value())
return snapshot.CalculateActions(c.cfg.ReconciliationBackoffInterval.Value())
}
func (c *StoreReconciler) WithReconciliationLock(