fix: make GetWorkspacesEligibleForTransition return even less false positives (#15594)

Relates to https://github.com/coder/coder/issues/15082

Further to https://github.com/coder/coder/pull/15429, this reduces the
amount of false-positives returned by the 'is eligible for autostart'
part of the query. We achieve this by calculating the 'next start at'
time of the workspace, storing it in the database, and using it in our
`GetWorkspacesEligibleForTransition` query.

The prior implementation of the 'is eligible for autostart' query would
return _all_ workspaces that at some point in the future _might_ be
eligible for autostart. This now ensures we only return workspaces that
_should_ be eligible for autostart.

We also now pass `currentTick` instead of `t` to the
`GetWorkspacesEligibleForTransition` query as otherwise we'll have one
round of workspaces that are skipped by `isEligibleForTransition` due to
`currentTick` being a truncated version of `t`.
This commit is contained in:
Danielle Maywood
2024-12-02 21:02:36 +00:00
committed by GitHub
parent 2b57dcc68c
commit e21a301682
35 changed files with 1012 additions and 75 deletions

View File

@ -21,6 +21,7 @@ import (
agpl "github.com/coder/coder/v2/coderd/schedule"
"github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/quartz"
)
// EnterpriseTemplateScheduleStore provides an agpl.TemplateScheduleStore that
@ -30,8 +31,8 @@ type EnterpriseTemplateScheduleStore struct {
// update.
UserQuietHoursScheduleStore *atomic.Pointer[agpl.UserQuietHoursScheduleStore]
// Custom time.Now() function to use in tests. Defaults to dbtime.Now().
TimeNowFn func() time.Time
// Clock for testing
Clock quartz.Clock
enqueuer notifications.Enqueuer
logger slog.Logger
@ -39,19 +40,21 @@ type EnterpriseTemplateScheduleStore struct {
var _ agpl.TemplateScheduleStore = &EnterpriseTemplateScheduleStore{}
func NewEnterpriseTemplateScheduleStore(userQuietHoursStore *atomic.Pointer[agpl.UserQuietHoursScheduleStore], enqueuer notifications.Enqueuer, logger slog.Logger) *EnterpriseTemplateScheduleStore {
func NewEnterpriseTemplateScheduleStore(userQuietHoursStore *atomic.Pointer[agpl.UserQuietHoursScheduleStore], enqueuer notifications.Enqueuer, logger slog.Logger, clock quartz.Clock) *EnterpriseTemplateScheduleStore {
if clock == nil {
clock = quartz.NewReal()
}
return &EnterpriseTemplateScheduleStore{
UserQuietHoursScheduleStore: userQuietHoursStore,
Clock: clock,
enqueuer: enqueuer,
logger: logger,
}
}
func (s *EnterpriseTemplateScheduleStore) now() time.Time {
if s.TimeNowFn != nil {
return s.TimeNowFn()
}
return dbtime.Now()
return dbtime.Time(s.Clock.Now())
}
// Get implements agpl.TemplateScheduleStore.
@ -164,7 +167,7 @@ func (s *EnterpriseTemplateScheduleStore) Set(ctx context.Context, db database.S
var dormantAt time.Time
if opts.UpdateWorkspaceDormantAt {
dormantAt = dbtime.Now()
dormantAt = s.now()
}
// If we updated the time_til_dormant_autodelete we need to update all the workspaces deleting_at
@ -205,8 +208,45 @@ func (s *EnterpriseTemplateScheduleStore) Set(ctx context.Context, db database.S
return database.Template{}, err
}
if opts.AutostartRequirement.DaysOfWeek != tpl.AutostartAllowedDays() {
templateSchedule, err := s.Get(ctx, db, tpl.ID)
if err != nil {
return database.Template{}, xerrors.Errorf("get template schedule: %w", err)
}
//nolint:gocritic // We need to be able to read information about all workspaces.
workspaces, err := db.GetWorkspacesByTemplateID(dbauthz.AsSystemRestricted(ctx), tpl.ID)
if err != nil {
return database.Template{}, xerrors.Errorf("get workspaces by template id: %w", err)
}
workspaceIDs := []uuid.UUID{}
nextStartAts := []time.Time{}
for _, workspace := range workspaces {
nextStartAt := time.Time{}
if workspace.AutostartSchedule.Valid {
next, err := agpl.NextAllowedAutostart(s.now(), workspace.AutostartSchedule.String, templateSchedule)
if err == nil {
nextStartAt = dbtime.Time(next.UTC())
}
}
workspaceIDs = append(workspaceIDs, workspace.ID)
nextStartAts = append(nextStartAts, nextStartAt)
}
//nolint:gocritic // We need to be able to update information about all workspaces.
if err := db.BatchUpdateWorkspaceNextStartAt(dbauthz.AsSystemRestricted(ctx), database.BatchUpdateWorkspaceNextStartAtParams{
IDs: workspaceIDs,
NextStartAts: nextStartAts,
}); err != nil {
return database.Template{}, xerrors.Errorf("update workspace next start at: %w", err)
}
}
for _, ws := range markedForDeletion {
dormantTime := dbtime.Now().Add(opts.TimeTilDormantAutoDelete)
dormantTime := s.now().Add(opts.TimeTilDormantAutoDelete)
_, err = s.enqueuer.Enqueue(
// nolint:gocritic // Need actor to enqueue notification
dbauthz.AsNotifier(ctx),
@ -304,6 +344,23 @@ func (s *EnterpriseTemplateScheduleStore) updateWorkspaceBuild(ctx context.Conte
return xerrors.Errorf("calculate new autostop for workspace %q: %w", workspace.ID, err)
}
if workspace.AutostartSchedule.Valid {
templateScheduleOptions, err := s.Get(ctx, db, workspace.TemplateID)
if err != nil {
return xerrors.Errorf("get template schedule options: %w", err)
}
nextStartAt, _ := agpl.NextAutostart(s.now(), workspace.AutostartSchedule.String, templateScheduleOptions)
err = db.UpdateWorkspaceNextStartAt(ctx, database.UpdateWorkspaceNextStartAtParams{
ID: workspace.ID,
NextStartAt: sql.NullTime{Valid: true, Time: nextStartAt},
})
if err != nil {
return xerrors.Errorf("update workspace next start at: %w", err)
}
}
// If max deadline is before now()+2h, then set it to that.
// This is intended to give ample warning to this workspace about an upcoming auto-stop.
// If we were to omit this "grace" period, then this workspace could be set to be stopped "now".