refactor(coderd): collapse activityBumpWorkspace into a single query (#9652)

* Adds unit-style tests for activityBumpWorkspace
* Ports logic of activityBumpWorkspace to a SQL query
* Updates activityBumpWorkspace to call above query
This commit is contained in:
Cian Johnston
2023-09-14 09:09:51 +01:00
committed by GitHub
parent 38560dd922
commit 8b6e2862fd
9 changed files with 424 additions and 69 deletions

View File

@ -2,8 +2,6 @@ package coderd
import (
"context"
"database/sql"
"errors"
"time"
"github.com/google/uuid"
@ -11,7 +9,6 @@ import (
"cdr.dev/slog"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbtime"
)
// activityBumpWorkspace automatically bumps the workspace's auto-off timer
@ -21,72 +18,7 @@ func activityBumpWorkspace(ctx context.Context, log slog.Logger, db database.Sto
// low priority operations fail first.
ctx, cancel := context.WithTimeout(ctx, time.Second*15)
defer cancel()
err := db.InTx(func(s database.Store) error {
build, err := s.GetLatestWorkspaceBuildByWorkspaceID(ctx, workspaceID)
if errors.Is(err, sql.ErrNoRows) {
return nil
} else if err != nil {
return xerrors.Errorf("get latest workspace build: %w", err)
}
job, err := s.GetProvisionerJobByID(ctx, build.JobID)
if err != nil {
return xerrors.Errorf("get provisioner job: %w", err)
}
if build.Transition != database.WorkspaceTransitionStart || !job.CompletedAt.Valid {
return nil
}
if build.Deadline.IsZero() {
// Workspace shutdown is manual
return nil
}
workspace, err := s.GetWorkspaceByID(ctx, workspaceID)
if err != nil {
return xerrors.Errorf("get workspace: %w", err)
}
var (
// We bump by the original TTL to prevent counter-intuitive behavior
// as the TTL wraps. For example, if I set the TTL to 12 hours, sign off
// work at midnight, come back at 10am, I would want another full day
// of uptime. In the prior implementation, the workspace would enter
// a state of always expiring 1 hour in the future
bumpAmount = time.Duration(workspace.Ttl.Int64)
// DB writes are expensive so we only bump when 5% of the deadline
// has elapsed.
bumpEvery = bumpAmount / 20
timeSinceLastBump = bumpAmount - time.Until(build.Deadline)
)
if timeSinceLastBump < bumpEvery {
return nil
}
if bumpAmount == 0 {
return nil
}
newDeadline := dbtime.Now().Add(bumpAmount)
if !build.MaxDeadline.IsZero() && newDeadline.After(build.MaxDeadline) {
newDeadline = build.MaxDeadline
}
if err := s.UpdateWorkspaceBuildByID(ctx, database.UpdateWorkspaceBuildByIDParams{
ID: build.ID,
UpdatedAt: dbtime.Now(),
ProvisionerState: build.ProvisionerState,
Deadline: newDeadline,
MaxDeadline: build.MaxDeadline,
}); err != nil {
return xerrors.Errorf("update workspace build: %w", err)
}
return nil
}, nil)
if err != nil {
if err := db.ActivityBumpWorkspace(ctx, workspaceID); err != nil {
if !xerrors.Is(err, context.Canceled) && !database.IsQueryCanceledError(err) {
// Bump will fail if the context is canceled, but this is ok.
log.Error(ctx, "bump failed", slog.Error(err),

View File

@ -0,0 +1,233 @@
package coderd
import (
"database/sql"
"runtime"
"testing"
"time"
"github.com/google/uuid"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbgen"
"github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/coderd/database/dbtime"
"github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/testutil"
"github.com/stretchr/testify/require"
)
func Test_ActivityBumpWorkspace(t *testing.T) {
t.Parallel()
for _, tt := range []struct {
name string
transition database.WorkspaceTransition
jobCompletedAt sql.NullTime
buildDeadlineOffset *time.Duration
maxDeadlineOffset *time.Duration
workspaceTTL time.Duration
expectedBump time.Duration
}{
{
name: "NotFinishedYet",
transition: database.WorkspaceTransitionStart,
jobCompletedAt: sql.NullTime{},
buildDeadlineOffset: ptr.Ref(8 * time.Hour),
workspaceTTL: 8 * time.Hour,
expectedBump: 0,
},
{
name: "ManualShutdown",
transition: database.WorkspaceTransitionStart,
jobCompletedAt: sql.NullTime{Valid: true, Time: dbtime.Now()},
buildDeadlineOffset: nil,
expectedBump: 0,
},
{
name: "NotTimeToBumpYet",
transition: database.WorkspaceTransitionStart,
jobCompletedAt: sql.NullTime{Valid: true, Time: dbtime.Now()},
buildDeadlineOffset: ptr.Ref(8 * time.Hour),
workspaceTTL: 8 * time.Hour,
expectedBump: 0,
},
{
name: "TimeToBump",
transition: database.WorkspaceTransitionStart,
jobCompletedAt: sql.NullTime{Valid: true, Time: dbtime.Now().Add(-24 * time.Minute)},
buildDeadlineOffset: ptr.Ref(8*time.Hour - 24*time.Minute),
workspaceTTL: 8 * time.Hour,
expectedBump: 8 * time.Hour,
},
{
name: "MaxDeadline",
transition: database.WorkspaceTransitionStart,
jobCompletedAt: sql.NullTime{Valid: true, Time: dbtime.Now().Add(-24 * time.Minute)},
buildDeadlineOffset: ptr.Ref(time.Minute), // last chance to bump!
maxDeadlineOffset: ptr.Ref(time.Hour),
workspaceTTL: 8 * time.Hour,
expectedBump: 1 * time.Hour,
},
{
// A workspace that is still running, has passed its deadline, but has not
// yet been auto-stopped should still bump the deadline.
name: "PastDeadlineStillBumps",
transition: database.WorkspaceTransitionStart,
jobCompletedAt: sql.NullTime{Valid: true, Time: dbtime.Now().Add(-24 * time.Minute)},
buildDeadlineOffset: ptr.Ref(-time.Minute),
workspaceTTL: 8 * time.Hour,
expectedBump: 8 * time.Hour,
},
{
// A stopped workspace should never bump.
name: "StoppedWorkspace",
transition: database.WorkspaceTransitionStop,
jobCompletedAt: sql.NullTime{Valid: true, Time: dbtime.Now().Add(-time.Minute)},
buildDeadlineOffset: ptr.Ref(-time.Minute),
workspaceTTL: 8 * time.Hour,
expectedBump: 0,
},
} {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
var (
now = dbtime.Now()
ctx = testutil.Context(t, testutil.WaitShort)
log = slogtest.Make(t, nil)
db, _ = dbtestutil.NewDB(t)
org = dbgen.Organization(t, db, database.Organization{})
user = dbgen.User(t, db, database.User{
Status: database.UserStatusActive,
})
_ = dbgen.OrganizationMember(t, db, database.OrganizationMember{
UserID: user.ID,
OrganizationID: org.ID,
})
templateVersion = dbgen.TemplateVersion(t, db, database.TemplateVersion{
OrganizationID: org.ID,
CreatedBy: user.ID,
})
template = dbgen.Template(t, db, database.Template{
OrganizationID: org.ID,
ActiveVersionID: templateVersion.ID,
CreatedBy: user.ID,
})
ws = dbgen.Workspace(t, db, database.Workspace{
OwnerID: user.ID,
OrganizationID: org.ID,
TemplateID: template.ID,
Ttl: sql.NullInt64{Valid: true, Int64: int64(tt.workspaceTTL)},
})
job = dbgen.ProvisionerJob(t, db, database.ProvisionerJob{
OrganizationID: org.ID,
CompletedAt: tt.jobCompletedAt,
})
_ = dbgen.WorkspaceResource(t, db, database.WorkspaceResource{
JobID: job.ID,
})
buildID = uuid.New()
)
var buildNumber int32 = 1
// Insert a number of previous workspace builds.
for i := 0; i < 5; i++ {
insertPrevWorkspaceBuild(t, db, org.ID, templateVersion.ID, ws.ID, database.WorkspaceTransitionStart, buildNumber)
buildNumber++
insertPrevWorkspaceBuild(t, db, org.ID, templateVersion.ID, ws.ID, database.WorkspaceTransitionStop, buildNumber)
buildNumber++
}
// dbgen.WorkspaceBuild automatically sets deadline to now+1 hour if not set
var buildDeadline time.Time
if tt.buildDeadlineOffset != nil {
buildDeadline = now.Add(*tt.buildDeadlineOffset)
}
var maxDeadline time.Time
if tt.maxDeadlineOffset != nil {
maxDeadline = now.Add(*tt.maxDeadlineOffset)
}
err := db.InsertWorkspaceBuild(ctx, database.InsertWorkspaceBuildParams{
ID: buildID,
CreatedAt: dbtime.Now(),
UpdatedAt: dbtime.Now(),
BuildNumber: buildNumber,
InitiatorID: user.ID,
Reason: database.BuildReasonInitiator,
WorkspaceID: ws.ID,
JobID: job.ID,
TemplateVersionID: templateVersion.ID,
Transition: tt.transition,
Deadline: buildDeadline,
MaxDeadline: maxDeadline,
})
require.NoError(t, err, "unexpected error inserting workspace build")
bld, err := db.GetWorkspaceBuildByID(ctx, buildID)
require.NoError(t, err, "unexpected error fetching inserted workspace build")
// Validate our initial state before bump
require.Equal(t, tt.transition, bld.Transition, "unexpected transition before bump")
require.Equal(t, tt.jobCompletedAt.Time.UTC(), job.CompletedAt.Time.UTC(), "unexpected job completed at before bump")
require.Equal(t, buildDeadline.UTC(), bld.Deadline.UTC(), "unexpected build deadline before bump")
require.Equal(t, maxDeadline.UTC(), bld.MaxDeadline.UTC(), "unexpected max deadline before bump")
require.Equal(t, tt.workspaceTTL, time.Duration(ws.Ttl.Int64), "unexpected workspace TTL before bump")
workaroundWindowsTimeResolution(t)
// Bump duration is measured from the time of the bump, so we measure from here.
start := dbtime.Now()
activityBumpWorkspace(ctx, log, db, bld.WorkspaceID)
elapsed := time.Since(start)
if elapsed > 15*time.Second {
t.Logf("warning: activityBumpWorkspace took longer than 15 seconds: %s", elapsed)
}
// The actual bump could have happened anywhere in the elapsed time, so we
// guess at the approximate time of the bump.
approxBumpTime := start.Add(elapsed / 2)
// Validate our state after bump
updatedBuild, err := db.GetLatestWorkspaceBuildByWorkspaceID(ctx, bld.WorkspaceID)
require.NoError(t, err, "unexpected error getting latest workspace build")
if tt.expectedBump == 0 {
require.Equal(t, bld.UpdatedAt.UTC(), updatedBuild.UpdatedAt.UTC(), "should not have bumped updated_at")
require.Equal(t, bld.Deadline.UTC(), updatedBuild.Deadline.UTC(), "should not have bumped deadline")
} else {
require.NotEqual(t, bld.UpdatedAt.UTC(), updatedBuild.UpdatedAt.UTC(), "should have bumped updated_at")
expectedDeadline := approxBumpTime.Add(tt.expectedBump).UTC()
// Note: if CI is especially slow, this test may fail. There is an internal 15-second
// deadline in activityBumpWorkspace, so we allow the same window here.
require.WithinDuration(t, expectedDeadline, updatedBuild.Deadline.UTC(), 15*time.Second, "unexpected deadline after bump")
}
})
}
}
func insertPrevWorkspaceBuild(t *testing.T, db database.Store, orgID, tvID, workspaceID uuid.UUID, transition database.WorkspaceTransition, buildNumber int32) {
t.Helper()
job := dbgen.ProvisionerJob(t, db, database.ProvisionerJob{
OrganizationID: orgID,
})
_ = dbgen.WorkspaceResource(t, db, database.WorkspaceResource{
JobID: job.ID,
})
_ = dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
BuildNumber: buildNumber,
WorkspaceID: workspaceID,
JobID: job.ID,
TemplateVersionID: tvID,
Transition: transition,
})
}
func workaroundWindowsTimeResolution(t *testing.T) {
t.Helper()
if runtime.GOOS == "windows" {
t.Logf("workaround: sleeping for a short time to avoid time resolution issues on Windows")
<-time.After(testutil.IntervalSlow)
}
}

View File

@ -657,6 +657,13 @@ func (q *querier) AcquireProvisionerJob(ctx context.Context, arg database.Acquir
return q.db.AcquireProvisionerJob(ctx, arg)
}
func (q *querier) ActivityBumpWorkspace(ctx context.Context, arg uuid.UUID) error {
fetch := func(ctx context.Context, arg uuid.UUID) (database.Workspace, error) {
return q.db.GetWorkspaceByID(ctx, arg)
}
return update(q.log, q.auth, fetch, q.db.ActivityBumpWorkspace)(ctx, arg)
}
func (q *querier) CleanTailnetCoordinators(ctx context.Context) error {
if err := q.authorizeContext(ctx, rbac.ActionDelete, rbac.ResourceTailnetCoordinator); err != nil {
return err

View File

@ -685,6 +685,13 @@ func (q *FakeQuerier) GetActiveDBCryptKeys(_ context.Context) ([]database.DBCryp
return ks, nil
}
func minTime(t, u time.Time) time.Time {
if t.Before(u) {
return t
}
return u
}
func (*FakeQuerier) AcquireLock(_ context.Context, _ int64) error {
return xerrors.New("AcquireLock must only be called within a transaction")
}
@ -744,6 +751,67 @@ func (q *FakeQuerier) AcquireProvisionerJob(_ context.Context, arg database.Acqu
return database.ProvisionerJob{}, sql.ErrNoRows
}
func (q *FakeQuerier) ActivityBumpWorkspace(ctx context.Context, workspaceID uuid.UUID) error {
err := validateDatabaseType(workspaceID)
if err != nil {
return err
}
q.mutex.Lock()
defer q.mutex.Unlock()
workspace, err := q.getWorkspaceByIDNoLock(ctx, workspaceID)
if err != nil {
return err
}
latestBuild, err := q.getLatestWorkspaceBuildByWorkspaceIDNoLock(ctx, workspaceID)
if err != nil {
return err
}
now := dbtime.Now()
for i := range q.workspaceBuilds {
if q.workspaceBuilds[i].BuildNumber != latestBuild.BuildNumber {
continue
}
// If the build is not active, do not bump.
if q.workspaceBuilds[i].Transition != database.WorkspaceTransitionStart {
return nil
}
// If the provisioner job is not completed, do not bump.
pj, err := q.getProvisionerJobByIDNoLock(ctx, q.workspaceBuilds[i].JobID)
if err != nil {
return err
}
if !pj.CompletedAt.Valid {
return nil
}
// Do not bump if the deadline is not set.
if q.workspaceBuilds[i].Deadline.IsZero() {
return nil
}
// Only bump if 5% of the deadline has passed.
ttlDur := time.Duration(workspace.Ttl.Int64)
ttlDur95 := ttlDur - (ttlDur / 20)
minBumpDeadline := q.workspaceBuilds[i].Deadline.Add(-ttlDur95)
if now.Before(minBumpDeadline) {
return nil
}
// Bump.
newDeadline := now.Add(ttlDur)
q.workspaceBuilds[i].UpdatedAt = now
if !q.workspaceBuilds[i].MaxDeadline.IsZero() {
q.workspaceBuilds[i].Deadline = minTime(newDeadline, q.workspaceBuilds[i].MaxDeadline)
} else {
q.workspaceBuilds[i].Deadline = newDeadline
}
return nil
}
return sql.ErrNoRows
}
func (*FakeQuerier) CleanTailnetCoordinators(_ context.Context) error {
return ErrUnimplemented
}
@ -4741,6 +4809,7 @@ func (q *FakeQuerier) InsertWorkspaceBuild(_ context.Context, arg database.Inser
JobID: arg.JobID,
ProvisionerState: arg.ProvisionerState,
Deadline: arg.Deadline,
MaxDeadline: arg.MaxDeadline,
Reason: arg.Reason,
}
q.workspaceBuilds = append(q.workspaceBuilds, workspaceBuild)

View File

@ -93,6 +93,13 @@ func (m metricsStore) AcquireProvisionerJob(ctx context.Context, arg database.Ac
return provisionerJob, err
}
func (m metricsStore) ActivityBumpWorkspace(ctx context.Context, arg uuid.UUID) error {
start := time.Now()
r0 := m.s.ActivityBumpWorkspace(ctx, arg)
m.queryLatencies.WithLabelValues("ActivityBumpWorkspace").Observe(time.Since(start).Seconds())
return r0
}
func (m metricsStore) CleanTailnetCoordinators(ctx context.Context) error {
start := time.Now()
err := m.s.CleanTailnetCoordinators(ctx)

View File

@ -68,6 +68,20 @@ func (mr *MockStoreMockRecorder) AcquireProvisionerJob(arg0, arg1 interface{}) *
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "AcquireProvisionerJob", reflect.TypeOf((*MockStore)(nil).AcquireProvisionerJob), arg0, arg1)
}
// ActivityBumpWorkspace mocks base method.
func (m *MockStore) ActivityBumpWorkspace(arg0 context.Context, arg1 uuid.UUID) error {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "ActivityBumpWorkspace", arg0, arg1)
ret0, _ := ret[0].(error)
return ret0
}
// ActivityBumpWorkspace indicates an expected call of ActivityBumpWorkspace.
func (mr *MockStoreMockRecorder) ActivityBumpWorkspace(arg0, arg1 interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ActivityBumpWorkspace", reflect.TypeOf((*MockStore)(nil).ActivityBumpWorkspace), arg0, arg1)
}
// CleanTailnetCoordinators mocks base method.
func (m *MockStore) CleanTailnetCoordinators(arg0 context.Context) error {
m.ctrl.T.Helper()

View File

@ -24,6 +24,13 @@ type sqlcQuerier interface {
// multiple provisioners from acquiring the same jobs. See:
// https://www.postgresql.org/docs/9.5/sql-select.html#SQL-FOR-UPDATE-SHARE
AcquireProvisionerJob(ctx context.Context, arg AcquireProvisionerJobParams) (ProvisionerJob, error)
// We bump by the original TTL to prevent counter-intuitive behavior
// as the TTL wraps. For example, if I set the TTL to 12 hours, sign off
// work at midnight, come back at 10am, I would want another full day
// of uptime.
// We only bump if workspace shutdown is manual.
// We only bump when 5% of the deadline has elapsed.
ActivityBumpWorkspace(ctx context.Context, workspaceID uuid.UUID) error
CleanTailnetCoordinators(ctx context.Context) error
DeleteAPIKeyByID(ctx context.Context, id string) error
DeleteAPIKeysByUserID(ctx context.Context, userID uuid.UUID) error

View File

@ -15,6 +15,52 @@ import (
"github.com/sqlc-dev/pqtype"
)
const activityBumpWorkspace = `-- name: ActivityBumpWorkspace :exec
WITH latest AS (
SELECT
workspace_builds.id::uuid AS build_id,
workspace_builds.deadline::timestamp AS build_deadline,
workspace_builds.max_deadline::timestamp AS build_max_deadline,
workspace_builds.transition AS build_transition,
provisioner_jobs.completed_at::timestamp AS job_completed_at,
(workspaces.ttl / 1000 / 1000 / 1000 || ' seconds')::interval AS ttl_interval
FROM workspace_builds
JOIN provisioner_jobs
ON provisioner_jobs.id = workspace_builds.job_id
JOIN workspaces
ON workspaces.id = workspace_builds.workspace_id
WHERE workspace_builds.workspace_id = $1::uuid
ORDER BY workspace_builds.build_number DESC
LIMIT 1
)
UPDATE
workspace_builds wb
SET
updated_at = NOW(),
deadline = CASE
WHEN l.build_max_deadline = '0001-01-01 00:00:00+00'
THEN NOW() + l.ttl_interval
ELSE LEAST(NOW() + l.ttl_interval, l.build_max_deadline)
END
FROM latest l
WHERE wb.id = l.build_id
AND l.job_completed_at IS NOT NULL
AND l.build_transition = 'start'
AND l.build_deadline != '0001-01-01 00:00:00+00'
AND l.build_deadline - (l.ttl_interval * 0.95) < NOW()
`
// We bump by the original TTL to prevent counter-intuitive behavior
// as the TTL wraps. For example, if I set the TTL to 12 hours, sign off
// work at midnight, come back at 10am, I would want another full day
// of uptime.
// We only bump if workspace shutdown is manual.
// We only bump when 5% of the deadline has elapsed.
func (q *sqlQuerier) ActivityBumpWorkspace(ctx context.Context, workspaceID uuid.UUID) error {
_, err := q.db.ExecContext(ctx, activityBumpWorkspace, workspaceID)
return err
}
const deleteAPIKeyByID = `-- name: DeleteAPIKeyByID :exec
DELETE FROM
api_keys

View File

@ -0,0 +1,40 @@
-- We bump by the original TTL to prevent counter-intuitive behavior
-- as the TTL wraps. For example, if I set the TTL to 12 hours, sign off
-- work at midnight, come back at 10am, I would want another full day
-- of uptime.
-- name: ActivityBumpWorkspace :exec
WITH latest AS (
SELECT
workspace_builds.id::uuid AS build_id,
workspace_builds.deadline::timestamp AS build_deadline,
workspace_builds.max_deadline::timestamp AS build_max_deadline,
workspace_builds.transition AS build_transition,
provisioner_jobs.completed_at::timestamp AS job_completed_at,
(workspaces.ttl / 1000 / 1000 / 1000 || ' seconds')::interval AS ttl_interval
FROM workspace_builds
JOIN provisioner_jobs
ON provisioner_jobs.id = workspace_builds.job_id
JOIN workspaces
ON workspaces.id = workspace_builds.workspace_id
WHERE workspace_builds.workspace_id = @workspace_id::uuid
ORDER BY workspace_builds.build_number DESC
LIMIT 1
)
UPDATE
workspace_builds wb
SET
updated_at = NOW(),
deadline = CASE
WHEN l.build_max_deadline = '0001-01-01 00:00:00+00'
THEN NOW() + l.ttl_interval
ELSE LEAST(NOW() + l.ttl_interval, l.build_max_deadline)
END
FROM latest l
WHERE wb.id = l.build_id
AND l.job_completed_at IS NOT NULL
AND l.build_transition = 'start'
-- We only bump if workspace shutdown is manual.
AND l.build_deadline != '0001-01-01 00:00:00+00'
-- We only bump when 5% of the deadline has elapsed.
AND l.build_deadline - (l.ttl_interval * 0.95) < NOW()
;