feat: cancel stuck pending jobs (#17803)

Closes: #16488
This commit is contained in:
Michael Suchacz
2025-05-20 15:22:44 +02:00
committed by GitHub
parent 613117bde2
commit 769c9ee337
23 changed files with 779 additions and 297 deletions

View File

@ -170,10 +170,10 @@ var (
Identifier: rbac.RoleIdentifier{Name: "provisionerd"},
DisplayName: "Provisioner Daemon",
Site: rbac.Permissions(map[string][]policy.Action{
// TODO: Add ProvisionerJob resource type.
rbac.ResourceFile.Type: {policy.ActionRead},
rbac.ResourceSystem.Type: {policy.WildcardSymbol},
rbac.ResourceTemplate.Type: {policy.ActionRead, policy.ActionUpdate},
rbac.ResourceProvisionerJobs.Type: {policy.ActionRead, policy.ActionUpdate, policy.ActionCreate},
rbac.ResourceFile.Type: {policy.ActionRead},
rbac.ResourceSystem.Type: {policy.WildcardSymbol},
rbac.ResourceTemplate.Type: {policy.ActionRead, policy.ActionUpdate},
// Unsure why provisionerd needs update and read personal
rbac.ResourceUser.Type: {policy.ActionRead, policy.ActionReadPersonal, policy.ActionUpdatePersonal},
rbac.ResourceWorkspaceDormant.Type: {policy.ActionDelete, policy.ActionRead, policy.ActionUpdate, policy.ActionWorkspaceStop},
@ -219,19 +219,20 @@ var (
Scope: rbac.ScopeAll,
}.WithCachedASTValue()
// See unhanger package.
subjectHangDetector = rbac.Subject{
Type: rbac.SubjectTypeHangDetector,
FriendlyName: "Hang Detector",
// See reaper package.
subjectJobReaper = rbac.Subject{
Type: rbac.SubjectTypeJobReaper,
FriendlyName: "Job Reaper",
ID: uuid.Nil.String(),
Roles: rbac.Roles([]rbac.Role{
{
Identifier: rbac.RoleIdentifier{Name: "hangdetector"},
DisplayName: "Hang Detector Daemon",
Identifier: rbac.RoleIdentifier{Name: "jobreaper"},
DisplayName: "Job Reaper Daemon",
Site: rbac.Permissions(map[string][]policy.Action{
rbac.ResourceSystem.Type: {policy.WildcardSymbol},
rbac.ResourceTemplate.Type: {policy.ActionRead},
rbac.ResourceWorkspace.Type: {policy.ActionRead, policy.ActionUpdate},
rbac.ResourceSystem.Type: {policy.WildcardSymbol},
rbac.ResourceTemplate.Type: {policy.ActionRead},
rbac.ResourceWorkspace.Type: {policy.ActionRead, policy.ActionUpdate},
rbac.ResourceProvisionerJobs.Type: {policy.ActionRead, policy.ActionUpdate},
}),
Org: map[string][]rbac.Permission{},
User: []rbac.Permission{},
@ -346,6 +347,7 @@ var (
rbac.ResourceNotificationTemplate.Type: {policy.ActionCreate, policy.ActionUpdate, policy.ActionDelete},
rbac.ResourceCryptoKey.Type: {policy.ActionCreate, policy.ActionUpdate, policy.ActionDelete},
rbac.ResourceFile.Type: {policy.ActionCreate, policy.ActionRead},
rbac.ResourceProvisionerJobs.Type: {policy.ActionRead, policy.ActionUpdate, policy.ActionCreate},
}),
Org: map[string][]rbac.Permission{},
User: []rbac.Permission{},
@ -407,10 +409,10 @@ func AsAutostart(ctx context.Context) context.Context {
return As(ctx, subjectAutostart)
}
// AsHangDetector returns a context with an actor that has permissions required
// for unhanger.Detector to function.
func AsHangDetector(ctx context.Context) context.Context {
return As(ctx, subjectHangDetector)
// AsJobReaper returns a context with an actor that has permissions required
// for reaper.Detector to function.
func AsJobReaper(ctx context.Context) context.Context {
return As(ctx, subjectJobReaper)
}
// AsKeyRotator returns a context with an actor that has permissions required for rotating crypto keys.
@ -1085,11 +1087,10 @@ func (q *querier) AcquireNotificationMessages(ctx context.Context, arg database.
return q.db.AcquireNotificationMessages(ctx, arg)
}
// TODO: We need to create a ProvisionerJob resource type
func (q *querier) AcquireProvisionerJob(ctx context.Context, arg database.AcquireProvisionerJobParams) (database.ProvisionerJob, error) {
// if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceSystem); err != nil {
// return database.ProvisionerJob{}, err
// }
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceProvisionerJobs); err != nil {
return database.ProvisionerJob{}, err
}
return q.db.AcquireProvisionerJob(ctx, arg)
}
@ -1912,14 +1913,6 @@ func (q *querier) GetHealthSettings(ctx context.Context) (string, error) {
return q.db.GetHealthSettings(ctx)
}
// TODO: We need to create a ProvisionerJob resource type
func (q *querier) GetHungProvisionerJobs(ctx context.Context, hungSince time.Time) ([]database.ProvisionerJob, error) {
// if err := q.authorizeContext(ctx, policy.ActionCreate, rbac.ResourceSystem); err != nil {
// return nil, err
// }
return q.db.GetHungProvisionerJobs(ctx, hungSince)
}
func (q *querier) GetInboxNotificationByID(ctx context.Context, id uuid.UUID) (database.InboxNotification, error) {
return fetchWithAction(q.log, q.auth, policy.ActionRead, q.db.GetInboxNotificationByID)(ctx, id)
}
@ -2307,6 +2300,13 @@ func (q *querier) GetProvisionerJobByID(ctx context.Context, id uuid.UUID) (data
return job, nil
}
func (q *querier) GetProvisionerJobByIDForUpdate(ctx context.Context, id uuid.UUID) (database.ProvisionerJob, error) {
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceProvisionerJobs); err != nil {
return database.ProvisionerJob{}, err
}
return q.db.GetProvisionerJobByIDForUpdate(ctx, id)
}
func (q *querier) GetProvisionerJobTimingsByJobID(ctx context.Context, jobID uuid.UUID) ([]database.ProvisionerJobTiming, error) {
_, err := q.GetProvisionerJobByID(ctx, jobID)
if err != nil {
@ -2315,31 +2315,49 @@ func (q *querier) GetProvisionerJobTimingsByJobID(ctx context.Context, jobID uui
return q.db.GetProvisionerJobTimingsByJobID(ctx, jobID)
}
// TODO: We have a ProvisionerJobs resource, but it hasn't been checked for this use-case.
func (q *querier) GetProvisionerJobsByIDs(ctx context.Context, ids []uuid.UUID) ([]database.ProvisionerJob, error) {
// if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceSystem); err != nil {
// return nil, err
// }
return q.db.GetProvisionerJobsByIDs(ctx, ids)
provisionerJobs, err := q.db.GetProvisionerJobsByIDs(ctx, ids)
if err != nil {
return nil, err
}
orgIDs := make(map[uuid.UUID]struct{})
for _, job := range provisionerJobs {
orgIDs[job.OrganizationID] = struct{}{}
}
for orgID := range orgIDs {
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceProvisionerJobs.InOrg(orgID)); err != nil {
return nil, err
}
}
return provisionerJobs, nil
}
// TODO: We have a ProvisionerJobs resource, but it hasn't been checked for this use-case.
func (q *querier) GetProvisionerJobsByIDsWithQueuePosition(ctx context.Context, ids []uuid.UUID) ([]database.GetProvisionerJobsByIDsWithQueuePositionRow, error) {
// TODO: Remove this once we have a proper rbac check for provisioner jobs.
// Details in https://github.com/coder/coder/issues/16160
return q.db.GetProvisionerJobsByIDsWithQueuePosition(ctx, ids)
}
func (q *querier) GetProvisionerJobsByOrganizationAndStatusWithQueuePositionAndProvisioner(ctx context.Context, arg database.GetProvisionerJobsByOrganizationAndStatusWithQueuePositionAndProvisionerParams) ([]database.GetProvisionerJobsByOrganizationAndStatusWithQueuePositionAndProvisionerRow, error) {
// TODO: Remove this once we have a proper rbac check for provisioner jobs.
// Details in https://github.com/coder/coder/issues/16160
return fetchWithPostFilter(q.auth, policy.ActionRead, q.db.GetProvisionerJobsByOrganizationAndStatusWithQueuePositionAndProvisioner)(ctx, arg)
}
// TODO: We have a ProvisionerJobs resource, but it hasn't been checked for this use-case.
func (q *querier) GetProvisionerJobsCreatedAfter(ctx context.Context, createdAt time.Time) ([]database.ProvisionerJob, error) {
// if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceSystem); err != nil {
// return nil, err
// }
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceProvisionerJobs); err != nil {
return nil, err
}
return q.db.GetProvisionerJobsCreatedAfter(ctx, createdAt)
}
func (q *querier) GetProvisionerJobsToBeReaped(ctx context.Context, arg database.GetProvisionerJobsToBeReapedParams) ([]database.ProvisionerJob, error) {
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceProvisionerJobs); err != nil {
return nil, err
}
return q.db.GetProvisionerJobsToBeReaped(ctx, arg)
}
func (q *querier) GetProvisionerKeyByHashedSecret(ctx context.Context, hashedSecret []byte) (database.ProvisionerKey, error) {
return fetch(q.log, q.auth, q.db.GetProvisionerKeyByHashedSecret)(ctx, hashedSecret)
}
@ -3533,27 +3551,22 @@ func (q *querier) InsertPresetParameters(ctx context.Context, arg database.Inser
return q.db.InsertPresetParameters(ctx, arg)
}
// TODO: We need to create a ProvisionerJob resource type
func (q *querier) InsertProvisionerJob(ctx context.Context, arg database.InsertProvisionerJobParams) (database.ProvisionerJob, error) {
// if err := q.authorizeContext(ctx, policy.ActionCreate, rbac.ResourceSystem); err != nil {
// return database.ProvisionerJob{}, err
// }
// TODO: Remove this once we have a proper rbac check for provisioner jobs.
// Details in https://github.com/coder/coder/issues/16160
return q.db.InsertProvisionerJob(ctx, arg)
}
// TODO: We need to create a ProvisionerJob resource type
func (q *querier) InsertProvisionerJobLogs(ctx context.Context, arg database.InsertProvisionerJobLogsParams) ([]database.ProvisionerJobLog, error) {
// if err := q.authorizeContext(ctx, policy.ActionCreate, rbac.ResourceSystem); err != nil {
// return nil, err
// }
// TODO: Remove this once we have a proper rbac check for provisioner jobs.
// Details in https://github.com/coder/coder/issues/16160
return q.db.InsertProvisionerJobLogs(ctx, arg)
}
// TODO: We need to create a ProvisionerJob resource type
func (q *querier) InsertProvisionerJobTimings(ctx context.Context, arg database.InsertProvisionerJobTimingsParams) ([]database.ProvisionerJobTiming, error) {
// if err := q.authorizeContext(ctx, policy.ActionCreate, rbac.ResourceSystem); err != nil {
// return nil, err
// }
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceProvisionerJobs); err != nil {
return nil, err
}
return q.db.InsertProvisionerJobTimings(ctx, arg)
}
@ -4176,15 +4189,17 @@ func (q *querier) UpdateProvisionerDaemonLastSeenAt(ctx context.Context, arg dat
return q.db.UpdateProvisionerDaemonLastSeenAt(ctx, arg)
}
// TODO: We need to create a ProvisionerJob resource type
func (q *querier) UpdateProvisionerJobByID(ctx context.Context, arg database.UpdateProvisionerJobByIDParams) error {
// if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceSystem); err != nil {
// return err
// }
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceProvisionerJobs); err != nil {
return err
}
return q.db.UpdateProvisionerJobByID(ctx, arg)
}
func (q *querier) UpdateProvisionerJobWithCancelByID(ctx context.Context, arg database.UpdateProvisionerJobWithCancelByIDParams) error {
// TODO: Remove this once we have a proper rbac check for provisioner jobs.
// Details in https://github.com/coder/coder/issues/16160
job, err := q.db.GetProvisionerJobByID(ctx, arg.ID)
if err != nil {
return err
@ -4251,14 +4266,20 @@ func (q *querier) UpdateProvisionerJobWithCancelByID(ctx context.Context, arg da
return q.db.UpdateProvisionerJobWithCancelByID(ctx, arg)
}
// TODO: We need to create a ProvisionerJob resource type
func (q *querier) UpdateProvisionerJobWithCompleteByID(ctx context.Context, arg database.UpdateProvisionerJobWithCompleteByIDParams) error {
// if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceSystem); err != nil {
// return err
// }
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceProvisionerJobs); err != nil {
return err
}
return q.db.UpdateProvisionerJobWithCompleteByID(ctx, arg)
}
func (q *querier) UpdateProvisionerJobWithCompleteWithStartedAtByID(ctx context.Context, arg database.UpdateProvisionerJobWithCompleteWithStartedAtByIDParams) error {
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceProvisionerJobs); err != nil {
return err
}
return q.db.UpdateProvisionerJobWithCompleteWithStartedAtByID(ctx, arg)
}
func (q *querier) UpdateReplica(ctx context.Context, arg database.UpdateReplicaParams) (database.Replica, error) {
if err := q.authorizeContext(ctx, policy.ActionUpdate, rbac.ResourceSystem); err != nil {
return database.Replica{}, err

View File

@ -694,9 +694,12 @@ func (s *MethodTestSuite) TestProvisionerJob() {
Asserts(v.RBACObject(tpl), []policy.Action{policy.ActionRead, policy.ActionUpdate}).Returns()
}))
s.Run("GetProvisionerJobsByIDs", s.Subtest(func(db database.Store, check *expects) {
a := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{})
b := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{})
check.Args([]uuid.UUID{a.ID, b.ID}).Asserts().Returns(slice.New(a, b))
o := dbgen.Organization(s.T(), db, database.Organization{})
a := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{OrganizationID: o.ID})
b := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{OrganizationID: o.ID})
check.Args([]uuid.UUID{a.ID, b.ID}).
Asserts(rbac.ResourceProvisionerJobs.InOrg(o.ID), policy.ActionRead).
Returns(slice.New(a, b))
}))
s.Run("GetProvisionerLogsAfterID", s.Subtest(func(db database.Store, check *expects) {
u := dbgen.User(s.T(), db, database.User{})
@ -3923,9 +3926,8 @@ func (s *MethodTestSuite) TestSystemFunctions() {
check.Args().Asserts(rbac.ResourceSystem, policy.ActionDelete)
}))
s.Run("GetProvisionerJobsCreatedAfter", s.Subtest(func(db database.Store, check *expects) {
// TODO: add provisioner job resource type
_ = dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{CreatedAt: time.Now().Add(-time.Hour)})
check.Args(time.Now()).Asserts( /*rbac.ResourceSystem, policy.ActionRead*/ )
check.Args(time.Now()).Asserts(rbac.ResourceProvisionerJobs, policy.ActionRead)
}))
s.Run("GetTemplateVersionsByIDs", s.Subtest(func(db database.Store, check *expects) {
dbtestutil.DisableForeignKeysAndTriggers(s.T(), db)
@ -4008,11 +4010,11 @@ func (s *MethodTestSuite) TestSystemFunctions() {
Returns([]database.WorkspaceAgent{agt})
}))
s.Run("GetProvisionerJobsByIDs", s.Subtest(func(db database.Store, check *expects) {
// TODO: add a ProvisionerJob resource type
a := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{})
b := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{})
o := dbgen.Organization(s.T(), db, database.Organization{})
a := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{OrganizationID: o.ID})
b := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{OrganizationID: o.ID})
check.Args([]uuid.UUID{a.ID, b.ID}).
Asserts( /*rbac.ResourceSystem, policy.ActionRead*/ ).
Asserts(rbac.ResourceProvisionerJobs.InOrg(o.ID), policy.ActionRead).
Returns(slice.New(a, b))
}))
s.Run("InsertWorkspaceAgent", s.Subtest(func(db database.Store, check *expects) {
@ -4048,7 +4050,6 @@ func (s *MethodTestSuite) TestSystemFunctions() {
}).Asserts(rbac.ResourceSystem, policy.ActionUpdate).Returns()
}))
s.Run("AcquireProvisionerJob", s.Subtest(func(db database.Store, check *expects) {
// TODO: we need to create a ProvisionerJob resource
j := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{
StartedAt: sql.NullTime{Valid: false},
UpdatedAt: time.Now(),
@ -4058,47 +4059,48 @@ func (s *MethodTestSuite) TestSystemFunctions() {
OrganizationID: j.OrganizationID,
Types: []database.ProvisionerType{j.Provisioner},
ProvisionerTags: must(json.Marshal(j.Tags)),
}).Asserts( /*rbac.ResourceSystem, policy.ActionUpdate*/ )
}).Asserts(rbac.ResourceProvisionerJobs, policy.ActionUpdate)
}))
s.Run("UpdateProvisionerJobWithCompleteByID", s.Subtest(func(db database.Store, check *expects) {
// TODO: we need to create a ProvisionerJob resource
j := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{})
check.Args(database.UpdateProvisionerJobWithCompleteByIDParams{
ID: j.ID,
}).Asserts( /*rbac.ResourceSystem, policy.ActionUpdate*/ )
}).Asserts(rbac.ResourceProvisionerJobs, policy.ActionUpdate)
}))
s.Run("UpdateProvisionerJobWithCompleteWithStartedAtByID", s.Subtest(func(db database.Store, check *expects) {
j := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{})
check.Args(database.UpdateProvisionerJobWithCompleteWithStartedAtByIDParams{
ID: j.ID,
}).Asserts(rbac.ResourceProvisionerJobs, policy.ActionUpdate)
}))
s.Run("UpdateProvisionerJobByID", s.Subtest(func(db database.Store, check *expects) {
// TODO: we need to create a ProvisionerJob resource
j := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{})
check.Args(database.UpdateProvisionerJobByIDParams{
ID: j.ID,
UpdatedAt: time.Now(),
}).Asserts( /*rbac.ResourceSystem, policy.ActionUpdate*/ )
}).Asserts(rbac.ResourceProvisionerJobs, policy.ActionUpdate)
}))
s.Run("InsertProvisionerJob", s.Subtest(func(db database.Store, check *expects) {
dbtestutil.DisableForeignKeysAndTriggers(s.T(), db)
// TODO: we need to create a ProvisionerJob resource
check.Args(database.InsertProvisionerJobParams{
ID: uuid.New(),
Provisioner: database.ProvisionerTypeEcho,
StorageMethod: database.ProvisionerStorageMethodFile,
Type: database.ProvisionerJobTypeWorkspaceBuild,
Input: json.RawMessage("{}"),
}).Asserts( /*rbac.ResourceSystem, policy.ActionCreate*/ )
}).Asserts( /* rbac.ResourceProvisionerJobs, policy.ActionCreate */ )
}))
s.Run("InsertProvisionerJobLogs", s.Subtest(func(db database.Store, check *expects) {
// TODO: we need to create a ProvisionerJob resource
j := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{})
check.Args(database.InsertProvisionerJobLogsParams{
JobID: j.ID,
}).Asserts( /*rbac.ResourceSystem, policy.ActionCreate*/ )
}).Asserts( /* rbac.ResourceProvisionerJobs, policy.ActionUpdate */ )
}))
s.Run("InsertProvisionerJobTimings", s.Subtest(func(db database.Store, check *expects) {
// TODO: we need to create a ProvisionerJob resource
j := dbgen.ProvisionerJob(s.T(), db, nil, database.ProvisionerJob{})
check.Args(database.InsertProvisionerJobTimingsParams{
JobID: j.ID,
}).Asserts( /*rbac.ResourceSystem, policy.ActionCreate*/ )
}).Asserts(rbac.ResourceProvisionerJobs, policy.ActionUpdate)
}))
s.Run("UpsertProvisionerDaemon", s.Subtest(func(db database.Store, check *expects) {
dbtestutil.DisableForeignKeysAndTriggers(s.T(), db)
@ -4234,8 +4236,8 @@ func (s *MethodTestSuite) TestSystemFunctions() {
s.Run("GetFileTemplates", s.Subtest(func(db database.Store, check *expects) {
check.Args(uuid.New()).Asserts(rbac.ResourceSystem, policy.ActionRead)
}))
s.Run("GetHungProvisionerJobs", s.Subtest(func(db database.Store, check *expects) {
check.Args(time.Time{}).Asserts()
s.Run("GetProvisionerJobsToBeReaped", s.Subtest(func(db database.Store, check *expects) {
check.Args(database.GetProvisionerJobsToBeReapedParams{}).Asserts(rbac.ResourceProvisionerJobs, policy.ActionRead)
}))
s.Run("UpsertOAuthSigningKey", s.Subtest(func(db database.Store, check *expects) {
check.Args("foo").Asserts(rbac.ResourceSystem, policy.ActionUpdate)
@ -4479,6 +4481,9 @@ func (s *MethodTestSuite) TestSystemFunctions() {
VapidPrivateKey: "test",
}).Asserts(rbac.ResourceDeploymentConfig, policy.ActionUpdate)
}))
s.Run("GetProvisionerJobByIDForUpdate", s.Subtest(func(db database.Store, check *expects) {
check.Args(uuid.New()).Asserts(rbac.ResourceProvisionerJobs, policy.ActionRead).Errors(sql.ErrNoRows)
}))
}
func (s *MethodTestSuite) TestNotifications() {