fix: complete job and mark workspace as deleted when no provisioners are available (#18465)

Alternate fix for https://github.com/coder/coder/issues/18080

Modifies wsbuilder to complete the provisioner job and mark the
workspace as deleted if it is clear that no provisioner will be able to
pick up the delete build.

This has a significant advantage of not deviating too much from the
current semantics of `POST /api/v2/workspacebuilds`.
https://github.com/coder/coder/pull/18460 ends up returning a 204 on
orphan delete due to no build being created.

Downside is that we have to duplicate some responsibilities of
provisionerdserver in wsbuilder.

There is a slight gotcha to this approach though: if you stop a
provisioner and then immediately try to orphan-delete, the job will
still be created because of the provisioner heartbeat interval. However
you can cancel it and try again.
This commit is contained in:
Cian Johnston
2025-06-23 14:07:42 +01:00
committed by GitHub
parent c3bc1e75ec
commit 2f55e29466
7 changed files with 502 additions and 86 deletions

View File

@ -3,6 +3,7 @@ package coderd
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"math"
@ -433,20 +434,56 @@ func (api *API) postWorkspaceBuilds(rw http.ResponseWriter, r *http.Request) {
return
}
var queuePos database.GetProvisionerJobsByIDsWithQueuePositionRow
if provisionerJob != nil {
queuePos.ProvisionerJob = *provisionerJob
queuePos.QueuePosition = 0
if err := provisionerjobs.PostJob(api.Pubsub, *provisionerJob); err != nil {
// Client probably doesn't care about this error, so just log it.
api.Logger.Error(ctx, "failed to post provisioner job to pubsub", slog.Error(err))
}
// We may need to complete the audit if wsbuilder determined that
// no provisioner could handle an orphan-delete job and completed it.
if createBuild.Orphan && createBuild.Transition == codersdk.WorkspaceTransitionDelete && provisionerJob.CompletedAt.Valid {
api.Logger.Warn(ctx, "orphan delete handled by wsbuilder due to no eligible provisioners",
slog.F("workspace_id", workspace.ID),
slog.F("workspace_build_id", workspaceBuild.ID),
slog.F("provisioner_job_id", provisionerJob.ID),
)
buildResourceInfo := audit.AdditionalFields{
WorkspaceName: workspace.Name,
BuildNumber: strconv.Itoa(int(workspaceBuild.BuildNumber)),
BuildReason: workspaceBuild.Reason,
WorkspaceID: workspace.ID,
WorkspaceOwner: workspace.OwnerName,
}
briBytes, err := json.Marshal(buildResourceInfo)
if err != nil {
api.Logger.Error(ctx, "failed to marshal build resource info for audit", slog.Error(err))
}
auditor := api.Auditor.Load()
bag := audit.BaggageFromContext(ctx)
audit.BackgroundAudit(ctx, &audit.BackgroundAuditParams[database.WorkspaceBuild]{
Audit: *auditor,
Log: api.Logger,
UserID: provisionerJob.InitiatorID,
OrganizationID: workspace.OrganizationID,
RequestID: provisionerJob.ID,
IP: bag.IP,
Action: database.AuditActionDelete,
Old: previousWorkspaceBuild,
New: *workspaceBuild,
Status: http.StatusOK,
AdditionalFields: briBytes,
})
}
}
apiBuild, err := api.convertWorkspaceBuild(
*workspaceBuild,
workspace,
database.GetProvisionerJobsByIDsWithQueuePositionRow{
ProvisionerJob: *provisionerJob,
QueuePosition: 0,
},
queuePos,
[]database.WorkspaceResource{},
[]database.WorkspaceResourceMetadatum{},
[]database.WorkspaceAgent{},