feat: reinitialize agents when a prebuilt workspace is claimed (#17475)

This pull request allows coder workspace agents to be reinitialized when
a prebuilt workspace is claimed by a user. This facilitates the transfer
of ownership between the anonymous prebuilds system user and the new
owner of the workspace.

Only a single agent per prebuilt workspace is supported for now, but
plumbing has already been done to facilitate the seamless transition to
multi-agent support.

---------

Signed-off-by: Danny Kopping <dannykopping@gmail.com>
Co-authored-by: Danny Kopping <dannykopping@gmail.com>
This commit is contained in:
Sas Swart
2025-05-14 14:15:36 +02:00
committed by GitHub
parent fcbdd1a28e
commit 425ee6fa55
38 changed files with 2184 additions and 449 deletions

View File

@ -40,12 +40,14 @@ import (
"github.com/coder/coder/v2/coderd/database/pubsub"
"github.com/coder/coder/v2/coderd/externalauth"
"github.com/coder/coder/v2/coderd/notifications"
"github.com/coder/coder/v2/coderd/prebuilds"
"github.com/coder/coder/v2/coderd/promoauth"
"github.com/coder/coder/v2/coderd/schedule"
"github.com/coder/coder/v2/coderd/telemetry"
"github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/coderd/wspubsub"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/coder/v2/provisioner"
"github.com/coder/coder/v2/provisionerd/proto"
"github.com/coder/coder/v2/provisionersdk"
@ -647,6 +649,30 @@ func (s *server) acquireProtoJob(ctx context.Context, job database.ProvisionerJo
}
}
runningAgentAuthTokens := []*sdkproto.RunningAgentAuthToken{}
if input.PrebuiltWorkspaceBuildStage == sdkproto.PrebuiltWorkspaceBuildStage_CLAIM {
// runningAgentAuthTokens are *only* used for prebuilds. We fetch them when we want to rebuild a prebuilt workspace
// but not generate new agent tokens. The provisionerdserver will push them down to
// the provisioner (and ultimately to the `coder_agent` resource in the Terraform provider) where they will be
// reused. Context: the agent token is often used in immutable attributes of workspace resource (e.g. VM/container)
// to initialize the agent, so if that value changes it will necessitate a replacement of that resource, thus
// obviating the whole point of the prebuild.
agents, err := s.Database.GetWorkspaceAgentsByWorkspaceAndBuildNumber(ctx, database.GetWorkspaceAgentsByWorkspaceAndBuildNumberParams{
WorkspaceID: workspace.ID,
BuildNumber: 1,
})
if err != nil {
s.Logger.Error(ctx, "failed to retrieve running agents of claimed prebuilt workspace",
slog.F("workspace_id", workspace.ID), slog.Error(err))
}
for _, agent := range agents {
runningAgentAuthTokens = append(runningAgentAuthTokens, &sdkproto.RunningAgentAuthToken{
AgentId: agent.ID.String(),
Token: agent.AuthToken.String(),
})
}
}
protoJob.Type = &proto.AcquiredJob_WorkspaceBuild_{
WorkspaceBuild: &proto.AcquiredJob_WorkspaceBuild{
WorkspaceBuildId: workspaceBuild.ID.String(),
@ -676,6 +702,7 @@ func (s *server) acquireProtoJob(ctx context.Context, job database.ProvisionerJo
WorkspaceBuildId: workspaceBuild.ID.String(),
WorkspaceOwnerLoginType: string(owner.LoginType),
WorkspaceOwnerRbacRoles: ownerRbacRoles,
RunningAgentAuthTokens: runningAgentAuthTokens,
PrebuiltWorkspaceBuildStage: input.PrebuiltWorkspaceBuildStage,
},
LogLevel: input.LogLevel,
@ -1812,6 +1839,19 @@ func (s *server) CompleteJob(ctx context.Context, completed *proto.CompletedJob)
if err != nil {
return nil, xerrors.Errorf("update workspace: %w", err)
}
if input.PrebuiltWorkspaceBuildStage == sdkproto.PrebuiltWorkspaceBuildStage_CLAIM {
s.Logger.Info(ctx, "workspace prebuild successfully claimed by user",
slog.F("workspace_id", workspace.ID))
err = prebuilds.NewPubsubWorkspaceClaimPublisher(s.Pubsub).PublishWorkspaceClaim(agentsdk.ReinitializationEvent{
WorkspaceID: workspace.ID,
Reason: agentsdk.ReinitializeReasonPrebuildClaimed,
})
if err != nil {
s.Logger.Error(ctx, "failed to publish workspace claim event", slog.Error(err))
}
}
case *proto.CompletedJob_TemplateDryRun_:
for _, resource := range jobType.TemplateDryRun.Resources {
s.Logger.Info(ctx, "inserting template dry-run job resource",
@ -1955,6 +1995,7 @@ func InsertWorkspacePresetAndParameters(ctx context.Context, db database.Store,
}
}
dbPreset, err := tx.InsertPreset(ctx, database.InsertPresetParams{
ID: uuid.New(),
TemplateVersionID: templateVersionID,
Name: protoPreset.Name,
CreatedAt: t,

View File

@ -26,7 +26,10 @@ import (
"github.com/coder/quartz"
"github.com/coder/serpent"
"github.com/coder/coder/v2/coderd/prebuilds"
"github.com/coder/coder/v2/coderd/provisionerdserver"
"github.com/coder/coder/v2/coderd/rbac"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/coder/v2/buildinfo"
"github.com/coder/coder/v2/coderd/audit"
@ -39,7 +42,6 @@ import (
"github.com/coder/coder/v2/coderd/externalauth"
"github.com/coder/coder/v2/coderd/notifications"
"github.com/coder/coder/v2/coderd/notifications/notificationstest"
"github.com/coder/coder/v2/coderd/provisionerdserver"
"github.com/coder/coder/v2/coderd/schedule"
"github.com/coder/coder/v2/coderd/schedule/cron"
"github.com/coder/coder/v2/coderd/telemetry"
@ -167,8 +169,12 @@ func TestAcquireJob(t *testing.T) {
_, err = tc.acquire(ctx, srv)
require.ErrorContains(t, err, "sql: no rows in result set")
})
for _, prebuiltWorkspace := range []bool{false, true} {
prebuiltWorkspace := prebuiltWorkspace
for _, prebuiltWorkspaceBuildStage := range []sdkproto.PrebuiltWorkspaceBuildStage{
sdkproto.PrebuiltWorkspaceBuildStage_NONE,
sdkproto.PrebuiltWorkspaceBuildStage_CREATE,
sdkproto.PrebuiltWorkspaceBuildStage_CLAIM,
} {
prebuiltWorkspaceBuildStage := prebuiltWorkspaceBuildStage
t.Run(tc.name+"_WorkspaceBuildJob", func(t *testing.T) {
t.Parallel()
// Set the max session token lifetime so we can assert we
@ -212,7 +218,7 @@ func TestAcquireJob(t *testing.T) {
Roles: []string{rbac.RoleOrgAuditor()},
})
// Add extra erronous roles
// Add extra erroneous roles
secondOrg := dbgen.Organization(t, db, database.Organization{})
dbgen.OrganizationMember(t, db, database.OrganizationMember{
UserID: user.ID,
@ -287,36 +293,74 @@ func TestAcquireJob(t *testing.T) {
Required: true,
Sensitive: false,
})
workspace := dbgen.Workspace(t, db, database.WorkspaceTable{
workspace := database.WorkspaceTable{
TemplateID: template.ID,
OwnerID: user.ID,
OrganizationID: pd.OrganizationID,
})
build := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
}
workspace = dbgen.Workspace(t, db, workspace)
build := database.WorkspaceBuild{
WorkspaceID: workspace.ID,
BuildNumber: 1,
JobID: uuid.New(),
TemplateVersionID: version.ID,
Transition: database.WorkspaceTransitionStart,
Reason: database.BuildReasonInitiator,
})
var buildState sdkproto.PrebuiltWorkspaceBuildStage
if prebuiltWorkspace {
buildState = sdkproto.PrebuiltWorkspaceBuildStage_CREATE
}
_ = dbgen.ProvisionerJob(t, db, ps, database.ProvisionerJob{
ID: build.ID,
build = dbgen.WorkspaceBuild(t, db, build)
input := provisionerdserver.WorkspaceProvisionJob{
WorkspaceBuildID: build.ID,
}
dbJob := database.ProvisionerJob{
ID: build.JobID,
OrganizationID: pd.OrganizationID,
InitiatorID: user.ID,
Provisioner: database.ProvisionerTypeEcho,
StorageMethod: database.ProvisionerStorageMethodFile,
FileID: file.ID,
Type: database.ProvisionerJobTypeWorkspaceBuild,
Input: must(json.Marshal(provisionerdserver.WorkspaceProvisionJob{
Input: must(json.Marshal(input)),
}
dbJob = dbgen.ProvisionerJob(t, db, ps, dbJob)
var agent database.WorkspaceAgent
if prebuiltWorkspaceBuildStage == sdkproto.PrebuiltWorkspaceBuildStage_CLAIM {
resource := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{
JobID: dbJob.ID,
})
agent = dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
ResourceID: resource.ID,
AuthToken: uuid.New(),
})
// At this point we have an unclaimed workspace and build, now we need to setup the claim
// build
build = database.WorkspaceBuild{
WorkspaceID: workspace.ID,
BuildNumber: 2,
JobID: uuid.New(),
TemplateVersionID: version.ID,
Transition: database.WorkspaceTransitionStart,
Reason: database.BuildReasonInitiator,
InitiatorID: user.ID,
}
build = dbgen.WorkspaceBuild(t, db, build)
input = provisionerdserver.WorkspaceProvisionJob{
WorkspaceBuildID: build.ID,
PrebuiltWorkspaceBuildStage: buildState,
})),
})
PrebuiltWorkspaceBuildStage: prebuiltWorkspaceBuildStage,
}
dbJob = database.ProvisionerJob{
ID: build.JobID,
OrganizationID: pd.OrganizationID,
InitiatorID: user.ID,
Provisioner: database.ProvisionerTypeEcho,
StorageMethod: database.ProvisionerStorageMethodFile,
FileID: file.ID,
Type: database.ProvisionerJobTypeWorkspaceBuild,
Input: must(json.Marshal(input)),
}
dbJob = dbgen.ProvisionerJob(t, db, ps, dbJob)
}
startPublished := make(chan struct{})
var closed bool
@ -350,6 +394,19 @@ func TestAcquireJob(t *testing.T) {
<-startPublished
if prebuiltWorkspaceBuildStage == sdkproto.PrebuiltWorkspaceBuildStage_CLAIM {
for {
// In the case of a prebuild claim, there is a second build, which is the
// one that we're interested in.
job, err = tc.acquire(ctx, srv)
require.NoError(t, err)
if _, ok := job.Type.(*proto.AcquiredJob_WorkspaceBuild_); ok {
break
}
}
<-startPublished
}
got, err := json.Marshal(job.Type)
require.NoError(t, err)
@ -384,8 +441,14 @@ func TestAcquireJob(t *testing.T) {
WorkspaceOwnerLoginType: string(user.LoginType),
WorkspaceOwnerRbacRoles: []*sdkproto.Role{{Name: rbac.RoleOrgMember(), OrgId: pd.OrganizationID.String()}, {Name: "member", OrgId: ""}, {Name: rbac.RoleOrgAuditor(), OrgId: pd.OrganizationID.String()}},
}
if prebuiltWorkspace {
wantedMetadata.PrebuiltWorkspaceBuildStage = sdkproto.PrebuiltWorkspaceBuildStage_CREATE
if prebuiltWorkspaceBuildStage == sdkproto.PrebuiltWorkspaceBuildStage_CLAIM {
// For claimed prebuilds, we expect the prebuild state to be set to CLAIM
// and we expect tokens from the first build to be set for reuse
wantedMetadata.PrebuiltWorkspaceBuildStage = prebuiltWorkspaceBuildStage
wantedMetadata.RunningAgentAuthTokens = append(wantedMetadata.RunningAgentAuthTokens, &sdkproto.RunningAgentAuthToken{
AgentId: agent.ID.String(),
Token: agent.AuthToken.String(),
})
}
slices.SortFunc(wantedMetadata.WorkspaceOwnerRbacRoles, func(a, b *sdkproto.Role) int {
@ -1750,6 +1813,110 @@ func TestCompleteJob(t *testing.T) {
})
}
})
t.Run("ReinitializePrebuiltAgents", func(t *testing.T) {
t.Parallel()
type testcase struct {
name string
shouldReinitializeAgent bool
}
for _, tc := range []testcase{
// Whether or not there are presets and those presets define prebuilds, etc
// are all irrelevant at this level. Those factors are useful earlier in the process.
// Everything relevant to this test is determined by the value of `PrebuildClaimedByUser`
// on the provisioner job. As such, there are only two significant test cases:
{
name: "claimed prebuild",
shouldReinitializeAgent: true,
},
{
name: "not a claimed prebuild",
shouldReinitializeAgent: false,
},
} {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
// GIVEN an enqueued provisioner job and its dependencies:
srv, db, ps, pd := setup(t, false, &overrides{})
buildID := uuid.New()
jobInput := provisionerdserver.WorkspaceProvisionJob{
WorkspaceBuildID: buildID,
}
if tc.shouldReinitializeAgent { // This is the key lever in the test
// GIVEN the enqueued provisioner job is for a workspace being claimed by a user:
jobInput.PrebuiltWorkspaceBuildStage = sdkproto.PrebuiltWorkspaceBuildStage_CLAIM
}
input, err := json.Marshal(jobInput)
require.NoError(t, err)
ctx := testutil.Context(t, testutil.WaitShort)
job, err := db.InsertProvisionerJob(ctx, database.InsertProvisionerJobParams{
Input: input,
Provisioner: database.ProvisionerTypeEcho,
StorageMethod: database.ProvisionerStorageMethodFile,
Type: database.ProvisionerJobTypeWorkspaceBuild,
})
require.NoError(t, err)
tpl := dbgen.Template(t, db, database.Template{
OrganizationID: pd.OrganizationID,
})
tv := dbgen.TemplateVersion(t, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: tpl.ID, Valid: true},
JobID: job.ID,
})
workspace := dbgen.Workspace(t, db, database.WorkspaceTable{
TemplateID: tpl.ID,
})
_ = dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
ID: buildID,
JobID: job.ID,
WorkspaceID: workspace.ID,
TemplateVersionID: tv.ID,
})
_, err = db.AcquireProvisionerJob(ctx, database.AcquireProvisionerJobParams{
WorkerID: uuid.NullUUID{
UUID: pd.ID,
Valid: true,
},
Types: []database.ProvisionerType{database.ProvisionerTypeEcho},
})
require.NoError(t, err)
// GIVEN something is listening to process workspace reinitialization:
reinitChan := make(chan agentsdk.ReinitializationEvent, 1) // Buffered to simplify test structure
cancel, err := prebuilds.NewPubsubWorkspaceClaimListener(ps, testutil.Logger(t)).ListenForWorkspaceClaims(ctx, workspace.ID, reinitChan)
require.NoError(t, err)
defer cancel()
// WHEN the job is completed
completedJob := proto.CompletedJob{
JobId: job.ID.String(),
Type: &proto.CompletedJob_WorkspaceBuild_{
WorkspaceBuild: &proto.CompletedJob_WorkspaceBuild{},
},
}
_, err = srv.CompleteJob(ctx, &completedJob)
require.NoError(t, err)
if tc.shouldReinitializeAgent {
event := testutil.RequireReceive(ctx, t, reinitChan)
require.Equal(t, workspace.ID, event.WorkspaceID)
} else {
select {
case <-reinitChan:
t.Fatal("unexpected reinitialization event published")
default:
// OK
}
}
})
}
})
}
func TestInsertWorkspacePresetsAndParameters(t *testing.T) {