feat: reinitialize agents when a prebuilt workspace is claimed (#17475)

This pull request allows coder workspace agents to be reinitialized when
a prebuilt workspace is claimed by a user. This facilitates the transfer
of ownership between the anonymous prebuilds system user and the new
owner of the workspace.

Only a single agent per prebuilt workspace is supported for now, but
plumbing has already been done to facilitate the seamless transition to
multi-agent support.

---------

Signed-off-by: Danny Kopping <dannykopping@gmail.com>
Co-authored-by: Danny Kopping <dannykopping@gmail.com>
This commit is contained in:
Sas Swart
2025-05-14 14:15:36 +02:00
committed by GitHub
parent fcbdd1a28e
commit 425ee6fa55
38 changed files with 2184 additions and 449 deletions

82
coderd/prebuilds/claim.go Normal file
View File

@ -0,0 +1,82 @@
package prebuilds
import (
"context"
"sync"
"github.com/google/uuid"
"golang.org/x/xerrors"
"cdr.dev/slog"
"github.com/coder/coder/v2/coderd/database/pubsub"
"github.com/coder/coder/v2/codersdk/agentsdk"
)
func NewPubsubWorkspaceClaimPublisher(ps pubsub.Pubsub) *PubsubWorkspaceClaimPublisher {
return &PubsubWorkspaceClaimPublisher{ps: ps}
}
type PubsubWorkspaceClaimPublisher struct {
ps pubsub.Pubsub
}
func (p PubsubWorkspaceClaimPublisher) PublishWorkspaceClaim(claim agentsdk.ReinitializationEvent) error {
channel := agentsdk.PrebuildClaimedChannel(claim.WorkspaceID)
if err := p.ps.Publish(channel, []byte(claim.Reason)); err != nil {
return xerrors.Errorf("failed to trigger prebuilt workspace agent reinitialization: %w", err)
}
return nil
}
func NewPubsubWorkspaceClaimListener(ps pubsub.Pubsub, logger slog.Logger) *PubsubWorkspaceClaimListener {
return &PubsubWorkspaceClaimListener{ps: ps, logger: logger}
}
type PubsubWorkspaceClaimListener struct {
logger slog.Logger
ps pubsub.Pubsub
}
// ListenForWorkspaceClaims subscribes to a pubsub channel and sends any received events on the chan that it returns.
// pubsub.Pubsub does not communicate when its last callback has been called after it has been closed. As such the chan
// returned by this method is never closed. Call the returned cancel() function to close the subscription when it is no longer needed.
// cancel() will be called if ctx expires or is canceled.
func (p PubsubWorkspaceClaimListener) ListenForWorkspaceClaims(ctx context.Context, workspaceID uuid.UUID, reinitEvents chan<- agentsdk.ReinitializationEvent) (func(), error) {
select {
case <-ctx.Done():
return func() {}, ctx.Err()
default:
}
cancelSub, err := p.ps.Subscribe(agentsdk.PrebuildClaimedChannel(workspaceID), func(inner context.Context, reason []byte) {
claim := agentsdk.ReinitializationEvent{
WorkspaceID: workspaceID,
Reason: agentsdk.ReinitializationReason(reason),
}
select {
case <-ctx.Done():
return
case <-inner.Done():
return
case reinitEvents <- claim:
}
})
if err != nil {
return func() {}, xerrors.Errorf("failed to subscribe to prebuild claimed channel: %w", err)
}
var once sync.Once
cancel := func() {
once.Do(func() {
cancelSub()
})
}
go func() {
<-ctx.Done()
cancel()
}()
return cancel, nil
}

View File

@ -0,0 +1,141 @@
package prebuilds_test
import (
"context"
"testing"
"time"
"github.com/google/uuid"
"github.com/stretchr/testify/require"
"golang.org/x/xerrors"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/coderd/database/pubsub"
"github.com/coder/coder/v2/coderd/prebuilds"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/coder/v2/testutil"
)
func TestPubsubWorkspaceClaimPublisher(t *testing.T) {
t.Parallel()
t.Run("published claim is received by a listener for the same workspace", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitShort)
logger := testutil.Logger(t)
ps := pubsub.NewInMemory()
workspaceID := uuid.New()
reinitEvents := make(chan agentsdk.ReinitializationEvent, 1)
publisher := prebuilds.NewPubsubWorkspaceClaimPublisher(ps)
listener := prebuilds.NewPubsubWorkspaceClaimListener(ps, logger)
cancel, err := listener.ListenForWorkspaceClaims(ctx, workspaceID, reinitEvents)
require.NoError(t, err)
defer cancel()
claim := agentsdk.ReinitializationEvent{
WorkspaceID: workspaceID,
Reason: agentsdk.ReinitializeReasonPrebuildClaimed,
}
err = publisher.PublishWorkspaceClaim(claim)
require.NoError(t, err)
gotEvent := testutil.RequireReceive(ctx, t, reinitEvents)
require.Equal(t, workspaceID, gotEvent.WorkspaceID)
require.Equal(t, claim.Reason, gotEvent.Reason)
})
t.Run("fail to publish claim", func(t *testing.T) {
t.Parallel()
ps := &brokenPubsub{}
publisher := prebuilds.NewPubsubWorkspaceClaimPublisher(ps)
claim := agentsdk.ReinitializationEvent{
WorkspaceID: uuid.New(),
Reason: agentsdk.ReinitializeReasonPrebuildClaimed,
}
err := publisher.PublishWorkspaceClaim(claim)
require.ErrorContains(t, err, "failed to trigger prebuilt workspace agent reinitialization")
})
}
func TestPubsubWorkspaceClaimListener(t *testing.T) {
t.Parallel()
t.Run("finds claim events for its workspace", func(t *testing.T) {
t.Parallel()
ps := pubsub.NewInMemory()
listener := prebuilds.NewPubsubWorkspaceClaimListener(ps, slogtest.Make(t, nil))
claims := make(chan agentsdk.ReinitializationEvent, 1) // Buffer to avoid messing with goroutines in the rest of the test
workspaceID := uuid.New()
cancelFunc, err := listener.ListenForWorkspaceClaims(context.Background(), workspaceID, claims)
require.NoError(t, err)
defer cancelFunc()
// Publish a claim
channel := agentsdk.PrebuildClaimedChannel(workspaceID)
reason := agentsdk.ReinitializeReasonPrebuildClaimed
err = ps.Publish(channel, []byte(reason))
require.NoError(t, err)
// Verify we receive the claim
ctx := testutil.Context(t, testutil.WaitShort)
claim := testutil.RequireReceive(ctx, t, claims)
require.Equal(t, workspaceID, claim.WorkspaceID)
require.Equal(t, reason, claim.Reason)
})
t.Run("ignores claim events for other workspaces", func(t *testing.T) {
t.Parallel()
ps := pubsub.NewInMemory()
listener := prebuilds.NewPubsubWorkspaceClaimListener(ps, slogtest.Make(t, nil))
claims := make(chan agentsdk.ReinitializationEvent)
workspaceID := uuid.New()
otherWorkspaceID := uuid.New()
cancelFunc, err := listener.ListenForWorkspaceClaims(context.Background(), workspaceID, claims)
require.NoError(t, err)
defer cancelFunc()
// Publish a claim for a different workspace
channel := agentsdk.PrebuildClaimedChannel(otherWorkspaceID)
err = ps.Publish(channel, []byte(agentsdk.ReinitializeReasonPrebuildClaimed))
require.NoError(t, err)
// Verify we don't receive the claim
select {
case <-claims:
t.Fatal("received claim for wrong workspace")
case <-time.After(100 * time.Millisecond):
// Expected - no claim received
}
})
t.Run("communicates the error if it can't subscribe", func(t *testing.T) {
t.Parallel()
claims := make(chan agentsdk.ReinitializationEvent)
ps := &brokenPubsub{}
listener := prebuilds.NewPubsubWorkspaceClaimListener(ps, slogtest.Make(t, nil))
_, err := listener.ListenForWorkspaceClaims(context.Background(), uuid.New(), claims)
require.ErrorContains(t, err, "failed to subscribe to prebuild claimed channel")
})
}
type brokenPubsub struct {
pubsub.Pubsub
}
func (brokenPubsub) Subscribe(_ string, _ pubsub.Listener) (func(), error) {
return nil, xerrors.New("broken")
}
func (brokenPubsub) Publish(_ string, _ []byte) error {
return xerrors.New("broken")
}