feat(agent/agentcontainers): retry with longer name on failure (#18513)

Closes https://github.com/coder/internal/issues/732

We now try (up to 5 times) when attempting to create an agent using the
workspace folder as the name.

It is important to note this flow is only ever ran when attempting to
create an agent using the workspace folder as the name. If a deployment
uses terraform or the devcontainer customization, we do not fall back to
this approach.
This commit is contained in:
Danielle Maywood
2025-06-24 19:04:16 +01:00
committed by GitHub
parent 4fd0312776
commit fcf93719c9
3 changed files with 403 additions and 22 deletions

View File

@ -42,7 +42,8 @@ const (
// read-write, which seems sensible for devcontainers. // read-write, which seems sensible for devcontainers.
coderPathInsideContainer = "/.coder-agent/coder" coderPathInsideContainer = "/.coder-agent/coder"
maxAgentNameLength = 64 maxAgentNameLength = 64
maxAttemptsToNameAgent = 5
) )
// API is responsible for container-related operations in the agent. // API is responsible for container-related operations in the agent.
@ -71,17 +72,18 @@ type API struct {
ownerName string ownerName string
workspaceName string workspaceName string
mu sync.RWMutex mu sync.RWMutex
closed bool closed bool
containers codersdk.WorkspaceAgentListContainersResponse // Output from the last list operation. containers codersdk.WorkspaceAgentListContainersResponse // Output from the last list operation.
containersErr error // Error from the last list operation. containersErr error // Error from the last list operation.
devcontainerNames map[string]bool // By devcontainer name. devcontainerNames map[string]bool // By devcontainer name.
knownDevcontainers map[string]codersdk.WorkspaceAgentDevcontainer // By workspace folder. knownDevcontainers map[string]codersdk.WorkspaceAgentDevcontainer // By workspace folder.
configFileModifiedTimes map[string]time.Time // By config file path. configFileModifiedTimes map[string]time.Time // By config file path.
recreateSuccessTimes map[string]time.Time // By workspace folder. recreateSuccessTimes map[string]time.Time // By workspace folder.
recreateErrorTimes map[string]time.Time // By workspace folder. recreateErrorTimes map[string]time.Time // By workspace folder.
injectedSubAgentProcs map[string]subAgentProcess // By workspace folder. injectedSubAgentProcs map[string]subAgentProcess // By workspace folder.
asyncWg sync.WaitGroup usingWorkspaceFolderName map[string]bool // By workspace folder.
asyncWg sync.WaitGroup
devcontainerLogSourceIDs map[string]uuid.UUID // By workspace folder. devcontainerLogSourceIDs map[string]uuid.UUID // By workspace folder.
} }
@ -278,6 +280,7 @@ func NewAPI(logger slog.Logger, options ...Option) *API {
recreateErrorTimes: make(map[string]time.Time), recreateErrorTimes: make(map[string]time.Time),
scriptLogger: func(uuid.UUID) ScriptLogger { return noopScriptLogger{} }, scriptLogger: func(uuid.UUID) ScriptLogger { return noopScriptLogger{} },
injectedSubAgentProcs: make(map[string]subAgentProcess), injectedSubAgentProcs: make(map[string]subAgentProcess),
usingWorkspaceFolderName: make(map[string]bool),
} }
// The ctx and logger must be set before applying options to avoid // The ctx and logger must be set before applying options to avoid
// nil pointer dereference. // nil pointer dereference.
@ -630,7 +633,7 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code
// folder's name. If it is not possible to generate a valid // folder's name. If it is not possible to generate a valid
// agent name based off of the folder name (i.e. no valid characters), // agent name based off of the folder name (i.e. no valid characters),
// we will instead fall back to using the container's friendly name. // we will instead fall back to using the container's friendly name.
dc.Name = safeAgentName(path.Base(filepath.ToSlash(dc.WorkspaceFolder)), dc.Container.FriendlyName) dc.Name, api.usingWorkspaceFolderName[dc.WorkspaceFolder] = api.makeAgentName(dc.WorkspaceFolder, dc.Container.FriendlyName)
} }
} }
@ -678,8 +681,10 @@ func (api *API) processUpdatedContainersLocked(ctx context.Context, updated code
var consecutiveHyphenRegex = regexp.MustCompile("-+") var consecutiveHyphenRegex = regexp.MustCompile("-+")
// `safeAgentName` returns a safe agent name derived from a folder name, // `safeAgentName` returns a safe agent name derived from a folder name,
// falling back to the containers friendly name if needed. // falling back to the containers friendly name if needed. The second
func safeAgentName(name string, friendlyName string) string { // return value will be `true` if it succeeded and `false` if it had
// to fallback to the friendly name.
func safeAgentName(name string, friendlyName string) (string, bool) {
// Keep only ASCII letters and digits, replacing everything // Keep only ASCII letters and digits, replacing everything
// else with a hyphen. // else with a hyphen.
var sb strings.Builder var sb strings.Builder
@ -701,10 +706,10 @@ func safeAgentName(name string, friendlyName string) string {
name = name[:min(len(name), maxAgentNameLength)] name = name[:min(len(name), maxAgentNameLength)]
if provisioner.AgentNameRegex.Match([]byte(name)) { if provisioner.AgentNameRegex.Match([]byte(name)) {
return name return name, true
} }
return safeFriendlyName(friendlyName) return safeFriendlyName(friendlyName), false
} }
// safeFriendlyName returns a API safe version of the container's // safeFriendlyName returns a API safe version of the container's
@ -719,6 +724,47 @@ func safeFriendlyName(name string) string {
return name return name
} }
// expandedAgentName creates an agent name by including parent directories
// from the workspace folder path to avoid name collisions. Like `safeAgentName`,
// the second returned value will be true if using the workspace folder name,
// and false if it fell back to the friendly name.
func expandedAgentName(workspaceFolder string, friendlyName string, depth int) (string, bool) {
var parts []string
for part := range strings.SplitSeq(filepath.ToSlash(workspaceFolder), "/") {
if part = strings.TrimSpace(part); part != "" {
parts = append(parts, part)
}
}
if len(parts) == 0 {
return safeFriendlyName(friendlyName), false
}
components := parts[max(0, len(parts)-depth-1):]
expanded := strings.Join(components, "-")
return safeAgentName(expanded, friendlyName)
}
// makeAgentName attempts to create an agent name. It will first attempt to create an
// agent name based off of the workspace folder, and will eventually fallback to a
// friendly name. Like `safeAgentName`, the second returned value will be true if the
// agent name utilizes the workspace folder, and false if it falls back to the
// friendly name.
func (api *API) makeAgentName(workspaceFolder string, friendlyName string) (string, bool) {
for attempt := 0; attempt <= maxAttemptsToNameAgent; attempt++ {
agentName, usingWorkspaceFolder := expandedAgentName(workspaceFolder, friendlyName, attempt)
if !usingWorkspaceFolder {
return agentName, false
}
if !api.devcontainerNames[agentName] {
return agentName, true
}
}
return safeFriendlyName(friendlyName), false
}
// RefreshContainers triggers an immediate update of the container list // RefreshContainers triggers an immediate update of the container list
// and waits for it to complete. // and waits for it to complete.
func (api *API) RefreshContainers(ctx context.Context) (err error) { func (api *API) RefreshContainers(ctx context.Context) (err error) {
@ -1234,6 +1280,7 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
if provisioner.AgentNameRegex.Match([]byte(name)) { if provisioner.AgentNameRegex.Match([]byte(name)) {
subAgentConfig.Name = name subAgentConfig.Name = name
configOutdated = true configOutdated = true
delete(api.usingWorkspaceFolderName, dc.WorkspaceFolder)
} else { } else {
logger.Warn(ctx, "invalid name in devcontainer customization, ignoring", logger.Warn(ctx, "invalid name in devcontainer customization, ignoring",
slog.F("name", name), slog.F("name", name),
@ -1320,12 +1367,55 @@ func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc c
) )
// Create new subagent record in the database to receive the auth token. // Create new subagent record in the database to receive the auth token.
// If we get a unique constraint violation, try with expanded names that
// include parent directories to avoid collisions.
client := *api.subAgentClient.Load() client := *api.subAgentClient.Load()
newSubAgent, err := client.Create(ctx, subAgentConfig)
if err != nil { originalName := subAgentConfig.Name
return xerrors.Errorf("create subagent failed: %w", err)
for attempt := 1; attempt <= maxAttemptsToNameAgent; attempt++ {
if proc.agent, err = client.Create(ctx, subAgentConfig); err == nil {
if api.usingWorkspaceFolderName[dc.WorkspaceFolder] {
api.devcontainerNames[dc.Name] = true
delete(api.usingWorkspaceFolderName, dc.WorkspaceFolder)
}
break
}
// NOTE(DanielleMaywood):
// Ordinarily we'd use `errors.As` here, but it didn't appear to work. Not
// sure if this is because of the communication protocol? Instead I've opted
// for a slightly more janky string contains approach.
//
// We only care if sub agent creation has failed due to a unique constraint
// violation on the agent name, as we can _possibly_ rectify this.
if !strings.Contains(err.Error(), "workspace agent name") {
return xerrors.Errorf("create subagent failed: %w", err)
}
// If there has been a unique constraint violation but the user is *not*
// using an auto-generated name, then we should error. This is because
// we do not want to surprise the user with a name they did not ask for.
if usingFolderName := api.usingWorkspaceFolderName[dc.WorkspaceFolder]; !usingFolderName {
return xerrors.Errorf("create subagent failed: %w", err)
}
if attempt == maxAttemptsToNameAgent {
return xerrors.Errorf("create subagent failed after %d attempts: %w", attempt, err)
}
// We increase how much of the workspace folder is used for generating
// the agent name. With each iteration there is greater chance of this
// being successful.
subAgentConfig.Name, api.usingWorkspaceFolderName[dc.WorkspaceFolder] = expandedAgentName(dc.WorkspaceFolder, dc.Container.FriendlyName, attempt)
logger.Debug(ctx, "retrying subagent creation with expanded name",
slog.F("original_name", originalName),
slog.F("expanded_name", subAgentConfig.Name),
slog.F("attempt", attempt+1),
)
} }
proc.agent = newSubAgent
logger.Info(ctx, "created new subagent", slog.F("agent_id", proc.agent.ID)) logger.Info(ctx, "created new subagent", slog.F("agent_id", proc.agent.ID))
} else { } else {

View File

@ -15,6 +15,7 @@ func TestSafeAgentName(t *testing.T) {
name string name string
folderName string folderName string
expected string expected string
fallback bool
}{ }{
// Basic valid names // Basic valid names
{ {
@ -110,18 +111,22 @@ func TestSafeAgentName(t *testing.T) {
{ {
folderName: "", folderName: "",
expected: "friendly-fallback", expected: "friendly-fallback",
fallback: true,
}, },
{ {
folderName: "---", folderName: "---",
expected: "friendly-fallback", expected: "friendly-fallback",
fallback: true,
}, },
{ {
folderName: "___", folderName: "___",
expected: "friendly-fallback", expected: "friendly-fallback",
fallback: true,
}, },
{ {
folderName: "@#$", folderName: "@#$",
expected: "friendly-fallback", expected: "friendly-fallback",
fallback: true,
}, },
// Additional edge cases // Additional edge cases
@ -192,10 +197,162 @@ func TestSafeAgentName(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.folderName, func(t *testing.T) { t.Run(tt.folderName, func(t *testing.T) {
t.Parallel() t.Parallel()
name := safeAgentName(tt.folderName, "friendly-fallback") name, usingWorkspaceFolder := safeAgentName(tt.folderName, "friendly-fallback")
assert.Equal(t, tt.expected, name) assert.Equal(t, tt.expected, name)
assert.True(t, provisioner.AgentNameRegex.Match([]byte(name))) assert.True(t, provisioner.AgentNameRegex.Match([]byte(name)))
assert.Equal(t, tt.fallback, !usingWorkspaceFolder)
})
}
}
func TestExpandedAgentName(t *testing.T) {
t.Parallel()
tests := []struct {
name string
workspaceFolder string
friendlyName string
depth int
expected string
fallback bool
}{
{
name: "simple path depth 1",
workspaceFolder: "/home/coder/project",
friendlyName: "friendly-fallback",
depth: 0,
expected: "project",
},
{
name: "simple path depth 2",
workspaceFolder: "/home/coder/project",
friendlyName: "friendly-fallback",
depth: 1,
expected: "coder-project",
},
{
name: "simple path depth 3",
workspaceFolder: "/home/coder/project",
friendlyName: "friendly-fallback",
depth: 2,
expected: "home-coder-project",
},
{
name: "simple path depth exceeds available",
workspaceFolder: "/home/coder/project",
friendlyName: "friendly-fallback",
depth: 9,
expected: "home-coder-project",
},
// Cases with special characters that need sanitization
{
name: "path with spaces and special chars",
workspaceFolder: "/home/coder/My Project_v2",
friendlyName: "friendly-fallback",
depth: 1,
expected: "coder-my-project-v2",
},
{
name: "path with dots and underscores",
workspaceFolder: "/home/user.name/project_folder.git",
friendlyName: "friendly-fallback",
depth: 1,
expected: "user-name-project-folder-git",
},
// Edge cases
{
name: "empty path",
workspaceFolder: "",
friendlyName: "friendly-fallback",
depth: 0,
expected: "friendly-fallback",
fallback: true,
},
{
name: "root path",
workspaceFolder: "/",
friendlyName: "friendly-fallback",
depth: 0,
expected: "friendly-fallback",
fallback: true,
},
{
name: "single component",
workspaceFolder: "project",
friendlyName: "friendly-fallback",
depth: 0,
expected: "project",
},
{
name: "single component with depth 2",
workspaceFolder: "project",
friendlyName: "friendly-fallback",
depth: 1,
expected: "project",
},
// Collision simulation cases
{
name: "foo/project depth 1",
workspaceFolder: "/home/coder/foo/project",
friendlyName: "friendly-fallback",
depth: 0,
expected: "project",
},
{
name: "foo/project depth 2",
workspaceFolder: "/home/coder/foo/project",
friendlyName: "friendly-fallback",
depth: 1,
expected: "foo-project",
},
{
name: "bar/project depth 1",
workspaceFolder: "/home/coder/bar/project",
friendlyName: "friendly-fallback",
depth: 0,
expected: "project",
},
{
name: "bar/project depth 2",
workspaceFolder: "/home/coder/bar/project",
friendlyName: "friendly-fallback",
depth: 1,
expected: "bar-project",
},
// Path with trailing slashes
{
name: "path with trailing slash",
workspaceFolder: "/home/coder/project/",
friendlyName: "friendly-fallback",
depth: 1,
expected: "coder-project",
},
{
name: "path with multiple trailing slashes",
workspaceFolder: "/home/coder/project///",
friendlyName: "friendly-fallback",
depth: 1,
expected: "coder-project",
},
// Path with leading slashes
{
name: "path with multiple leading slashes",
workspaceFolder: "///home/coder/project",
friendlyName: "friendly-fallback",
depth: 1,
expected: "coder-project",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
name, usingWorkspaceFolder := expandedAgentName(tt.workspaceFolder, tt.friendlyName, tt.depth)
assert.Equal(t, tt.expected, name)
assert.True(t, provisioner.AgentNameRegex.Match([]byte(name)))
assert.Equal(t, tt.fallback, !usingWorkspaceFolder)
}) })
} }
} }

View File

@ -3,12 +3,14 @@ package agentcontainers_test
import ( import (
"context" "context"
"encoding/json" "encoding/json"
"fmt"
"math/rand" "math/rand"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
"os" "os"
"os/exec" "os/exec"
"runtime" "runtime"
"slices"
"strings" "strings"
"sync" "sync"
"testing" "testing"
@ -17,6 +19,7 @@ import (
"github.com/fsnotify/fsnotify" "github.com/fsnotify/fsnotify"
"github.com/go-chi/chi/v5" "github.com/go-chi/chi/v5"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/lib/pq"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"go.uber.org/mock/gomock" "go.uber.org/mock/gomock"
@ -264,6 +267,16 @@ func (m *fakeSubAgentClient) Create(ctx context.Context, agent agentcontainers.S
if agent.OperatingSystem == "" { if agent.OperatingSystem == "" {
return agentcontainers.SubAgent{}, xerrors.New("operating system must be set") return agentcontainers.SubAgent{}, xerrors.New("operating system must be set")
} }
for _, a := range m.agents {
if a.Name == agent.Name {
return agentcontainers.SubAgent{}, &pq.Error{
Code: "23505",
Message: fmt.Sprintf("workspace agent name %q already exists in this workspace build", agent.Name),
}
}
}
agent.ID = uuid.New() agent.ID = uuid.New()
agent.AuthToken = uuid.New() agent.AuthToken = uuid.New()
if m.agents == nil { if m.agents == nil {
@ -2074,6 +2087,127 @@ func mustFindDevcontainerByPath(t *testing.T, devcontainers []codersdk.Workspace
return codersdk.WorkspaceAgentDevcontainer{} // Unreachable, but required for compilation return codersdk.WorkspaceAgentDevcontainer{} // Unreachable, but required for compilation
} }
// TestSubAgentCreationWithNameRetry tests the retry logic when unique constraint violations occur
func TestSubAgentCreationWithNameRetry(t *testing.T) {
t.Parallel()
if runtime.GOOS == "windows" {
t.Skip("Dev Container tests are not supported on Windows")
}
tests := []struct {
name string
workspaceFolders []string
expectedNames []string
takenNames []string
}{
{
name: "SingleCollision",
workspaceFolders: []string{
"/home/coder/foo/project",
"/home/coder/bar/project",
},
expectedNames: []string{
"project",
"bar-project",
},
},
{
name: "MultipleCollisions",
workspaceFolders: []string{
"/home/coder/foo/x/project",
"/home/coder/bar/x/project",
"/home/coder/baz/x/project",
},
expectedNames: []string{
"project",
"x-project",
"baz-x-project",
},
},
{
name: "NameAlreadyTaken",
takenNames: []string{"project", "x-project"},
workspaceFolders: []string{
"/home/coder/foo/x/project",
},
expectedNames: []string{
"foo-x-project",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
var (
ctx = testutil.Context(t, testutil.WaitMedium)
logger = testutil.Logger(t)
mClock = quartz.NewMock(t)
fSAC = &fakeSubAgentClient{logger: logger, agents: make(map[uuid.UUID]agentcontainers.SubAgent)}
ccli = &fakeContainerCLI{arch: runtime.GOARCH}
)
for _, name := range tt.takenNames {
fSAC.agents[uuid.New()] = agentcontainers.SubAgent{Name: name}
}
mClock.Set(time.Now()).MustWait(ctx)
tickerTrap := mClock.Trap().TickerFunc("updaterLoop")
api := agentcontainers.NewAPI(logger,
agentcontainers.WithClock(mClock),
agentcontainers.WithContainerCLI(ccli),
agentcontainers.WithDevcontainerCLI(&fakeDevcontainerCLI{}),
agentcontainers.WithSubAgentClient(fSAC),
agentcontainers.WithWatcher(watcher.NewNoop()),
)
defer api.Close()
tickerTrap.MustWait(ctx).MustRelease(ctx)
tickerTrap.Close()
for i, workspaceFolder := range tt.workspaceFolders {
ccli.containers.Containers = append(ccli.containers.Containers, newFakeContainer(
fmt.Sprintf("container%d", i+1),
fmt.Sprintf("/.devcontainer/devcontainer%d.json", i+1),
workspaceFolder,
))
err := api.RefreshContainers(ctx)
require.NoError(t, err)
}
// Verify that both agents were created with expected names
require.Len(t, fSAC.created, len(tt.workspaceFolders))
actualNames := make([]string, len(fSAC.created))
for i, agent := range fSAC.created {
actualNames[i] = agent.Name
}
slices.Sort(tt.expectedNames)
slices.Sort(actualNames)
assert.Equal(t, tt.expectedNames, actualNames)
})
}
}
func newFakeContainer(id, configPath, workspaceFolder string) codersdk.WorkspaceAgentContainer {
return codersdk.WorkspaceAgentContainer{
ID: id,
FriendlyName: "test-friendly",
Image: "test-image:latest",
Labels: map[string]string{
agentcontainers.DevcontainerLocalFolderLabel: workspaceFolder,
agentcontainers.DevcontainerConfigFileLabel: configPath,
},
Running: true,
}
}
func fakeContainer(t *testing.T, mut ...func(*codersdk.WorkspaceAgentContainer)) codersdk.WorkspaceAgentContainer { func fakeContainer(t *testing.T, mut ...func(*codersdk.WorkspaceAgentContainer)) codersdk.WorkspaceAgentContainer {
t.Helper() t.Helper()
ct := codersdk.WorkspaceAgentContainer{ ct := codersdk.WorkspaceAgentContainer{