coder/agent/agentcontainers/api.go

package agentcontainers

import (
	"context"
	"errors"
	"fmt"
	"net/http"
	"os"
	"path"
	"path/filepath"
	"regexp"
	"runtime"
	"slices"
	"strings"
	"sync"
	"sync/atomic"
	"time"

	"github.com/fsnotify/fsnotify"
	"github.com/go-chi/chi/v5"
	"github.com/google/uuid"
	"golang.org/x/xerrors"

	"cdr.dev/slog"
	"github.com/coder/coder/v2/agent/agentcontainers/watcher"
	"github.com/coder/coder/v2/agent/agentexec"
	"github.com/coder/coder/v2/agent/usershell"
	"github.com/coder/coder/v2/coderd/httpapi"
	"github.com/coder/coder/v2/codersdk"
	"github.com/coder/coder/v2/codersdk/agentsdk"
	"github.com/coder/coder/v2/provisioner"
	"github.com/coder/quartz"
)

const (
	defaultUpdateInterval   = 10 * time.Second
	defaultOperationTimeout = 15 * time.Second

	// Destination path inside the container, we store it in a fixed location
	// under /.coder-agent/coder to avoid conflicts and avoid being shadowed
	// by tmpfs or other mounts. This assumes the container root filesystem is
	// read-write, which seems sensible for devcontainers.
	coderPathInsideContainer = "/.coder-agent/coder"

	maxAgentNameLength     = 64
	maxAttemptsToNameAgent = 5
)

// API is responsible for container-related operations in the agent.
// It provides methods to list and manage containers.
type API struct {
	ctx                         context.Context
	cancel                      context.CancelFunc
	watcherDone                 chan struct{}
	updaterDone                 chan struct{}
	updateTrigger               chan chan error // Channel to trigger manual refresh.
	updateInterval              time.Duration   // Interval for periodic container updates.
	logger                      slog.Logger
	watcher                     watcher.Watcher
	execer                      agentexec.Execer
	commandEnv                  CommandEnv
	ccli                        ContainerCLI
	containerLabelIncludeFilter map[string]string // Labels to filter containers by.
	dccli                       DevcontainerCLI
	clock                       quartz.Clock
	scriptLogger                func(logSourceID uuid.UUID) ScriptLogger
	subAgentClient              atomic.Pointer[SubAgentClient]
	subAgentURL                 string
	subAgentEnv                 []string

	ownerName     string
	workspaceName string
	parentAgent   string

	mu                       sync.RWMutex  // Protects the following fields.
	initDone                 chan struct{} // Closed by Init.
	closed                   bool
	containers               codersdk.WorkspaceAgentListContainersResponse  // Output from the last list operation.
	containersErr            error                                          // Error from the last list operation.
	devcontainerNames        map[string]bool                                // By devcontainer name.
	knownDevcontainers       map[string]codersdk.WorkspaceAgentDevcontainer // By workspace folder.
	devcontainerLogSourceIDs map[string]uuid.UUID                           // By workspace folder.
	configFileModifiedTimes  map[string]time.Time                           // By config file path.
	recreateSuccessTimes     map[string]time.Time                           // By workspace folder.
	recreateErrorTimes       map[string]time.Time                           // By workspace folder.
	injectedSubAgentProcs    map[string]subAgentProcess                     // By workspace folder.
	usingWorkspaceFolderName map[string]bool                                // By workspace folder.
	ignoredDevcontainers     map[string]bool                                // By workspace folder. Tracks three states (true, false and not checked).
	asyncWg                  sync.WaitGroup
}

type subAgentProcess struct {
	agent       SubAgent
	containerID string
	ctx         context.Context
	stop        context.CancelFunc
}

// Option is a functional option for API.
type Option func(*API)

// WithClock sets the quartz.Clock implementation to use.
// This is primarily used for testing to control time.
func WithClock(clock quartz.Clock) Option {
	return func(api *API) {
		api.clock = clock
	}
}

// WithExecer sets the agentexec.Execer implementation to use.
func WithExecer(execer agentexec.Execer) Option {
	return func(api *API) {
		api.execer = execer
	}
}

// WithCommandEnv sets the CommandEnv implementation to use.
func WithCommandEnv(ce CommandEnv) Option {
	return func(api *API) {
		api.commandEnv = func(ei usershell.EnvInfoer, preEnv []string) (string, string, []string, error) {
			shell, dir, env, err := ce(ei, preEnv)
			if err != nil {
				return shell, dir, env, err
			}
			env = slices.DeleteFunc(env, func(s string) bool {
				// Ensure we filter out environment variables that come
				// from the parent agent and are incorrect or not
				// relevant for the devcontainer.
				return strings.HasPrefix(s, "CODER_WORKSPACE_AGENT_NAME=") ||
					strings.HasPrefix(s, "CODER_WORKSPACE_AGENT_URL=") ||
					strings.HasPrefix(s, "CODER_AGENT_TOKEN=") ||
					strings.HasPrefix(s, "CODER_AGENT_AUTH=") ||
					strings.HasPrefix(s, "CODER_AGENT_DEVCONTAINERS_ENABLE=")
			})
			return shell, dir, env, nil
		}
	}
}

// WithContainerCLI sets the agentcontainers.ContainerCLI implementation
// to use. The default implementation uses the Docker CLI.
func WithContainerCLI(ccli ContainerCLI) Option {
	return func(api *API) {
		api.ccli = ccli
	}
}

// WithContainerLabelIncludeFilter sets a label filter for containers.
// This option can be given multiple times to filter by multiple labels.
// The behavior is such that only containers matching one or more of the
// provided labels will be included.
func WithContainerLabelIncludeFilter(label, value string) Option {
	return func(api *API) {
		api.containerLabelIncludeFilter[label] = value
	}
}

// WithDevcontainerCLI sets the DevcontainerCLI implementation to use.
// This can be used in tests to modify @devcontainer/cli behavior.
func WithDevcontainerCLI(dccli DevcontainerCLI) Option {
	return func(api *API) {
		api.dccli = dccli
	}
}

// WithSubAgentClient sets the SubAgentClient implementation to use.
// This is used to list, create, and delete devcontainer agents.
func WithSubAgentClient(client SubAgentClient) Option {
	return func(api *API) {
		api.subAgentClient.Store(&client)
	}
}

// WithSubAgentURL sets the agent URL for the sub-agent for
// communicating with the control plane.
func WithSubAgentURL(url string) Option {
	return func(api *API) {
		api.subAgentURL = url
	}
}

// WithSubAgentEnv sets the environment variables for the sub-agent.
func WithSubAgentEnv(env ...string) Option {
	return func(api *API) {
		api.subAgentEnv = env
	}
}

// WithManifestInfo sets the owner name, and workspace name
// for the sub-agent.
func WithManifestInfo(owner, workspace, parentAgent string) Option {
	return func(api *API) {
		api.ownerName = owner
		api.workspaceName = workspace
		api.parentAgent = parentAgent
	}
}

// WithDevcontainers sets the known devcontainers for the API. This
// allows the API to be aware of devcontainers defined in the workspace
// agent manifest.
func WithDevcontainers(devcontainers []codersdk.WorkspaceAgentDevcontainer, scripts []codersdk.WorkspaceAgentScript) Option {
	return func(api *API) {
		if len(devcontainers) == 0 {
			return
		}
		api.knownDevcontainers = make(map[string]codersdk.WorkspaceAgentDevcontainer, len(devcontainers))
		api.devcontainerNames = make(map[string]bool, len(devcontainers))
		api.devcontainerLogSourceIDs = make(map[string]uuid.UUID)
		for _, dc := range devcontainers {
			if dc.Status == "" {
				dc.Status = codersdk.WorkspaceAgentDevcontainerStatusStarting
			}

			api.knownDevcontainers[dc.WorkspaceFolder] = dc
			api.devcontainerNames[dc.Name] = true
			for _, script := range scripts {
				// The devcontainer scripts match the devcontainer ID for
				// identification.
				if script.ID == dc.ID {
					api.devcontainerLogSourceIDs[dc.WorkspaceFolder] = script.LogSourceID
					break
				}
			}
			if api.devcontainerLogSourceIDs[dc.WorkspaceFolder] == uuid.Nil {
				api.logger.Error(api.ctx, "devcontainer log source ID not found for devcontainer",
					slog.F("devcontainer_id", dc.ID),
					slog.F("devcontainer_name", dc.Name),
					slog.F("workspace_folder", dc.WorkspaceFolder),
					slog.F("config_path", dc.ConfigPath),
				)
			}
		}
	}
}

// WithWatcher sets the file watcher implementation to use. By default a
// noop watcher is used. This can be used in tests to modify the watcher
// behavior or to use an actual file watcher (e.g. fsnotify).
func WithWatcher(w watcher.Watcher) Option {
	return func(api *API) {
		api.watcher = w
	}
}

// ScriptLogger is an interface for sending devcontainer logs to the
// controlplane.
type ScriptLogger interface {
	Send(ctx context.Context, log ...agentsdk.Log) error
	Flush(ctx context.Context) error
}

// noopScriptLogger is a no-op implementation of the ScriptLogger
// interface.
type noopScriptLogger struct{}

func (noopScriptLogger) Send(context.Context, ...agentsdk.Log) error { return nil }
func (noopScriptLogger) Flush(context.Context) error                 { return nil }

// WithScriptLogger sets the script logger provider for devcontainer operations.
func WithScriptLogger(scriptLogger func(logSourceID uuid.UUID) ScriptLogger) Option {
	return func(api *API) {
		api.scriptLogger = scriptLogger
	}
}

// NewAPI returns a new API with the given options applied.
func NewAPI(logger slog.Logger, options ...Option) *API {
	ctx, cancel := context.WithCancel(context.Background())
	api := &API{
		ctx:                         ctx,
		cancel:                      cancel,
		initDone:                    make(chan struct{}),
		updateTrigger:               make(chan chan error),
		updateInterval:              defaultUpdateInterval,
		logger:                      logger,
		clock:                       quartz.NewReal(),
		execer:                      agentexec.DefaultExecer,
		containerLabelIncludeFilter: make(map[string]string),
		devcontainerNames:           make(map[string]bool),
		knownDevcontainers:          make(map[string]codersdk.WorkspaceAgentDevcontainer),
		configFileModifiedTimes:     make(map[string]time.Time),
		ignoredDevcontainers:        make(map[string]bool),
		recreateSuccessTimes:        make(map[string]time.Time),
		recreateErrorTimes:          make(map[string]time.Time),
		scriptLogger:                func(uuid.UUID) ScriptLogger { return noopScriptLogger{} },
		injectedSubAgentProcs:       make(map[string]subAgentProcess),
		usingWorkspaceFolderName:    make(map[string]bool),
	}
	// The ctx and logger must be set before applying options to avoid
	// nil pointer dereference.
	for _, opt := range options {
		opt(api)
	}
	if api.commandEnv != nil {
		api.execer = newCommandEnvExecer(
			api.logger,
			api.commandEnv,
			api.execer,
		)
	}
	if api.ccli == nil {
		api.ccli = NewDockerCLI(api.execer)
	}
	if api.dccli == nil {
		api.dccli = NewDevcontainerCLI(logger.Named("devcontainer-cli"), api.execer)
	}
	if api.watcher == nil {
		var err error
		api.watcher, err = watcher.NewFSNotify()
		if err != nil {
			logger.Error(ctx, "create file watcher service failed", slog.Error(err))
			api.watcher = watcher.NewNoop()
		}
	}
	if api.subAgentClient.Load() == nil {
		var c SubAgentClient = noopSubAgentClient{}
		api.subAgentClient.Store(&c)
	}

	return api
}

// Init applies a final set of options to the API and then
// closes initDone. This method can only be called once.
func (api *API) Init(opts ...Option) {
	api.mu.Lock()
	defer api.mu.Unlock()
	if api.closed {
		return
	}
	select {
	case <-api.initDone:
		return
	default:
	}
	defer close(api.initDone)

	for _, opt := range opts {
		opt(api)
	}
}

// Start starts the API by initializing the watcher and updater loops.
// This method calls Init, if it is desired to apply options after
// the API has been created, it should be done by calling Init before
// Start. This method must only be called once.
func (api *API) Start() {
	api.Init()

	api.mu.Lock()
	defer api.mu.Unlock()
	if api.closed {
		return
	}

	api.watcherDone = make(chan struct{})
	api.updaterDone = make(chan struct{})

	go api.watcherLoop()
	go api.updaterLoop()
}

func (api *API) watcherLoop() {
	defer close(api.watcherDone)
	defer api.logger.Debug(api.ctx, "watcher loop stopped")
	api.logger.Debug(api.ctx, "watcher loop started")

	for {
		event, err := api.watcher.Next(api.ctx)
		if err != nil {
			if errors.Is(err, watcher.ErrClosed) {
				api.logger.Debug(api.ctx, "watcher closed")
				return
			}
			if api.ctx.Err() != nil {
				api.logger.Debug(api.ctx, "api context canceled")
				return
			}
			api.logger.Error(api.ctx, "watcher error waiting for next event", slog.Error(err))
			continue
		}
		if event == nil {
			continue
		}

		now := api.clock.Now("agentcontainers", "watcherLoop")
		switch {
		case event.Has(fsnotify.Create | fsnotify.Write):
			api.logger.Debug(api.ctx, "devcontainer config file changed", slog.F("file", event.Name))
			api.markDevcontainerDirty(event.Name, now)
		case event.Has(fsnotify.Remove):
			api.logger.Debug(api.ctx, "devcontainer config file removed", slog.F("file", event.Name))
			api.markDevcontainerDirty(event.Name, now)
		case event.Has(fsnotify.Rename):
			api.logger.Debug(api.ctx, "devcontainer config file renamed", slog.F("file", event.Name))
			api.markDevcontainerDirty(event.Name, now)
		default:
			api.logger.Debug(api.ctx, "devcontainer config file event ignored", slog.F("file", event.Name), slog.F("event", event))
		}
	}
}

// updaterLoop is responsible for periodically updating the container
// list and handling manual refresh requests.
func (api *API) updaterLoop() {
	defer close(api.updaterDone)
	defer api.logger.Debug(api.ctx, "updater loop stopped")
	api.logger.Debug(api.ctx, "updater loop started")

	// Make sure we clean up any subagents not tracked by this process
	// before starting the update loop and creating new ones.
	api.logger.Debug(api.ctx, "cleaning up subagents")
	if err := api.cleanupSubAgents(api.ctx); err != nil {
		api.logger.Error(api.ctx, "cleanup subagents failed", slog.Error(err))
	} else {
		api.logger.Debug(api.ctx, "cleanup subagents complete")
	}

	// Perform an initial update to populate the container list, this
	// gives us a guarantee that the API has loaded the initial state
	// before returning any responses. This is useful for both tests
	// and anyone looking to interact with the API.
	api.logger.Debug(api.ctx, "performing initial containers update")
	if err := api.updateContainers(api.ctx); err != nil {
		if errors.Is(err, context.Canceled) {
			api.logger.Warn(api.ctx, "initial containers update canceled", slog.Error(err))
		} else {
			api.logger.Error(api.ctx, "initial containers update failed", slog.Error(err))
		}
	} else {
		api.logger.Debug(api.ctx, "initial containers update complete")
	}

	// We utilize a TickerFunc here instead of a regular Ticker so that
	// we can guarantee execution of the updateContainers method after
	// advancing the clock.
	ticker := api.clock.TickerFunc(api.ctx, api.updateInterval, func() error {
		done := make(chan error, 1)
		var sent bool
		defer func() {
			if !sent {
				close(done)
			}
		}()
		select {
		case <-api.ctx.Done():
			return api.ctx.Err()
		case api.updateTrigger <- done:
			sent = true
			err := <-done
			if err != nil {
				if errors.Is(err, context.Canceled) {
					api.logger.Warn(api.ctx, "updater loop ticker canceled", slog.Error(err))
				} else {
					api.logger.Error(api.ctx, "updater loop ticker failed", slog.Error(err))
				}
			}
		default:
			api.logger.Debug(api.ctx, "updater loop ticker skipped, update in progress")
		}

		return nil // Always nil to keep the ticker going.
	}, "agentcontainers", "updaterLoop")
	defer func() {
		if err := ticker.Wait("agentcontainers", "updaterLoop"); err != nil && !errors.Is(err, context.Canceled) {
			api.logger.Error(api.ctx, "updater loop ticker failed", slog.Error(err))
		}
	}()

	for {
		select {
		case <-api.ctx.Done():
			return
		case done := <-api.updateTrigger:
			// Note that although we pass api.ctx here, updateContainers
			// has an internal timeout to prevent long blocking calls.
			done <- api.updateContainers(api.ctx)
			close(done)
		}
	}
}

// UpdateSubAgentClient updates the `SubAgentClient` for the API.
func (api *API) UpdateSubAgentClient(client SubAgentClient) {
	api.subAgentClient.Store(&client)
}

// Routes returns the HTTP handler for container-related routes.
func (api *API) Routes() http.Handler {
	r := chi.NewRouter()

	ensureInitDoneMW := func(next http.Handler) http.Handler {
		return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
			select {
			case <-api.ctx.Done():
				httpapi.Write(r.Context(), rw, http.StatusServiceUnavailable, codersdk.Response{
					Message: "API closed",
					Detail:  "The API is closed and cannot process requests.",
				})
				return
			case <-r.Context().Done():
				return
			case <-api.initDone:
				// API init is done, we can start processing requests.
			}
			next.ServeHTTP(rw, r)
		})
	}

	// For now, all endpoints require the initial update to be done.
	// If we want to allow some endpoints to be available before
	// the initial update, we can enable this per-route.
	r.Use(ensureInitDoneMW)

	r.Get("/", api.handleList)
	// TODO(mafredri): Simplify this route as the previous /devcontainers
	// /-route was dropped. We can drop the /devcontainers prefix here too.
	r.Route("/devcontainers/{devcontainer}", func(r chi.Router) {
		r.Post("/recreate", api.handleDevcontainerRecreate)
	})

	return r
}

// handleList handles the HTTP request to list containers.
func (api *API) handleList(rw http.ResponseWriter, r *http.Request) {
	ct, err := api.getContainers()
	if err != nil {
		httpapi.Write(r.Context(), rw, http.StatusInternalServerError, codersdk.Response{
			Message: "Could not get containers",
			Detail:  err.Error(),
		})
		return
	}
	httpapi.Write(r.Context(), rw, http.StatusOK, ct)
}

// updateContainers fetches the latest container list, processes it, and
// updates the cache. It performs locking for updating shared API state.
func (api *API) updateContainers(ctx context.Context) error {
	listCtx, listCancel := context.WithTimeout(ctx, defaultOperationTimeout)
	defer listCancel()

	updated, err := api.ccli.List(listCtx)
	if err != nil {
		// If the context was canceled, we hold off on clearing the
		// containers cache. This is to avoid clearing the cache if
		// the update was canceled due to a timeout. Hopefully this
		// will clear up on the next update.
		if !errors.Is(err, context.Canceled) {
			api.mu.Lock()
			api.containersErr = err
			api.mu.Unlock()
		}

		return xerrors.Errorf("list containers failed: %w", err)
	}
	// Clone to avoid test flakes due to data manipulation.
	updated.Containers = slices.Clone(updated.Containers)

	api.mu.Lock()
	defer api.mu.Unlock()

	api.processUpdatedContainersLocked(ctx, updated)

	api.logger.Debug(ctx, "containers updated successfully", slog.F("container_count", len(api.containers.Containers)), slog.F("warning_count", len(api.containers.Warnings)), slog.F("devcontainer_count", len(api.knownDevcontainers)))

	return nil
}

// processUpdatedContainersLocked updates the devcontainer state based
// on the latest list of containers. This method assumes that api.mu is
// held.
func (api *API) processUpdatedContainersLocked(ctx context.Context, updated codersdk.WorkspaceAgentListContainersResponse) {
	dcFields := func(dc codersdk.WorkspaceAgentDevcontainer) []slog.Field {
		f := []slog.Field{
			slog.F("devcontainer_id", dc.ID),
			slog.F("devcontainer_name", dc.Name),
			slog.F("workspace_folder", dc.WorkspaceFolder),
			slog.F("config_path", dc.ConfigPath),
		}
		if dc.Container != nil {
			f = append(f, slog.F("container_id", dc.Container.ID))
			f = append(f, slog.F("container_name", dc.Container.FriendlyName))
		}
		return f
	}

	// Reset the container links in known devcontainers to detect if
	// they still exist.
	for _, dc := range api.knownDevcontainers {
		dc.Container = nil
		api.knownDevcontainers[dc.WorkspaceFolder] = dc
	}

	// Check if the container is running and update the known devcontainers.
	for i := range updated.Containers {
		container := &updated.Containers[i] // Grab a reference to the container to allow mutating it.

		workspaceFolder := container.Labels[DevcontainerLocalFolderLabel]
		configFile := container.Labels[DevcontainerConfigFileLabel]

		if workspaceFolder == "" {
			continue
		}

		logger := api.logger.With(
			slog.F("container_id", updated.Containers[i].ID),
			slog.F("container_name", updated.Containers[i].FriendlyName),
			slog.F("workspace_folder", workspaceFolder),
			slog.F("config_file", configFile),
		)

		// Filter out devcontainer tests, unless explicitly set in include filters.
		if len(api.containerLabelIncludeFilter) > 0 || container.Labels[DevcontainerIsTestRunLabel] == "true" {
			var ok bool
			for label, value := range api.containerLabelIncludeFilter {
				if v, found := container.Labels[label]; found && v == value {
					ok = true
				}
			}
			// Verbose debug logging is fine here since typically filters
			// are only used in development or testing environments.
			if !ok {
				logger.Debug(ctx, "container does not match include filter, ignoring devcontainer", slog.F("container_labels", container.Labels), slog.F("include_filter", api.containerLabelIncludeFilter))
				continue
			}
			logger.Debug(ctx, "container matches include filter, processing devcontainer", slog.F("container_labels", container.Labels), slog.F("include_filter", api.containerLabelIncludeFilter))
		}

		if dc, ok := api.knownDevcontainers[workspaceFolder]; ok {
			// If no config path is set, this devcontainer was defined
			// in Terraform without the optional config file. Assume the
			// first container with the workspace folder label is the
			// one we want to use.
			if dc.ConfigPath == "" && configFile != "" {
				dc.ConfigPath = configFile
				if err := api.watcher.Add(configFile); err != nil {
					logger.With(dcFields(dc)...).Error(ctx, "watch devcontainer config file failed", slog.Error(err))
				}
			}

			dc.Container = container
			api.knownDevcontainers[dc.WorkspaceFolder] = dc
			continue
		}

		dc := codersdk.WorkspaceAgentDevcontainer{
			ID:              uuid.New(),
			Name:            "", // Updated later based on container state.
			WorkspaceFolder: workspaceFolder,
			ConfigPath:      configFile,
			Status:          "",    // Updated later based on container state.
			Dirty:           false, // Updated later based on config file changes.
			Container:       container,
		}

		if configFile != "" {
			if err := api.watcher.Add(configFile); err != nil {
				logger.With(dcFields(dc)...).Error(ctx, "watch devcontainer config file failed", slog.Error(err))
			}
		}

		api.knownDevcontainers[workspaceFolder] = dc
	}

	// Iterate through all known devcontainers and update their status
	// based on the current state of the containers.
	for _, dc := range api.knownDevcontainers {
		logger := api.logger.With(dcFields(dc)...)

		if dc.Container != nil {
			if !api.devcontainerNames[dc.Name] {
				// If the devcontainer name wasn't set via terraform, we
				// will attempt to create an agent name based on the workspace
				// folder's name. If it is not possible to generate a valid
				// agent name based off of the folder name (i.e. no valid characters),
				// we will instead fall back to using the container's friendly name.
				dc.Name, api.usingWorkspaceFolderName[dc.WorkspaceFolder] = api.makeAgentName(dc.WorkspaceFolder, dc.Container.FriendlyName)
			}
		}

		switch {
		case dc.Status == codersdk.WorkspaceAgentDevcontainerStatusStarting:
			continue // This state is handled by the recreation routine.

		case dc.Status == codersdk.WorkspaceAgentDevcontainerStatusError && (dc.Container == nil || dc.Container.CreatedAt.Before(api.recreateErrorTimes[dc.WorkspaceFolder])):
			continue // The devcontainer needs to be recreated.

		case dc.Container != nil:
			dc.Status = codersdk.WorkspaceAgentDevcontainerStatusStopped
			if dc.Container.Running {
				dc.Status = codersdk.WorkspaceAgentDevcontainerStatusRunning
			}

			dc.Dirty = false
			if lastModified, hasModTime := api.configFileModifiedTimes[dc.ConfigPath]; hasModTime && dc.Container.CreatedAt.Before(lastModified) {
				dc.Dirty = true
			}

			if dc.Status == codersdk.WorkspaceAgentDevcontainerStatusRunning {
				err := api.maybeInjectSubAgentIntoContainerLocked(ctx, dc)
				if err != nil {
					logger.Error(ctx, "inject subagent into container failed", slog.Error(err))
				}
			}

		case dc.Container == nil:
			if !api.devcontainerNames[dc.Name] {
				dc.Name = ""
			}
			dc.Status = codersdk.WorkspaceAgentDevcontainerStatusStopped
			dc.Dirty = false
		}

		delete(api.recreateErrorTimes, dc.WorkspaceFolder)
		api.knownDevcontainers[dc.WorkspaceFolder] = dc
	}

	api.containers = updated
	api.containersErr = nil
}

var consecutiveHyphenRegex = regexp.MustCompile("-+")

// `safeAgentName` returns a safe agent name derived from a folder name,
// falling back to the container’s friendly name if needed. The second
// return value will be `true` if it succeeded and `false` if it had
// to fallback to the friendly name.
func safeAgentName(name string, friendlyName string) (string, bool) {
	// Keep only ASCII letters and digits, replacing everything
	// else with a hyphen.
	var sb strings.Builder
	for _, r := range strings.ToLower(name) {
		if (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') {
			_, _ = sb.WriteRune(r)
		} else {
			_, _ = sb.WriteRune('-')
		}
	}

	// Remove any consecutive hyphens, and then trim any leading
	// and trailing hyphens.
	name = consecutiveHyphenRegex.ReplaceAllString(sb.String(), "-")
	name = strings.Trim(name, "-")

	// Ensure the name of the agent doesn't exceed the maximum agent
	// name length.
	name = name[:min(len(name), maxAgentNameLength)]

	if provisioner.AgentNameRegex.Match([]byte(name)) {
		return name, true
	}

	return safeFriendlyName(friendlyName), false
}

// safeFriendlyName returns a API safe version of the container's
// friendly name.
//
// See provisioner/regexes.go for the regex used to validate
// the friendly name on the API side.
func safeFriendlyName(name string) string {
	name = strings.ToLower(name)
	name = strings.ReplaceAll(name, "_", "-")

	return name
}

// expandedAgentName creates an agent name by including parent directories
// from the workspace folder path to avoid name collisions. Like `safeAgentName`,
// the second returned value will be true if using the workspace folder name,
// and false if it fell back to the friendly name.
func expandedAgentName(workspaceFolder string, friendlyName string, depth int) (string, bool) {
	var parts []string
	for part := range strings.SplitSeq(filepath.ToSlash(workspaceFolder), "/") {
		if part = strings.TrimSpace(part); part != "" {
			parts = append(parts, part)
		}
	}
	if len(parts) == 0 {
		return safeFriendlyName(friendlyName), false
	}

	components := parts[max(0, len(parts)-depth-1):]
	expanded := strings.Join(components, "-")

	return safeAgentName(expanded, friendlyName)
}

// makeAgentName attempts to create an agent name. It will first attempt to create an
// agent name based off of the workspace folder, and will eventually fallback to a
// friendly name. Like `safeAgentName`, the second returned value will be true if the
// agent name utilizes the workspace folder, and false if it falls back to the
// friendly name.
func (api *API) makeAgentName(workspaceFolder string, friendlyName string) (string, bool) {
	for attempt := 0; attempt <= maxAttemptsToNameAgent; attempt++ {
		agentName, usingWorkspaceFolder := expandedAgentName(workspaceFolder, friendlyName, attempt)
		if !usingWorkspaceFolder {
			return agentName, false
		}

		if !api.devcontainerNames[agentName] {
			return agentName, true
		}
	}

	return safeFriendlyName(friendlyName), false
}

// RefreshContainers triggers an immediate update of the container list
// and waits for it to complete.
func (api *API) RefreshContainers(ctx context.Context) (err error) {
	defer func() {
		if err != nil {
			err = xerrors.Errorf("refresh containers failed: %w", err)
		}
	}()

	done := make(chan error, 1)
	var sent bool
	defer func() {
		if !sent {
			close(done)
		}
	}()
	select {
	case <-api.ctx.Done():
		return xerrors.Errorf("API closed: %w", api.ctx.Err())
	case <-ctx.Done():
		return ctx.Err()
	case api.updateTrigger <- done:
		sent = true
		select {
		case <-api.ctx.Done():
			return xerrors.Errorf("API closed: %w", api.ctx.Err())
		case <-ctx.Done():
			return ctx.Err()
		case err := <-done:
			return err
		}
	}
}

func (api *API) getContainers() (codersdk.WorkspaceAgentListContainersResponse, error) {
	api.mu.RLock()
	defer api.mu.RUnlock()

	if api.containersErr != nil {
		return codersdk.WorkspaceAgentListContainersResponse{}, api.containersErr
	}

	var devcontainers []codersdk.WorkspaceAgentDevcontainer
	if len(api.knownDevcontainers) > 0 {
		devcontainers = make([]codersdk.WorkspaceAgentDevcontainer, 0, len(api.knownDevcontainers))
		for _, dc := range api.knownDevcontainers {
			if api.ignoredDevcontainers[dc.WorkspaceFolder] {
				continue
			}

			// Include the agent if it's running (we're iterating over
			// copies, so mutating is fine).
			if proc := api.injectedSubAgentProcs[dc.WorkspaceFolder]; proc.agent.ID != uuid.Nil {
				dc.Agent = &codersdk.WorkspaceAgentDevcontainerAgent{
					ID:        proc.agent.ID,
					Name:      proc.agent.Name,
					Directory: proc.agent.Directory,
				}
			}

			devcontainers = append(devcontainers, dc)
		}
		slices.SortFunc(devcontainers, func(a, b codersdk.WorkspaceAgentDevcontainer) int {
			return strings.Compare(a.Name, b.Name)
		})
	}

	return codersdk.WorkspaceAgentListContainersResponse{
		Devcontainers: devcontainers,
		Containers:    slices.Clone(api.containers.Containers),
		Warnings:      slices.Clone(api.containers.Warnings),
	}, nil
}

// handleDevcontainerRecreate handles the HTTP request to recreate a
// devcontainer by referencing the container.
func (api *API) handleDevcontainerRecreate(w http.ResponseWriter, r *http.Request) {
	ctx := r.Context()
	devcontainerID := chi.URLParam(r, "devcontainer")

	if devcontainerID == "" {
		httpapi.Write(ctx, w, http.StatusBadRequest, codersdk.Response{
			Message: "Missing devcontainer ID",
			Detail:  "Devcontainer ID is required to recreate a devcontainer.",
		})
		return
	}

	api.mu.Lock()

	var dc codersdk.WorkspaceAgentDevcontainer
	for _, knownDC := range api.knownDevcontainers {
		if knownDC.ID.String() == devcontainerID {
			dc = knownDC
			break
		}
	}
	if dc.ID == uuid.Nil {
		api.mu.Unlock()

		httpapi.Write(ctx, w, http.StatusNotFound, codersdk.Response{
			Message: "Devcontainer not found.",
			Detail:  fmt.Sprintf("Could not find devcontainer with ID: %q", devcontainerID),
		})
		return
	}
	if dc.Status == codersdk.WorkspaceAgentDevcontainerStatusStarting {
		api.mu.Unlock()

		httpapi.Write(ctx, w, http.StatusConflict, codersdk.Response{
			Message: "Devcontainer recreation already in progress",
			Detail:  fmt.Sprintf("Recreation for devcontainer %q is already underway.", dc.Name),
		})
		return
	}

	// Update the status so that we don't try to recreate the
	// devcontainer multiple times in parallel.
	dc.Status = codersdk.WorkspaceAgentDevcontainerStatusStarting
	dc.Container = nil
	api.knownDevcontainers[dc.WorkspaceFolder] = dc
	go func() {
		_ = api.CreateDevcontainer(dc.WorkspaceFolder, dc.ConfigPath, WithRemoveExistingContainer())
	}()

	api.mu.Unlock()

	httpapi.Write(ctx, w, http.StatusAccepted, codersdk.Response{
		Message: "Devcontainer recreation initiated",
		Detail:  fmt.Sprintf("Recreation process for devcontainer %q has started.", dc.Name),
	})
}

// createDevcontainer should run in its own goroutine and is responsible for
// recreating a devcontainer based on the provided devcontainer configuration.
// It updates the devcontainer status and logs the process. The configPath is
// passed as a parameter for the odd chance that the container being recreated
// has a different config file than the one stored in the devcontainer state.
// The devcontainer state must be set to starting and the asyncWg must be
// incremented before calling this function.
func (api *API) CreateDevcontainer(workspaceFolder, configPath string, opts ...DevcontainerCLIUpOptions) error {
	api.mu.Lock()
	if api.closed {
		api.mu.Unlock()
		return nil
	}

	dc, found := api.knownDevcontainers[workspaceFolder]
	if !found {
		api.mu.Unlock()
		return xerrors.Errorf("devcontainer not found")
	}

	var (
		ctx    = api.ctx
		logger = api.logger.With(
			slog.F("devcontainer_id", dc.ID),
			slog.F("devcontainer_name", dc.Name),
			slog.F("workspace_folder", dc.WorkspaceFolder),
			slog.F("config_path", dc.ConfigPath),
		)
	)

	// Send logs via agent logging facilities.
	logSourceID := api.devcontainerLogSourceIDs[dc.WorkspaceFolder]
	if logSourceID == uuid.Nil {
		api.logger.Debug(api.ctx, "devcontainer log source ID not found, falling back to external log source ID")
		logSourceID = agentsdk.ExternalLogSourceID
	}

	api.asyncWg.Add(1)
	defer api.asyncWg.Done()
	api.mu.Unlock()

	if dc.ConfigPath != configPath {
		logger.Warn(ctx, "devcontainer config path mismatch",
			slog.F("config_path_param", configPath),
		)
	}

	scriptLogger := api.scriptLogger(logSourceID)
	defer func() {
		flushCtx, cancel := context.WithTimeout(api.ctx, 5*time.Second)
		defer cancel()
		if err := scriptLogger.Flush(flushCtx); err != nil {
			logger.Error(flushCtx, "flush devcontainer logs failed during recreation", slog.Error(err))
		}
	}()
	infoW := agentsdk.LogsWriter(ctx, scriptLogger.Send, logSourceID, codersdk.LogLevelInfo)
	defer infoW.Close()
	errW := agentsdk.LogsWriter(ctx, scriptLogger.Send, logSourceID, codersdk.LogLevelError)
	defer errW.Close()

	logger.Debug(ctx, "starting devcontainer recreation")

	upOptions := []DevcontainerCLIUpOptions{WithUpOutput(infoW, errW)}
	upOptions = append(upOptions, opts...)

	_, err := api.dccli.Up(ctx, dc.WorkspaceFolder, configPath, upOptions...)
	if err != nil {
		// No need to log if the API is closing (context canceled), as this
		// is expected behavior when the API is shutting down.
		if !errors.Is(err, context.Canceled) {
			logger.Error(ctx, "devcontainer creation failed", slog.Error(err))
		}

		api.mu.Lock()
		dc = api.knownDevcontainers[dc.WorkspaceFolder]
		dc.Status = codersdk.WorkspaceAgentDevcontainerStatusError
		api.knownDevcontainers[dc.WorkspaceFolder] = dc
		api.recreateErrorTimes[dc.WorkspaceFolder] = api.clock.Now("agentcontainers", "recreate", "errorTimes")
		api.mu.Unlock()

		return xerrors.Errorf("start devcontainer: %w", err)
	}

	logger.Info(ctx, "devcontainer created successfully")

	api.mu.Lock()
	dc = api.knownDevcontainers[dc.WorkspaceFolder]
	// Update the devcontainer status to Running or Stopped based on the
	// current state of the container, changing the status to !starting
	// allows the update routine to update the devcontainer status, but
	// to minimize the time between API consistency, we guess the status
	// based on the container state.
	dc.Status = codersdk.WorkspaceAgentDevcontainerStatusStopped
	if dc.Container != nil {
		if dc.Container.Running {
			dc.Status = codersdk.WorkspaceAgentDevcontainerStatusRunning
		}
	}
	dc.Dirty = false
	api.recreateSuccessTimes[dc.WorkspaceFolder] = api.clock.Now("agentcontainers", "recreate", "successTimes")
	api.knownDevcontainers[dc.WorkspaceFolder] = dc
	api.mu.Unlock()

	// Ensure an immediate refresh to accurately reflect the
	// devcontainer state after recreation.
	if err := api.RefreshContainers(ctx); err != nil {
		logger.Error(ctx, "failed to trigger immediate refresh after devcontainer creation", slog.Error(err))
		return xerrors.Errorf("refresh containers: %w", err)
	}

	return nil
}

// markDevcontainerDirty finds the devcontainer with the given config file path
// and marks it as dirty. It acquires the lock before modifying the state.
func (api *API) markDevcontainerDirty(configPath string, modifiedAt time.Time) {
	api.mu.Lock()
	defer api.mu.Unlock()

	// Record the timestamp of when this configuration file was modified.
	api.configFileModifiedTimes[configPath] = modifiedAt

	for _, dc := range api.knownDevcontainers {
		if dc.ConfigPath != configPath {
			continue
		}

		logger := api.logger.With(
			slog.F("devcontainer_id", dc.ID),
			slog.F("devcontainer_name", dc.Name),
			slog.F("workspace_folder", dc.WorkspaceFolder),
			slog.F("file", configPath),
			slog.F("modified_at", modifiedAt),
		)

		// TODO(mafredri): Simplistic mark for now, we should check if the
		// container is running and if the config file was modified after
		// the container was created.
		if !dc.Dirty {
			logger.Info(api.ctx, "marking devcontainer as dirty")
			dc.Dirty = true
		}
		if _, ok := api.ignoredDevcontainers[dc.WorkspaceFolder]; ok {
			logger.Debug(api.ctx, "clearing devcontainer ignored state")
			delete(api.ignoredDevcontainers, dc.WorkspaceFolder) // Allow re-reading config.
		}

		api.knownDevcontainers[dc.WorkspaceFolder] = dc
	}
}

// cleanupSubAgents removes subagents that are no longer managed by
// this agent. This is usually only run at startup to ensure a clean
// slate. This method has an internal timeout to prevent blocking
// indefinitely if something goes wrong with the subagent deletion.
func (api *API) cleanupSubAgents(ctx context.Context) error {
	client := *api.subAgentClient.Load()
	agents, err := client.List(ctx)
	if err != nil {
		return xerrors.Errorf("list agents: %w", err)
	}
	if len(agents) == 0 {
		return nil
	}

	api.mu.Lock()
	defer api.mu.Unlock()

	injected := make(map[uuid.UUID]bool, len(api.injectedSubAgentProcs))
	for _, proc := range api.injectedSubAgentProcs {
		injected[proc.agent.ID] = true
	}

	ctx, cancel := context.WithTimeout(ctx, defaultOperationTimeout)
	defer cancel()

	for _, agent := range agents {
		if injected[agent.ID] {
			continue
		}
		client := *api.subAgentClient.Load()
		err := client.Delete(ctx, agent.ID)
		if err != nil {
			api.logger.Error(ctx, "failed to delete agent",
				slog.Error(err),
				slog.F("agent_id", agent.ID),
				slog.F("agent_name", agent.Name),
			)
		}
	}

	return nil
}

// maybeInjectSubAgentIntoContainerLocked injects a subagent into a dev
// container and starts the subagent process. This method assumes that
// api.mu is held. This method is idempotent and will not re-inject the
// subagent if it is already/still running in the container.
//
// This method uses an internal timeout to prevent blocking indefinitely
// if something goes wrong with the injection.
func (api *API) maybeInjectSubAgentIntoContainerLocked(ctx context.Context, dc codersdk.WorkspaceAgentDevcontainer) (err error) {
	if api.ignoredDevcontainers[dc.WorkspaceFolder] {
		return nil
	}

	ctx, cancel := context.WithTimeout(ctx, defaultOperationTimeout)
	defer cancel()

	container := dc.Container
	if container == nil {
		return xerrors.New("container is nil, cannot inject subagent")
	}

	logger := api.logger.With(
		slog.F("devcontainer_id", dc.ID),
		slog.F("devcontainer_name", dc.Name),
		slog.F("workspace_folder", dc.WorkspaceFolder),
		slog.F("config_path", dc.ConfigPath),
		slog.F("container_id", container.ID),
		slog.F("container_name", container.FriendlyName),
	)

	// Check if subagent already exists for this devcontainer.
	maybeRecreateSubAgent := false
	proc, injected := api.injectedSubAgentProcs[dc.WorkspaceFolder]
	if injected {
		if _, ignoreChecked := api.ignoredDevcontainers[dc.WorkspaceFolder]; !ignoreChecked {
			// If ignore status has not yet been checked, or cleared by
			// modifications to the devcontainer.json, we must read it
			// to determine the current status. This can happen while
			// the  devcontainer subagent is already running or before
			// we've had a chance to inject it.
			//
			// Note, for simplicity, we do not try to optimize to reduce
			// ReadConfig calls here.
			config, err := api.dccli.ReadConfig(ctx, dc.WorkspaceFolder, dc.ConfigPath, nil)
			if err != nil {
				return xerrors.Errorf("read devcontainer config: %w", err)
			}

			dcIgnored := config.Configuration.Customizations.Coder.Ignore
			if dcIgnored {
				proc.stop()
				if proc.agent.ID != uuid.Nil {
					// Unlock while doing the delete operation.
					api.mu.Unlock()
					client := *api.subAgentClient.Load()
					if err := client.Delete(ctx, proc.agent.ID); err != nil {
						api.mu.Lock()
						return xerrors.Errorf("delete subagent: %w", err)
					}
					api.mu.Lock()
				}
				// Reset agent and containerID to force config re-reading if ignore is toggled.
				proc.agent = SubAgent{}
				proc.containerID = ""
				api.injectedSubAgentProcs[dc.WorkspaceFolder] = proc
				api.ignoredDevcontainers[dc.WorkspaceFolder] = dcIgnored
				return nil
			}
		}

		if proc.containerID == container.ID && proc.ctx.Err() == nil {
			// Same container and running, no need to reinject.
			return nil
		}

		if proc.containerID != container.ID {
			// Always recreate the subagent if the container ID changed
			// for now, in the future we can inspect e.g. if coder_apps
			// remain the same and avoid unnecessary recreation.
			logger.Debug(ctx, "container ID changed, injecting subagent into new container",
				slog.F("old_container_id", proc.containerID),
			)
			maybeRecreateSubAgent = proc.agent.ID != uuid.Nil
		}

		// Container ID changed or the subagent process is not running,
		// stop the existing subagent context to replace it.
		proc.stop()
	}
	if proc.agent.OperatingSystem == "" {
		// Set SubAgent defaults.
		proc.agent.OperatingSystem = "linux" // Assuming Linux for devcontainers.
	}

	// Prepare the subAgentProcess to be used when running the subagent.
	// We use api.ctx here to ensure that the process keeps running
	// after this method returns.
	proc.ctx, proc.stop = context.WithCancel(api.ctx)
	api.injectedSubAgentProcs[dc.WorkspaceFolder] = proc

	// This is used to track the goroutine that will run the subagent
	// process inside the container. It will be decremented when the
	// subagent process completes or if an error occurs before we can
	// start the subagent.
	api.asyncWg.Add(1)
	ranSubAgent := false

	// Clean up if injection fails.
	var dcIgnored, setDCIgnored bool
	defer func() {
		if setDCIgnored {
			api.ignoredDevcontainers[dc.WorkspaceFolder] = dcIgnored
		}
		if !ranSubAgent {
			proc.stop()
			if !api.closed {
				// Ensure sure state modifications are reflected.
				api.injectedSubAgentProcs[dc.WorkspaceFolder] = proc
			}
			api.asyncWg.Done()
		}
	}()

	// Unlock the mutex to allow other operations while we
	// inject the subagent into the container.
	api.mu.Unlock()
	defer api.mu.Lock() // Re-lock.

	arch, err := api.ccli.DetectArchitecture(ctx, container.ID)
	if err != nil {
		return xerrors.Errorf("detect architecture: %w", err)
	}

	logger.Info(ctx, "detected container architecture", slog.F("architecture", arch))

	// For now, only support injecting if the architecture matches the host.
	hostArch := runtime.GOARCH

	// TODO(mafredri): Add support for downloading agents for supported architectures.
	if arch != hostArch {
		logger.Warn(ctx, "skipping subagent injection for unsupported architecture",
			slog.F("container_arch", arch),
			slog.F("host_arch", hostArch),
		)
		return nil
	}
	if proc.agent.ID == uuid.Nil {
		proc.agent.Architecture = arch
	}

	subAgentConfig := proc.agent.CloneConfig(dc)
	if proc.agent.ID == uuid.Nil || maybeRecreateSubAgent {
		subAgentConfig.Architecture = arch

		displayAppsMap := map[codersdk.DisplayApp]bool{
			// NOTE(DanielleMaywood):
			// We use the same defaults here as set in terraform-provider-coder.
			// https://github.com/coder/terraform-provider-coder/blob/c1c33f6d556532e75662c0ca373ed8fdea220eb5/provider/agent.go#L38-L51
			codersdk.DisplayAppVSCodeDesktop:  true,
			codersdk.DisplayAppVSCodeInsiders: false,
			codersdk.DisplayAppWebTerminal:    true,
			codersdk.DisplayAppSSH:            true,
			codersdk.DisplayAppPortForward:    true,
		}

		var (
			featureOptionsAsEnvs       []string
			appsWithPossibleDuplicates []SubAgentApp
			workspaceFolder            = DevcontainerDefaultContainerWorkspaceFolder
		)

		if err := func() error {
			var (
				config         DevcontainerConfig
				configOutdated bool
			)

			readConfig := func() (DevcontainerConfig, error) {
				return api.dccli.ReadConfig(ctx, dc.WorkspaceFolder, dc.ConfigPath,
					append(featureOptionsAsEnvs, []string{
						fmt.Sprintf("CODER_WORKSPACE_AGENT_NAME=%s", subAgentConfig.Name),
						fmt.Sprintf("CODER_WORKSPACE_OWNER_NAME=%s", api.ownerName),
						fmt.Sprintf("CODER_WORKSPACE_NAME=%s", api.workspaceName),
						fmt.Sprintf("CODER_WORKSPACE_PARENT_AGENT_NAME=%s", api.parentAgent),
						fmt.Sprintf("CODER_URL=%s", api.subAgentURL),
						fmt.Sprintf("CONTAINER_ID=%s", container.ID),
					}...),
				)
			}

			if config, err = readConfig(); err != nil {
				return err
			}

			// We only allow ignore to be set in the root customization layer to
			// prevent weird interactions with devcontainer features.
			dcIgnored, setDCIgnored = config.Configuration.Customizations.Coder.Ignore, true
			if dcIgnored {
				return nil
			}

			workspaceFolder = config.Workspace.WorkspaceFolder

			featureOptionsAsEnvs = config.MergedConfiguration.Features.OptionsAsEnvs()
			if len(featureOptionsAsEnvs) > 0 {
				configOutdated = true
			}

			// NOTE(DanielleMaywood):
			// We only want to take an agent name specified in the root customization layer.
			// This restricts the ability for a feature to specify the agent name. We may revisit
			// this in the future, but for now we want to restrict this behavior.
			if name := config.Configuration.Customizations.Coder.Name; name != "" {
				// We only want to pick this name if it is a valid name.
				if provisioner.AgentNameRegex.Match([]byte(name)) {
					subAgentConfig.Name = name
					configOutdated = true
					delete(api.usingWorkspaceFolderName, dc.WorkspaceFolder)
				} else {
					logger.Warn(ctx, "invalid name in devcontainer customization, ignoring",
						slog.F("name", name),
						slog.F("regex", provisioner.AgentNameRegex.String()),
					)
				}
			}

			if configOutdated {
				if config, err = readConfig(); err != nil {
					return err
				}
			}

			coderCustomization := config.MergedConfiguration.Customizations.Coder

			for _, customization := range coderCustomization {
				for app, enabled := range customization.DisplayApps {
					if _, ok := displayAppsMap[app]; !ok {
						logger.Warn(ctx, "unknown display app in devcontainer customization, ignoring",
							slog.F("app", app),
							slog.F("enabled", enabled),
						)
						continue
					}
					displayAppsMap[app] = enabled
				}

				appsWithPossibleDuplicates = append(appsWithPossibleDuplicates, customization.Apps...)
			}

			return nil
		}(); err != nil {
			api.logger.Error(ctx, "unable to read devcontainer config", slog.Error(err))
		}

		if dcIgnored {
			proc.stop()
			if proc.agent.ID != uuid.Nil {
				// If we stop the subagent, we also need to delete it.
				client := *api.subAgentClient.Load()
				if err := client.Delete(ctx, proc.agent.ID); err != nil {
					return xerrors.Errorf("delete subagent: %w", err)
				}
			}
			// Reset agent and containerID to force config re-reading if
			// ignore is toggled.
			proc.agent = SubAgent{}
			proc.containerID = ""
			return nil
		}

		displayApps := make([]codersdk.DisplayApp, 0, len(displayAppsMap))
		for app, enabled := range displayAppsMap {
			if enabled {
				displayApps = append(displayApps, app)
			}
		}
		slices.Sort(displayApps)

		appSlugs := make(map[string]struct{})
		apps := make([]SubAgentApp, 0, len(appsWithPossibleDuplicates))

		// We want to deduplicate the apps based on their slugs here.
		// As we want to prioritize later apps, we will walk through this
		// backwards.
		for _, app := range slices.Backward(appsWithPossibleDuplicates) {
			if _, slugAlreadyExists := appSlugs[app.Slug]; slugAlreadyExists {
				continue
			}

			appSlugs[app.Slug] = struct{}{}
			apps = append(apps, app)
		}

		// Apps is currently in reverse order here, so by reversing it we restore
		// it to the original order.
		slices.Reverse(apps)

		subAgentConfig.DisplayApps = displayApps
		subAgentConfig.Apps = apps
		subAgentConfig.Directory = workspaceFolder
	}

	agentBinaryPath, err := os.Executable()
	if err != nil {
		return xerrors.Errorf("get agent binary path: %w", err)
	}
	agentBinaryPath, err = filepath.EvalSymlinks(agentBinaryPath)
	if err != nil {
		return xerrors.Errorf("resolve agent binary path: %w", err)
	}

	// If we scripted this as a `/bin/sh` script, we could reduce these
	// steps to one instruction, speeding up the injection process.
	//
	// Note: We use `path` instead of `filepath` here because we are
	// working with Unix-style paths inside the container.
	if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "mkdir", "-p", path.Dir(coderPathInsideContainer)); err != nil {
		return xerrors.Errorf("create agent directory in container: %w", err)
	}

	if err := api.ccli.Copy(ctx, container.ID, agentBinaryPath, coderPathInsideContainer); err != nil {
		return xerrors.Errorf("copy agent binary: %w", err)
	}

	logger.Info(ctx, "copied agent binary to container")

	// Make sure the agent binary is executable so we can run it (the
	// user doesn't matter since we're making it executable for all).
	if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "chmod", "0755", path.Dir(coderPathInsideContainer), coderPathInsideContainer); err != nil {
		return xerrors.Errorf("set agent binary executable: %w", err)
	}

	// Make sure the agent binary is owned by a valid user so we can run it.
	if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "/bin/sh", "-c", fmt.Sprintf("chown $(id -u):$(id -g) %s", coderPathInsideContainer)); err != nil {
		return xerrors.Errorf("set agent binary ownership: %w", err)
	}

	// Attempt to add CAP_NET_ADMIN to the binary to improve network
	// performance (optional, allow to fail). See `bootstrap_linux.sh`.
	// TODO(mafredri): Disable for now until we can figure out why this
	// causes the following error on some images:
	//
	//	Image: mcr.microsoft.com/devcontainers/base:ubuntu
	// 	Error: /.coder-agent/coder: Operation not permitted
	//
	// if _, err := api.ccli.ExecAs(ctx, container.ID, "root", "setcap", "cap_net_admin+ep", coderPathInsideContainer); err != nil {
	// 	logger.Warn(ctx, "set CAP_NET_ADMIN on agent binary failed", slog.Error(err))
	// }

	deleteSubAgent := proc.agent.ID != uuid.Nil && maybeRecreateSubAgent && !proc.agent.EqualConfig(subAgentConfig)
	if deleteSubAgent {
		logger.Debug(ctx, "deleting existing subagent for recreation", slog.F("agent_id", proc.agent.ID))
		client := *api.subAgentClient.Load()
		err = client.Delete(ctx, proc.agent.ID)
		if err != nil {
			return xerrors.Errorf("delete existing subagent failed: %w", err)
		}
		proc.agent = SubAgent{} // Clear agent to signal that we need to create a new one.
	}

	if proc.agent.ID == uuid.Nil {
		logger.Debug(ctx, "creating new subagent",
			slog.F("directory", subAgentConfig.Directory),
			slog.F("display_apps", subAgentConfig.DisplayApps),
		)

		// Create new subagent record in the database to receive the auth token.
		// If we get a unique constraint violation, try with expanded names that
		// include parent directories to avoid collisions.
		client := *api.subAgentClient.Load()

		originalName := subAgentConfig.Name

		for attempt := 1; attempt <= maxAttemptsToNameAgent; attempt++ {
			agent, err := client.Create(ctx, subAgentConfig)
			if err == nil {
				proc.agent = agent // Only reassign on success.
				if api.usingWorkspaceFolderName[dc.WorkspaceFolder] {
					api.devcontainerNames[dc.Name] = true
					delete(api.usingWorkspaceFolderName, dc.WorkspaceFolder)
				}

				break
			}
			// NOTE(DanielleMaywood):
			// Ordinarily we'd use `errors.As` here, but it didn't appear to work. Not
			// sure if this is because of the communication protocol? Instead I've opted
			// for a slightly more janky string contains approach.
			//
			// We only care if sub agent creation has failed due to a unique constraint
			// violation on the agent name, as we can _possibly_ rectify this.
			if !strings.Contains(err.Error(), "workspace agent name") {
				return xerrors.Errorf("create subagent failed: %w", err)
			}

			// If there has been a unique constraint violation but the user is *not*
			// using an auto-generated name, then we should error. This is because
			// we do not want to surprise the user with a name they did not ask for.
			if usingFolderName := api.usingWorkspaceFolderName[dc.WorkspaceFolder]; !usingFolderName {
				return xerrors.Errorf("create subagent failed: %w", err)
			}

			if attempt == maxAttemptsToNameAgent {
				return xerrors.Errorf("create subagent failed after %d attempts: %w", attempt, err)
			}

			// We increase how much of the workspace folder is used for generating
			// the agent name. With each iteration there is greater chance of this
			// being successful.
			subAgentConfig.Name, api.usingWorkspaceFolderName[dc.WorkspaceFolder] = expandedAgentName(dc.WorkspaceFolder, dc.Container.FriendlyName, attempt)

			logger.Debug(ctx, "retrying subagent creation with expanded name",
				slog.F("original_name", originalName),
				slog.F("expanded_name", subAgentConfig.Name),
				slog.F("attempt", attempt+1),
			)
		}

		logger.Info(ctx, "created new subagent", slog.F("agent_id", proc.agent.ID))
	} else {
		logger.Debug(ctx, "subagent already exists, skipping recreation",
			slog.F("agent_id", proc.agent.ID),
		)
	}

	api.mu.Lock() // Re-lock to update the agent.
	defer api.mu.Unlock()
	if api.closed {
		deleteCtx, deleteCancel := context.WithTimeout(context.Background(), defaultOperationTimeout)
		defer deleteCancel()
		client := *api.subAgentClient.Load()
		err := client.Delete(deleteCtx, proc.agent.ID)
		if err != nil {
			return xerrors.Errorf("delete existing subagent failed after API closed: %w", err)
		}
		return nil
	}
	// If we got this far, we should update the container ID to make
	// sure we don't retry. If we update it too soon we may end up
	// using an old subagent if e.g. delete failed previously.
	proc.containerID = container.ID
	api.injectedSubAgentProcs[dc.WorkspaceFolder] = proc

	// Start the subagent in the container in a new goroutine to avoid
	// blocking. Note that we pass the api.ctx to the subagent process
	// so that it isn't affected by the timeout.
	go api.runSubAgentInContainer(api.ctx, logger, dc, proc, coderPathInsideContainer)
	ranSubAgent = true

	return nil
}

// runSubAgentInContainer runs the subagent process inside a dev
// container. The api.asyncWg must be incremented before calling this
// function, and it will be decremented when the subagent process
// completes or if an error occurs.
func (api *API) runSubAgentInContainer(ctx context.Context, logger slog.Logger, dc codersdk.WorkspaceAgentDevcontainer, proc subAgentProcess, agentPath string) {
	container := dc.Container // Must not be nil.
	logger = logger.With(
		slog.F("agent_id", proc.agent.ID),
	)

	defer func() {
		proc.stop()
		logger.Debug(ctx, "agent process cleanup complete")
		api.asyncWg.Done()
	}()

	logger.Info(ctx, "starting subagent in devcontainer")

	env := []string{
		"CODER_AGENT_URL=" + api.subAgentURL,
		"CODER_AGENT_TOKEN=" + proc.agent.AuthToken.String(),
	}
	env = append(env, api.subAgentEnv...)
	err := api.dccli.Exec(proc.ctx, dc.WorkspaceFolder, dc.ConfigPath, agentPath, []string{"agent"},
		WithExecContainerID(container.ID),
		WithRemoteEnv(env...),
	)
	if err != nil && !errors.Is(err, context.Canceled) {
		logger.Error(ctx, "subagent process failed", slog.Error(err))
	} else {
		logger.Info(ctx, "subagent process finished")
	}
}

func (api *API) Close() error {
	api.mu.Lock()
	if api.closed {
		api.mu.Unlock()
		return nil
	}
	api.logger.Debug(api.ctx, "closing API")
	api.closed = true

	// Stop all running subagent processes and clean up.
	subAgentIDs := make([]uuid.UUID, 0, len(api.injectedSubAgentProcs))
	for workspaceFolder, proc := range api.injectedSubAgentProcs {
		api.logger.Debug(api.ctx, "canceling subagent process",
			slog.F("agent_name", proc.agent.Name),
			slog.F("agent_id", proc.agent.ID),
			slog.F("container_id", proc.containerID),
			slog.F("workspace_folder", workspaceFolder),
		)
		proc.stop()
		if proc.agent.ID != uuid.Nil {
			subAgentIDs = append(subAgentIDs, proc.agent.ID)
		}
	}
	api.injectedSubAgentProcs = make(map[string]subAgentProcess)

	api.cancel()    // Interrupt all routines.
	api.mu.Unlock() // Release lock before waiting for goroutines.

	// Note: We can't use api.ctx here because it's canceled.
	deleteCtx, deleteCancel := context.WithTimeout(context.Background(), defaultOperationTimeout)
	defer deleteCancel()
	client := *api.subAgentClient.Load()
	for _, id := range subAgentIDs {
		err := client.Delete(deleteCtx, id)
		if err != nil {
			api.logger.Error(api.ctx, "delete subagent record during shutdown failed",
				slog.Error(err),
				slog.F("agent_id", id),
			)
		}
	}

	// Close the watcher to ensure its loop finishes.
	err := api.watcher.Close()

	// Wait for loops to finish.
	if api.watcherDone != nil {
		<-api.watcherDone
	}
	if api.updaterDone != nil {
		<-api.updaterDone
	}

	// Wait for all async tasks to complete.
	api.asyncWg.Wait()

	api.logger.Debug(api.ctx, "closed API")
	return err
}