Files
coder/agent/agentexec/cli_linux.go
Mathias Fredriksson df92df4565 fix(agent): filter out GOTRACEBACK=none (#16924)
With the switch to Go 1.24.1, our dogfood workspaces started setting
`GOTRACEBACK=none` in the environment, resulting in missing stacktraces
for users.

This is due to the capability changes we do when
`USE_CAP_NET_ADMIN=true`.

564b387262/provisionersdk/scripts/bootstrap_linux.sh (L60-L76)

This most likely triggers a change in securitybits which sets
`_AT_SECURE` for the process.

a1ddbdd3ef/src/runtime/os_linux.go (L297-L327)

Which in turn triggers secure mode:

a1ddbdd3ef/src/runtime/security_unix.go

This should not affect workspaces as template authors can still set the
environment on the agent resource.

See https://pkg.go.dev/runtime#hdr-Security
2025-03-17 11:10:14 +02:00

206 lines
5.7 KiB
Go

//go:build linux
// +build linux
package agentexec
import (
"flag"
"fmt"
"os"
"os/exec"
"runtime"
"slices"
"strconv"
"strings"
"syscall"
"golang.org/x/sys/unix"
"golang.org/x/xerrors"
"kernel.org/pub/linux/libs/security/libcap/cap"
"github.com/coder/coder/v2/agent/usershell"
)
// CLI runs the agent-exec command. It should only be called by the cli package.
func CLI() error {
// We lock the OS thread here to avoid a race condition where the nice priority
// we set gets applied to a different thread than the one we exec the provided
// command on.
runtime.LockOSThread()
// Nop on success but we do it anyway in case of an error.
defer runtime.UnlockOSThread()
var (
fs = flag.NewFlagSet("agent-exec", flag.ExitOnError)
nice = fs.Int("coder-nice", unset, "")
oom = fs.Int("coder-oom", unset, "")
)
if len(os.Args) < 3 {
return xerrors.Errorf("malformed command %+v", os.Args)
}
// Parse everything after "coder agent-exec".
err := fs.Parse(os.Args[2:])
if err != nil {
return xerrors.Errorf("parse flags: %w", err)
}
// Get everything after "coder agent-exec --"
args := execArgs(os.Args)
if len(args) == 0 {
return xerrors.Errorf("no exec command provided %+v", os.Args)
}
if *oom == unset {
// If an explicit oom score isn't set, we use the default.
*oom, err = defaultOOMScore()
if err != nil {
return xerrors.Errorf("get default oom score: %w", err)
}
}
if *nice == unset {
// If an explicit nice score isn't set, we use the default.
*nice, err = defaultNiceScore()
if err != nil {
return xerrors.Errorf("get default nice score: %w", err)
}
}
// We drop effective caps prior to setting dumpable so that we limit the
// impact of someone attempting to hijack the process (i.e. with a debugger)
// to take advantage of the capabilities of the agent process. We encourage
// users to set cap_net_admin on the agent binary for improved networking
// performance and doing so results in the process having its SET_DUMPABLE
// attribute disabled (meaning we cannot adjust the oom score).
err = dropEffectiveCaps()
if err != nil {
printfStdErr("failed to drop effective caps: %v", err)
}
// Set dumpable to 1 so that we can adjust the oom score. If the process
// doesn't have capabilities or has an suid/sgid bit set, this is already
// set.
err = unix.Prctl(unix.PR_SET_DUMPABLE, 1, 0, 0, 0)
if err != nil {
printfStdErr("failed to set dumpable: %v", err)
}
err = writeOOMScoreAdj(*oom)
if err != nil {
// We alert the user instead of failing the command since it can be difficult to debug
// for a template admin otherwise. It's quite possible (and easy) to set an
// inappriopriate value for oom_score_adj.
printfStdErr("failed to adjust oom score to %d for cmd %+v: %v", *oom, execArgs(os.Args), err)
}
// Set dumpable back to 0 just to be safe. It's not inherited for execve anyways.
err = unix.Prctl(unix.PR_SET_DUMPABLE, 0, 0, 0, 0)
if err != nil {
printfStdErr("failed to unset dumpable: %v", err)
}
err = unix.Setpriority(unix.PRIO_PROCESS, 0, *nice)
if err != nil {
// We alert the user instead of failing the command since it can be difficult to debug
// for a template admin otherwise. It's quite possible (and easy) to set an
// inappriopriate value for niceness.
printfStdErr("failed to adjust niceness to %d for cmd %+v: %v", *nice, args, err)
}
path, err := exec.LookPath(args[0])
if err != nil {
return xerrors.Errorf("look path: %w", err)
}
// Remove environment variables specific to the agentexec command. This is
// especially important for environments that are attempting to develop Coder in Coder.
ei := usershell.SystemEnvInfo{}
env := ei.Environ()
env = slices.DeleteFunc(env, func(e string) bool {
return strings.HasPrefix(e, EnvProcPrioMgmt) ||
strings.HasPrefix(e, EnvProcOOMScore) ||
strings.HasPrefix(e, EnvProcNiceScore)
})
return syscall.Exec(path, args, env)
}
func defaultNiceScore() (int, error) {
score, err := unix.Getpriority(unix.PRIO_PROCESS, 0)
if err != nil {
return 0, xerrors.Errorf("get nice score: %w", err)
}
// See https://linux.die.net/man/2/setpriority#Notes
score = 20 - score
score += 5
if score > 19 {
return 19, nil
}
return score, nil
}
func defaultOOMScore() (int, error) {
score, err := oomScoreAdj()
if err != nil {
return 0, xerrors.Errorf("get oom score: %w", err)
}
// If the agent has a negative oom_score_adj, we set the child to 0
// so it's treated like every other process.
if score < 0 {
return 0, nil
}
// If the agent is already almost at the maximum then set it to the max.
if score >= 998 {
return 1000, nil
}
// If the agent oom_score_adj is >=0, we set the child to slightly
// less than the maximum. If users want a different score they set it
// directly.
return 998, nil
}
func oomScoreAdj() (int, error) {
scoreStr, err := os.ReadFile("/proc/self/oom_score_adj")
if err != nil {
return 0, xerrors.Errorf("read oom_score_adj: %w", err)
}
return strconv.Atoi(strings.TrimSpace(string(scoreStr)))
}
func writeOOMScoreAdj(score int) error {
return os.WriteFile(fmt.Sprintf("/proc/%d/oom_score_adj", os.Getpid()), []byte(fmt.Sprintf("%d", score)), 0o600)
}
// execArgs returns the arguments to pass to syscall.Exec after the "--" delimiter.
func execArgs(args []string) []string {
for i, arg := range args {
if arg == "--" {
return args[i+1:]
}
}
return nil
}
func printfStdErr(format string, a ...any) {
_, _ = fmt.Fprintf(os.Stderr, "coder-agent: %s\n", fmt.Sprintf(format, a...))
}
func dropEffectiveCaps() error {
proc := cap.GetProc()
err := proc.ClearFlag(cap.Effective)
if err != nil {
return xerrors.Errorf("clear effective caps: %w", err)
}
err = proc.SetProc()
if err != nil {
return xerrors.Errorf("set proc: %w", err)
}
return nil
}