mirror of
https://github.com/coder/coder.git
synced 2025-03-14 10:09:57 +00:00
feat: add agent exec pkg (#15577)
This commit is contained in:
145
agent/agentexec/cli_linux.go
Normal file
145
agent/agentexec/cli_linux.go
Normal file
@ -0,0 +1,145 @@
|
||||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
package agentexec
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
// unset is set to an invalid value for nice and oom scores.
|
||||
const unset = -2000
|
||||
|
||||
// CLI runs the agent-exec command. It should only be called by the cli package.
|
||||
func CLI() error {
|
||||
// We lock the OS thread here to avoid a race condition where the nice priority
|
||||
// we get is on a different thread from the one we set it on.
|
||||
runtime.LockOSThread()
|
||||
// Nop on success but we do it anyway in case of an error.
|
||||
defer runtime.UnlockOSThread()
|
||||
|
||||
var (
|
||||
fs = flag.NewFlagSet("agent-exec", flag.ExitOnError)
|
||||
nice = fs.Int("coder-nice", unset, "")
|
||||
oom = fs.Int("coder-oom", unset, "")
|
||||
)
|
||||
|
||||
if len(os.Args) < 3 {
|
||||
return xerrors.Errorf("malformed command %+v", os.Args)
|
||||
}
|
||||
|
||||
// Parse everything after "coder agent-exec".
|
||||
err := fs.Parse(os.Args[2:])
|
||||
if err != nil {
|
||||
return xerrors.Errorf("parse flags: %w", err)
|
||||
}
|
||||
|
||||
// Get everything after "coder agent-exec --"
|
||||
args := execArgs(os.Args)
|
||||
if len(args) == 0 {
|
||||
return xerrors.Errorf("no exec command provided %+v", os.Args)
|
||||
}
|
||||
|
||||
if *nice == unset {
|
||||
// If an explicit nice score isn't set, we use the default.
|
||||
*nice, err = defaultNiceScore()
|
||||
if err != nil {
|
||||
return xerrors.Errorf("get default nice score: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if *oom == unset {
|
||||
// If an explicit oom score isn't set, we use the default.
|
||||
*oom, err = defaultOOMScore()
|
||||
if err != nil {
|
||||
return xerrors.Errorf("get default oom score: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
err = unix.Setpriority(unix.PRIO_PROCESS, 0, *nice)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("set nice score: %w", err)
|
||||
}
|
||||
|
||||
err = writeOOMScoreAdj(*oom)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("set oom score: %w", err)
|
||||
}
|
||||
|
||||
path, err := exec.LookPath(args[0])
|
||||
if err != nil {
|
||||
return xerrors.Errorf("look path: %w", err)
|
||||
}
|
||||
|
||||
return syscall.Exec(path, args, os.Environ())
|
||||
}
|
||||
|
||||
func defaultNiceScore() (int, error) {
|
||||
score, err := unix.Getpriority(unix.PRIO_PROCESS, 0)
|
||||
if err != nil {
|
||||
return 0, xerrors.Errorf("get nice score: %w", err)
|
||||
}
|
||||
// See https://linux.die.net/man/2/setpriority#Notes
|
||||
score = 20 - score
|
||||
|
||||
score += 5
|
||||
if score > 19 {
|
||||
return 19, nil
|
||||
}
|
||||
return score, nil
|
||||
}
|
||||
|
||||
func defaultOOMScore() (int, error) {
|
||||
score, err := oomScoreAdj()
|
||||
if err != nil {
|
||||
return 0, xerrors.Errorf("get oom score: %w", err)
|
||||
}
|
||||
|
||||
// If the agent has a negative oom_score_adj, we set the child to 0
|
||||
// so it's treated like every other process.
|
||||
if score < 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// If the agent is already almost at the maximum then set it to the max.
|
||||
if score >= 998 {
|
||||
return 1000, nil
|
||||
}
|
||||
|
||||
// If the agent oom_score_adj is >=0, we set the child to slightly
|
||||
// less than the maximum. If users want a different score they set it
|
||||
// directly.
|
||||
return 998, nil
|
||||
}
|
||||
|
||||
func oomScoreAdj() (int, error) {
|
||||
scoreStr, err := os.ReadFile("/proc/self/oom_score_adj")
|
||||
if err != nil {
|
||||
return 0, xerrors.Errorf("read oom_score_adj: %w", err)
|
||||
}
|
||||
return strconv.Atoi(strings.TrimSpace(string(scoreStr)))
|
||||
}
|
||||
|
||||
func writeOOMScoreAdj(score int) error {
|
||||
return os.WriteFile("/proc/self/oom_score_adj", []byte(fmt.Sprintf("%d", score)), 0o600)
|
||||
}
|
||||
|
||||
// execArgs returns the arguments to pass to syscall.Exec after the "--" delimiter.
|
||||
func execArgs(args []string) []string {
|
||||
for i, arg := range args {
|
||||
if arg == "--" {
|
||||
return args[i+1:]
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
178
agent/agentexec/cli_linux_test.go
Normal file
178
agent/agentexec/cli_linux_test.go
Normal file
@ -0,0 +1,178 @@
|
||||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
package agentexec_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/coder/coder/v2/testutil"
|
||||
)
|
||||
|
||||
func TestCLI(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("OK", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitMedium)
|
||||
cmd, path := cmd(ctx, t, 123, 12)
|
||||
err := cmd.Start()
|
||||
require.NoError(t, err)
|
||||
go cmd.Wait()
|
||||
|
||||
waitForSentinel(ctx, t, cmd, path)
|
||||
requireOOMScore(t, cmd.Process.Pid, 123)
|
||||
requireNiceScore(t, cmd.Process.Pid, 12)
|
||||
})
|
||||
|
||||
t.Run("Defaults", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx := testutil.Context(t, testutil.WaitMedium)
|
||||
cmd, path := cmd(ctx, t, 0, 0)
|
||||
err := cmd.Start()
|
||||
require.NoError(t, err)
|
||||
go cmd.Wait()
|
||||
|
||||
waitForSentinel(ctx, t, cmd, path)
|
||||
|
||||
expectedNice := expectedNiceScore(t)
|
||||
expectedOOM := expectedOOMScore(t)
|
||||
requireOOMScore(t, cmd.Process.Pid, expectedOOM)
|
||||
requireNiceScore(t, cmd.Process.Pid, expectedNice)
|
||||
})
|
||||
}
|
||||
|
||||
func requireNiceScore(t *testing.T, pid int, score int) {
|
||||
t.Helper()
|
||||
|
||||
nice, err := unix.Getpriority(unix.PRIO_PROCESS, pid)
|
||||
require.NoError(t, err)
|
||||
// See https://linux.die.net/man/2/setpriority#Notes
|
||||
require.Equal(t, score, 20-nice)
|
||||
}
|
||||
|
||||
func requireOOMScore(t *testing.T, pid int, expected int) {
|
||||
t.Helper()
|
||||
|
||||
actual, err := os.ReadFile(fmt.Sprintf("/proc/%d/oom_score_adj", pid))
|
||||
require.NoError(t, err)
|
||||
score := strings.TrimSpace(string(actual))
|
||||
require.Equal(t, strconv.Itoa(expected), score)
|
||||
}
|
||||
|
||||
func waitForSentinel(ctx context.Context, t *testing.T, cmd *exec.Cmd, path string) {
|
||||
t.Helper()
|
||||
|
||||
ticker := time.NewTicker(testutil.IntervalFast)
|
||||
defer ticker.Stop()
|
||||
|
||||
// RequireEventually doesn't work well with require.NoError or similar require functions.
|
||||
for {
|
||||
err := cmd.Process.Signal(syscall.Signal(0))
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = os.Stat(path)
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ticker.C:
|
||||
case <-ctx.Done():
|
||||
require.NoError(t, ctx.Err())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func cmd(ctx context.Context, t *testing.T, oom, nice int) (*exec.Cmd, string) {
|
||||
var (
|
||||
args = execArgs(oom, nice)
|
||||
dir = t.TempDir()
|
||||
file = filepath.Join(dir, "sentinel")
|
||||
)
|
||||
|
||||
args = append(args, "sh", "-c", fmt.Sprintf("touch %s && sleep 10m", file))
|
||||
//nolint:gosec
|
||||
cmd := exec.CommandContext(ctx, TestBin, args...)
|
||||
|
||||
// We set this so we can also easily kill the sleep process the shell spawns.
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{
|
||||
Setpgid: true,
|
||||
}
|
||||
|
||||
cmd.Env = os.Environ()
|
||||
var buf bytes.Buffer
|
||||
cmd.Stdout = &buf
|
||||
cmd.Stderr = &buf
|
||||
t.Cleanup(func() {
|
||||
// Print output of a command if the test fails.
|
||||
if t.Failed() {
|
||||
t.Logf("cmd %q output: %s", cmd.Args, buf.String())
|
||||
}
|
||||
if cmd.Process != nil {
|
||||
// We use -cmd.Process.Pid to kill the whole process group.
|
||||
_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGINT)
|
||||
}
|
||||
})
|
||||
return cmd, file
|
||||
}
|
||||
|
||||
func expectedOOMScore(t *testing.T) int {
|
||||
t.Helper()
|
||||
|
||||
score, err := os.ReadFile(fmt.Sprintf("/proc/%d/oom_score_adj", os.Getpid()))
|
||||
require.NoError(t, err)
|
||||
|
||||
scoreInt, err := strconv.Atoi(strings.TrimSpace(string(score)))
|
||||
require.NoError(t, err)
|
||||
|
||||
if scoreInt < 0 {
|
||||
return 0
|
||||
}
|
||||
if scoreInt >= 998 {
|
||||
return 1000
|
||||
}
|
||||
return 998
|
||||
}
|
||||
|
||||
func expectedNiceScore(t *testing.T) int {
|
||||
t.Helper()
|
||||
|
||||
score, err := unix.Getpriority(unix.PRIO_PROCESS, os.Getpid())
|
||||
require.NoError(t, err)
|
||||
|
||||
// Priority is niceness + 20.
|
||||
score = 20 - score
|
||||
score += 5
|
||||
if score > 19 {
|
||||
return 19
|
||||
}
|
||||
return score
|
||||
}
|
||||
|
||||
func execArgs(oom int, nice int) []string {
|
||||
execArgs := []string{"agent-exec"}
|
||||
if oom != 0 {
|
||||
execArgs = append(execArgs, fmt.Sprintf("--coder-oom=%d", oom))
|
||||
}
|
||||
if nice != 0 {
|
||||
execArgs = append(execArgs, fmt.Sprintf("--coder-nice=%d", nice))
|
||||
}
|
||||
execArgs = append(execArgs, "--")
|
||||
return execArgs
|
||||
}
|
10
agent/agentexec/cli_other.go
Normal file
10
agent/agentexec/cli_other.go
Normal file
@ -0,0 +1,10 @@
|
||||
//go:build !linux
|
||||
// +build !linux
|
||||
|
||||
package agentexec
|
||||
|
||||
import "golang.org/x/xerrors"
|
||||
|
||||
func CLI() error {
|
||||
return xerrors.New("agent-exec is only supported on Linux")
|
||||
}
|
19
agent/agentexec/cmdtest/main_linux.go
Normal file
19
agent/agentexec/cmdtest/main_linux.go
Normal file
@ -0,0 +1,19 @@
|
||||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/coder/coder/v2/agent/agentexec"
|
||||
)
|
||||
|
||||
func main() {
|
||||
err := agentexec.CLI()
|
||||
if err != nil {
|
||||
_, _ = fmt.Fprintln(os.Stderr, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
86
agent/agentexec/exec.go
Normal file
86
agent/agentexec/exec.go
Normal file
@ -0,0 +1,86 @@
|
||||
package agentexec
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
)
|
||||
|
||||
const (
|
||||
// EnvProcPrioMgmt is the environment variable that determines whether
|
||||
// we attempt to manage process CPU and OOM Killer priority.
|
||||
EnvProcPrioMgmt = "CODER_PROC_PRIO_MGMT"
|
||||
EnvProcOOMScore = "CODER_PROC_OOM_SCORE"
|
||||
EnvProcNiceScore = "CODER_PROC_NICE_SCORE"
|
||||
)
|
||||
|
||||
// CommandContext returns an exec.Cmd that calls "coder agent-exec" prior to exec'ing
|
||||
// the provided command if CODER_PROC_PRIO_MGMT is set, otherwise a normal exec.Cmd
|
||||
// is returned. All instances of exec.Cmd should flow through this function to ensure
|
||||
// proper resource constraints are applied to the child process.
|
||||
func CommandContext(ctx context.Context, cmd string, args ...string) (*exec.Cmd, error) {
|
||||
_, enabled := os.LookupEnv(EnvProcPrioMgmt)
|
||||
if runtime.GOOS != "linux" || !enabled {
|
||||
return exec.CommandContext(ctx, cmd, args...), nil
|
||||
}
|
||||
|
||||
executable, err := os.Executable()
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("get executable: %w", err)
|
||||
}
|
||||
|
||||
bin, err := filepath.EvalSymlinks(executable)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("eval symlinks: %w", err)
|
||||
}
|
||||
|
||||
execArgs := []string{"agent-exec"}
|
||||
if score, ok := envValInt(EnvProcOOMScore); ok {
|
||||
execArgs = append(execArgs, oomScoreArg(score))
|
||||
}
|
||||
|
||||
if score, ok := envValInt(EnvProcNiceScore); ok {
|
||||
execArgs = append(execArgs, niceScoreArg(score))
|
||||
}
|
||||
execArgs = append(execArgs, "--", cmd)
|
||||
execArgs = append(execArgs, args...)
|
||||
|
||||
return exec.CommandContext(ctx, bin, execArgs...), nil
|
||||
}
|
||||
|
||||
// envValInt searches for a key in a list of environment variables and parses it to an int.
|
||||
// If the key is not found or cannot be parsed, returns 0 and false.
|
||||
func envValInt(key string) (int, bool) {
|
||||
val, ok := os.LookupEnv(key)
|
||||
if !ok {
|
||||
return 0, false
|
||||
}
|
||||
|
||||
i, err := strconv.Atoi(val)
|
||||
if err != nil {
|
||||
return 0, false
|
||||
}
|
||||
return i, true
|
||||
}
|
||||
|
||||
// The following are flags used by the agent-exec command. We use flags instead of
|
||||
// environment variables to avoid having to deal with a caller overriding the
|
||||
// environment variables.
|
||||
const (
|
||||
niceFlag = "coder-nice"
|
||||
oomFlag = "coder-oom"
|
||||
)
|
||||
|
||||
func niceScoreArg(score int) string {
|
||||
return fmt.Sprintf("--%s=%d", niceFlag, score)
|
||||
}
|
||||
|
||||
func oomScoreArg(score int) string {
|
||||
return fmt.Sprintf("--%s=%d", oomFlag, score)
|
||||
}
|
119
agent/agentexec/exec_test.go
Normal file
119
agent/agentexec/exec_test.go
Normal file
@ -0,0 +1,119 @@
|
||||
package agentexec_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/coder/coder/v2/agent/agentexec"
|
||||
)
|
||||
|
||||
//nolint:paralleltest // we need to test environment variables
|
||||
func TestExec(t *testing.T) {
|
||||
//nolint:paralleltest // we need to test environment variables
|
||||
t.Run("NonLinux", func(t *testing.T) {
|
||||
t.Setenv(agentexec.EnvProcPrioMgmt, "true")
|
||||
|
||||
if runtime.GOOS == "linux" {
|
||||
t.Skip("skipping on linux")
|
||||
}
|
||||
|
||||
cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep")
|
||||
require.NoError(t, err)
|
||||
|
||||
path, err := exec.LookPath("sh")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, path, cmd.Path)
|
||||
require.Equal(t, []string{"sh", "-c", "sleep"}, cmd.Args)
|
||||
})
|
||||
|
||||
//nolint:paralleltest // we need to test environment variables
|
||||
t.Run("Linux", func(t *testing.T) {
|
||||
//nolint:paralleltest // we need to test environment variables
|
||||
t.Run("Disabled", func(t *testing.T) {
|
||||
if runtime.GOOS != "linux" {
|
||||
t.Skip("skipping on linux")
|
||||
}
|
||||
|
||||
cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep")
|
||||
require.NoError(t, err)
|
||||
path, err := exec.LookPath("sh")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, path, cmd.Path)
|
||||
require.Equal(t, []string{"sh", "-c", "sleep"}, cmd.Args)
|
||||
})
|
||||
|
||||
//nolint:paralleltest // we need to test environment variables
|
||||
t.Run("Enabled", func(t *testing.T) {
|
||||
t.Setenv(agentexec.EnvProcPrioMgmt, "hello")
|
||||
|
||||
if runtime.GOOS != "linux" {
|
||||
t.Skip("skipping on linux")
|
||||
}
|
||||
|
||||
executable, err := os.Executable()
|
||||
require.NoError(t, err)
|
||||
|
||||
cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, executable, cmd.Path)
|
||||
require.Equal(t, []string{executable, "agent-exec", "--", "sh", "-c", "sleep"}, cmd.Args)
|
||||
})
|
||||
|
||||
t.Run("Nice", func(t *testing.T) {
|
||||
t.Setenv(agentexec.EnvProcPrioMgmt, "hello")
|
||||
t.Setenv(agentexec.EnvProcNiceScore, "10")
|
||||
|
||||
if runtime.GOOS != "linux" {
|
||||
t.Skip("skipping on linux")
|
||||
}
|
||||
|
||||
executable, err := os.Executable()
|
||||
require.NoError(t, err)
|
||||
|
||||
cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, executable, cmd.Path)
|
||||
require.Equal(t, []string{executable, "agent-exec", "--coder-nice=10", "--", "sh", "-c", "sleep"}, cmd.Args)
|
||||
})
|
||||
|
||||
t.Run("OOM", func(t *testing.T) {
|
||||
t.Setenv(agentexec.EnvProcPrioMgmt, "hello")
|
||||
t.Setenv(agentexec.EnvProcOOMScore, "123")
|
||||
|
||||
if runtime.GOOS != "linux" {
|
||||
t.Skip("skipping on linux")
|
||||
}
|
||||
|
||||
executable, err := os.Executable()
|
||||
require.NoError(t, err)
|
||||
|
||||
cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, executable, cmd.Path)
|
||||
require.Equal(t, []string{executable, "agent-exec", "--coder-oom=123", "--", "sh", "-c", "sleep"}, cmd.Args)
|
||||
})
|
||||
|
||||
t.Run("Both", func(t *testing.T) {
|
||||
t.Setenv(agentexec.EnvProcPrioMgmt, "hello")
|
||||
t.Setenv(agentexec.EnvProcOOMScore, "432")
|
||||
t.Setenv(agentexec.EnvProcNiceScore, "14")
|
||||
|
||||
if runtime.GOOS != "linux" {
|
||||
t.Skip("skipping on linux")
|
||||
}
|
||||
|
||||
executable, err := os.Executable()
|
||||
require.NoError(t, err)
|
||||
|
||||
cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep")
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, executable, cmd.Path)
|
||||
require.Equal(t, []string{executable, "agent-exec", "--coder-oom=432", "--coder-nice=14", "--", "sh", "-c", "sleep"}, cmd.Args)
|
||||
})
|
||||
})
|
||||
}
|
46
agent/agentexec/main_linux_test.go
Normal file
46
agent/agentexec/main_linux_test.go
Normal file
@ -0,0 +1,46 @@
|
||||
//go:build linux
|
||||
// +build linux
|
||||
|
||||
package agentexec_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var TestBin string
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
code := func() int {
|
||||
// We generate a unique directory per test invocation to avoid collisions between two
|
||||
// processes attempting to create the same temp file.
|
||||
dir := genDir()
|
||||
defer os.RemoveAll(dir)
|
||||
TestBin = buildBinary(dir)
|
||||
return m.Run()
|
||||
}()
|
||||
|
||||
os.Exit(code)
|
||||
}
|
||||
|
||||
func buildBinary(dir string) string {
|
||||
path := filepath.Join(dir, "agent-test")
|
||||
out, err := exec.Command("go", "build", "-o", path, "./cmdtest").CombinedOutput()
|
||||
mustf(err, "build binary: %s", out)
|
||||
return path
|
||||
}
|
||||
|
||||
func mustf(err error, msg string, args ...any) {
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf(msg, args...))
|
||||
}
|
||||
}
|
||||
|
||||
func genDir() string {
|
||||
dir, err := os.MkdirTemp(os.TempDir(), "agentexec")
|
||||
mustf(err, "create temp dir: %v", err)
|
||||
return dir
|
||||
}
|
Reference in New Issue
Block a user