feat: add agent exec pkg (#15577)

This commit is contained in:
Jon Ayers
2024-11-25 17:22:12 +02:00
committed by GitHub
parent 7876dc5fb1
commit bbc549d2df
7 changed files with 603 additions and 0 deletions

View File

@ -0,0 +1,145 @@
//go:build linux
// +build linux
package agentexec
import (
"flag"
"fmt"
"os"
"os/exec"
"runtime"
"strconv"
"strings"
"syscall"
"golang.org/x/sys/unix"
"golang.org/x/xerrors"
)
// unset is set to an invalid value for nice and oom scores.
const unset = -2000
// CLI runs the agent-exec command. It should only be called by the cli package.
func CLI() error {
// We lock the OS thread here to avoid a race condition where the nice priority
// we get is on a different thread from the one we set it on.
runtime.LockOSThread()
// Nop on success but we do it anyway in case of an error.
defer runtime.UnlockOSThread()
var (
fs = flag.NewFlagSet("agent-exec", flag.ExitOnError)
nice = fs.Int("coder-nice", unset, "")
oom = fs.Int("coder-oom", unset, "")
)
if len(os.Args) < 3 {
return xerrors.Errorf("malformed command %+v", os.Args)
}
// Parse everything after "coder agent-exec".
err := fs.Parse(os.Args[2:])
if err != nil {
return xerrors.Errorf("parse flags: %w", err)
}
// Get everything after "coder agent-exec --"
args := execArgs(os.Args)
if len(args) == 0 {
return xerrors.Errorf("no exec command provided %+v", os.Args)
}
if *nice == unset {
// If an explicit nice score isn't set, we use the default.
*nice, err = defaultNiceScore()
if err != nil {
return xerrors.Errorf("get default nice score: %w", err)
}
}
if *oom == unset {
// If an explicit oom score isn't set, we use the default.
*oom, err = defaultOOMScore()
if err != nil {
return xerrors.Errorf("get default oom score: %w", err)
}
}
err = unix.Setpriority(unix.PRIO_PROCESS, 0, *nice)
if err != nil {
return xerrors.Errorf("set nice score: %w", err)
}
err = writeOOMScoreAdj(*oom)
if err != nil {
return xerrors.Errorf("set oom score: %w", err)
}
path, err := exec.LookPath(args[0])
if err != nil {
return xerrors.Errorf("look path: %w", err)
}
return syscall.Exec(path, args, os.Environ())
}
func defaultNiceScore() (int, error) {
score, err := unix.Getpriority(unix.PRIO_PROCESS, 0)
if err != nil {
return 0, xerrors.Errorf("get nice score: %w", err)
}
// See https://linux.die.net/man/2/setpriority#Notes
score = 20 - score
score += 5
if score > 19 {
return 19, nil
}
return score, nil
}
func defaultOOMScore() (int, error) {
score, err := oomScoreAdj()
if err != nil {
return 0, xerrors.Errorf("get oom score: %w", err)
}
// If the agent has a negative oom_score_adj, we set the child to 0
// so it's treated like every other process.
if score < 0 {
return 0, nil
}
// If the agent is already almost at the maximum then set it to the max.
if score >= 998 {
return 1000, nil
}
// If the agent oom_score_adj is >=0, we set the child to slightly
// less than the maximum. If users want a different score they set it
// directly.
return 998, nil
}
func oomScoreAdj() (int, error) {
scoreStr, err := os.ReadFile("/proc/self/oom_score_adj")
if err != nil {
return 0, xerrors.Errorf("read oom_score_adj: %w", err)
}
return strconv.Atoi(strings.TrimSpace(string(scoreStr)))
}
func writeOOMScoreAdj(score int) error {
return os.WriteFile("/proc/self/oom_score_adj", []byte(fmt.Sprintf("%d", score)), 0o600)
}
// execArgs returns the arguments to pass to syscall.Exec after the "--" delimiter.
func execArgs(args []string) []string {
for i, arg := range args {
if arg == "--" {
return args[i+1:]
}
}
return nil
}

View File

@ -0,0 +1,178 @@
//go:build linux
// +build linux
package agentexec_test
import (
"bytes"
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"syscall"
"testing"
"time"
"github.com/stretchr/testify/require"
"golang.org/x/sys/unix"
"github.com/coder/coder/v2/testutil"
)
func TestCLI(t *testing.T) {
t.Parallel()
t.Run("OK", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitMedium)
cmd, path := cmd(ctx, t, 123, 12)
err := cmd.Start()
require.NoError(t, err)
go cmd.Wait()
waitForSentinel(ctx, t, cmd, path)
requireOOMScore(t, cmd.Process.Pid, 123)
requireNiceScore(t, cmd.Process.Pid, 12)
})
t.Run("Defaults", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitMedium)
cmd, path := cmd(ctx, t, 0, 0)
err := cmd.Start()
require.NoError(t, err)
go cmd.Wait()
waitForSentinel(ctx, t, cmd, path)
expectedNice := expectedNiceScore(t)
expectedOOM := expectedOOMScore(t)
requireOOMScore(t, cmd.Process.Pid, expectedOOM)
requireNiceScore(t, cmd.Process.Pid, expectedNice)
})
}
func requireNiceScore(t *testing.T, pid int, score int) {
t.Helper()
nice, err := unix.Getpriority(unix.PRIO_PROCESS, pid)
require.NoError(t, err)
// See https://linux.die.net/man/2/setpriority#Notes
require.Equal(t, score, 20-nice)
}
func requireOOMScore(t *testing.T, pid int, expected int) {
t.Helper()
actual, err := os.ReadFile(fmt.Sprintf("/proc/%d/oom_score_adj", pid))
require.NoError(t, err)
score := strings.TrimSpace(string(actual))
require.Equal(t, strconv.Itoa(expected), score)
}
func waitForSentinel(ctx context.Context, t *testing.T, cmd *exec.Cmd, path string) {
t.Helper()
ticker := time.NewTicker(testutil.IntervalFast)
defer ticker.Stop()
// RequireEventually doesn't work well with require.NoError or similar require functions.
for {
err := cmd.Process.Signal(syscall.Signal(0))
require.NoError(t, err)
_, err = os.Stat(path)
if err == nil {
return
}
select {
case <-ticker.C:
case <-ctx.Done():
require.NoError(t, ctx.Err())
}
}
}
func cmd(ctx context.Context, t *testing.T, oom, nice int) (*exec.Cmd, string) {
var (
args = execArgs(oom, nice)
dir = t.TempDir()
file = filepath.Join(dir, "sentinel")
)
args = append(args, "sh", "-c", fmt.Sprintf("touch %s && sleep 10m", file))
//nolint:gosec
cmd := exec.CommandContext(ctx, TestBin, args...)
// We set this so we can also easily kill the sleep process the shell spawns.
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
cmd.Env = os.Environ()
var buf bytes.Buffer
cmd.Stdout = &buf
cmd.Stderr = &buf
t.Cleanup(func() {
// Print output of a command if the test fails.
if t.Failed() {
t.Logf("cmd %q output: %s", cmd.Args, buf.String())
}
if cmd.Process != nil {
// We use -cmd.Process.Pid to kill the whole process group.
_ = syscall.Kill(-cmd.Process.Pid, syscall.SIGINT)
}
})
return cmd, file
}
func expectedOOMScore(t *testing.T) int {
t.Helper()
score, err := os.ReadFile(fmt.Sprintf("/proc/%d/oom_score_adj", os.Getpid()))
require.NoError(t, err)
scoreInt, err := strconv.Atoi(strings.TrimSpace(string(score)))
require.NoError(t, err)
if scoreInt < 0 {
return 0
}
if scoreInt >= 998 {
return 1000
}
return 998
}
func expectedNiceScore(t *testing.T) int {
t.Helper()
score, err := unix.Getpriority(unix.PRIO_PROCESS, os.Getpid())
require.NoError(t, err)
// Priority is niceness + 20.
score = 20 - score
score += 5
if score > 19 {
return 19
}
return score
}
func execArgs(oom int, nice int) []string {
execArgs := []string{"agent-exec"}
if oom != 0 {
execArgs = append(execArgs, fmt.Sprintf("--coder-oom=%d", oom))
}
if nice != 0 {
execArgs = append(execArgs, fmt.Sprintf("--coder-nice=%d", nice))
}
execArgs = append(execArgs, "--")
return execArgs
}

View File

@ -0,0 +1,10 @@
//go:build !linux
// +build !linux
package agentexec
import "golang.org/x/xerrors"
func CLI() error {
return xerrors.New("agent-exec is only supported on Linux")
}

View File

@ -0,0 +1,19 @@
//go:build linux
// +build linux
package main
import (
"fmt"
"os"
"github.com/coder/coder/v2/agent/agentexec"
)
func main() {
err := agentexec.CLI()
if err != nil {
_, _ = fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}

86
agent/agentexec/exec.go Normal file
View File

@ -0,0 +1,86 @@
package agentexec
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"runtime"
"strconv"
"golang.org/x/xerrors"
)
const (
// EnvProcPrioMgmt is the environment variable that determines whether
// we attempt to manage process CPU and OOM Killer priority.
EnvProcPrioMgmt = "CODER_PROC_PRIO_MGMT"
EnvProcOOMScore = "CODER_PROC_OOM_SCORE"
EnvProcNiceScore = "CODER_PROC_NICE_SCORE"
)
// CommandContext returns an exec.Cmd that calls "coder agent-exec" prior to exec'ing
// the provided command if CODER_PROC_PRIO_MGMT is set, otherwise a normal exec.Cmd
// is returned. All instances of exec.Cmd should flow through this function to ensure
// proper resource constraints are applied to the child process.
func CommandContext(ctx context.Context, cmd string, args ...string) (*exec.Cmd, error) {
_, enabled := os.LookupEnv(EnvProcPrioMgmt)
if runtime.GOOS != "linux" || !enabled {
return exec.CommandContext(ctx, cmd, args...), nil
}
executable, err := os.Executable()
if err != nil {
return nil, xerrors.Errorf("get executable: %w", err)
}
bin, err := filepath.EvalSymlinks(executable)
if err != nil {
return nil, xerrors.Errorf("eval symlinks: %w", err)
}
execArgs := []string{"agent-exec"}
if score, ok := envValInt(EnvProcOOMScore); ok {
execArgs = append(execArgs, oomScoreArg(score))
}
if score, ok := envValInt(EnvProcNiceScore); ok {
execArgs = append(execArgs, niceScoreArg(score))
}
execArgs = append(execArgs, "--", cmd)
execArgs = append(execArgs, args...)
return exec.CommandContext(ctx, bin, execArgs...), nil
}
// envValInt searches for a key in a list of environment variables and parses it to an int.
// If the key is not found or cannot be parsed, returns 0 and false.
func envValInt(key string) (int, bool) {
val, ok := os.LookupEnv(key)
if !ok {
return 0, false
}
i, err := strconv.Atoi(val)
if err != nil {
return 0, false
}
return i, true
}
// The following are flags used by the agent-exec command. We use flags instead of
// environment variables to avoid having to deal with a caller overriding the
// environment variables.
const (
niceFlag = "coder-nice"
oomFlag = "coder-oom"
)
func niceScoreArg(score int) string {
return fmt.Sprintf("--%s=%d", niceFlag, score)
}
func oomScoreArg(score int) string {
return fmt.Sprintf("--%s=%d", oomFlag, score)
}

View File

@ -0,0 +1,119 @@
package agentexec_test
import (
"context"
"os"
"os/exec"
"runtime"
"testing"
"github.com/stretchr/testify/require"
"github.com/coder/coder/v2/agent/agentexec"
)
//nolint:paralleltest // we need to test environment variables
func TestExec(t *testing.T) {
//nolint:paralleltest // we need to test environment variables
t.Run("NonLinux", func(t *testing.T) {
t.Setenv(agentexec.EnvProcPrioMgmt, "true")
if runtime.GOOS == "linux" {
t.Skip("skipping on linux")
}
cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep")
require.NoError(t, err)
path, err := exec.LookPath("sh")
require.NoError(t, err)
require.Equal(t, path, cmd.Path)
require.Equal(t, []string{"sh", "-c", "sleep"}, cmd.Args)
})
//nolint:paralleltest // we need to test environment variables
t.Run("Linux", func(t *testing.T) {
//nolint:paralleltest // we need to test environment variables
t.Run("Disabled", func(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip("skipping on linux")
}
cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep")
require.NoError(t, err)
path, err := exec.LookPath("sh")
require.NoError(t, err)
require.Equal(t, path, cmd.Path)
require.Equal(t, []string{"sh", "-c", "sleep"}, cmd.Args)
})
//nolint:paralleltest // we need to test environment variables
t.Run("Enabled", func(t *testing.T) {
t.Setenv(agentexec.EnvProcPrioMgmt, "hello")
if runtime.GOOS != "linux" {
t.Skip("skipping on linux")
}
executable, err := os.Executable()
require.NoError(t, err)
cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep")
require.NoError(t, err)
require.Equal(t, executable, cmd.Path)
require.Equal(t, []string{executable, "agent-exec", "--", "sh", "-c", "sleep"}, cmd.Args)
})
t.Run("Nice", func(t *testing.T) {
t.Setenv(agentexec.EnvProcPrioMgmt, "hello")
t.Setenv(agentexec.EnvProcNiceScore, "10")
if runtime.GOOS != "linux" {
t.Skip("skipping on linux")
}
executable, err := os.Executable()
require.NoError(t, err)
cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep")
require.NoError(t, err)
require.Equal(t, executable, cmd.Path)
require.Equal(t, []string{executable, "agent-exec", "--coder-nice=10", "--", "sh", "-c", "sleep"}, cmd.Args)
})
t.Run("OOM", func(t *testing.T) {
t.Setenv(agentexec.EnvProcPrioMgmt, "hello")
t.Setenv(agentexec.EnvProcOOMScore, "123")
if runtime.GOOS != "linux" {
t.Skip("skipping on linux")
}
executable, err := os.Executable()
require.NoError(t, err)
cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep")
require.NoError(t, err)
require.Equal(t, executable, cmd.Path)
require.Equal(t, []string{executable, "agent-exec", "--coder-oom=123", "--", "sh", "-c", "sleep"}, cmd.Args)
})
t.Run("Both", func(t *testing.T) {
t.Setenv(agentexec.EnvProcPrioMgmt, "hello")
t.Setenv(agentexec.EnvProcOOMScore, "432")
t.Setenv(agentexec.EnvProcNiceScore, "14")
if runtime.GOOS != "linux" {
t.Skip("skipping on linux")
}
executable, err := os.Executable()
require.NoError(t, err)
cmd, err := agentexec.CommandContext(context.Background(), "sh", "-c", "sleep")
require.NoError(t, err)
require.Equal(t, executable, cmd.Path)
require.Equal(t, []string{executable, "agent-exec", "--coder-oom=432", "--coder-nice=14", "--", "sh", "-c", "sleep"}, cmd.Args)
})
})
}

View File

@ -0,0 +1,46 @@
//go:build linux
// +build linux
package agentexec_test
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"testing"
)
var TestBin string
func TestMain(m *testing.M) {
code := func() int {
// We generate a unique directory per test invocation to avoid collisions between two
// processes attempting to create the same temp file.
dir := genDir()
defer os.RemoveAll(dir)
TestBin = buildBinary(dir)
return m.Run()
}()
os.Exit(code)
}
func buildBinary(dir string) string {
path := filepath.Join(dir, "agent-test")
out, err := exec.Command("go", "build", "-o", path, "./cmdtest").CombinedOutput()
mustf(err, "build binary: %s", out)
return path
}
func mustf(err error, msg string, args ...any) {
if err != nil {
panic(fmt.Sprintf(msg, args...))
}
}
func genDir() string {
dir, err := os.MkdirTemp(os.TempDir(), "agentexec")
mustf(err, "create temp dir: %v", err)
return dir
}