mirror of
https://github.com/coder/coder.git
synced 2025-07-12 00:14:10 +00:00
feat: add SIGQUIT/SIGTRAP handler for the CLI (#5665)
This commit is contained in:
@ -39,6 +39,8 @@ func workspaceAgent() *cobra.Command {
|
||||
ctx, cancel := context.WithCancel(cmd.Context())
|
||||
defer cancel()
|
||||
|
||||
go dumpHandler(ctx)
|
||||
|
||||
rawURL, err := cmd.Flags().GetString(varAgentURL)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("CODER_AGENT_URL must be set: %w", err)
|
||||
|
93
cli/root.go
93
cli/root.go
@ -8,8 +8,11 @@ import (
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"syscall"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
@ -631,3 +634,93 @@ func (h *headerTransport) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
}
|
||||
return h.transport.RoundTrip(req)
|
||||
}
|
||||
|
||||
// dumpHandler provides a custom SIGQUIT and SIGTRAP handler that dumps the
|
||||
// stacktrace of all goroutines to stderr and a well-known file in the home
|
||||
// directory. This is useful for debugging deadlock issues that may occur in
|
||||
// production in workspaces, since the default Go runtime will only dump to
|
||||
// stderr (which is often difficult/impossible to read in a workspace).
|
||||
//
|
||||
// SIGQUITs will still cause the program to exit (similarly to the default Go
|
||||
// runtime behavior).
|
||||
//
|
||||
// A SIGQUIT handler will not be registered if GOTRACEBACK=crash.
|
||||
//
|
||||
// On Windows this immediately returns.
|
||||
func dumpHandler(ctx context.Context) {
|
||||
if runtime.GOOS == "windows" {
|
||||
// free up the goroutine since it'll be permanently blocked anyways
|
||||
return
|
||||
}
|
||||
|
||||
listenSignals := []os.Signal{syscall.SIGTRAP}
|
||||
if os.Getenv("GOTRACEBACK") != "crash" {
|
||||
listenSignals = append(listenSignals, syscall.SIGQUIT)
|
||||
}
|
||||
|
||||
sigs := make(chan os.Signal, 1)
|
||||
signal.Notify(sigs, listenSignals...)
|
||||
defer signal.Stop(sigs)
|
||||
|
||||
for {
|
||||
sigStr := ""
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case sig := <-sigs:
|
||||
switch sig {
|
||||
case syscall.SIGQUIT:
|
||||
sigStr = "SIGQUIT"
|
||||
case syscall.SIGTRAP:
|
||||
sigStr = "SIGTRAP"
|
||||
}
|
||||
}
|
||||
|
||||
// Start with a 1MB buffer and keep doubling it until we can fit the
|
||||
// entire stacktrace, stopping early once we reach 64MB.
|
||||
buf := make([]byte, 1_000_000)
|
||||
stacklen := 0
|
||||
for {
|
||||
stacklen = runtime.Stack(buf, true)
|
||||
if stacklen < len(buf) {
|
||||
break
|
||||
}
|
||||
if 2*len(buf) > 64_000_000 {
|
||||
// Write a message to the end of the buffer saying that it was
|
||||
// truncated.
|
||||
const truncatedMsg = "\n\n\nstack trace truncated due to size\n"
|
||||
copy(buf[len(buf)-len(truncatedMsg):], truncatedMsg)
|
||||
break
|
||||
}
|
||||
buf = make([]byte, 2*len(buf))
|
||||
}
|
||||
|
||||
_, _ = fmt.Fprintf(os.Stderr, "%s:\n%s\n", sigStr, buf[:stacklen])
|
||||
|
||||
// Write to a well-known file.
|
||||
dir, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
dir = os.TempDir()
|
||||
}
|
||||
fpath := filepath.Join(dir, fmt.Sprintf("coder-agent-%s.dump", time.Now().Format("2006-01-02T15:04:05.000Z")))
|
||||
_, _ = fmt.Fprintf(os.Stderr, "writing dump to %q\n", fpath)
|
||||
|
||||
f, err := os.Create(fpath)
|
||||
if err != nil {
|
||||
_, _ = fmt.Fprintf(os.Stderr, "failed to open dump file: %v\n", err.Error())
|
||||
goto done
|
||||
}
|
||||
_, err = f.Write(buf[:stacklen])
|
||||
_ = f.Close()
|
||||
if err != nil {
|
||||
_, _ = fmt.Fprintf(os.Stderr, "failed to write dump file: %v\n", err.Error())
|
||||
goto done
|
||||
}
|
||||
|
||||
done:
|
||||
if sigStr == "SIGQUIT" {
|
||||
//nolint:revive
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -81,6 +81,13 @@ func Server(vip *viper.Viper, newAPI func(context.Context, *coderd.Options) (*co
|
||||
Use: "server",
|
||||
Short: "Start a Coder server",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Main command context for managing cancellation of running
|
||||
// services.
|
||||
ctx, cancel := context.WithCancel(cmd.Context())
|
||||
defer cancel()
|
||||
|
||||
go dumpHandler(ctx)
|
||||
|
||||
cfg, err := deployment.Config(cmd.Flags(), vip)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("getting deployment config: %w", err)
|
||||
@ -123,11 +130,6 @@ func Server(vip *viper.Viper, newAPI func(context.Context, *coderd.Options) (*co
|
||||
logger = logger.AppendSinks(tracing.SlogSink{})
|
||||
}
|
||||
|
||||
// Main command context for managing cancellation
|
||||
// of running services.
|
||||
ctx, cancel := context.WithCancel(cmd.Context())
|
||||
defer cancel()
|
||||
|
||||
// Register signals early on so that graceful shutdown can't
|
||||
// be interrupted by additional signals. Note that we avoid
|
||||
// shadowing cancel() (from above) here because notifyStop()
|
||||
|
Reference in New Issue
Block a user