feat: add metrics to workspace agent scripts (#11132)

* push startup script metrics to agent
This commit is contained in:
Steven Masley
2023-12-13 11:45:43 -06:00
committed by GitHub
parent 41ed581460
commit b7bdb17460
20 changed files with 306 additions and 127 deletions

View File

@ -35,6 +35,8 @@ import (
"tailscale.com/types/netlogtype"
"cdr.dev/slog"
"github.com/coder/retry"
"github.com/coder/coder/v2/agent/agentproc"
"github.com/coder/coder/v2/agent/agentscripts"
"github.com/coder/coder/v2/agent/agentssh"
@ -45,7 +47,6 @@ import (
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/agentsdk"
"github.com/coder/coder/v2/tailnet"
"github.com/coder/retry"
)
const (
@ -222,8 +223,10 @@ type agent struct {
connCountReconnectingPTY atomic.Int64
prometheusRegistry *prometheus.Registry
metrics *agentMetrics
syscaller agentproc.Syscaller
// metrics are prometheus registered metrics that will be collected and
// labeled in Coder with the agent + workspace.
metrics *agentMetrics
syscaller agentproc.Syscaller
// modifiedProcs is used for testing process priority management.
modifiedProcs chan []*agentproc.Process
@ -252,6 +255,9 @@ func (a *agent) init(ctx context.Context) {
Filesystem: a.filesystem,
PatchLogs: a.client.PatchLogs,
})
// Register runner metrics. If the prom registry is nil, the metrics
// will not report anywhere.
a.scriptRunner.RegisterMetrics(a.prometheusRegistry)
go a.runLoop(ctx)
}
@ -745,9 +751,12 @@ func (a *agent) run(ctx context.Context) error {
return xerrors.Errorf("init script runner: %w", err)
}
err = a.trackConnGoroutine(func() {
start := time.Now()
err := a.scriptRunner.Execute(ctx, func(script codersdk.WorkspaceAgentScript) bool {
return script.RunOnStart
})
// Measure the time immediately after the script has finished
dur := time.Since(start).Seconds()
if err != nil {
a.logger.Warn(ctx, "startup script(s) failed", slog.Error(err))
if errors.Is(err, agentscripts.ErrTimeout) {
@ -758,6 +767,12 @@ func (a *agent) run(ctx context.Context) error {
} else {
a.setLifecycle(ctx, codersdk.WorkspaceAgentLifecycleReady)
}
label := "false"
if err == nil {
label = "true"
}
a.metrics.startupScriptSeconds.WithLabelValues(label).Set(dur)
a.scriptRunner.StartCron()
})
if err != nil {