feat: add agent timings (#14713)

* feat: begin impl of agent script timings

* feat: add job_id and display_name to script timings

* fix: increment migration number

* fix: rename migrations from 251 to 254

* test: get tests compiling

* fix: appease the linter

* fix: get tests passing again

* fix: drop column from correct table

* test: add fixture for agent script timings

* fix: typo

* fix: use job id used in provisioner job timings

* fix: increment migration number

* test: behaviour of script runner

* test: rewrite test

* test: does exit 1 script break things?

* test: rewrite test again

* fix: revert change

Not sure how this came to be, I do not recall manually changing
these files.

* fix: let code breathe

* fix: wrap errors

* fix: justify nolint

* fix: swap require.Equal argument order

* fix: add mutex operations

* feat: add 'ran_on_start' and 'blocked_login' fields

* fix: update testdata fixture

* fix: refer to agent_id instead of job_id in timings

* fix: JobID -> AgentID in dbauthz_test

* fix: add 'id' to scripts, make timing refer to script id

* fix: fix broken tests and convert bug

* fix: update testdata fixtures

* fix: update testdata fixtures again

* feat: capture stage and if script timed out

* fix: update migration number

* test: add test for script api

* fix: fake db query

* fix: use UTC time

* fix: ensure r.scriptComplete is not nil

* fix: move err check to right after call

* fix: uppercase sql

* fix: use dbtime.Now()

* fix: debug log on r.scriptCompleted being nil

* fix: ensure correct rbac permissions

* chore: remove DisplayName

* fix: get tests passing

* fix: remove space in sql up

* docs: document ExecuteOption

* fix: drop 'RETURNING' from sql

* chore: remove 'display_name' from timing table

* fix: testdata fixture

* fix: put r.scriptCompleted call in goroutine

* fix: track goroutine for test + use separate context for reporting

* fix: appease linter, handle trackCommandGoroutine error

* fix: resolve race condition

* feat: replace timed_out column with status column

* test: update testdata fixture

* fix: apply suggestions from review

* revert: linter changes
This commit is contained in:
Danielle Maywood
2024-09-24 10:51:49 +01:00
committed by GitHub
parent b8944074c4
commit ae522c558d
43 changed files with 1367 additions and 232 deletions

View File

@ -941,7 +941,7 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
}
}
err = a.scriptRunner.Init(manifest.Scripts)
err = a.scriptRunner.Init(manifest.Scripts, aAPI.ScriptCompleted)
if err != nil {
return xerrors.Errorf("init script runner: %w", err)
}
@ -949,9 +949,7 @@ func (a *agent) handleManifest(manifestOK *checkpoint) func(ctx context.Context,
start := time.Now()
// here we use the graceful context because the script runner is not directly tied
// to the agent API.
err := a.scriptRunner.Execute(a.gracefulCtx, func(script codersdk.WorkspaceAgentScript) bool {
return script.RunOnStart
})
err := a.scriptRunner.Execute(a.gracefulCtx, agentscripts.ExecuteStartScripts)
// Measure the time immediately after the script has finished
dur := time.Since(start).Seconds()
if err != nil {
@ -1844,9 +1842,7 @@ func (a *agent) Close() error {
a.gracefulCancel()
lifecycleState := codersdk.WorkspaceAgentLifecycleOff
err = a.scriptRunner.Execute(a.hardCtx, func(script codersdk.WorkspaceAgentScript) bool {
return script.RunOnStop
})
err = a.scriptRunner.Execute(a.hardCtx, agentscripts.ExecuteStopScripts)
if err != nil {
a.logger.Warn(a.hardCtx, "shutdown script(s) failed", slog.Error(err))
if errors.Is(err, agentscripts.ErrTimeout) {