fix(coderd): ensure agent timings are non-zero on insert (#18065)

Relates to https://github.com/coder/coder/issues/15432

Ensures that no workspace build timings with zero values for started_at or ended_at are inserted into the DB or returned from the API.
This commit is contained in:
Cian Johnston
2025-05-29 13:36:06 +01:00
committed by GitHub
parent 5f34d01906
commit 776c144128
8 changed files with 217 additions and 25 deletions

View File

@ -144,6 +144,12 @@ func TestScriptReportsTiming(t *testing.T) {
timing := timings[0]
require.Equal(t, int32(0), timing.ExitCode)
if assert.True(t, timing.Start.IsValid(), "start time should be valid") {
require.NotZero(t, timing.Start.AsTime(), "start time should not be zero")
}
if assert.True(t, timing.End.IsValid(), "end time should be valid") {
require.NotZero(t, timing.End.AsTime(), "end time should not be zero")
}
require.GreaterOrEqual(t, timing.End.AsTime(), timing.Start.AsTime())
}

View File

@ -18,11 +18,29 @@ type ScriptsAPI struct {
func (s *ScriptsAPI) ScriptCompleted(ctx context.Context, req *agentproto.WorkspaceAgentScriptCompletedRequest) (*agentproto.WorkspaceAgentScriptCompletedResponse, error) {
res := &agentproto.WorkspaceAgentScriptCompletedResponse{}
scriptID, err := uuid.FromBytes(req.Timing.ScriptId)
if req.GetTiming() == nil {
return nil, xerrors.New("script timing is required")
}
scriptID, err := uuid.FromBytes(req.GetTiming().GetScriptId())
if err != nil {
return nil, xerrors.Errorf("script id from bytes: %w", err)
}
scriptStart := req.GetTiming().GetStart()
if !scriptStart.IsValid() || scriptStart.AsTime().IsZero() {
return nil, xerrors.New("script start time is required and cannot be zero")
}
scriptEnd := req.GetTiming().GetEnd()
if !scriptEnd.IsValid() || scriptEnd.AsTime().IsZero() {
return nil, xerrors.New("script end time is required and cannot be zero")
}
if scriptStart.AsTime().After(scriptEnd.AsTime()) {
return nil, xerrors.New("script start time cannot be after end time")
}
var stage database.WorkspaceAgentScriptTimingStage
switch req.Timing.Stage {
case agentproto.Timing_START:

View File

@ -6,6 +6,7 @@ import (
"time"
"github.com/google/uuid"
"github.com/stretchr/testify/require"
"go.uber.org/mock/gomock"
"google.golang.org/protobuf/types/known/timestamppb"
@ -22,6 +23,8 @@ func TestScriptCompleted(t *testing.T) {
tests := []struct {
scriptID uuid.UUID
timing *agentproto.Timing
expectInsert bool
expectError string
}{
{
scriptID: uuid.New(),
@ -32,6 +35,7 @@ func TestScriptCompleted(t *testing.T) {
Status: agentproto.Timing_OK,
ExitCode: 0,
},
expectInsert: true,
},
{
scriptID: uuid.New(),
@ -42,6 +46,7 @@ func TestScriptCompleted(t *testing.T) {
Status: agentproto.Timing_OK,
ExitCode: 0,
},
expectInsert: true,
},
{
scriptID: uuid.New(),
@ -52,6 +57,7 @@ func TestScriptCompleted(t *testing.T) {
Status: agentproto.Timing_OK,
ExitCode: 0,
},
expectInsert: true,
},
{
scriptID: uuid.New(),
@ -62,6 +68,7 @@ func TestScriptCompleted(t *testing.T) {
Status: agentproto.Timing_TIMED_OUT,
ExitCode: 255,
},
expectInsert: true,
},
{
scriptID: uuid.New(),
@ -72,6 +79,67 @@ func TestScriptCompleted(t *testing.T) {
Status: agentproto.Timing_EXIT_FAILURE,
ExitCode: 1,
},
expectInsert: true,
},
{
scriptID: uuid.New(),
timing: &agentproto.Timing{
Stage: agentproto.Timing_START,
Start: nil,
End: timestamppb.New(dbtime.Now().Add(time.Second)),
Status: agentproto.Timing_OK,
ExitCode: 0,
},
expectInsert: false,
expectError: "script start time is required and cannot be zero",
},
{
scriptID: uuid.New(),
timing: &agentproto.Timing{
Stage: agentproto.Timing_START,
Start: timestamppb.New(dbtime.Now()),
End: nil,
Status: agentproto.Timing_OK,
ExitCode: 0,
},
expectInsert: false,
expectError: "script end time is required and cannot be zero",
},
{
scriptID: uuid.New(),
timing: &agentproto.Timing{
Stage: agentproto.Timing_START,
Start: timestamppb.New(time.Time{}),
End: timestamppb.New(dbtime.Now()),
Status: agentproto.Timing_OK,
ExitCode: 0,
},
expectInsert: false,
expectError: "script start time is required and cannot be zero",
},
{
scriptID: uuid.New(),
timing: &agentproto.Timing{
Stage: agentproto.Timing_START,
Start: timestamppb.New(dbtime.Now()),
End: timestamppb.New(time.Time{}),
Status: agentproto.Timing_OK,
ExitCode: 0,
},
expectInsert: false,
expectError: "script end time is required and cannot be zero",
},
{
scriptID: uuid.New(),
timing: &agentproto.Timing{
Stage: agentproto.Timing_START,
Start: timestamppb.New(dbtime.Now()),
End: timestamppb.New(dbtime.Now().Add(-time.Second)),
Status: agentproto.Timing_OK,
ExitCode: 0,
},
expectInsert: false,
expectError: "script start time cannot be after end time",
},
}
@ -80,6 +148,7 @@ func TestScriptCompleted(t *testing.T) {
tt.timing.ScriptId = tt.scriptID[:]
mDB := dbmock.NewMockStore(gomock.NewController(t))
if tt.expectInsert {
mDB.EXPECT().InsertWorkspaceAgentScriptTimings(gomock.Any(), database.InsertWorkspaceAgentScriptTimingsParams{
ScriptID: tt.scriptID,
Stage: protoScriptTimingStageToDatabase(tt.timing.Stage),
@ -88,11 +157,17 @@ func TestScriptCompleted(t *testing.T) {
EndedAt: tt.timing.End.AsTime(),
ExitCode: tt.timing.ExitCode,
})
}
api := &agentapi.ScriptsAPI{Database: mDB}
api.ScriptCompleted(context.Background(), &agentproto.WorkspaceAgentScriptCompletedRequest{
_, err := api.ScriptCompleted(context.Background(), &agentproto.WorkspaceAgentScriptCompletedRequest{
Timing: tt.timing,
})
if tt.expectError != "" {
require.Contains(t, err.Error(), tt.expectError, "expected error did not match")
} else {
require.NoError(t, err, "expected no error but got one")
}
}
}

View File

@ -215,6 +215,17 @@ func WorkspaceAgent(t testing.TB, db database.Store, orig database.WorkspaceAgen
APIKeyScope: takeFirst(orig.APIKeyScope, database.AgentKeyScopeEnumAll),
})
require.NoError(t, err, "insert workspace agent")
if orig.FirstConnectedAt.Valid || orig.LastConnectedAt.Valid || orig.DisconnectedAt.Valid || orig.LastConnectedReplicaID.Valid {
err = db.UpdateWorkspaceAgentConnectionByID(genCtx, database.UpdateWorkspaceAgentConnectionByIDParams{
ID: agt.ID,
FirstConnectedAt: takeFirst(orig.FirstConnectedAt, agt.FirstConnectedAt),
LastConnectedAt: takeFirst(orig.LastConnectedAt, agt.LastConnectedAt),
DisconnectedAt: takeFirst(orig.DisconnectedAt, agt.DisconnectedAt),
LastConnectedReplicaID: takeFirst(orig.LastConnectedReplicaID, agt.LastConnectedReplicaID),
UpdatedAt: takeFirst(orig.UpdatedAt, agt.UpdatedAt),
})
require.NoError(t, err, "update workspace agent first connected at")
}
return agt
}

View File

@ -1741,8 +1741,15 @@ func (s *server) completeWorkspaceBuildJob(ctx context.Context, job database.Pro
JobID: jobID,
}
for _, t := range jobType.WorkspaceBuild.Timings {
if t.Start == nil || t.End == nil {
s.Logger.Warn(ctx, "timings entry has nil start or end time", slog.F("entry", t.String()))
start := t.GetStart()
if !start.IsValid() || start.AsTime().IsZero() {
s.Logger.Warn(ctx, "timings entry has nil or zero start time", slog.F("job_id", job.ID.String()), slog.F("workspace_id", workspace.ID), slog.F("workspace_build_id", workspaceBuild.ID), slog.F("user_id", workspace.OwnerID))
continue
}
end := t.GetEnd()
if !end.IsValid() || end.AsTime().IsZero() {
s.Logger.Warn(ctx, "timings entry has nil or zero end time, skipping", slog.F("job_id", job.ID.String()), slog.F("workspace_id", workspace.ID), slog.F("workspace_build_id", workspaceBuild.ID), slog.F("user_id", workspace.OwnerID))
continue
}
@ -1771,7 +1778,7 @@ func (s *server) completeWorkspaceBuildJob(ctx context.Context, job database.Pro
// after being started.
//
// Agent timeouts could be minutes apart, resulting in an unresponsive
// experience, so we'll notify after every unique timeout seconds.
// experience, so we'll notify after every unique timeout seconds
if !input.DryRun && workspaceBuild.Transition == database.WorkspaceTransitionStart && len(agentTimeouts) > 0 {
timeouts := maps.Keys(agentTimeouts)
slices.Sort(timeouts)

View File

@ -1301,14 +1301,57 @@ func TestCompleteJob(t *testing.T) {
Name: "test-workspace-resource",
Type: "aws_instance",
}},
Timings: []*sdkproto.Timing{{
Timings: []*sdkproto.Timing{
{
Stage: "test",
Source: "test-source",
Resource: "test-resource",
Action: "test-action",
Start: timestamppb.Now(),
End: timestamppb.Now(),
}},
},
{
Stage: "test2",
Source: "test-source2",
Resource: "test-resource2",
Action: "test-action2",
// Start: omitted
// End: omitted
},
{
Stage: "test3",
Source: "test-source3",
Resource: "test-resource3",
Action: "test-action3",
Start: timestamppb.Now(),
End: nil,
},
{
Stage: "test3",
Source: "test-source3",
Resource: "test-resource3",
Action: "test-action3",
Start: nil,
End: timestamppb.Now(),
},
{
Stage: "test4",
Source: "test-source4",
Resource: "test-resource4",
Action: "test-action4",
Start: timestamppb.New(time.Time{}),
End: timestamppb.Now(),
},
{
Stage: "test5",
Source: "test-source5",
Resource: "test-resource5",
Action: "test-action5",
Start: timestamppb.Now(),
End: timestamppb.New(time.Time{}),
},
nil, // nil timing should be ignored
},
},
},
})

View File

@ -1179,6 +1179,16 @@ func (api *API) buildTimings(ctx context.Context, build database.WorkspaceBuild)
}
for _, t := range provisionerTimings {
// Ref: #15432: agent script timings must not have a zero start or end time.
if t.StartedAt.IsZero() || t.EndedAt.IsZero() {
api.Logger.Debug(ctx, "ignoring provisioner timing with zero start or end time",
slog.F("workspace_id", build.WorkspaceID),
slog.F("workspace_build_id", build.ID),
slog.F("provisioner_job_id", t.JobID),
)
continue
}
res.ProvisionerTimings = append(res.ProvisionerTimings, codersdk.ProvisionerTiming{
JobID: t.JobID,
Stage: codersdk.TimingStage(t.Stage),
@ -1190,6 +1200,17 @@ func (api *API) buildTimings(ctx context.Context, build database.WorkspaceBuild)
})
}
for _, t := range agentScriptTimings {
// Ref: #15432: agent script timings must not have a zero start or end time.
if t.StartedAt.IsZero() || t.EndedAt.IsZero() {
api.Logger.Debug(ctx, "ignoring agent script timing with zero start or end time",
slog.F("workspace_id", build.WorkspaceID),
slog.F("workspace_agent_id", t.WorkspaceAgentID),
slog.F("workspace_build_id", build.ID),
slog.F("workspace_agent_script_id", t.ScriptID),
)
continue
}
res.AgentScriptTimings = append(res.AgentScriptTimings, codersdk.AgentScriptTiming{
StartedAt: t.StartedAt,
EndedAt: t.EndedAt,
@ -1202,6 +1223,14 @@ func (api *API) buildTimings(ctx context.Context, build database.WorkspaceBuild)
})
}
for _, agent := range agents {
if agent.FirstConnectedAt.Time.IsZero() {
api.Logger.Debug(ctx, "ignoring agent connection timing with zero first connected time",
slog.F("workspace_id", build.WorkspaceID),
slog.F("workspace_agent_id", agent.ID),
slog.F("workspace_build_id", build.ID),
)
continue
}
res.AgentConnectionTimings = append(res.AgentConnectionTimings, codersdk.AgentConnectionTiming{
WorkspaceAgentID: agent.ID.String(),
WorkspaceAgentName: agent.Name,

View File

@ -2,6 +2,7 @@ package coderd_test
import (
"context"
"database/sql"
"errors"
"fmt"
"net/http"
@ -1737,6 +1738,7 @@ func TestWorkspaceBuildTimings(t *testing.T) {
})
agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
ResourceID: resource.ID,
FirstConnectedAt: sql.NullTime{Valid: true, Time: dbtime.Now().Add(-time.Hour)},
})
// When: fetching timings for the build
@ -1768,6 +1770,7 @@ func TestWorkspaceBuildTimings(t *testing.T) {
for i := range agents {
agents[i] = dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
ResourceID: resource.ID,
FirstConnectedAt: sql.NullTime{Valid: true, Time: dbtime.Now().Add(-time.Duration(i) * time.Hour)},
})
}