refactor: replace startup script logs EOF with starting/ready time (#8082)

This commit reverts some of the changes in #8029 and implements an
alternative method of keeping track of when the startup script has ended
and there will be no more logs.

This is achieved by adding new agent fields for tracking when the agent
enters the "starting" and "ready"/"start_error" lifecycle states. The
timestamps simplify logic since we don't need understand if the current
state is before or after the state we're interested in. They can also be
used to show data like how long the startup script took to execute. This
also allowed us to remove the EOF field from the logs as the
implementation was problematic when we returned the EOF log entry in the
response since requesting _after_ that ID would give no logs and the API
would thus lose track of EOF.
This commit is contained in:
Mathias Fredriksson
2023-06-20 14:41:55 +03:00
committed by GitHub
parent b1d1b63113
commit 8dac0356ed
29 changed files with 462 additions and 540 deletions

18
coderd/apidoc/docs.go generated
View File

@ -5812,6 +5812,9 @@ const docTemplate = `{
"agentsdk.PostLifecycleRequest": {
"type": "object",
"properties": {
"changed_at": {
"type": "string"
},
"state": {
"$ref": "#/definitions/codersdk.WorkspaceAgentLifecycle"
}
@ -5856,9 +5859,6 @@ const docTemplate = `{
"created_at": {
"type": "string"
},
"eof": {
"type": "boolean"
},
"level": {
"$ref": "#/definitions/codersdk.LogLevel"
},
@ -9256,6 +9256,10 @@ const docTemplate = `{
"operating_system": {
"type": "string"
},
"ready_at": {
"type": "string",
"format": "date-time"
},
"resource_id": {
"type": "string",
"format": "uuid"
@ -9266,6 +9270,10 @@ const docTemplate = `{
"shutdown_script_timeout_seconds": {
"type": "integer"
},
"started_at": {
"type": "string",
"format": "date-time"
},
"startup_logs_length": {
"type": "integer"
},
@ -9388,10 +9396,6 @@ const docTemplate = `{
"type": "string",
"format": "date-time"
},
"eof": {
"description": "EOF indicates that this is the last log entry and the file is closed.",
"type": "boolean"
},
"id": {
"type": "integer"
},

View File

@ -5127,6 +5127,9 @@
"agentsdk.PostLifecycleRequest": {
"type": "object",
"properties": {
"changed_at": {
"type": "string"
},
"state": {
"$ref": "#/definitions/codersdk.WorkspaceAgentLifecycle"
}
@ -5171,9 +5174,6 @@
"created_at": {
"type": "string"
},
"eof": {
"type": "boolean"
},
"level": {
"$ref": "#/definitions/codersdk.LogLevel"
},
@ -8352,6 +8352,10 @@
"operating_system": {
"type": "string"
},
"ready_at": {
"type": "string",
"format": "date-time"
},
"resource_id": {
"type": "string",
"format": "uuid"
@ -8362,6 +8366,10 @@
"shutdown_script_timeout_seconds": {
"type": "integer"
},
"started_at": {
"type": "string",
"format": "date-time"
},
"startup_logs_length": {
"type": "integer"
},
@ -8484,10 +8492,6 @@
"type": "string",
"format": "date-time"
},
"eof": {
"description": "EOF indicates that this is the last log entry and the file is closed.",
"type": "boolean"
},
"id": {
"type": "integer"
},

View File

@ -1385,6 +1385,14 @@ func (q *querier) GetWorkspaceAgentByInstanceID(ctx context.Context, authInstanc
return agent, nil
}
func (q *querier) GetWorkspaceAgentLifecycleStateByID(ctx context.Context, id uuid.UUID) (database.GetWorkspaceAgentLifecycleStateByIDRow, error) {
_, err := q.GetWorkspaceAgentByID(ctx, id)
if err != nil {
return database.GetWorkspaceAgentLifecycleStateByIDRow{}, err
}
return q.db.GetWorkspaceAgentLifecycleStateByID(ctx, id)
}
func (q *querier) GetWorkspaceAgentMetadata(ctx context.Context, workspaceAgentID uuid.UUID) ([]database.WorkspaceAgentMetadatum, error) {
workspace, err := q.db.GetWorkspaceByAgentID(ctx, workspaceAgentID)
if err != nil {
@ -1407,14 +1415,6 @@ func (q *querier) GetWorkspaceAgentStartupLogsAfter(ctx context.Context, arg dat
return q.db.GetWorkspaceAgentStartupLogsAfter(ctx, arg)
}
func (q *querier) GetWorkspaceAgentStartupLogsEOF(ctx context.Context, agentID uuid.UUID) (bool, error) {
_, err := q.GetWorkspaceAgentByID(ctx, agentID)
if err != nil {
return false, err
}
return q.db.GetWorkspaceAgentStartupLogsEOF(ctx, agentID)
}
func (q *querier) GetWorkspaceAgentStats(ctx context.Context, createdAfter time.Time) ([]database.GetWorkspaceAgentStatsRow, error) {
return q.db.GetWorkspaceAgentStats(ctx, createdAfter)
}
@ -2324,12 +2324,7 @@ func (q *querier) UpdateWorkspaceAgentConnectionByID(ctx context.Context, arg da
}
func (q *querier) UpdateWorkspaceAgentLifecycleStateByID(ctx context.Context, arg database.UpdateWorkspaceAgentLifecycleStateByIDParams) error {
agent, err := q.db.GetWorkspaceAgentByID(ctx, arg.ID)
if err != nil {
return err
}
workspace, err := q.db.GetWorkspaceByAgentID(ctx, agent.ID)
workspace, err := q.db.GetWorkspaceByAgentID(ctx, arg.ID)
if err != nil {
return err
}

View File

@ -2696,6 +2696,21 @@ func (q *fakeQuerier) GetWorkspaceAgentByInstanceID(_ context.Context, instanceI
return database.WorkspaceAgent{}, sql.ErrNoRows
}
func (q *fakeQuerier) GetWorkspaceAgentLifecycleStateByID(ctx context.Context, id uuid.UUID) (database.GetWorkspaceAgentLifecycleStateByIDRow, error) {
q.mutex.RLock()
defer q.mutex.RUnlock()
agent, err := q.getWorkspaceAgentByIDNoLock(ctx, id)
if err != nil {
return database.GetWorkspaceAgentLifecycleStateByIDRow{}, err
}
return database.GetWorkspaceAgentLifecycleStateByIDRow{
LifecycleState: agent.LifecycleState,
StartedAt: agent.StartedAt,
ReadyAt: agent.ReadyAt,
}, nil
}
func (q *fakeQuerier) GetWorkspaceAgentMetadata(_ context.Context, workspaceAgentID uuid.UUID) ([]database.WorkspaceAgentMetadatum, error) {
q.mutex.RLock()
defer q.mutex.RUnlock()
@ -2730,22 +2745,6 @@ func (q *fakeQuerier) GetWorkspaceAgentStartupLogsAfter(_ context.Context, arg d
return logs, nil
}
func (q *fakeQuerier) GetWorkspaceAgentStartupLogsEOF(_ context.Context, agentID uuid.UUID) (bool, error) {
q.mutex.RLock()
defer q.mutex.RUnlock()
var lastLog database.WorkspaceAgentStartupLog
for _, log := range q.workspaceAgentLogs {
if log.AgentID != agentID {
continue
}
if log.ID > lastLog.ID {
lastLog = log
}
}
return lastLog.EOF, nil
}
func (q *fakeQuerier) GetWorkspaceAgentStats(_ context.Context, createdAfter time.Time) ([]database.GetWorkspaceAgentStatsRow, error) {
q.mutex.RLock()
defer q.mutex.RUnlock()
@ -4042,7 +4041,6 @@ func (q *fakeQuerier) InsertWorkspaceAgentStartupLogs(_ context.Context, arg dat
CreatedAt: arg.CreatedAt[index],
Level: arg.Level[index],
Output: output,
EOF: arg.EOF[index],
})
outputLength += int32(len(output))
}
@ -4901,6 +4899,8 @@ func (q *fakeQuerier) UpdateWorkspaceAgentLifecycleStateByID(_ context.Context,
for i, agent := range q.workspaceAgents {
if agent.ID == arg.ID {
agent.LifecycleState = arg.LifecycleState
agent.StartedAt = arg.StartedAt
agent.ReadyAt = arg.ReadyAt
q.workspaceAgents[i] = agent
return nil
}

View File

@ -724,6 +724,13 @@ func (m metricsStore) GetWorkspaceAgentByInstanceID(ctx context.Context, authIns
return agent, err
}
func (m metricsStore) GetWorkspaceAgentLifecycleStateByID(ctx context.Context, id uuid.UUID) (database.GetWorkspaceAgentLifecycleStateByIDRow, error) {
start := time.Now()
r0, r1 := m.s.GetWorkspaceAgentLifecycleStateByID(ctx, id)
m.queryLatencies.WithLabelValues("GetWorkspaceAgentLifecycleStateByID").Observe(time.Since(start).Seconds())
return r0, r1
}
func (m metricsStore) GetWorkspaceAgentMetadata(ctx context.Context, workspaceAgentID uuid.UUID) ([]database.WorkspaceAgentMetadatum, error) {
start := time.Now()
metadata, err := m.s.GetWorkspaceAgentMetadata(ctx, workspaceAgentID)
@ -738,13 +745,6 @@ func (m metricsStore) GetWorkspaceAgentStartupLogsAfter(ctx context.Context, arg
return logs, err
}
func (m metricsStore) GetWorkspaceAgentStartupLogsEOF(ctx context.Context, agentID uuid.UUID) (bool, error) {
start := time.Now()
r0, r1 := m.s.GetWorkspaceAgentStartupLogsEOF(ctx, agentID)
m.queryLatencies.WithLabelValues("GetWorkspaceAgentStartupLogsEOF").Observe(time.Since(start).Seconds())
return r0, r1
}
func (m metricsStore) GetWorkspaceAgentStats(ctx context.Context, createdAt time.Time) ([]database.GetWorkspaceAgentStatsRow, error) {
start := time.Now()
stats, err := m.s.GetWorkspaceAgentStats(ctx, createdAt)

View File

@ -1423,6 +1423,21 @@ func (mr *MockStoreMockRecorder) GetWorkspaceAgentByInstanceID(arg0, arg1 interf
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaceAgentByInstanceID", reflect.TypeOf((*MockStore)(nil).GetWorkspaceAgentByInstanceID), arg0, arg1)
}
// GetWorkspaceAgentLifecycleStateByID mocks base method.
func (m *MockStore) GetWorkspaceAgentLifecycleStateByID(arg0 context.Context, arg1 uuid.UUID) (database.GetWorkspaceAgentLifecycleStateByIDRow, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "GetWorkspaceAgentLifecycleStateByID", arg0, arg1)
ret0, _ := ret[0].(database.GetWorkspaceAgentLifecycleStateByIDRow)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// GetWorkspaceAgentLifecycleStateByID indicates an expected call of GetWorkspaceAgentLifecycleStateByID.
func (mr *MockStoreMockRecorder) GetWorkspaceAgentLifecycleStateByID(arg0, arg1 interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaceAgentLifecycleStateByID", reflect.TypeOf((*MockStore)(nil).GetWorkspaceAgentLifecycleStateByID), arg0, arg1)
}
// GetWorkspaceAgentMetadata mocks base method.
func (m *MockStore) GetWorkspaceAgentMetadata(arg0 context.Context, arg1 uuid.UUID) ([]database.WorkspaceAgentMetadatum, error) {
m.ctrl.T.Helper()
@ -1453,21 +1468,6 @@ func (mr *MockStoreMockRecorder) GetWorkspaceAgentStartupLogsAfter(arg0, arg1 in
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaceAgentStartupLogsAfter", reflect.TypeOf((*MockStore)(nil).GetWorkspaceAgentStartupLogsAfter), arg0, arg1)
}
// GetWorkspaceAgentStartupLogsEOF mocks base method.
func (m *MockStore) GetWorkspaceAgentStartupLogsEOF(arg0 context.Context, arg1 uuid.UUID) (bool, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "GetWorkspaceAgentStartupLogsEOF", arg0, arg1)
ret0, _ := ret[0].(bool)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// GetWorkspaceAgentStartupLogsEOF indicates an expected call of GetWorkspaceAgentStartupLogsEOF.
func (mr *MockStoreMockRecorder) GetWorkspaceAgentStartupLogsEOF(arg0, arg1 interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetWorkspaceAgentStartupLogsEOF", reflect.TypeOf((*MockStore)(nil).GetWorkspaceAgentStartupLogsEOF), arg0, arg1)
}
// GetWorkspaceAgentStats mocks base method.
func (m *MockStore) GetWorkspaceAgentStats(arg0 context.Context, arg1 time.Time) ([]database.GetWorkspaceAgentStatsRow, error) {
m.ctrl.T.Helper()

View File

@ -549,12 +549,9 @@ CREATE TABLE workspace_agent_startup_logs (
created_at timestamp with time zone NOT NULL,
output character varying(1024) NOT NULL,
id bigint NOT NULL,
level log_level DEFAULT 'info'::log_level NOT NULL,
eof boolean DEFAULT false NOT NULL
level log_level DEFAULT 'info'::log_level NOT NULL
);
COMMENT ON COLUMN workspace_agent_startup_logs.eof IS 'End of file reached';
CREATE SEQUENCE workspace_agent_startup_logs_id_seq
START WITH 1
INCREMENT BY 1
@ -616,6 +613,8 @@ CREATE TABLE workspace_agents (
startup_logs_overflowed boolean DEFAULT false NOT NULL,
subsystem workspace_agent_subsystem DEFAULT 'none'::workspace_agent_subsystem NOT NULL,
startup_script_behavior startup_script_behavior DEFAULT 'non-blocking'::startup_script_behavior NOT NULL,
started_at timestamp with time zone,
ready_at timestamp with time zone,
CONSTRAINT max_startup_logs_length CHECK ((startup_logs_length <= 1048576))
);
@ -643,6 +642,10 @@ COMMENT ON COLUMN workspace_agents.startup_logs_overflowed IS 'Whether the start
COMMENT ON COLUMN workspace_agents.startup_script_behavior IS 'When startup script behavior is non-blocking, the workspace will be ready and accessible upon agent connection, when it is blocking, workspace will wait for the startup script to complete before becoming ready and accessible.';
COMMENT ON COLUMN workspace_agents.started_at IS 'The time the agent entered the starting lifecycle state';
COMMENT ON COLUMN workspace_agents.ready_at IS 'The time the agent entered the ready or start_error lifecycle state';
CREATE TABLE workspace_apps (
id uuid NOT NULL,
created_at timestamp with time zone NOT NULL,

View File

@ -0,0 +1,13 @@
BEGIN;
ALTER TABLE workspace_agents
DROP COLUMN started_at,
DROP COLUMN ready_at;
-- We won't bring back log entries where eof = TRUE, but this doesn't matter
-- as the implementation doesn't require it and hasn't been part of a release.
ALTER TABLE workspace_agent_startup_logs ADD COLUMN eof boolean NOT NULL DEFAULT false;
COMMENT ON COLUMN workspace_agent_startup_logs.eof IS 'End of file reached';
COMMIT;

View File

@ -0,0 +1,14 @@
BEGIN;
DELETE FROM workspace_agent_startup_logs WHERE eof IS TRUE;
ALTER TABLE workspace_agent_startup_logs DROP COLUMN eof;
ALTER TABLE workspace_agents
ADD COLUMN started_at TIMESTAMP WITH TIME ZONE DEFAULT NULL,
ADD COLUMN ready_at TIMESTAMP WITH TIME ZONE DEFAULT NULL;
COMMENT ON COLUMN workspace_agents.started_at IS 'The time the agent entered the starting lifecycle state';
COMMENT ON COLUMN workspace_agents.ready_at IS 'The time the agent entered the ready or start_error lifecycle state';
COMMIT;

View File

@ -1712,6 +1712,10 @@ type WorkspaceAgent struct {
Subsystem WorkspaceAgentSubsystem `db:"subsystem" json:"subsystem"`
// When startup script behavior is non-blocking, the workspace will be ready and accessible upon agent connection, when it is blocking, workspace will wait for the startup script to complete before becoming ready and accessible.
StartupScriptBehavior StartupScriptBehavior `db:"startup_script_behavior" json:"startup_script_behavior"`
// The time the agent entered the starting lifecycle state
StartedAt sql.NullTime `db:"started_at" json:"started_at"`
// The time the agent entered the ready or start_error lifecycle state
ReadyAt sql.NullTime `db:"ready_at" json:"ready_at"`
}
type WorkspaceAgentMetadatum struct {
@ -1732,8 +1736,6 @@ type WorkspaceAgentStartupLog struct {
Output string `db:"output" json:"output"`
ID int64 `db:"id" json:"id"`
Level LogLevel `db:"level" json:"level"`
// End of file reached
EOF bool `db:"eof" json:"eof"`
}
type WorkspaceAgentStat struct {

View File

@ -125,9 +125,9 @@ type sqlcQuerier interface {
GetWorkspaceAgentByAuthToken(ctx context.Context, authToken uuid.UUID) (WorkspaceAgent, error)
GetWorkspaceAgentByID(ctx context.Context, id uuid.UUID) (WorkspaceAgent, error)
GetWorkspaceAgentByInstanceID(ctx context.Context, authInstanceID string) (WorkspaceAgent, error)
GetWorkspaceAgentLifecycleStateByID(ctx context.Context, id uuid.UUID) (GetWorkspaceAgentLifecycleStateByIDRow, error)
GetWorkspaceAgentMetadata(ctx context.Context, workspaceAgentID uuid.UUID) ([]WorkspaceAgentMetadatum, error)
GetWorkspaceAgentStartupLogsAfter(ctx context.Context, arg GetWorkspaceAgentStartupLogsAfterParams) ([]WorkspaceAgentStartupLog, error)
GetWorkspaceAgentStartupLogsEOF(ctx context.Context, agentID uuid.UUID) (bool, error)
GetWorkspaceAgentStats(ctx context.Context, createdAt time.Time) ([]GetWorkspaceAgentStatsRow, error)
GetWorkspaceAgentStatsAndLabels(ctx context.Context, createdAt time.Time) ([]GetWorkspaceAgentStatsAndLabelsRow, error)
GetWorkspaceAgentsByResourceIDs(ctx context.Context, ids []uuid.UUID) ([]WorkspaceAgent, error)

View File

@ -114,7 +114,6 @@ func TestInsertWorkspaceAgentStartupLogs(t *testing.T) {
CreatedAt: []time.Time{database.Now()},
Output: []string{"first"},
Level: []database.LogLevel{database.LogLevelInfo},
EOF: []bool{false},
// 1 MB is the max
OutputLength: 1 << 20,
})
@ -126,7 +125,6 @@ func TestInsertWorkspaceAgentStartupLogs(t *testing.T) {
CreatedAt: []time.Time{database.Now()},
Output: []string{"second"},
Level: []database.LogLevel{database.LogLevelInfo},
EOF: []bool{false},
OutputLength: 1,
})
require.True(t, database.IsStartupLogsLimitError(err))

View File

@ -5259,7 +5259,7 @@ func (q *sqlQuerier) DeleteOldWorkspaceAgentStartupLogs(ctx context.Context) err
const getWorkspaceAgentByAuthToken = `-- name: GetWorkspaceAgentByAuthToken :one
SELECT
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed, subsystem, startup_script_behavior
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed, subsystem, startup_script_behavior, started_at, ready_at
FROM
workspace_agents
WHERE
@ -5303,13 +5303,15 @@ func (q *sqlQuerier) GetWorkspaceAgentByAuthToken(ctx context.Context, authToken
&i.StartupLogsOverflowed,
&i.Subsystem,
&i.StartupScriptBehavior,
&i.StartedAt,
&i.ReadyAt,
)
return i, err
}
const getWorkspaceAgentByID = `-- name: GetWorkspaceAgentByID :one
SELECT
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed, subsystem, startup_script_behavior
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed, subsystem, startup_script_behavior, started_at, ready_at
FROM
workspace_agents
WHERE
@ -5351,13 +5353,15 @@ func (q *sqlQuerier) GetWorkspaceAgentByID(ctx context.Context, id uuid.UUID) (W
&i.StartupLogsOverflowed,
&i.Subsystem,
&i.StartupScriptBehavior,
&i.StartedAt,
&i.ReadyAt,
)
return i, err
}
const getWorkspaceAgentByInstanceID = `-- name: GetWorkspaceAgentByInstanceID :one
SELECT
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed, subsystem, startup_script_behavior
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed, subsystem, startup_script_behavior, started_at, ready_at
FROM
workspace_agents
WHERE
@ -5401,10 +5405,36 @@ func (q *sqlQuerier) GetWorkspaceAgentByInstanceID(ctx context.Context, authInst
&i.StartupLogsOverflowed,
&i.Subsystem,
&i.StartupScriptBehavior,
&i.StartedAt,
&i.ReadyAt,
)
return i, err
}
const getWorkspaceAgentLifecycleStateByID = `-- name: GetWorkspaceAgentLifecycleStateByID :one
SELECT
lifecycle_state,
started_at,
ready_at
FROM
workspace_agents
WHERE
id = $1
`
type GetWorkspaceAgentLifecycleStateByIDRow struct {
LifecycleState WorkspaceAgentLifecycleState `db:"lifecycle_state" json:"lifecycle_state"`
StartedAt sql.NullTime `db:"started_at" json:"started_at"`
ReadyAt sql.NullTime `db:"ready_at" json:"ready_at"`
}
func (q *sqlQuerier) GetWorkspaceAgentLifecycleStateByID(ctx context.Context, id uuid.UUID) (GetWorkspaceAgentLifecycleStateByIDRow, error) {
row := q.db.QueryRowContext(ctx, getWorkspaceAgentLifecycleStateByID, id)
var i GetWorkspaceAgentLifecycleStateByIDRow
err := row.Scan(&i.LifecycleState, &i.StartedAt, &i.ReadyAt)
return i, err
}
const getWorkspaceAgentMetadata = `-- name: GetWorkspaceAgentMetadata :many
SELECT
workspace_agent_id, display_name, key, script, value, error, timeout, interval, collected_at
@ -5449,7 +5479,7 @@ func (q *sqlQuerier) GetWorkspaceAgentMetadata(ctx context.Context, workspaceAge
const getWorkspaceAgentStartupLogsAfter = `-- name: GetWorkspaceAgentStartupLogsAfter :many
SELECT
agent_id, created_at, output, id, level, eof
agent_id, created_at, output, id, level
FROM
workspace_agent_startup_logs
WHERE
@ -5479,7 +5509,6 @@ func (q *sqlQuerier) GetWorkspaceAgentStartupLogsAfter(ctx context.Context, arg
&i.Output,
&i.ID,
&i.Level,
&i.EOF,
); err != nil {
return nil, err
}
@ -5494,29 +5523,9 @@ func (q *sqlQuerier) GetWorkspaceAgentStartupLogsAfter(ctx context.Context, arg
return items, nil
}
const getWorkspaceAgentStartupLogsEOF = `-- name: GetWorkspaceAgentStartupLogsEOF :one
SELECT CASE WHEN EXISTS (
SELECT
agent_id, created_at, output, id, level, eof
FROM
workspace_agent_startup_logs
WHERE
agent_id = $1
AND eof = true
LIMIT 1
) THEN TRUE ELSE FALSE END
`
func (q *sqlQuerier) GetWorkspaceAgentStartupLogsEOF(ctx context.Context, agentID uuid.UUID) (bool, error) {
row := q.db.QueryRowContext(ctx, getWorkspaceAgentStartupLogsEOF, agentID)
var column_1 bool
err := row.Scan(&column_1)
return column_1, err
}
const getWorkspaceAgentsByResourceIDs = `-- name: GetWorkspaceAgentsByResourceIDs :many
SELECT
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed, subsystem, startup_script_behavior
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed, subsystem, startup_script_behavior, started_at, ready_at
FROM
workspace_agents
WHERE
@ -5564,6 +5573,8 @@ func (q *sqlQuerier) GetWorkspaceAgentsByResourceIDs(ctx context.Context, ids []
&i.StartupLogsOverflowed,
&i.Subsystem,
&i.StartupScriptBehavior,
&i.StartedAt,
&i.ReadyAt,
); err != nil {
return nil, err
}
@ -5579,7 +5590,7 @@ func (q *sqlQuerier) GetWorkspaceAgentsByResourceIDs(ctx context.Context, ids []
}
const getWorkspaceAgentsCreatedAfter = `-- name: GetWorkspaceAgentsCreatedAfter :many
SELECT id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed, subsystem, startup_script_behavior FROM workspace_agents WHERE created_at > $1
SELECT id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed, subsystem, startup_script_behavior, started_at, ready_at FROM workspace_agents WHERE created_at > $1
`
func (q *sqlQuerier) GetWorkspaceAgentsCreatedAfter(ctx context.Context, createdAt time.Time) ([]WorkspaceAgent, error) {
@ -5623,6 +5634,8 @@ func (q *sqlQuerier) GetWorkspaceAgentsCreatedAfter(ctx context.Context, created
&i.StartupLogsOverflowed,
&i.Subsystem,
&i.StartupScriptBehavior,
&i.StartedAt,
&i.ReadyAt,
); err != nil {
return nil, err
}
@ -5639,7 +5652,7 @@ func (q *sqlQuerier) GetWorkspaceAgentsCreatedAfter(ctx context.Context, created
const getWorkspaceAgentsInLatestBuildByWorkspaceID = `-- name: GetWorkspaceAgentsInLatestBuildByWorkspaceID :many
SELECT
workspace_agents.id, workspace_agents.created_at, workspace_agents.updated_at, workspace_agents.name, workspace_agents.first_connected_at, workspace_agents.last_connected_at, workspace_agents.disconnected_at, workspace_agents.resource_id, workspace_agents.auth_token, workspace_agents.auth_instance_id, workspace_agents.architecture, workspace_agents.environment_variables, workspace_agents.operating_system, workspace_agents.startup_script, workspace_agents.instance_metadata, workspace_agents.resource_metadata, workspace_agents.directory, workspace_agents.version, workspace_agents.last_connected_replica_id, workspace_agents.connection_timeout_seconds, workspace_agents.troubleshooting_url, workspace_agents.motd_file, workspace_agents.lifecycle_state, workspace_agents.startup_script_timeout_seconds, workspace_agents.expanded_directory, workspace_agents.shutdown_script, workspace_agents.shutdown_script_timeout_seconds, workspace_agents.startup_logs_length, workspace_agents.startup_logs_overflowed, workspace_agents.subsystem, workspace_agents.startup_script_behavior
workspace_agents.id, workspace_agents.created_at, workspace_agents.updated_at, workspace_agents.name, workspace_agents.first_connected_at, workspace_agents.last_connected_at, workspace_agents.disconnected_at, workspace_agents.resource_id, workspace_agents.auth_token, workspace_agents.auth_instance_id, workspace_agents.architecture, workspace_agents.environment_variables, workspace_agents.operating_system, workspace_agents.startup_script, workspace_agents.instance_metadata, workspace_agents.resource_metadata, workspace_agents.directory, workspace_agents.version, workspace_agents.last_connected_replica_id, workspace_agents.connection_timeout_seconds, workspace_agents.troubleshooting_url, workspace_agents.motd_file, workspace_agents.lifecycle_state, workspace_agents.startup_script_timeout_seconds, workspace_agents.expanded_directory, workspace_agents.shutdown_script, workspace_agents.shutdown_script_timeout_seconds, workspace_agents.startup_logs_length, workspace_agents.startup_logs_overflowed, workspace_agents.subsystem, workspace_agents.startup_script_behavior, workspace_agents.started_at, workspace_agents.ready_at
FROM
workspace_agents
JOIN
@ -5699,6 +5712,8 @@ func (q *sqlQuerier) GetWorkspaceAgentsInLatestBuildByWorkspaceID(ctx context.Co
&i.StartupLogsOverflowed,
&i.Subsystem,
&i.StartupScriptBehavior,
&i.StartedAt,
&i.ReadyAt,
); err != nil {
return nil, err
}
@ -5739,7 +5754,7 @@ INSERT INTO
shutdown_script_timeout_seconds
)
VALUES
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21) RETURNING id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed, subsystem, startup_script_behavior
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21) RETURNING id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed, subsystem, startup_script_behavior, started_at, ready_at
`
type InsertWorkspaceAgentParams struct {
@ -5823,6 +5838,8 @@ func (q *sqlQuerier) InsertWorkspaceAgent(ctx context.Context, arg InsertWorkspa
&i.StartupLogsOverflowed,
&i.Subsystem,
&i.StartupScriptBehavior,
&i.StartedAt,
&i.ReadyAt,
)
return i, err
}
@ -5865,17 +5882,16 @@ func (q *sqlQuerier) InsertWorkspaceAgentMetadata(ctx context.Context, arg Inser
const insertWorkspaceAgentStartupLogs = `-- name: InsertWorkspaceAgentStartupLogs :many
WITH new_length AS (
UPDATE workspace_agents SET
startup_logs_length = startup_logs_length + $6 WHERE workspace_agents.id = $1
startup_logs_length = startup_logs_length + $5 WHERE workspace_agents.id = $1
)
INSERT INTO
workspace_agent_startup_logs (agent_id, created_at, output, level, eof)
workspace_agent_startup_logs (agent_id, created_at, output, level)
SELECT
$1 :: uuid AS agent_id,
unnest($2 :: timestamptz [ ]) AS created_at,
unnest($3 :: VARCHAR(1024) [ ]) AS output,
unnest($4 :: log_level [ ]) AS level,
unnest($5 :: boolean [ ]) AS eof
RETURNING workspace_agent_startup_logs.agent_id, workspace_agent_startup_logs.created_at, workspace_agent_startup_logs.output, workspace_agent_startup_logs.id, workspace_agent_startup_logs.level, workspace_agent_startup_logs.eof
unnest($4 :: log_level [ ]) AS level
RETURNING workspace_agent_startup_logs.agent_id, workspace_agent_startup_logs.created_at, workspace_agent_startup_logs.output, workspace_agent_startup_logs.id, workspace_agent_startup_logs.level
`
type InsertWorkspaceAgentStartupLogsParams struct {
@ -5883,7 +5899,6 @@ type InsertWorkspaceAgentStartupLogsParams struct {
CreatedAt []time.Time `db:"created_at" json:"created_at"`
Output []string `db:"output" json:"output"`
Level []LogLevel `db:"level" json:"level"`
EOF []bool `db:"eof" json:"eof"`
OutputLength int32 `db:"output_length" json:"output_length"`
}
@ -5893,7 +5908,6 @@ func (q *sqlQuerier) InsertWorkspaceAgentStartupLogs(ctx context.Context, arg In
pq.Array(arg.CreatedAt),
pq.Array(arg.Output),
pq.Array(arg.Level),
pq.Array(arg.EOF),
arg.OutputLength,
)
if err != nil {
@ -5909,7 +5923,6 @@ func (q *sqlQuerier) InsertWorkspaceAgentStartupLogs(ctx context.Context, arg In
&i.Output,
&i.ID,
&i.Level,
&i.EOF,
); err != nil {
return nil, err
}
@ -5962,7 +5975,9 @@ const updateWorkspaceAgentLifecycleStateByID = `-- name: UpdateWorkspaceAgentLif
UPDATE
workspace_agents
SET
lifecycle_state = $2
lifecycle_state = $2,
started_at = $3,
ready_at = $4
WHERE
id = $1
`
@ -5970,10 +5985,17 @@ WHERE
type UpdateWorkspaceAgentLifecycleStateByIDParams struct {
ID uuid.UUID `db:"id" json:"id"`
LifecycleState WorkspaceAgentLifecycleState `db:"lifecycle_state" json:"lifecycle_state"`
StartedAt sql.NullTime `db:"started_at" json:"started_at"`
ReadyAt sql.NullTime `db:"ready_at" json:"ready_at"`
}
func (q *sqlQuerier) UpdateWorkspaceAgentLifecycleStateByID(ctx context.Context, arg UpdateWorkspaceAgentLifecycleStateByIDParams) error {
_, err := q.db.ExecContext(ctx, updateWorkspaceAgentLifecycleStateByID, arg.ID, arg.LifecycleState)
_, err := q.db.ExecContext(ctx, updateWorkspaceAgentLifecycleStateByID,
arg.ID,
arg.LifecycleState,
arg.StartedAt,
arg.ReadyAt,
)
return err
}

View File

@ -87,11 +87,24 @@ SET
WHERE
id = $1;
-- name: GetWorkspaceAgentLifecycleStateByID :one
SELECT
lifecycle_state,
started_at,
ready_at
FROM
workspace_agents
WHERE
id = $1;
-- name: UpdateWorkspaceAgentLifecycleStateByID :exec
UPDATE
workspace_agents
SET
lifecycle_state = $2
lifecycle_state = $2,
started_at = $3,
ready_at = $4
WHERE
id = $1;
@ -146,31 +159,18 @@ WHERE
id > @created_after
) ORDER BY id ASC;
-- name: GetWorkspaceAgentStartupLogsEOF :one
SELECT CASE WHEN EXISTS (
SELECT
*
FROM
workspace_agent_startup_logs
WHERE
agent_id = $1
AND eof = true
LIMIT 1
) THEN TRUE ELSE FALSE END;
-- name: InsertWorkspaceAgentStartupLogs :many
WITH new_length AS (
UPDATE workspace_agents SET
startup_logs_length = startup_logs_length + @output_length WHERE workspace_agents.id = @agent_id
)
INSERT INTO
workspace_agent_startup_logs (agent_id, created_at, output, level, eof)
workspace_agent_startup_logs (agent_id, created_at, output, level)
SELECT
@agent_id :: uuid AS agent_id,
unnest(@created_at :: timestamptz [ ]) AS created_at,
unnest(@output :: VARCHAR(1024) [ ]) AS output,
unnest(@level :: log_level [ ]) AS level,
unnest(@eof :: boolean [ ]) AS eof
unnest(@level :: log_level [ ]) AS level
RETURNING workspace_agent_startup_logs.*;
-- If an agent hasn't connected in the last 7 days, we purge it's logs.

View File

@ -259,16 +259,8 @@ func (api *API) patchWorkspaceAgentStartupLogs(rw http.ResponseWriter, r *http.R
createdAt := make([]time.Time, 0)
output := make([]string, 0)
level := make([]database.LogLevel, 0)
eof := make([]bool, 0)
outputLength := 0
for i, logEntry := range req.Logs {
if logEntry.EOF && i != len(req.Logs)-1 {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "EOF log must be the last log entry.",
})
return
}
for _, logEntry := range req.Logs {
createdAt = append(createdAt, logEntry.CreatedAt)
output = append(output, logEntry.Output)
outputLength += len(logEntry.Output)
@ -285,22 +277,21 @@ func (api *API) patchWorkspaceAgentStartupLogs(rw http.ResponseWriter, r *http.R
return
}
level = append(level, parsedLevel)
eof = append(eof, logEntry.EOF)
}
var logs []database.WorkspaceAgentStartupLog
// Ensure logs are not written after EOF.
eofError := xerrors.New("EOF log already received")
// Ensure logs are not written after script ended.
scriptEndedError := xerrors.New("startup script has ended")
err := api.Database.InTx(func(db database.Store) error {
isEOF, err := db.GetWorkspaceAgentStartupLogsEOF(ctx, workspaceAgent.ID)
state, err := db.GetWorkspaceAgentLifecycleStateByID(ctx, workspaceAgent.ID)
if err != nil {
return xerrors.Errorf("EOF status: %w", err)
return xerrors.Errorf("workspace agent startup script status: %w", err)
}
if isEOF {
// The agent has already sent an EOF log, so we don't need to process
// any more logs.
return eofError
if state.ReadyAt.Valid {
// The agent startup script has already ended, so we don't want to
// process any more logs.
return scriptEndedError
}
logs, err = db.InsertWorkspaceAgentStartupLogs(ctx, database.InsertWorkspaceAgentStartupLogsParams{
@ -308,15 +299,14 @@ func (api *API) patchWorkspaceAgentStartupLogs(rw http.ResponseWriter, r *http.R
CreatedAt: createdAt,
Output: output,
Level: level,
EOF: eof,
OutputLength: int32(outputLength),
})
return err
}, nil)
if err != nil {
if errors.Is(err, eofError) {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Startup log has been closed.",
if errors.Is(err, scriptEndedError) {
httpapi.Write(ctx, rw, http.StatusConflict, codersdk.Response{
Message: "Failed to upload logs, startup script has already ended.",
Detail: err.Error(),
})
return
@ -368,14 +358,12 @@ func (api *API) patchWorkspaceAgentStartupLogs(rw http.ResponseWriter, r *http.R
return
}
firstLog := logs[0]
lastLog := logs[len(logs)-1]
lowestLogID := logs[0].ID
// Publish by the lowest log ID inserted so the
// log stream will fetch everything from that point.
api.publishWorkspaceAgentStartupLogsUpdate(ctx, workspaceAgent.ID, agentsdk.StartupLogsNotifyMessage{
CreatedAfter: firstLog.ID - 1,
EndOfLogs: lastLog.EOF,
CreatedAfter: lowestLogID - 1,
})
if workspaceAgent.StartupLogsLength == 0 {
@ -492,22 +480,23 @@ func (api *API) workspaceAgentStartupLogs(rw http.ResponseWriter, r *http.Reques
return
}
lastSentLogID := after
if len(logs) > 0 {
last := logs[len(logs)-1]
if last.EOF {
// The startup script has finished running, so we can close the connection.
return
}
lastSentLogID = last.ID
if workspaceAgent.ReadyAt.Valid {
// Fast path, the startup script has finished running, so we can close
// the connection.
return
}
if !codersdk.WorkspaceAgentLifecycle(workspaceAgent.LifecycleState).Starting() {
// Backwards compatibility: Avoid waiting forever in case this agent was
// created before the current release.
// Backwards compatibility: Avoid waiting forever in case this agent is
// older than the current release and has already reported the ready
// state.
return
}
lastSentLogID := after
if len(logs) > 0 {
lastSentLogID = logs[len(logs)-1].ID
}
notifyCh := make(chan struct{}, 1)
// Allow us to immediately check if we missed any logs
// between initial fetch and subscribe.
@ -537,8 +526,11 @@ func (api *API) workspaceAgentStartupLogs(rw http.ResponseWriter, r *http.Reques
t := time.NewTicker(recheckInterval)
defer t.Stop()
var state database.GetWorkspaceAgentLifecycleStateByIDRow
go func() {
defer close(bufferedLogs)
var err error
for {
select {
case <-ctx.Done():
@ -548,6 +540,17 @@ func (api *API) workspaceAgentStartupLogs(rw http.ResponseWriter, r *http.Reques
t.Reset(recheckInterval)
}
if !state.ReadyAt.Valid {
state, err = api.Database.GetWorkspaceAgentLifecycleStateByID(ctx, workspaceAgent.ID)
if err != nil {
if xerrors.Is(err, context.Canceled) {
return
}
logger.Warn(ctx, "failed to get workspace agent lifecycle state", slog.Error(err))
continue
}
}
logs, err := api.Database.GetWorkspaceAgentStartupLogsAfter(ctx, database.GetWorkspaceAgentStartupLogsAfterParams{
AgentID: workspaceAgent.ID,
CreatedAfter: lastSentLogID,
@ -560,6 +563,9 @@ func (api *API) workspaceAgentStartupLogs(rw http.ResponseWriter, r *http.Reques
continue
}
if len(logs) == 0 {
if state.ReadyAt.Valid {
return
}
continue
}
@ -569,9 +575,6 @@ func (api *API) workspaceAgentStartupLogs(rw http.ResponseWriter, r *http.Reques
case bufferedLogs <- logs:
lastSentLogID = logs[len(logs)-1].ID
}
if logs[len(logs)-1].EOF {
return
}
}
}()
defer func() {
@ -1165,6 +1168,8 @@ func convertWorkspaceAgent(derpMap *tailcfg.DERPMap, coordinator tailnet.Coordin
Architecture: dbAgent.Architecture,
OperatingSystem: dbAgent.OperatingSystem,
StartupScript: dbAgent.StartupScript.String,
StartupScriptBehavior: codersdk.WorkspaceAgentStartupScriptBehavior(dbAgent.StartupScriptBehavior),
StartupScriptTimeoutSeconds: dbAgent.StartupScriptTimeoutSeconds,
StartupLogsLength: dbAgent.StartupLogsLength,
StartupLogsOverflowed: dbAgent.StartupLogsOverflowed,
Version: dbAgent.Version,
@ -1176,8 +1181,6 @@ func convertWorkspaceAgent(derpMap *tailcfg.DERPMap, coordinator tailnet.Coordin
TroubleshootingURL: troubleshootingURL,
LifecycleState: codersdk.WorkspaceAgentLifecycle(dbAgent.LifecycleState),
LoginBeforeReady: dbAgent.StartupScriptBehavior != database.StartupScriptBehaviorBlocking,
StartupScriptBehavior: codersdk.WorkspaceAgentStartupScriptBehavior(dbAgent.StartupScriptBehavior),
StartupScriptTimeoutSeconds: dbAgent.StartupScriptTimeoutSeconds,
ShutdownScript: dbAgent.ShutdownScript.String,
ShutdownScriptTimeoutSeconds: dbAgent.ShutdownScriptTimeoutSeconds,
Subsystem: codersdk.AgentSubsystem(dbAgent.Subsystem),
@ -1214,6 +1217,13 @@ func convertWorkspaceAgent(derpMap *tailcfg.DERPMap, coordinator tailnet.Coordin
workspaceAgent.LastConnectedAt = status.LastConnectedAt
workspaceAgent.DisconnectedAt = status.DisconnectedAt
if dbAgent.StartedAt.Valid {
workspaceAgent.StartedAt = &dbAgent.StartedAt.Time
}
if dbAgent.ReadyAt.Valid {
workspaceAgent.ReadyAt = &dbAgent.ReadyAt.Time
}
return workspaceAgent, nil
}
@ -1592,56 +1602,38 @@ func (api *API) workspaceAgentReportLifecycle(rw http.ResponseWriter, r *http.Re
return
}
if req.ChangedAt.IsZero() {
// Backwards compatibility with older agents.
req.ChangedAt = database.Now()
}
changedAt := sql.NullTime{Time: req.ChangedAt, Valid: true}
startedAt := workspaceAgent.StartedAt
readyAt := workspaceAgent.ReadyAt
switch lifecycleState {
case codersdk.WorkspaceAgentLifecycleStarting:
startedAt = changedAt
readyAt.Valid = false // This agent is re-starting, so it's not ready yet.
case codersdk.WorkspaceAgentLifecycleReady, codersdk.WorkspaceAgentLifecycleStartError:
readyAt = changedAt
}
err = api.Database.UpdateWorkspaceAgentLifecycleStateByID(ctx, database.UpdateWorkspaceAgentLifecycleStateByIDParams{
ID: workspaceAgent.ID,
LifecycleState: dbLifecycleState,
StartedAt: startedAt,
ReadyAt: readyAt,
})
if err != nil {
logger.Error(ctx, "failed to update lifecycle state", slog.Error(err))
httpapi.InternalServerError(rw, err)
return
}
if !lifecycleState.Starting() {
var eofLog []database.WorkspaceAgentStartupLog
// Ensure the startup logs are marked as complete if the agent
// is no longer starting. This should be reported by the agent
// itself, but we do it here as a fallback.
err = api.Database.InTx(func(db database.Store) error {
isEOF, err := db.GetWorkspaceAgentStartupLogsEOF(ctx, workspaceAgent.ID)
if err != nil {
return xerrors.Errorf("EOF status: %w", err)
}
if isEOF {
return nil
}
logger.Debug(ctx, "marking startup logs as complete because agent is no longer starting")
eofLog, err = db.InsertWorkspaceAgentStartupLogs(ctx, database.InsertWorkspaceAgentStartupLogsParams{
AgentID: workspaceAgent.ID,
CreatedAt: []time.Time{database.Now()},
Output: []string{""},
Level: []database.LogLevel{database.LogLevelInfo},
EOF: []bool{true},
OutputLength: 0,
})
if err != nil {
return xerrors.Errorf("write EOF log entry: %w", err)
}
return nil
}, nil)
if err != nil {
logger.Warn(ctx, "failed to mark startup logs as complete", slog.Error(err))
// If this fails, we want the agent to keep trying so that the
// startup log is eventually marked as complete.
httpapi.InternalServerError(rw, err)
return
}
if len(eofLog) > 0 {
api.publishWorkspaceAgentStartupLogsUpdate(ctx, workspaceAgent.ID, agentsdk.StartupLogsNotifyMessage{
CreatedAfter: eofLog[0].ID - 1,
EndOfLogs: true,
})
}
if readyAt.Valid {
api.publishWorkspaceAgentStartupLogsUpdate(ctx, workspaceAgent.ID, agentsdk.StartupLogsNotifyMessage{
EndOfLogs: true,
})
}
api.publishWorkspaceUpdate(ctx, workspace.ID)
@ -2079,7 +2071,6 @@ func convertWorkspaceAgentStartupLog(logEntry database.WorkspaceAgentStartupLog)
CreatedAt: logEntry.CreatedAt,
Output: logEntry.Output,
Level: codersdk.LogLevel(logEntry.Level),
EOF: logEntry.EOF,
}
}

View File

@ -301,124 +301,6 @@ func TestWorkspaceAgentStartupLogs(t *testing.T) {
}
}
})
t.Run("AllowEOFAfterOverflowAndCloseFollowWebsocket", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitMedium)
client := coderdtest.New(t, &coderdtest.Options{
IncludeProvisionerDaemon: true,
})
user := coderdtest.CreateFirstUser(t, client)
authToken := uuid.NewString()
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
ProvisionPlan: echo.ProvisionComplete,
ProvisionApply: []*proto.Provision_Response{{
Type: &proto.Provision_Response_Complete{
Complete: &proto.Provision_Complete{
Resources: []*proto.Resource{{
Name: "example",
Type: "aws_instance",
Agents: []*proto.Agent{{
Id: uuid.NewString(),
Auth: &proto.Agent_Token{
Token: authToken,
},
}},
}},
},
},
}},
})
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
build := coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
updates, err := client.WatchWorkspace(ctx, workspace.ID)
require.NoError(t, err)
logs, closeLogs, err := client.WorkspaceAgentStartupLogsAfter(ctx, build.Resources[0].Agents[0].ID, 0)
require.NoError(t, err)
defer closeLogs.Close()
wantLogs := []codersdk.WorkspaceAgentStartupLog{
{
CreatedAt: database.Now(),
Output: "testing",
Level: "info",
},
{
CreatedAt: database.Now().Add(time.Minute),
Level: "info",
EOF: true,
},
}
agentClient := agentsdk.New(client.URL)
agentClient.SetSessionToken(authToken)
var convertedLogs []agentsdk.StartupLog
for _, log := range wantLogs {
convertedLogs = append(convertedLogs, agentsdk.StartupLog{
CreatedAt: log.CreatedAt,
Output: log.Output,
Level: log.Level,
EOF: log.EOF,
})
}
initialLogs := convertedLogs[:len(convertedLogs)-1]
eofLog := convertedLogs[len(convertedLogs)-1]
err = agentClient.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{Logs: initialLogs})
require.NoError(t, err)
overflowLogs := []agentsdk.StartupLog{
{
CreatedAt: database.Now(),
Output: strings.Repeat("a", (1<<20)+1),
},
eofLog, // Include EOF which will be discarded due to overflow.
}
err = agentClient.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{Logs: overflowLogs})
var apiError *codersdk.Error
require.ErrorAs(t, err, &apiError)
require.Equal(t, http.StatusRequestEntityTooLarge, apiError.StatusCode())
// It's possible we have multiple updates queued, but that's alright, we just
// wait for the one where it overflows.
for {
var update codersdk.Workspace
select {
case <-ctx.Done():
require.Fail(t, "timed out waiting for overflow")
case update = <-updates:
}
if update.LatestBuild.Resources[0].Agents[0].StartupLogsOverflowed {
break
}
}
// Now we should still be able to send the EOF.
err = agentClient.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{Logs: []agentsdk.StartupLog{eofLog}})
require.NoError(t, err)
var gotLogs []codersdk.WorkspaceAgentStartupLog
logsLoop:
for {
select {
case <-ctx.Done():
require.Fail(t, "timed out waiting for logs")
case l, ok := <-logs:
if !ok {
break logsLoop
}
gotLogs = append(gotLogs, l...)
}
}
for i := range gotLogs {
gotLogs[i].ID = 0 // Ignore ID for comparison.
}
require.Equal(t, wantLogs, gotLogs)
})
t.Run("CloseAfterLifecycleStateIsNotRunning", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitMedium)
@ -472,25 +354,26 @@ func TestWorkspaceAgentStartupLogs(t *testing.T) {
require.NoError(t, err)
err = agentClient.PostLifecycle(ctx, agentsdk.PostLifecycleRequest{
State: codersdk.WorkspaceAgentLifecycleReady,
State: codersdk.WorkspaceAgentLifecycleReady,
ChangedAt: time.Now(),
})
require.NoError(t, err)
var gotLogs []codersdk.WorkspaceAgentStartupLog
for {
select {
case <-ctx.Done():
require.Fail(t, "timed out waiting for logs EOF")
case l := <-logs:
for _, log := range l {
if log.EOF {
// Success.
return
}
require.Fail(t, "timed out waiting for logs to end")
case l, ok := <-logs:
gotLogs = append(gotLogs, l...)
if !ok {
require.Len(t, gotLogs, 1, "expected one log")
return // Success.
}
}
}
})
t.Run("NoLogAfterEOF", func(t *testing.T) {
t.Run("NoLogAfterScriptEnded", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitMedium)
client := coderdtest.New(t, &coderdtest.Options{
@ -526,13 +409,9 @@ func TestWorkspaceAgentStartupLogs(t *testing.T) {
agentClient := agentsdk.New(client.URL)
agentClient.SetSessionToken(authToken)
err := agentClient.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{
Logs: []agentsdk.StartupLog{
{
CreatedAt: database.Now(),
EOF: true,
},
},
err := agentClient.PostLifecycle(ctx, agentsdk.PostLifecycleRequest{
State: codersdk.WorkspaceAgentLifecycleReady,
ChangedAt: time.Now(),
})
require.NoError(t, err)
@ -544,7 +423,7 @@ func TestWorkspaceAgentStartupLogs(t *testing.T) {
},
},
})
require.Error(t, err, "insert after EOF should not succeed")
require.Error(t, err, "insert after script ended should not succeed")
})
}
@ -1410,7 +1289,8 @@ func TestWorkspaceAgent_LifecycleState(t *testing.T) {
ctx := testutil.Context(t, testutil.WaitLong)
err := agentClient.PostLifecycle(ctx, agentsdk.PostLifecycleRequest{
State: tt.state,
State: tt.state,
ChangedAt: time.Now(),
})
if tt.wantErr {
require.Error(t, err)