feat: add startup script logs to the ui (#6558)

* Add startup script logs to the database

* Add coderd endpoints for startup script logs

* Push startup script logs from agent

* Pull startup script logs on frontend

* Rename queries

* Add constraint

* Start creating log sending loop

* Add log sending to the agent

* Add tests for streaming logs

* Shorten notify channel name

* Add FE

* Improve bulk log performance

* Finish UI display

* Fix startup log visibility

* Add warning for overflow

* Fix agent queue logs overflow

* Display staartup logs in a virtual DOM for performance

* Fix agent queue with loads of logs

* Fix authorize test

* Remove faulty test

* Fix startup and shutdown reporting error

* Fix gen

* Fix comments

* Periodically purge old database entries

* Add test fixture for migration

* Add Storybook

* Check if there are logs when displaying features

* Fix startup component overflow gap

* Fix startup log wrapping

---------

Co-authored-by: Asher <ash@coder.com>
This commit is contained in:
Kyle Carberry
2023-03-23 14:09:13 -05:00
committed by GitHub
parent a6fa8cac58
commit cb7375450b
57 changed files with 2513 additions and 353 deletions

145
coderd/apidoc/docs.go generated
View File

@ -2644,13 +2644,13 @@ const docTemplate = `{
},
{
"type": "integer",
"description": "Before Unix timestamp",
"description": "Before log id",
"name": "before",
"in": "query"
},
{
"type": "integer",
"description": "After Unix timestamp",
"description": "After log id",
"name": "after",
"in": "query"
},
@ -4402,6 +4402,48 @@ const docTemplate = `{
}
}
},
"/workspaceagents/me/startup-logs": {
"patch": {
"security": [
{
"CoderSessionToken": []
}
],
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"tags": [
"Agents"
],
"summary": "Patch workspace agent startup logs",
"operationId": "patch-workspace-agent-startup-logs",
"parameters": [
{
"description": "Startup logs",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/agentsdk.PatchStartupLogs"
}
}
],
"responses": {
"200": {
"description": "OK",
"schema": {
"$ref": "#/definitions/codersdk.Response"
}
}
},
"x-apidocgen": {
"skip": true
}
}
},
"/workspaceagents/{workspaceagent}": {
"get": {
"security": [
@ -4565,6 +4607,62 @@ const docTemplate = `{
}
}
},
"/workspaceagents/{workspaceagent}/startup-logs": {
"get": {
"security": [
{
"CoderSessionToken": []
}
],
"produces": [
"application/json"
],
"tags": [
"Agents"
],
"summary": "Get startup logs by workspace agent",
"operationId": "get-startup-logs-by-workspace-agent",
"parameters": [
{
"type": "string",
"format": "uuid",
"description": "Workspace agent ID",
"name": "workspaceagent",
"in": "path",
"required": true
},
{
"type": "integer",
"description": "Before log id",
"name": "before",
"in": "query"
},
{
"type": "integer",
"description": "After log id",
"name": "after",
"in": "query"
},
{
"type": "boolean",
"description": "Follow log stream",
"name": "follow",
"in": "query"
}
],
"responses": {
"200": {
"description": "OK",
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/codersdk.WorkspaceAgentStartupLog"
}
}
}
}
}
},
"/workspacebuilds/{workspacebuild}": {
"get": {
"security": [
@ -5344,6 +5442,17 @@ const docTemplate = `{
}
}
},
"agentsdk.PatchStartupLogs": {
"type": "object",
"properties": {
"logs": {
"type": "array",
"items": {
"$ref": "#/definitions/agentsdk.StartupLog"
}
}
}
},
"agentsdk.PostAppHealthsRequest": {
"type": "object",
"properties": {
@ -5375,6 +5484,17 @@ const docTemplate = `{
}
}
},
"agentsdk.StartupLog": {
"type": "object",
"properties": {
"created_at": {
"type": "string"
},
"output": {
"type": "string"
}
}
},
"agentsdk.Stats": {
"type": "object",
"properties": {
@ -8680,6 +8800,12 @@ const docTemplate = `{
"shutdown_script_timeout_seconds": {
"type": "integer"
},
"startup_logs_length": {
"type": "integer"
},
"startup_logs_overflowed": {
"type": "boolean"
},
"startup_script": {
"type": "string"
},
@ -8763,6 +8889,21 @@ const docTemplate = `{
}
}
},
"codersdk.WorkspaceAgentStartupLog": {
"type": "object",
"properties": {
"created_at": {
"type": "string",
"format": "date-time"
},
"id": {
"type": "integer"
},
"output": {
"type": "string"
}
}
},
"codersdk.WorkspaceAgentStatus": {
"type": "string",
"enum": [

View File

@ -2320,13 +2320,13 @@
},
{
"type": "integer",
"description": "Before Unix timestamp",
"description": "Before log id",
"name": "before",
"in": "query"
},
{
"type": "integer",
"description": "After Unix timestamp",
"description": "After log id",
"name": "after",
"in": "query"
},
@ -3866,6 +3866,42 @@
}
}
},
"/workspaceagents/me/startup-logs": {
"patch": {
"security": [
{
"CoderSessionToken": []
}
],
"consumes": ["application/json"],
"produces": ["application/json"],
"tags": ["Agents"],
"summary": "Patch workspace agent startup logs",
"operationId": "patch-workspace-agent-startup-logs",
"parameters": [
{
"description": "Startup logs",
"name": "request",
"in": "body",
"required": true,
"schema": {
"$ref": "#/definitions/agentsdk.PatchStartupLogs"
}
}
],
"responses": {
"200": {
"description": "OK",
"schema": {
"$ref": "#/definitions/codersdk.Response"
}
}
},
"x-apidocgen": {
"skip": true
}
}
},
"/workspaceagents/{workspaceagent}": {
"get": {
"security": [
@ -4013,6 +4049,58 @@
}
}
},
"/workspaceagents/{workspaceagent}/startup-logs": {
"get": {
"security": [
{
"CoderSessionToken": []
}
],
"produces": ["application/json"],
"tags": ["Agents"],
"summary": "Get startup logs by workspace agent",
"operationId": "get-startup-logs-by-workspace-agent",
"parameters": [
{
"type": "string",
"format": "uuid",
"description": "Workspace agent ID",
"name": "workspaceagent",
"in": "path",
"required": true
},
{
"type": "integer",
"description": "Before log id",
"name": "before",
"in": "query"
},
{
"type": "integer",
"description": "After log id",
"name": "after",
"in": "query"
},
{
"type": "boolean",
"description": "Follow log stream",
"name": "follow",
"in": "query"
}
],
"responses": {
"200": {
"description": "OK",
"schema": {
"type": "array",
"items": {
"$ref": "#/definitions/codersdk.WorkspaceAgentStartupLog"
}
}
}
}
}
},
"/workspacebuilds/{workspacebuild}": {
"get": {
"security": [
@ -4715,6 +4803,17 @@
}
}
},
"agentsdk.PatchStartupLogs": {
"type": "object",
"properties": {
"logs": {
"type": "array",
"items": {
"$ref": "#/definitions/agentsdk.StartupLog"
}
}
}
},
"agentsdk.PostAppHealthsRequest": {
"type": "object",
"properties": {
@ -4746,6 +4845,17 @@
}
}
},
"agentsdk.StartupLog": {
"type": "object",
"properties": {
"created_at": {
"type": "string"
},
"output": {
"type": "string"
}
}
},
"agentsdk.Stats": {
"type": "object",
"properties": {
@ -7824,6 +7934,12 @@
"shutdown_script_timeout_seconds": {
"type": "integer"
},
"startup_logs_length": {
"type": "integer"
},
"startup_logs_overflowed": {
"type": "boolean"
},
"startup_script": {
"type": "string"
},
@ -7907,6 +8023,21 @@
}
}
},
"codersdk.WorkspaceAgentStartupLog": {
"type": "object",
"properties": {
"created_at": {
"type": "string",
"format": "date-time"
},
"id": {
"type": "integer"
},
"output": {
"type": "string"
}
}
},
"codersdk.WorkspaceAgentStatus": {
"type": "string",
"enum": ["connecting", "connected", "disconnected", "timeout"],

View File

@ -604,6 +604,7 @@ func New(options *Options) *API {
r.Use(httpmw.ExtractWorkspaceAgent(options.Database))
r.Get("/metadata", api.workspaceAgentMetadata)
r.Post("/startup", api.postWorkspaceAgentStartup)
r.Patch("/startup-logs", api.patchWorkspaceAgentStartupLogs)
r.Post("/app-health", api.postWorkspaceAppHealth)
r.Get("/gitauth", api.workspaceAgentsGitAuth)
r.Get("/gitsshkey", api.agentGitSSHKey)
@ -619,6 +620,7 @@ func New(options *Options) *API {
)
r.Get("/", api.workspaceAgent)
r.Get("/pty", api.workspaceAgentPTY)
r.Get("/startup-logs", api.workspaceAgentStartupLogs)
r.Get("/listening-ports", api.workspaceAgentListeningPorts)
r.Get("/connection", api.workspaceAgentConnection)
r.Get("/coordinate", api.workspaceAgentClientCoordinate)

View File

@ -345,6 +345,7 @@ func assertProduce(t *testing.T, comment SwaggerComment) {
} else {
if (comment.router == "/workspaceagents/me/app-health" && comment.method == "post") ||
(comment.router == "/workspaceagents/me/startup" && comment.method == "post") ||
(comment.router == "/workspaceagents/me/startup/logs" && comment.method == "patch") ||
(comment.router == "/licenses/{id}" && comment.method == "delete") ||
(comment.router == "/debug/coordinator" && comment.method == "get") {
return // Exception: HTTP 200 is returned without response entity

View File

@ -263,13 +263,21 @@ func (q *querier) GetProvisionerJobByID(ctx context.Context, id uuid.UUID) (data
return job, nil
}
func (q *querier) GetProvisionerLogsByIDBetween(ctx context.Context, arg database.GetProvisionerLogsByIDBetweenParams) ([]database.ProvisionerJobLog, error) {
func (q *querier) GetProvisionerLogsAfterID(ctx context.Context, arg database.GetProvisionerLogsAfterIDParams) ([]database.ProvisionerJobLog, error) {
// Authorized read on job lets the actor also read the logs.
_, err := q.GetProvisionerJobByID(ctx, arg.JobID)
if err != nil {
return nil, err
}
return q.db.GetProvisionerLogsByIDBetween(ctx, arg)
return q.db.GetProvisionerLogsAfterID(ctx, arg)
}
func (q *querier) GetWorkspaceAgentStartupLogsAfter(ctx context.Context, arg database.GetWorkspaceAgentStartupLogsAfterParams) ([]database.WorkspaceAgentStartupLog, error) {
_, err := q.GetWorkspaceAgentByID(ctx, arg.AgentID)
if err != nil {
return nil, err
}
return q.db.GetWorkspaceAgentStartupLogsAfter(ctx, arg)
}
func (q *querier) GetLicenses(ctx context.Context) ([]database.License, error) {
@ -1245,6 +1253,24 @@ func (q *querier) UpdateWorkspaceAgentLifecycleStateByID(ctx context.Context, ar
return q.db.UpdateWorkspaceAgentLifecycleStateByID(ctx, arg)
}
func (q *querier) UpdateWorkspaceAgentStartupLogOverflowByID(ctx context.Context, arg database.UpdateWorkspaceAgentStartupLogOverflowByIDParams) error {
agent, err := q.db.GetWorkspaceAgentByID(ctx, arg.ID)
if err != nil {
return err
}
workspace, err := q.db.GetWorkspaceByAgentID(ctx, agent.ID)
if err != nil {
return err
}
if err := q.authorizeContext(ctx, rbac.ActionUpdate, workspace); err != nil {
return err
}
return q.db.UpdateWorkspaceAgentStartupLogOverflowByID(ctx, arg)
}
func (q *querier) UpdateWorkspaceAgentStartupByID(ctx context.Context, arg database.UpdateWorkspaceAgentStartupByIDParams) error {
agent, err := q.db.GetWorkspaceAgentByID(ctx, arg.ID)
if err != nil {

View File

@ -282,13 +282,18 @@ func (s *MethodTestSuite) TestProvsionerJob() {
check.Args(database.UpdateProvisionerJobWithCancelByIDParams{ID: j.ID}).
Asserts(v.RBACObject(tpl), []rbac.Action{rbac.ActionRead, rbac.ActionUpdate}).Returns()
}))
s.Run("GetProvisionerLogsByIDBetween", s.Subtest(func(db database.Store, check *expects) {
s.Run("GetProvisionerJobsByIDs", s.Subtest(func(db database.Store, check *expects) {
a := dbgen.ProvisionerJob(s.T(), db, database.ProvisionerJob{})
b := dbgen.ProvisionerJob(s.T(), db, database.ProvisionerJob{})
check.Args([]uuid.UUID{a.ID, b.ID}).Asserts().Returns(slice.New(a, b))
}))
s.Run("GetProvisionerLogsAfterID", s.Subtest(func(db database.Store, check *expects) {
w := dbgen.Workspace(s.T(), db, database.Workspace{})
j := dbgen.ProvisionerJob(s.T(), db, database.ProvisionerJob{
Type: database.ProvisionerJobTypeWorkspaceBuild,
})
_ = dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{JobID: j.ID, WorkspaceID: w.ID})
check.Args(database.GetProvisionerLogsByIDBetweenParams{
check.Args(database.GetProvisionerLogsAfterIDParams{
JobID: j.ID,
}).Asserts(w, rbac.ActionRead).Returns([]database.ProvisionerJobLog{})
}))
@ -978,6 +983,16 @@ func (s *MethodTestSuite) TestWorkspace() {
LifecycleState: database.WorkspaceAgentLifecycleStateCreated,
}).Asserts(ws, rbac.ActionUpdate).Returns()
}))
s.Run("UpdateWorkspaceAgentStartupLogOverflowByID", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
check.Args(database.UpdateWorkspaceAgentStartupLogOverflowByIDParams{
ID: agt.ID,
StartupLogsOverflowed: true,
}).Asserts(ws, rbac.ActionUpdate).Returns()
}))
s.Run("UpdateWorkspaceAgentStartupByID", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
@ -987,6 +1002,15 @@ func (s *MethodTestSuite) TestWorkspace() {
ID: agt.ID,
}).Asserts(ws, rbac.ActionUpdate).Returns()
}))
s.Run("GetWorkspaceAgentStartupLogsAfter", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})
res := dbgen.WorkspaceResource(s.T(), db, database.WorkspaceResource{JobID: build.JobID})
agt := dbgen.WorkspaceAgent(s.T(), db, database.WorkspaceAgent{ResourceID: res.ID})
check.Args(database.GetWorkspaceAgentStartupLogsAfterParams{
AgentID: agt.ID,
}).Asserts(ws, rbac.ActionRead).Returns([]database.WorkspaceAgentStartupLog{})
}))
s.Run("GetWorkspaceAppByAgentIDAndSlug", s.Subtest(func(db database.Store, check *expects) {
ws := dbgen.Workspace(s.T(), db, database.Workspace{})
build := dbgen.WorkspaceBuild(s.T(), db, database.WorkspaceBuild{WorkspaceID: ws.ID, JobID: uuid.New()})

View File

@ -280,6 +280,13 @@ func (q *querier) DeleteOldWorkspaceAgentStats(ctx context.Context) error {
return q.db.DeleteOldWorkspaceAgentStats(ctx)
}
func (q *querier) DeleteOldWorkspaceAgentStartupLogs(ctx context.Context) error {
if err := q.authorizeContext(ctx, rbac.ActionDelete, rbac.ResourceSystem); err != nil {
return err
}
return q.db.DeleteOldWorkspaceAgentStartupLogs(ctx)
}
func (q *querier) GetDeploymentWorkspaceAgentStats(ctx context.Context, createdAfter time.Time) (database.GetDeploymentWorkspaceAgentStatsRow, error) {
return q.db.GetDeploymentWorkspaceAgentStats(ctx, createdAfter)
}
@ -370,6 +377,10 @@ func (q *querier) InsertProvisionerJobLogs(ctx context.Context, arg database.Ins
return q.db.InsertProvisionerJobLogs(ctx, arg)
}
func (q *querier) InsertWorkspaceAgentStartupLogs(ctx context.Context, arg database.InsertWorkspaceAgentStartupLogsParams) ([]database.WorkspaceAgentStartupLog, error) {
return q.db.InsertWorkspaceAgentStartupLogs(ctx, arg)
}
// TODO: We need to create a ProvisionerDaemon resource type
func (q *querier) InsertProvisionerDaemon(ctx context.Context, arg database.InsertProvisionerDaemonParams) (database.ProvisionerDaemon, error) {
// if err := q.authorizeContext(ctx, rbac.ActionCreate, rbac.ResourceSystem); err != nil {

View File

@ -60,6 +60,7 @@ func New() database.Store {
templateVersions: make([]database.TemplateVersion, 0),
templates: make([]database.Template, 0),
workspaceAgentStats: make([]database.WorkspaceAgentStat, 0),
workspaceAgentLogs: make([]database.WorkspaceAgentStartupLog, 0),
workspaceBuilds: make([]database.WorkspaceBuild, 0),
workspaceApps: make([]database.WorkspaceApp, 0),
workspaces: make([]database.Workspace, 0),
@ -123,6 +124,7 @@ type data struct {
templateVersionVariables []database.TemplateVersionVariable
templates []database.Template
workspaceAgents []database.WorkspaceAgent
workspaceAgentLogs []database.WorkspaceAgentStartupLog
workspaceApps []database.WorkspaceApp
workspaceBuilds []database.WorkspaceBuild
workspaceBuildParameters []database.WorkspaceBuildParameter
@ -2614,7 +2616,7 @@ func (q *fakeQuerier) GetProvisionerJobsCreatedAfter(_ context.Context, after ti
return jobs, nil
}
func (q *fakeQuerier) GetProvisionerLogsByIDBetween(_ context.Context, arg database.GetProvisionerLogsByIDBetweenParams) ([]database.ProvisionerJobLog, error) {
func (q *fakeQuerier) GetProvisionerLogsAfterID(_ context.Context, arg database.GetProvisionerLogsAfterIDParams) ([]database.ProvisionerJobLog, error) {
if err := validateDatabaseType(arg); err != nil {
return nil, err
}
@ -2627,9 +2629,6 @@ func (q *fakeQuerier) GetProvisionerLogsByIDBetween(_ context.Context, arg datab
if jobLog.JobID != arg.JobID {
continue
}
if arg.CreatedBefore != 0 && jobLog.ID > arg.CreatedBefore {
continue
}
if arg.CreatedAfter != 0 && jobLog.ID < arg.CreatedAfter {
continue
}
@ -3517,6 +3516,70 @@ func (q *fakeQuerier) UpdateWorkspaceAgentStartupByID(_ context.Context, arg dat
return sql.ErrNoRows
}
func (q *fakeQuerier) GetWorkspaceAgentStartupLogsAfter(_ context.Context, arg database.GetWorkspaceAgentStartupLogsAfterParams) ([]database.WorkspaceAgentStartupLog, error) {
if err := validateDatabaseType(arg); err != nil {
return nil, err
}
q.mutex.Lock()
defer q.mutex.Unlock()
logs := []database.WorkspaceAgentStartupLog{}
for _, log := range q.workspaceAgentLogs {
if log.AgentID != arg.AgentID {
continue
}
if arg.CreatedAfter != 0 && log.ID < arg.CreatedAfter {
continue
}
logs = append(logs, log)
}
return logs, nil
}
func (q *fakeQuerier) InsertWorkspaceAgentStartupLogs(_ context.Context, arg database.InsertWorkspaceAgentStartupLogsParams) ([]database.WorkspaceAgentStartupLog, error) {
if err := validateDatabaseType(arg); err != nil {
return nil, err
}
q.mutex.Lock()
defer q.mutex.Unlock()
logs := []database.WorkspaceAgentStartupLog{}
id := int64(1)
if len(q.workspaceAgentLogs) > 0 {
id = q.workspaceAgentLogs[len(q.workspaceAgentLogs)-1].ID
}
outputLength := int32(0)
for index, output := range arg.Output {
id++
logs = append(logs, database.WorkspaceAgentStartupLog{
ID: id,
AgentID: arg.AgentID,
CreatedAt: arg.CreatedAt[index],
Output: output,
})
outputLength += int32(len(output))
}
for index, agent := range q.workspaceAgents {
if agent.ID != arg.AgentID {
continue
}
// Greater than 1MB, same as the PostgreSQL constraint!
if agent.StartupLogsLength+outputLength > (1 << 20) {
return nil, &pq.Error{
Constraint: "max_startup_logs_length",
Table: "workspace_agents",
}
}
agent.StartupLogsLength += outputLength
q.workspaceAgents[index] = agent
break
}
q.workspaceAgentLogs = append(q.workspaceAgentLogs, logs...)
return logs, nil
}
func (q *fakeQuerier) UpdateProvisionerJobByID(_ context.Context, arg database.UpdateProvisionerJobByIDParams) error {
if err := validateDatabaseType(arg); err != nil {
return err
@ -4325,6 +4388,11 @@ func (q *fakeQuerier) DeleteLicense(_ context.Context, id int32) (int32, error)
return 0, sql.ErrNoRows
}
func (*fakeQuerier) DeleteOldWorkspaceAgentStartupLogs(_ context.Context) error {
// noop
return nil
}
func (q *fakeQuerier) GetUserLinkByLinkedID(_ context.Context, id string) (database.UserLink, error) {
q.mutex.RLock()
defer q.mutex.RUnlock()
@ -4735,3 +4803,20 @@ func (q *fakeQuerier) UpdateWorkspaceAgentLifecycleStateByID(_ context.Context,
}
return sql.ErrNoRows
}
func (q *fakeQuerier) UpdateWorkspaceAgentStartupLogOverflowByID(_ context.Context, arg database.UpdateWorkspaceAgentStartupLogOverflowByIDParams) error {
if err := validateDatabaseType(arg); err != nil {
return err
}
q.mutex.Lock()
defer q.mutex.Unlock()
for i, agent := range q.workspaceAgents {
if agent.ID == arg.ID {
agent.StartupLogsOverflowed = arg.StartupLogsOverflowed
q.workspaceAgents[i] = agent
return nil
}
}
return sql.ErrNoRows
}

View File

@ -0,0 +1,64 @@
package dbpurge
import (
"context"
"errors"
"io"
"time"
"golang.org/x/sync/errgroup"
"cdr.dev/slog"
"github.com/coder/coder/coderd/database"
)
// New creates a new periodically purging database instance.
// It is the caller's responsibility to call Close on the returned instance.
//
// This is for cleaning up old, unused resources from the database that take up space.
func New(ctx context.Context, logger slog.Logger, db database.Store) io.Closer {
closed := make(chan struct{})
ctx, cancelFunc := context.WithCancel(ctx)
go func() {
defer close(closed)
ticker := time.NewTicker(24 * time.Hour)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
}
var eg errgroup.Group
eg.Go(func() error {
return db.DeleteOldWorkspaceAgentStartupLogs(ctx)
})
eg.Go(func() error {
return db.DeleteOldWorkspaceAgentStats(ctx)
})
err := eg.Wait()
if err != nil {
if errors.Is(err, context.Canceled) {
return
}
logger.Error(ctx, "failed to purge old database entries", slog.Error(err))
}
}
}()
return &instance{
cancel: cancelFunc,
closed: closed,
}
}
type instance struct {
cancel context.CancelFunc
closed chan struct{}
}
func (i *instance) Close() error {
i.cancel()
<-i.closed
return nil
}

View File

@ -0,0 +1,26 @@
package dbpurge_test
import (
"context"
"testing"
"go.uber.org/goleak"
"github.com/stretchr/testify/require"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/coderd/database/dbfake"
"github.com/coder/coder/coderd/database/dbpurge"
)
func TestMain(m *testing.M) {
goleak.VerifyTestMain(m)
}
// Ensures no goroutines leak.
func TestPurge(t *testing.T) {
t.Parallel()
purger := dbpurge.New(context.Background(), slogtest.Make(t, nil), dbfake.New())
err := purger.Close()
require.NoError(t, err)
}

View File

@ -475,6 +475,22 @@ CREATE TABLE users (
last_seen_at timestamp without time zone DEFAULT '0001-01-01 00:00:00'::timestamp without time zone NOT NULL
);
CREATE TABLE workspace_agent_startup_logs (
agent_id uuid NOT NULL,
created_at timestamp with time zone NOT NULL,
output character varying(1024) NOT NULL,
id bigint NOT NULL
);
CREATE SEQUENCE workspace_agent_startup_logs_id_seq
START WITH 1
INCREMENT BY 1
NO MINVALUE
NO MAXVALUE
CACHE 1;
ALTER SEQUENCE workspace_agent_startup_logs_id_seq OWNED BY workspace_agent_startup_logs.id;
CREATE TABLE workspace_agent_stats (
id uuid NOT NULL,
created_at timestamp with time zone NOT NULL,
@ -523,7 +539,10 @@ CREATE TABLE workspace_agents (
startup_script_timeout_seconds integer DEFAULT 0 NOT NULL,
expanded_directory character varying(4096) DEFAULT ''::character varying NOT NULL,
shutdown_script character varying(65534),
shutdown_script_timeout_seconds integer DEFAULT 0 NOT NULL
shutdown_script_timeout_seconds integer DEFAULT 0 NOT NULL,
startup_logs_length integer DEFAULT 0 NOT NULL,
startup_logs_overflowed boolean DEFAULT false NOT NULL,
CONSTRAINT max_startup_logs_length CHECK ((startup_logs_length <= 1048576))
);
COMMENT ON COLUMN workspace_agents.version IS 'Version tracks the version of the currently running workspace agent. Workspace agents register their version upon start.';
@ -546,6 +565,10 @@ COMMENT ON COLUMN workspace_agents.shutdown_script IS 'Script that is executed b
COMMENT ON COLUMN workspace_agents.shutdown_script_timeout_seconds IS 'The number of seconds to wait for the shutdown script to complete. If the script does not complete within this time, the agent lifecycle will be marked as shutdown_timeout.';
COMMENT ON COLUMN workspace_agents.startup_logs_length IS 'Total length of startup logs';
COMMENT ON COLUMN workspace_agents.startup_logs_overflowed IS 'Whether the startup logs overflowed in length';
CREATE TABLE workspace_apps (
id uuid NOT NULL,
created_at timestamp with time zone NOT NULL,
@ -639,6 +662,8 @@ ALTER TABLE ONLY licenses ALTER COLUMN id SET DEFAULT nextval('licenses_id_seq':
ALTER TABLE ONLY provisioner_job_logs ALTER COLUMN id SET DEFAULT nextval('provisioner_job_logs_id_seq'::regclass);
ALTER TABLE ONLY workspace_agent_startup_logs ALTER COLUMN id SET DEFAULT nextval('workspace_agent_startup_logs_id_seq'::regclass);
ALTER TABLE ONLY workspace_resource_metadata ALTER COLUMN id SET DEFAULT nextval('workspace_resource_metadata_id_seq'::regclass);
ALTER TABLE ONLY workspace_agent_stats
@ -731,6 +756,9 @@ ALTER TABLE ONLY user_links
ALTER TABLE ONLY users
ADD CONSTRAINT users_pkey PRIMARY KEY (id);
ALTER TABLE ONLY workspace_agent_startup_logs
ADD CONSTRAINT workspace_agent_startup_logs_pkey PRIMARY KEY (id);
ALTER TABLE ONLY workspace_agents
ADD CONSTRAINT workspace_agents_pkey PRIMARY KEY (id);
@ -802,6 +830,8 @@ CREATE UNIQUE INDEX users_email_lower_idx ON users USING btree (lower(email)) WH
CREATE UNIQUE INDEX users_username_lower_idx ON users USING btree (lower(username)) WHERE (deleted = false);
CREATE INDEX workspace_agent_startup_logs_id_agent_id_idx ON workspace_agent_startup_logs USING btree (agent_id, id);
CREATE INDEX workspace_agents_auth_token_idx ON workspace_agents USING btree (auth_token);
CREATE INDEX workspace_agents_resource_id_idx ON workspace_agents USING btree (resource_id);
@ -864,6 +894,9 @@ ALTER TABLE ONLY templates
ALTER TABLE ONLY user_links
ADD CONSTRAINT user_links_user_id_fkey FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE;
ALTER TABLE ONLY workspace_agent_startup_logs
ADD CONSTRAINT workspace_agent_startup_logs_agent_id_fkey FOREIGN KEY (agent_id) REFERENCES workspace_agents(id) ON DELETE CASCADE;
ALTER TABLE ONLY workspace_agents
ADD CONSTRAINT workspace_agents_resource_id_fkey FOREIGN KEY (resource_id) REFERENCES workspace_resources(id) ON DELETE CASCADE;

View File

@ -45,3 +45,12 @@ func IsQueryCanceledError(err error) bool {
return false
}
func IsStartupLogsLimitError(err error) bool {
var pqErr *pq.Error
if errors.As(err, &pqErr) {
return pqErr.Constraint == "max_startup_logs_length" && pqErr.Table == "workspace_agents"
}
return false
}

View File

@ -0,0 +1,4 @@
DROP TABLE workspace_agent_startup_logs;
ALTER TABLE ONLY workspace_agents
DROP COLUMN startup_logs_length,
DROP COLUMN startup_logs_overflowed;

View File

@ -0,0 +1,18 @@
BEGIN;
CREATE TABLE IF NOT EXISTS workspace_agent_startup_logs (
agent_id uuid NOT NULL REFERENCES workspace_agents (id) ON DELETE CASCADE,
created_at timestamptz NOT NULL,
output varchar(1024) NOT NULL,
id BIGSERIAL PRIMARY KEY
);
CREATE INDEX workspace_agent_startup_logs_id_agent_id_idx ON workspace_agent_startup_logs USING btree (agent_id, id ASC);
-- The maximum length of startup logs is 1MB per workspace agent.
ALTER TABLE workspace_agents ADD COLUMN startup_logs_length integer NOT NULL DEFAULT 0 CONSTRAINT max_startup_logs_length CHECK (startup_logs_length <= 1048576);
ALTER TABLE workspace_agents ADD COLUMN startup_logs_overflowed boolean NOT NULL DEFAULT false;
COMMENT ON COLUMN workspace_agents.startup_logs_length IS 'Total length of startup logs';
COMMENT ON COLUMN workspace_agents.startup_logs_overflowed IS 'Whether the startup logs overflowed in length';
COMMIT;

View File

@ -0,0 +1,9 @@
INSERT INTO workspace_agent_startup_logs (
agent_id,
created_at,
output
) VALUES (
'45e89705-e09d-4850-bcec-f9a937f5d78d',
NOW(),
'output'
);

View File

@ -1569,6 +1569,17 @@ type WorkspaceAgent struct {
ShutdownScript sql.NullString `db:"shutdown_script" json:"shutdown_script"`
// The number of seconds to wait for the shutdown script to complete. If the script does not complete within this time, the agent lifecycle will be marked as shutdown_timeout.
ShutdownScriptTimeoutSeconds int32 `db:"shutdown_script_timeout_seconds" json:"shutdown_script_timeout_seconds"`
// Total length of startup logs
StartupLogsLength int32 `db:"startup_logs_length" json:"startup_logs_length"`
// Whether the startup logs overflowed in length
StartupLogsOverflowed bool `db:"startup_logs_overflowed" json:"startup_logs_overflowed"`
}
type WorkspaceAgentStartupLog struct {
AgentID uuid.UUID `db:"agent_id" json:"agent_id"`
CreatedAt time.Time `db:"created_at" json:"created_at"`
Output string `db:"output" json:"output"`
ID int64 `db:"id" json:"id"`
}
type WorkspaceAgentStat struct {

View File

@ -33,6 +33,9 @@ type sqlcQuerier interface {
DeleteGroupMemberFromGroup(ctx context.Context, arg DeleteGroupMemberFromGroupParams) error
DeleteGroupMembersByOrgAndUser(ctx context.Context, arg DeleteGroupMembersByOrgAndUserParams) error
DeleteLicense(ctx context.Context, id int32) (int32, error)
// If an agent hasn't connected in the last 7 days, we purge it's logs.
// Logs can take up a lot of space, so it's important we clean up frequently.
DeleteOldWorkspaceAgentStartupLogs(ctx context.Context) error
DeleteOldWorkspaceAgentStats(ctx context.Context) error
DeleteParameterValueByID(ctx context.Context, id uuid.UUID) error
DeleteReplicasUpdatedBefore(ctx context.Context, updatedAt time.Time) error
@ -87,7 +90,7 @@ type sqlcQuerier interface {
GetProvisionerJobByID(ctx context.Context, id uuid.UUID) (ProvisionerJob, error)
GetProvisionerJobsByIDs(ctx context.Context, ids []uuid.UUID) ([]ProvisionerJob, error)
GetProvisionerJobsCreatedAfter(ctx context.Context, createdAt time.Time) ([]ProvisionerJob, error)
GetProvisionerLogsByIDBetween(ctx context.Context, arg GetProvisionerLogsByIDBetweenParams) ([]ProvisionerJobLog, error)
GetProvisionerLogsAfterID(ctx context.Context, arg GetProvisionerLogsAfterIDParams) ([]ProvisionerJobLog, error)
GetQuotaAllowanceForUser(ctx context.Context, userID uuid.UUID) (int64, error)
GetQuotaConsumedForUser(ctx context.Context, ownerID uuid.UUID) (int64, error)
GetReplicasUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]Replica, error)
@ -121,6 +124,7 @@ type sqlcQuerier interface {
GetWorkspaceAgentByAuthToken(ctx context.Context, authToken uuid.UUID) (WorkspaceAgent, error)
GetWorkspaceAgentByID(ctx context.Context, id uuid.UUID) (WorkspaceAgent, error)
GetWorkspaceAgentByInstanceID(ctx context.Context, authInstanceID string) (WorkspaceAgent, error)
GetWorkspaceAgentStartupLogsAfter(ctx context.Context, arg GetWorkspaceAgentStartupLogsAfterParams) ([]WorkspaceAgentStartupLog, error)
GetWorkspaceAgentStats(ctx context.Context, createdAt time.Time) ([]GetWorkspaceAgentStatsRow, error)
GetWorkspaceAgentsByResourceIDs(ctx context.Context, ids []uuid.UUID) ([]WorkspaceAgent, error)
GetWorkspaceAgentsCreatedAfter(ctx context.Context, createdAt time.Time) ([]WorkspaceAgent, error)
@ -181,6 +185,7 @@ type sqlcQuerier interface {
InsertUserLink(ctx context.Context, arg InsertUserLinkParams) (UserLink, error)
InsertWorkspace(ctx context.Context, arg InsertWorkspaceParams) (Workspace, error)
InsertWorkspaceAgent(ctx context.Context, arg InsertWorkspaceAgentParams) (WorkspaceAgent, error)
InsertWorkspaceAgentStartupLogs(ctx context.Context, arg InsertWorkspaceAgentStartupLogsParams) ([]WorkspaceAgentStartupLog, error)
InsertWorkspaceAgentStat(ctx context.Context, arg InsertWorkspaceAgentStatParams) (WorkspaceAgentStat, error)
InsertWorkspaceApp(ctx context.Context, arg InsertWorkspaceAppParams) (WorkspaceApp, error)
InsertWorkspaceBuild(ctx context.Context, arg InsertWorkspaceBuildParams) (WorkspaceBuild, error)
@ -225,6 +230,7 @@ type sqlcQuerier interface {
UpdateWorkspaceAgentConnectionByID(ctx context.Context, arg UpdateWorkspaceAgentConnectionByIDParams) error
UpdateWorkspaceAgentLifecycleStateByID(ctx context.Context, arg UpdateWorkspaceAgentLifecycleStateByIDParams) error
UpdateWorkspaceAgentStartupByID(ctx context.Context, arg UpdateWorkspaceAgentStartupByIDParams) error
UpdateWorkspaceAgentStartupLogOverflowByID(ctx context.Context, arg UpdateWorkspaceAgentStartupLogOverflowByIDParams) error
UpdateWorkspaceAppHealthByID(ctx context.Context, arg UpdateWorkspaceAppHealthByIDParams) error
UpdateWorkspaceAutostart(ctx context.Context, arg UpdateWorkspaceAutostartParams) error
UpdateWorkspaceBuildByID(ctx context.Context, arg UpdateWorkspaceBuildByIDParams) (WorkspaceBuild, error)

View File

@ -86,3 +86,42 @@ func TestGetDeploymentWorkspaceAgentStats(t *testing.T) {
require.Equal(t, int64(1), stats.SessionCountVSCode)
})
}
func TestInsertWorkspaceAgentStartupLogs(t *testing.T) {
t.Parallel()
if testing.Short() {
t.SkipNow()
}
sqlDB := testSQLDB(t)
ctx := context.Background()
err := migrations.Up(sqlDB)
require.NoError(t, err)
db := database.New(sqlDB)
org := dbgen.Organization(t, db, database.Organization{})
job := dbgen.ProvisionerJob(t, db, database.ProvisionerJob{
OrganizationID: org.ID,
})
resource := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{
JobID: job.ID,
})
agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
ResourceID: resource.ID,
})
logs, err := db.InsertWorkspaceAgentStartupLogs(ctx, database.InsertWorkspaceAgentStartupLogsParams{
AgentID: agent.ID,
CreatedAt: []time.Time{database.Now()},
Output: []string{"first"},
// 1 MB is the max
OutputLength: 1 << 20,
})
require.NoError(t, err)
require.Equal(t, int64(1), logs[0].ID)
_, err = db.InsertWorkspaceAgentStartupLogs(ctx, database.InsertWorkspaceAgentStartupLogsParams{
AgentID: agent.ID,
CreatedAt: []time.Time{database.Now()},
Output: []string{"second"},
OutputLength: 1,
})
require.True(t, database.IsStartupLogsLimitError(err))
}

View File

@ -2286,7 +2286,7 @@ func (q *sqlQuerier) InsertProvisionerDaemon(ctx context.Context, arg InsertProv
return i, err
}
const getProvisionerLogsByIDBetween = `-- name: GetProvisionerLogsByIDBetween :many
const getProvisionerLogsAfterID = `-- name: GetProvisionerLogsAfterID :many
SELECT
job_id, created_at, source, level, stage, output, id
FROM
@ -2295,18 +2295,16 @@ WHERE
job_id = $1
AND (
id > $2
OR id < $3
) ORDER BY id ASC
`
type GetProvisionerLogsByIDBetweenParams struct {
JobID uuid.UUID `db:"job_id" json:"job_id"`
CreatedAfter int64 `db:"created_after" json:"created_after"`
CreatedBefore int64 `db:"created_before" json:"created_before"`
type GetProvisionerLogsAfterIDParams struct {
JobID uuid.UUID `db:"job_id" json:"job_id"`
CreatedAfter int64 `db:"created_after" json:"created_after"`
}
func (q *sqlQuerier) GetProvisionerLogsByIDBetween(ctx context.Context, arg GetProvisionerLogsByIDBetweenParams) ([]ProvisionerJobLog, error) {
rows, err := q.db.QueryContext(ctx, getProvisionerLogsByIDBetween, arg.JobID, arg.CreatedAfter, arg.CreatedBefore)
func (q *sqlQuerier) GetProvisionerLogsAfterID(ctx context.Context, arg GetProvisionerLogsAfterIDParams) ([]ProvisionerJobLog, error) {
rows, err := q.db.QueryContext(ctx, getProvisionerLogsAfterID, arg.JobID, arg.CreatedAfter)
if err != nil {
return nil, err
}
@ -5072,9 +5070,22 @@ func (q *sqlQuerier) UpdateUserStatus(ctx context.Context, arg UpdateUserStatusP
return i, err
}
const deleteOldWorkspaceAgentStartupLogs = `-- name: DeleteOldWorkspaceAgentStartupLogs :exec
DELETE FROM workspace_agent_startup_logs WHERE agent_id IN
(SELECT id FROM workspace_agents WHERE last_connected_at IS NOT NULL
AND last_connected_at < NOW() - INTERVAL '7 day')
`
// If an agent hasn't connected in the last 7 days, we purge it's logs.
// Logs can take up a lot of space, so it's important we clean up frequently.
func (q *sqlQuerier) DeleteOldWorkspaceAgentStartupLogs(ctx context.Context) error {
_, err := q.db.ExecContext(ctx, deleteOldWorkspaceAgentStartupLogs)
return err
}
const getWorkspaceAgentByAuthToken = `-- name: GetWorkspaceAgentByAuthToken :one
SELECT
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, login_before_ready, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, login_before_ready, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed
FROM
workspace_agents
WHERE
@ -5115,13 +5126,15 @@ func (q *sqlQuerier) GetWorkspaceAgentByAuthToken(ctx context.Context, authToken
&i.ExpandedDirectory,
&i.ShutdownScript,
&i.ShutdownScriptTimeoutSeconds,
&i.StartupLogsLength,
&i.StartupLogsOverflowed,
)
return i, err
}
const getWorkspaceAgentByID = `-- name: GetWorkspaceAgentByID :one
SELECT
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, login_before_ready, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, login_before_ready, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed
FROM
workspace_agents
WHERE
@ -5160,13 +5173,15 @@ func (q *sqlQuerier) GetWorkspaceAgentByID(ctx context.Context, id uuid.UUID) (W
&i.ExpandedDirectory,
&i.ShutdownScript,
&i.ShutdownScriptTimeoutSeconds,
&i.StartupLogsLength,
&i.StartupLogsOverflowed,
)
return i, err
}
const getWorkspaceAgentByInstanceID = `-- name: GetWorkspaceAgentByInstanceID :one
SELECT
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, login_before_ready, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, login_before_ready, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed
FROM
workspace_agents
WHERE
@ -5207,13 +5222,60 @@ func (q *sqlQuerier) GetWorkspaceAgentByInstanceID(ctx context.Context, authInst
&i.ExpandedDirectory,
&i.ShutdownScript,
&i.ShutdownScriptTimeoutSeconds,
&i.StartupLogsLength,
&i.StartupLogsOverflowed,
)
return i, err
}
const getWorkspaceAgentStartupLogsAfter = `-- name: GetWorkspaceAgentStartupLogsAfter :many
SELECT
agent_id, created_at, output, id
FROM
workspace_agent_startup_logs
WHERE
agent_id = $1
AND (
id > $2
) ORDER BY id ASC
`
type GetWorkspaceAgentStartupLogsAfterParams struct {
AgentID uuid.UUID `db:"agent_id" json:"agent_id"`
CreatedAfter int64 `db:"created_after" json:"created_after"`
}
func (q *sqlQuerier) GetWorkspaceAgentStartupLogsAfter(ctx context.Context, arg GetWorkspaceAgentStartupLogsAfterParams) ([]WorkspaceAgentStartupLog, error) {
rows, err := q.db.QueryContext(ctx, getWorkspaceAgentStartupLogsAfter, arg.AgentID, arg.CreatedAfter)
if err != nil {
return nil, err
}
defer rows.Close()
var items []WorkspaceAgentStartupLog
for rows.Next() {
var i WorkspaceAgentStartupLog
if err := rows.Scan(
&i.AgentID,
&i.CreatedAt,
&i.Output,
&i.ID,
); err != nil {
return nil, err
}
items = append(items, i)
}
if err := rows.Close(); err != nil {
return nil, err
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const getWorkspaceAgentsByResourceIDs = `-- name: GetWorkspaceAgentsByResourceIDs :many
SELECT
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, login_before_ready, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds
id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, login_before_ready, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed
FROM
workspace_agents
WHERE
@ -5258,6 +5320,8 @@ func (q *sqlQuerier) GetWorkspaceAgentsByResourceIDs(ctx context.Context, ids []
&i.ExpandedDirectory,
&i.ShutdownScript,
&i.ShutdownScriptTimeoutSeconds,
&i.StartupLogsLength,
&i.StartupLogsOverflowed,
); err != nil {
return nil, err
}
@ -5273,7 +5337,7 @@ func (q *sqlQuerier) GetWorkspaceAgentsByResourceIDs(ctx context.Context, ids []
}
const getWorkspaceAgentsCreatedAfter = `-- name: GetWorkspaceAgentsCreatedAfter :many
SELECT id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, login_before_ready, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds FROM workspace_agents WHERE created_at > $1
SELECT id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, login_before_ready, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed FROM workspace_agents WHERE created_at > $1
`
func (q *sqlQuerier) GetWorkspaceAgentsCreatedAfter(ctx context.Context, createdAt time.Time) ([]WorkspaceAgent, error) {
@ -5314,6 +5378,8 @@ func (q *sqlQuerier) GetWorkspaceAgentsCreatedAfter(ctx context.Context, created
&i.ExpandedDirectory,
&i.ShutdownScript,
&i.ShutdownScriptTimeoutSeconds,
&i.StartupLogsLength,
&i.StartupLogsOverflowed,
); err != nil {
return nil, err
}
@ -5354,7 +5420,7 @@ INSERT INTO
shutdown_script_timeout_seconds
)
VALUES
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21) RETURNING id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, login_before_ready, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds
($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21) RETURNING id, created_at, updated_at, name, first_connected_at, last_connected_at, disconnected_at, resource_id, auth_token, auth_instance_id, architecture, environment_variables, operating_system, startup_script, instance_metadata, resource_metadata, directory, version, last_connected_replica_id, connection_timeout_seconds, troubleshooting_url, motd_file, lifecycle_state, login_before_ready, startup_script_timeout_seconds, expanded_directory, shutdown_script, shutdown_script_timeout_seconds, startup_logs_length, startup_logs_overflowed
`
type InsertWorkspaceAgentParams struct {
@ -5435,10 +5501,66 @@ func (q *sqlQuerier) InsertWorkspaceAgent(ctx context.Context, arg InsertWorkspa
&i.ExpandedDirectory,
&i.ShutdownScript,
&i.ShutdownScriptTimeoutSeconds,
&i.StartupLogsLength,
&i.StartupLogsOverflowed,
)
return i, err
}
const insertWorkspaceAgentStartupLogs = `-- name: InsertWorkspaceAgentStartupLogs :many
WITH new_length AS (
UPDATE workspace_agents SET
startup_logs_length = startup_logs_length + $4 WHERE workspace_agents.id = $1
)
INSERT INTO
workspace_agent_startup_logs
SELECT
$1 :: uuid AS agent_id,
unnest($2 :: timestamptz [ ]) AS created_at,
unnest($3 :: VARCHAR(1024) [ ]) AS output
RETURNING workspace_agent_startup_logs.agent_id, workspace_agent_startup_logs.created_at, workspace_agent_startup_logs.output, workspace_agent_startup_logs.id
`
type InsertWorkspaceAgentStartupLogsParams struct {
AgentID uuid.UUID `db:"agent_id" json:"agent_id"`
CreatedAt []time.Time `db:"created_at" json:"created_at"`
Output []string `db:"output" json:"output"`
OutputLength int32 `db:"output_length" json:"output_length"`
}
func (q *sqlQuerier) InsertWorkspaceAgentStartupLogs(ctx context.Context, arg InsertWorkspaceAgentStartupLogsParams) ([]WorkspaceAgentStartupLog, error) {
rows, err := q.db.QueryContext(ctx, insertWorkspaceAgentStartupLogs,
arg.AgentID,
pq.Array(arg.CreatedAt),
pq.Array(arg.Output),
arg.OutputLength,
)
if err != nil {
return nil, err
}
defer rows.Close()
var items []WorkspaceAgentStartupLog
for rows.Next() {
var i WorkspaceAgentStartupLog
if err := rows.Scan(
&i.AgentID,
&i.CreatedAt,
&i.Output,
&i.ID,
); err != nil {
return nil, err
}
items = append(items, i)
}
if err := rows.Close(); err != nil {
return nil, err
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const updateWorkspaceAgentConnectionByID = `-- name: UpdateWorkspaceAgentConnectionByID :exec
UPDATE
workspace_agents
@ -5513,6 +5635,25 @@ func (q *sqlQuerier) UpdateWorkspaceAgentStartupByID(ctx context.Context, arg Up
return err
}
const updateWorkspaceAgentStartupLogOverflowByID = `-- name: UpdateWorkspaceAgentStartupLogOverflowByID :exec
UPDATE
workspace_agents
SET
startup_logs_overflowed = $2
WHERE
id = $1
`
type UpdateWorkspaceAgentStartupLogOverflowByIDParams struct {
ID uuid.UUID `db:"id" json:"id"`
StartupLogsOverflowed bool `db:"startup_logs_overflowed" json:"startup_logs_overflowed"`
}
func (q *sqlQuerier) UpdateWorkspaceAgentStartupLogOverflowByID(ctx context.Context, arg UpdateWorkspaceAgentStartupLogOverflowByIDParams) error {
_, err := q.db.ExecContext(ctx, updateWorkspaceAgentStartupLogOverflowByID, arg.ID, arg.StartupLogsOverflowed)
return err
}
const deleteOldWorkspaceAgentStats = `-- name: DeleteOldWorkspaceAgentStats :exec
DELETE FROM workspace_agent_stats WHERE created_at < NOW() - INTERVAL '30 days'
`

View File

@ -1,4 +1,4 @@
-- name: GetProvisionerLogsByIDBetween :many
-- name: GetProvisionerLogsAfterID :many
SELECT
*
FROM
@ -7,7 +7,6 @@ WHERE
job_id = @job_id
AND (
id > @created_after
OR id < @created_before
) ORDER BY id ASC;
-- name: InsertProvisionerJobLogs :many

View File

@ -93,3 +93,42 @@ SET
lifecycle_state = $2
WHERE
id = $1;
-- name: UpdateWorkspaceAgentStartupLogOverflowByID :exec
UPDATE
workspace_agents
SET
startup_logs_overflowed = $2
WHERE
id = $1;
-- name: GetWorkspaceAgentStartupLogsAfter :many
SELECT
*
FROM
workspace_agent_startup_logs
WHERE
agent_id = $1
AND (
id > @created_after
) ORDER BY id ASC;
-- name: InsertWorkspaceAgentStartupLogs :many
WITH new_length AS (
UPDATE workspace_agents SET
startup_logs_length = startup_logs_length + @output_length WHERE workspace_agents.id = @agent_id
)
INSERT INTO
workspace_agent_startup_logs
SELECT
@agent_id :: uuid AS agent_id,
unnest(@created_at :: timestamptz [ ]) AS created_at,
unnest(@output :: VARCHAR(1024) [ ]) AS output
RETURNING workspace_agent_startup_logs.*;
-- If an agent hasn't connected in the last 7 days, we purge it's logs.
-- Logs can take up a lot of space, so it's important we clean up frequently.
-- name: DeleteOldWorkspaceAgentStartupLogs :exec
DELETE FROM workspace_agent_startup_logs WHERE agent_id IN
(SELECT id FROM workspace_agents WHERE last_connected_at IS NOT NULL
AND last_connected_at < NOW() - INTERVAL '7 day');

View File

@ -9,7 +9,6 @@ import (
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"
"golang.org/x/xerrors"
"github.com/google/uuid"
@ -168,10 +167,6 @@ func countUniqueUsers(rows []database.GetTemplateDAUsRow) int {
func (c *Cache) refreshTemplateDAUs(ctx context.Context) error {
//nolint:gocritic // This is a system service.
ctx = dbauthz.AsSystemRestricted(ctx)
err := c.database.DeleteOldWorkspaceAgentStats(ctx)
if err != nil {
return xerrors.Errorf("delete old stats: %w", err)
}
templates, err := c.database.GetTemplates(ctx)
if err != nil {

View File

@ -25,57 +25,18 @@ import (
// Returns provisioner logs based on query parameters.
// The intended usage for a client to stream all logs (with JS API):
// const timestamp = new Date().getTime();
// 1. GET /logs?before=<id>
// 2. GET /logs?after=<id>&follow
// GET /logs
// GET /logs?after=<id>&follow
// The combination of these responses should provide all current logs
// to the consumer, and future logs are streamed in the follow request.
func (api *API) provisionerJobLogs(rw http.ResponseWriter, r *http.Request, job database.ProvisionerJob) {
var (
ctx = r.Context()
actor, _ = dbauthz.ActorFromContext(ctx)
logger = api.Logger.With(slog.F("job_id", job.ID))
follow = r.URL.Query().Has("follow")
afterRaw = r.URL.Query().Get("after")
beforeRaw = r.URL.Query().Get("before")
ctx = r.Context()
actor, _ = dbauthz.ActorFromContext(ctx)
logger = api.Logger.With(slog.F("job_id", job.ID))
follow = r.URL.Query().Has("follow")
afterRaw = r.URL.Query().Get("after")
)
if beforeRaw != "" && follow {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Query param \"before\" cannot be used with \"follow\".",
})
return
}
// if we are following logs, start the subscription before we query the database, so that we don't miss any logs
// between the end of our query and the start of the subscription. We might get duplicates, so we'll keep track
// of processed IDs.
var bufferedLogs <-chan *database.ProvisionerJobLog
if follow {
bl, closeFollow, err := api.followLogs(actor, job.ID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Internal error watching provisioner logs.",
Detail: err.Error(),
})
return
}
defer closeFollow()
bufferedLogs = bl
// Next query the job itself to see if it is complete. If so, the historical query to the database will return
// the full set of logs. It's a little sad to have to query the job again, given that our caller definitely
// has, but we need to query it *after* we start following the pubsub to avoid a race condition where the job
// completes between the prior query and the start of following the pubsub. A more substantial refactor could
// avoid this, but not worth it for one fewer query at this point.
job, err = api.Database.GetProvisionerJobByID(ctx, job.ID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Internal error querying job.",
Detail: err.Error(),
})
return
}
}
var after int64
// Only fetch logs created after the time provided.
@ -92,26 +53,10 @@ func (api *API) provisionerJobLogs(rw http.ResponseWriter, r *http.Request, job
return
}
}
var before int64
// Only fetch logs created before the time provided.
if beforeRaw != "" {
var err error
before, err = strconv.ParseInt(beforeRaw, 10, 64)
if err != nil {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Query param \"before\" must be an integer.",
Validations: []codersdk.ValidationError{
{Field: "before", Detail: "Must be an integer"},
},
})
return
}
}
logs, err := api.Database.GetProvisionerLogsByIDBetween(ctx, database.GetProvisionerLogsByIDBetweenParams{
JobID: job.ID,
CreatedAfter: after,
CreatedBefore: before,
logs, err := api.Database.GetProvisionerLogsAfterID(ctx, database.GetProvisionerLogsAfterIDParams{
JobID: job.ID,
CreatedAfter: after,
})
if errors.Is(err, sql.ErrNoRows) {
err = nil
@ -162,11 +107,27 @@ func (api *API) provisionerJobLogs(rw http.ResponseWriter, r *http.Request, job
}
}
if job.CompletedAt.Valid {
// job was complete before we queried the database for historical logs, meaning we got everything. No need
// to stream anything from the bufferedLogs.
// job was complete before we queried the database for historical logs
return
}
// if we are following logs, start the subscription before we query the database, so that we don't miss any logs
// between the end of our query and the start of the subscription. We might get duplicates, so we'll keep track
// of processed IDs.
var bufferedLogs <-chan *database.ProvisionerJobLog
if follow {
bl, closeFollow, err := api.followProvisionerJobLogs(actor, job.ID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Internal error watching provisioner logs.",
Detail: err.Error(),
})
return
}
defer closeFollow()
bufferedLogs = bl
}
for {
select {
case <-ctx.Done():
@ -382,7 +343,7 @@ type provisionerJobLogsMessage struct {
EndOfLogs bool `json:"end_of_logs,omitempty"`
}
func (api *API) followLogs(actor rbac.Subject, jobID uuid.UUID) (<-chan *database.ProvisionerJobLog, func(), error) {
func (api *API) followProvisionerJobLogs(actor rbac.Subject, jobID uuid.UUID) (<-chan *database.ProvisionerJobLog, func(), error) {
logger := api.Logger.With(slog.F("job_id", jobID))
var (
@ -419,7 +380,7 @@ func (api *API) followLogs(actor rbac.Subject, jobID uuid.UUID) (<-chan *databas
// CreatedAfter is sent when logs are streaming!
if jlMsg.CreatedAfter != 0 {
logs, err := api.Database.GetProvisionerLogsByIDBetween(dbauthz.As(ctx, actor), database.GetProvisionerLogsByIDBetweenParams{
logs, err := api.Database.GetProvisionerLogsAfterID(dbauthz.As(ctx, actor), database.GetProvisionerLogsAfterIDParams{
JobID: jobID,
CreatedAfter: jlMsg.CreatedAfter,
})
@ -443,7 +404,7 @@ func (api *API) followLogs(actor rbac.Subject, jobID uuid.UUID) (<-chan *databas
// so we fetch logs after the last ID we've seen and send them!
if jlMsg.EndOfLogs {
endOfLogs.Store(true)
logs, err := api.Database.GetProvisionerLogsByIDBetween(dbauthz.As(ctx, actor), database.GetProvisionerLogsByIDBetweenParams{
logs, err := api.Database.GetProvisionerLogsAfterID(dbauthz.As(ctx, actor), database.GetProvisionerLogsAfterIDParams{
JobID: jobID,
CreatedAfter: lastSentLogID.Load(),
})
@ -458,8 +419,6 @@ func (api *API) followLogs(actor rbac.Subject, jobID uuid.UUID) (<-chan *databas
logger.Debug(ctx, "got End of Logs")
bufferedLogs <- nil
}
lastSentLogID.Store(jlMsg.CreatedAfter)
},
)
if err != nil {

View File

@ -89,36 +89,4 @@ func TestProvisionerJobLogs(t *testing.T) {
}
}
})
t.Run("List", func(t *testing.T) {
t.Parallel()
client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true})
user := coderdtest.CreateFirstUser(t, client)
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
ProvisionApply: []*proto.Provision_Response{{
Type: &proto.Provision_Response_Log{
Log: &proto.Log{
Level: proto.LogLevel_INFO,
Output: "log-output",
},
},
}, {
Type: &proto.Provision_Response_Complete{
Complete: &proto.Provision_Complete{},
},
}},
})
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
defer cancel()
logs, err := client.WorkspaceBuildLogsBefore(ctx, workspace.LatestBuild.ID, 0)
require.NoError(t, err)
require.Greater(t, len(logs), 1)
})
}

View File

@ -1482,8 +1482,8 @@ func (api *API) templateVersionResources(rw http.ResponseWriter, r *http.Request
// @Produce json
// @Tags Templates
// @Param templateversion path string true "Template version ID" format(uuid)
// @Param before query int false "Before Unix timestamp"
// @Param after query int false "After Unix timestamp"
// @Param before query int false "Before log id"
// @Param after query int false "After log id"
// @Param follow query bool false "Follow log stream"
// @Success 200 {array} codersdk.ProvisionerJobLog
// @Router /templateversions/{templateversion}/logs [get]

View File

@ -15,6 +15,7 @@ import (
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/google/uuid"
@ -216,6 +217,318 @@ func (api *API) postWorkspaceAgentStartup(rw http.ResponseWriter, r *http.Reques
httpapi.Write(ctx, rw, http.StatusOK, nil)
}
// @Summary Patch workspace agent startup logs
// @ID patch-workspace-agent-startup-logs
// @Security CoderSessionToken
// @Accept json
// @Produce json
// @Tags Agents
// @Param request body agentsdk.PatchStartupLogs true "Startup logs"
// @Success 200 {object} codersdk.Response
// @Router /workspaceagents/me/startup-logs [patch]
// @x-apidocgen {"skip": true}
func (api *API) patchWorkspaceAgentStartupLogs(rw http.ResponseWriter, r *http.Request) {
ctx := r.Context()
workspaceAgent := httpmw.WorkspaceAgent(r)
var req agentsdk.PatchStartupLogs
if !httpapi.Read(ctx, rw, r, &req) {
return
}
if len(req.Logs) == 0 {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "No logs provided.",
})
return
}
createdAt := make([]time.Time, 0)
output := make([]string, 0)
outputLength := 0
for _, log := range req.Logs {
createdAt = append(createdAt, log.CreatedAt)
output = append(output, log.Output)
outputLength += len(log.Output)
}
logs, err := api.Database.InsertWorkspaceAgentStartupLogs(ctx, database.InsertWorkspaceAgentStartupLogsParams{
AgentID: workspaceAgent.ID,
CreatedAt: createdAt,
Output: output,
OutputLength: int32(outputLength),
})
if err != nil {
if database.IsStartupLogsLimitError(err) {
if !workspaceAgent.StartupLogsOverflowed {
err := api.Database.UpdateWorkspaceAgentStartupLogOverflowByID(ctx, database.UpdateWorkspaceAgentStartupLogOverflowByIDParams{
ID: workspaceAgent.ID,
StartupLogsOverflowed: true,
})
if err != nil {
// We don't want to return here, because the agent will retry
// on failure and this isn't a huge deal. The overflow state
// is just a hint to the user that the logs are incomplete.
api.Logger.Warn(ctx, "failed to update workspace agent startup log overflow", slog.Error(err))
}
resource, err := api.Database.GetWorkspaceResourceByID(ctx, workspaceAgent.ResourceID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Failed to get workspace resource.",
Detail: err.Error(),
})
return
}
build, err := api.Database.GetWorkspaceBuildByJobID(ctx, resource.JobID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Internal error fetching workspace build job.",
Detail: err.Error(),
})
return
}
api.publishWorkspaceUpdate(ctx, build.WorkspaceID)
}
httpapi.Write(ctx, rw, http.StatusRequestEntityTooLarge, codersdk.Response{
Message: "Startup logs limit exceeded",
Detail: err.Error(),
})
return
}
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Failed to upload startup logs",
Detail: err.Error(),
})
return
}
if workspaceAgent.StartupLogsLength == 0 {
// If these are the first logs being appended, we publish a UI update
// to notify the UI that logs are now available.
resource, err := api.Database.GetWorkspaceResourceByID(ctx, workspaceAgent.ResourceID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Failed to get workspace resource.",
Detail: err.Error(),
})
return
}
build, err := api.Database.GetWorkspaceBuildByJobID(ctx, resource.JobID)
if err != nil {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Internal error fetching workspace build job.",
Detail: err.Error(),
})
return
}
api.publishWorkspaceUpdate(ctx, build.WorkspaceID)
}
lowestID := logs[0].ID
// Publish by the lowest log ID inserted so the
// log stream will fetch everything from that point.
data, err := json.Marshal(agentsdk.StartupLogsNotifyMessage{
CreatedAfter: lowestID - 1,
})
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Failed to marshal startup logs notify message",
Detail: err.Error(),
})
return
}
err = api.Pubsub.Publish(agentsdk.StartupLogsNotifyChannel(workspaceAgent.ID), data)
if err != nil {
// We don't want to return an error to the agent here,
// otherwise it might try to reinsert the logs.
api.Logger.Warn(ctx, "failed to publish startup logs notify message", slog.Error(err))
}
httpapi.Write(ctx, rw, http.StatusOK, nil)
}
// workspaceAgentStartupLogs returns the logs sent from a workspace agent
// during startup.
//
// @Summary Get startup logs by workspace agent
// @ID get-startup-logs-by-workspace-agent
// @Security CoderSessionToken
// @Produce json
// @Tags Agents
// @Param workspaceagent path string true "Workspace agent ID" format(uuid)
// @Param before query int false "Before log id"
// @Param after query int false "After log id"
// @Param follow query bool false "Follow log stream"
// @Success 200 {array} codersdk.WorkspaceAgentStartupLog
// @Router /workspaceagents/{workspaceagent}/startup-logs [get]
func (api *API) workspaceAgentStartupLogs(rw http.ResponseWriter, r *http.Request) {
// This mostly copies how provisioner job logs are streamed!
var (
ctx = r.Context()
workspaceAgent = httpmw.WorkspaceAgentParam(r)
workspace = httpmw.WorkspaceParam(r)
logger = api.Logger.With(slog.F("workspace_agent_id", workspaceAgent.ID))
follow = r.URL.Query().Has("follow")
afterRaw = r.URL.Query().Get("after")
)
if !api.Authorize(r, rbac.ActionRead, workspace) {
httpapi.ResourceNotFound(rw)
return
}
var after int64
// Only fetch logs created after the time provided.
if afterRaw != "" {
var err error
after, err = strconv.ParseInt(afterRaw, 10, 64)
if err != nil {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Query param \"after\" must be an integer.",
Validations: []codersdk.ValidationError{
{Field: "after", Detail: "Must be an integer"},
},
})
return
}
}
logs, err := api.Database.GetWorkspaceAgentStartupLogsAfter(ctx, database.GetWorkspaceAgentStartupLogsAfterParams{
AgentID: workspaceAgent.ID,
CreatedAfter: after,
})
if errors.Is(err, sql.ErrNoRows) {
err = nil
}
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Internal error fetching provisioner logs.",
Detail: err.Error(),
})
return
}
if logs == nil {
logs = []database.WorkspaceAgentStartupLog{}
}
if !follow {
logger.Debug(ctx, "Finished non-follow job logs")
httpapi.Write(ctx, rw, http.StatusOK, convertWorkspaceAgentStartupLogs(logs))
return
}
api.WebsocketWaitMutex.Lock()
api.WebsocketWaitGroup.Add(1)
api.WebsocketWaitMutex.Unlock()
defer api.WebsocketWaitGroup.Done()
conn, err := websocket.Accept(rw, r, nil)
if err != nil {
httpapi.Write(ctx, rw, http.StatusBadRequest, codersdk.Response{
Message: "Failed to accept websocket.",
Detail: err.Error(),
})
return
}
go httpapi.Heartbeat(ctx, conn)
ctx, wsNetConn := websocketNetConn(ctx, conn, websocket.MessageText)
defer wsNetConn.Close() // Also closes conn.
// The Go stdlib JSON encoder appends a newline character after message write.
encoder := json.NewEncoder(wsNetConn)
err = encoder.Encode(convertWorkspaceAgentStartupLogs(logs))
if err != nil {
return
}
if workspaceAgent.LifecycleState == database.WorkspaceAgentLifecycleStateReady {
// The startup script has finished running, so we can close the connection.
return
}
var (
bufferedLogs = make(chan []database.WorkspaceAgentStartupLog, 128)
endOfLogs atomic.Bool
lastSentLogID atomic.Int64
)
sendLogs := func(logs []database.WorkspaceAgentStartupLog) {
select {
case bufferedLogs <- logs:
lastSentLogID.Store(logs[len(logs)-1].ID)
default:
logger.Warn(ctx, "workspace agent startup log overflowing channel")
}
}
closeSubscribe, err := api.Pubsub.Subscribe(
agentsdk.StartupLogsNotifyChannel(workspaceAgent.ID),
func(ctx context.Context, message []byte) {
if endOfLogs.Load() {
return
}
jlMsg := agentsdk.StartupLogsNotifyMessage{}
err := json.Unmarshal(message, &jlMsg)
if err != nil {
logger.Warn(ctx, "invalid startup logs notify message", slog.Error(err))
return
}
if jlMsg.CreatedAfter != 0 {
logs, err := api.Database.GetWorkspaceAgentStartupLogsAfter(ctx, database.GetWorkspaceAgentStartupLogsAfterParams{
AgentID: workspaceAgent.ID,
CreatedAfter: jlMsg.CreatedAfter,
})
if err != nil {
logger.Warn(ctx, "failed to get workspace agent startup logs after", slog.Error(err))
return
}
sendLogs(logs)
}
if jlMsg.EndOfLogs {
endOfLogs.Store(true)
logs, err := api.Database.GetWorkspaceAgentStartupLogsAfter(ctx, database.GetWorkspaceAgentStartupLogsAfterParams{
AgentID: workspaceAgent.ID,
CreatedAfter: lastSentLogID.Load(),
})
if err != nil {
logger.Warn(ctx, "get workspace agent startup logs after", slog.Error(err))
return
}
sendLogs(logs)
bufferedLogs <- nil
}
},
)
if err != nil {
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
Message: "Failed to subscribe to startup logs.",
Detail: err.Error(),
})
return
}
defer closeSubscribe()
for {
select {
case <-ctx.Done():
logger.Debug(context.Background(), "job logs context canceled")
return
case logs, ok := <-bufferedLogs:
// A nil log is sent when complete!
if !ok || logs == nil {
logger.Debug(context.Background(), "reached the end of published logs")
return
}
err = encoder.Encode(convertWorkspaceAgentStartupLogs(logs))
if err != nil {
return
}
}
}
}
// workspaceAgentPTY spawns a PTY and pipes it over a WebSocket.
// This is used for the web terminal.
//
@ -851,6 +1164,8 @@ func convertWorkspaceAgent(derpMap *tailcfg.DERPMap, coordinator tailnet.Coordin
Architecture: dbAgent.Architecture,
OperatingSystem: dbAgent.OperatingSystem,
StartupScript: dbAgent.StartupScript.String,
StartupLogsLength: dbAgent.StartupLogsLength,
StartupLogsOverflowed: dbAgent.StartupLogsOverflowed,
Version: dbAgent.Version,
EnvironmentVariables: envs,
Directory: dbAgent.Directory,
@ -1525,3 +1840,19 @@ func websocketNetConn(ctx context.Context, conn *websocket.Conn, msgType websock
Conn: nc,
}
}
func convertWorkspaceAgentStartupLogs(logs []database.WorkspaceAgentStartupLog) []codersdk.WorkspaceAgentStartupLog {
sdk := make([]codersdk.WorkspaceAgentStartupLog, 0, len(logs))
for _, log := range logs {
sdk = append(sdk, convertWorkspaceAgentStartupLog(log))
}
return sdk
}
func convertWorkspaceAgentStartupLog(log database.WorkspaceAgentStartupLog) codersdk.WorkspaceAgentStartupLog {
return codersdk.WorkspaceAgentStartupLog{
ID: log.ID,
CreatedAt: log.CreatedAt,
Output: log.Output,
}
}

View File

@ -175,6 +175,128 @@ func TestWorkspaceAgent(t *testing.T) {
})
}
func TestWorkspaceAgentStartupLogs(t *testing.T) {
t.Parallel()
t.Run("Success", func(t *testing.T) {
t.Parallel()
ctx, cancelFunc := testutil.Context(t)
defer cancelFunc()
client := coderdtest.New(t, &coderdtest.Options{
IncludeProvisionerDaemon: true,
})
user := coderdtest.CreateFirstUser(t, client)
authToken := uuid.NewString()
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
ProvisionPlan: echo.ProvisionComplete,
ProvisionApply: []*proto.Provision_Response{{
Type: &proto.Provision_Response_Complete{
Complete: &proto.Provision_Complete{
Resources: []*proto.Resource{{
Name: "example",
Type: "aws_instance",
Agents: []*proto.Agent{{
Id: uuid.NewString(),
Auth: &proto.Agent_Token{
Token: authToken,
},
}},
}},
},
},
}},
})
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
build := coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
agentClient := agentsdk.New(client.URL)
agentClient.SetSessionToken(authToken)
err := agentClient.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{
Logs: []agentsdk.StartupLog{{
CreatedAt: database.Now(),
Output: "testing",
}},
})
require.NoError(t, err)
logs, closer, err := client.WorkspaceAgentStartupLogsAfter(ctx, build.Resources[0].Agents[0].ID, -500)
require.NoError(t, err)
defer func() {
_ = closer.Close()
}()
var logChunk []codersdk.WorkspaceAgentStartupLog
select {
case <-ctx.Done():
case logChunk = <-logs:
}
require.NoError(t, ctx.Err())
require.Len(t, logChunk, 1)
require.Equal(t, "testing", logChunk[0].Output)
cancelFunc()
})
t.Run("PublishesOnOverflow", func(t *testing.T) {
t.Parallel()
ctx, cancelFunc := testutil.Context(t)
defer cancelFunc()
client := coderdtest.New(t, &coderdtest.Options{
IncludeProvisionerDaemon: true,
})
user := coderdtest.CreateFirstUser(t, client)
authToken := uuid.NewString()
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
ProvisionPlan: echo.ProvisionComplete,
ProvisionApply: []*proto.Provision_Response{{
Type: &proto.Provision_Response_Complete{
Complete: &proto.Provision_Complete{
Resources: []*proto.Resource{{
Name: "example",
Type: "aws_instance",
Agents: []*proto.Agent{{
Id: uuid.NewString(),
Auth: &proto.Agent_Token{
Token: authToken,
},
}},
}},
},
},
}},
})
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
updates, err := client.WatchWorkspace(ctx, workspace.ID)
require.NoError(t, err)
agentClient := agentsdk.New(client.URL)
agentClient.SetSessionToken(authToken)
err = agentClient.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{
Logs: []agentsdk.StartupLog{{
CreatedAt: database.Now(),
Output: strings.Repeat("a", (1<<20)+1),
}},
})
var apiError *codersdk.Error
require.ErrorAs(t, err, &apiError)
require.Equal(t, http.StatusRequestEntityTooLarge, apiError.StatusCode())
var update codersdk.Workspace
select {
case <-ctx.Done():
t.FailNow()
case update = <-updates:
}
// Ensure that the UI gets an update when the logs overflow!
require.True(t, update.LatestBuild.Resources[0].Agents[0].StartupLogsOverflowed)
cancelFunc()
})
}
func TestWorkspaceAgentListen(t *testing.T) {
t.Parallel()

View File

@ -249,3 +249,7 @@ func (*client) PostAppHealth(_ context.Context, _ agentsdk.PostAppHealthsRequest
func (*client) PostStartup(_ context.Context, _ agentsdk.PostStartupRequest) error {
return nil
}
func (*client) PatchStartupLogs(_ context.Context, _ agentsdk.PatchStartupLogs) error {
return nil
}