mirror of
https://github.com/coder/coder.git
synced 2025-07-03 16:13:58 +00:00
refactor: replace startup script logs EOF with starting/ready time (#8082)
This commit reverts some of the changes in #8029 and implements an alternative method of keeping track of when the startup script has ended and there will be no more logs. This is achieved by adding new agent fields for tracking when the agent enters the "starting" and "ready"/"start_error" lifecycle states. The timestamps simplify logic since we don't need understand if the current state is before or after the state we're interested in. They can also be used to show data like how long the startup script took to execute. This also allowed us to remove the EOF field from the logs as the implementation was problematic when we returned the EOF log entry in the response since requesting _after_ that ID would give no logs and the API would thus lose track of EOF.
This commit is contained in:
committed by
GitHub
parent
b1d1b63113
commit
8dac0356ed
@ -301,124 +301,6 @@ func TestWorkspaceAgentStartupLogs(t *testing.T) {
|
||||
}
|
||||
}
|
||||
})
|
||||
t.Run("AllowEOFAfterOverflowAndCloseFollowWebsocket", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx := testutil.Context(t, testutil.WaitMedium)
|
||||
client := coderdtest.New(t, &coderdtest.Options{
|
||||
IncludeProvisionerDaemon: true,
|
||||
})
|
||||
user := coderdtest.CreateFirstUser(t, client)
|
||||
authToken := uuid.NewString()
|
||||
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
|
||||
Parse: echo.ParseComplete,
|
||||
ProvisionPlan: echo.ProvisionComplete,
|
||||
ProvisionApply: []*proto.Provision_Response{{
|
||||
Type: &proto.Provision_Response_Complete{
|
||||
Complete: &proto.Provision_Complete{
|
||||
Resources: []*proto.Resource{{
|
||||
Name: "example",
|
||||
Type: "aws_instance",
|
||||
Agents: []*proto.Agent{{
|
||||
Id: uuid.NewString(),
|
||||
Auth: &proto.Agent_Token{
|
||||
Token: authToken,
|
||||
},
|
||||
}},
|
||||
}},
|
||||
},
|
||||
},
|
||||
}},
|
||||
})
|
||||
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
|
||||
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
|
||||
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
|
||||
build := coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
|
||||
|
||||
updates, err := client.WatchWorkspace(ctx, workspace.ID)
|
||||
require.NoError(t, err)
|
||||
|
||||
logs, closeLogs, err := client.WorkspaceAgentStartupLogsAfter(ctx, build.Resources[0].Agents[0].ID, 0)
|
||||
require.NoError(t, err)
|
||||
defer closeLogs.Close()
|
||||
|
||||
wantLogs := []codersdk.WorkspaceAgentStartupLog{
|
||||
{
|
||||
CreatedAt: database.Now(),
|
||||
Output: "testing",
|
||||
Level: "info",
|
||||
},
|
||||
{
|
||||
CreatedAt: database.Now().Add(time.Minute),
|
||||
Level: "info",
|
||||
EOF: true,
|
||||
},
|
||||
}
|
||||
|
||||
agentClient := agentsdk.New(client.URL)
|
||||
agentClient.SetSessionToken(authToken)
|
||||
|
||||
var convertedLogs []agentsdk.StartupLog
|
||||
for _, log := range wantLogs {
|
||||
convertedLogs = append(convertedLogs, agentsdk.StartupLog{
|
||||
CreatedAt: log.CreatedAt,
|
||||
Output: log.Output,
|
||||
Level: log.Level,
|
||||
EOF: log.EOF,
|
||||
})
|
||||
}
|
||||
initialLogs := convertedLogs[:len(convertedLogs)-1]
|
||||
eofLog := convertedLogs[len(convertedLogs)-1]
|
||||
err = agentClient.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{Logs: initialLogs})
|
||||
require.NoError(t, err)
|
||||
|
||||
overflowLogs := []agentsdk.StartupLog{
|
||||
{
|
||||
CreatedAt: database.Now(),
|
||||
Output: strings.Repeat("a", (1<<20)+1),
|
||||
},
|
||||
eofLog, // Include EOF which will be discarded due to overflow.
|
||||
}
|
||||
err = agentClient.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{Logs: overflowLogs})
|
||||
var apiError *codersdk.Error
|
||||
require.ErrorAs(t, err, &apiError)
|
||||
require.Equal(t, http.StatusRequestEntityTooLarge, apiError.StatusCode())
|
||||
|
||||
// It's possible we have multiple updates queued, but that's alright, we just
|
||||
// wait for the one where it overflows.
|
||||
for {
|
||||
var update codersdk.Workspace
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
require.Fail(t, "timed out waiting for overflow")
|
||||
case update = <-updates:
|
||||
}
|
||||
if update.LatestBuild.Resources[0].Agents[0].StartupLogsOverflowed {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Now we should still be able to send the EOF.
|
||||
err = agentClient.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{Logs: []agentsdk.StartupLog{eofLog}})
|
||||
require.NoError(t, err)
|
||||
|
||||
var gotLogs []codersdk.WorkspaceAgentStartupLog
|
||||
logsLoop:
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
require.Fail(t, "timed out waiting for logs")
|
||||
case l, ok := <-logs:
|
||||
if !ok {
|
||||
break logsLoop
|
||||
}
|
||||
gotLogs = append(gotLogs, l...)
|
||||
}
|
||||
}
|
||||
for i := range gotLogs {
|
||||
gotLogs[i].ID = 0 // Ignore ID for comparison.
|
||||
}
|
||||
require.Equal(t, wantLogs, gotLogs)
|
||||
})
|
||||
t.Run("CloseAfterLifecycleStateIsNotRunning", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx := testutil.Context(t, testutil.WaitMedium)
|
||||
@ -472,25 +354,26 @@ func TestWorkspaceAgentStartupLogs(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
err = agentClient.PostLifecycle(ctx, agentsdk.PostLifecycleRequest{
|
||||
State: codersdk.WorkspaceAgentLifecycleReady,
|
||||
State: codersdk.WorkspaceAgentLifecycleReady,
|
||||
ChangedAt: time.Now(),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
var gotLogs []codersdk.WorkspaceAgentStartupLog
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
require.Fail(t, "timed out waiting for logs EOF")
|
||||
case l := <-logs:
|
||||
for _, log := range l {
|
||||
if log.EOF {
|
||||
// Success.
|
||||
return
|
||||
}
|
||||
require.Fail(t, "timed out waiting for logs to end")
|
||||
case l, ok := <-logs:
|
||||
gotLogs = append(gotLogs, l...)
|
||||
if !ok {
|
||||
require.Len(t, gotLogs, 1, "expected one log")
|
||||
return // Success.
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
t.Run("NoLogAfterEOF", func(t *testing.T) {
|
||||
t.Run("NoLogAfterScriptEnded", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx := testutil.Context(t, testutil.WaitMedium)
|
||||
client := coderdtest.New(t, &coderdtest.Options{
|
||||
@ -526,13 +409,9 @@ func TestWorkspaceAgentStartupLogs(t *testing.T) {
|
||||
agentClient := agentsdk.New(client.URL)
|
||||
agentClient.SetSessionToken(authToken)
|
||||
|
||||
err := agentClient.PatchStartupLogs(ctx, agentsdk.PatchStartupLogs{
|
||||
Logs: []agentsdk.StartupLog{
|
||||
{
|
||||
CreatedAt: database.Now(),
|
||||
EOF: true,
|
||||
},
|
||||
},
|
||||
err := agentClient.PostLifecycle(ctx, agentsdk.PostLifecycleRequest{
|
||||
State: codersdk.WorkspaceAgentLifecycleReady,
|
||||
ChangedAt: time.Now(),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
@ -544,7 +423,7 @@ func TestWorkspaceAgentStartupLogs(t *testing.T) {
|
||||
},
|
||||
},
|
||||
})
|
||||
require.Error(t, err, "insert after EOF should not succeed")
|
||||
require.Error(t, err, "insert after script ended should not succeed")
|
||||
})
|
||||
}
|
||||
|
||||
@ -1410,7 +1289,8 @@ func TestWorkspaceAgent_LifecycleState(t *testing.T) {
|
||||
ctx := testutil.Context(t, testutil.WaitLong)
|
||||
|
||||
err := agentClient.PostLifecycle(ctx, agentsdk.PostLifecycleRequest{
|
||||
State: tt.state,
|
||||
State: tt.state,
|
||||
ChangedAt: time.Now(),
|
||||
})
|
||||
if tt.wantErr {
|
||||
require.Error(t, err)
|
||||
|
Reference in New Issue
Block a user