mirror of
https://github.com/coder/coder.git
synced 2025-07-06 15:41:45 +00:00
feat: add computed workspace and agent health fields to the api (#8280)
This commit is contained in:
committed by
GitHub
parent
eabf929676
commit
b73f9d8e86
49
coderd/apidoc/docs.go
generated
49
coderd/apidoc/docs.go
generated
@ -9592,6 +9592,14 @@ const docTemplate = `{
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"health": {
|
||||
"description": "Health shows the health of the workspace and information about\nwhat is causing an unhealthy status.",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/codersdk.WorkspaceHealth"
|
||||
}
|
||||
]
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"format": "uuid"
|
||||
@ -9689,6 +9697,14 @@ const docTemplate = `{
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"health": {
|
||||
"description": "Health reports the health of the agent.",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/codersdk.WorkspaceAgentHealth"
|
||||
}
|
||||
]
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"format": "uuid"
|
||||
@ -9783,6 +9799,21 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"codersdk.WorkspaceAgentHealth": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"healthy": {
|
||||
"description": "Healthy is true if the agent is healthy.",
|
||||
"type": "boolean",
|
||||
"example": false
|
||||
},
|
||||
"reason": {
|
||||
"description": "Reason is a human-readable explanation of the agent's health. It is empty if Healthy is true.",
|
||||
"type": "string",
|
||||
"example": "agent has lost connection"
|
||||
}
|
||||
}
|
||||
},
|
||||
"codersdk.WorkspaceAgentLifecycle": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
@ -10149,6 +10180,24 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"codersdk.WorkspaceHealth": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"failing_agents": {
|
||||
"description": "FailingAgents lists the IDs of the agents that are failing, if any.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"format": "uuid"
|
||||
}
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is true if the workspace is healthy.",
|
||||
"type": "boolean",
|
||||
"example": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"codersdk.WorkspaceProxy": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
49
coderd/apidoc/swagger.json
generated
49
coderd/apidoc/swagger.json
generated
@ -8659,6 +8659,14 @@
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"health": {
|
||||
"description": "Health shows the health of the workspace and information about\nwhat is causing an unhealthy status.",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/codersdk.WorkspaceHealth"
|
||||
}
|
||||
]
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"format": "uuid"
|
||||
@ -8756,6 +8764,14 @@
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"health": {
|
||||
"description": "Health reports the health of the agent.",
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/codersdk.WorkspaceAgentHealth"
|
||||
}
|
||||
]
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"format": "uuid"
|
||||
@ -8850,6 +8866,21 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"codersdk.WorkspaceAgentHealth": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"healthy": {
|
||||
"description": "Healthy is true if the agent is healthy.",
|
||||
"type": "boolean",
|
||||
"example": false
|
||||
},
|
||||
"reason": {
|
||||
"description": "Reason is a human-readable explanation of the agent's health. It is empty if Healthy is true.",
|
||||
"type": "string",
|
||||
"example": "agent has lost connection"
|
||||
}
|
||||
}
|
||||
},
|
||||
"codersdk.WorkspaceAgentLifecycle": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
@ -9187,6 +9218,24 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"codersdk.WorkspaceHealth": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"failing_agents": {
|
||||
"description": "FailingAgents lists the IDs of the agents that are failing, if any.",
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"format": "uuid"
|
||||
}
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is true if the workspace is healthy.",
|
||||
"type": "boolean",
|
||||
"example": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"codersdk.WorkspaceProxy": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -1262,6 +1262,24 @@ func convertWorkspaceAgent(derpMap *tailcfg.DERPMap, coordinator tailnet.Coordin
|
||||
workspaceAgent.ReadyAt = &dbAgent.ReadyAt.Time
|
||||
}
|
||||
|
||||
switch {
|
||||
case workspaceAgent.Status != codersdk.WorkspaceAgentConnected && workspaceAgent.LifecycleState == codersdk.WorkspaceAgentLifecycleOff:
|
||||
workspaceAgent.Health.Reason = "agent is not running"
|
||||
case workspaceAgent.Status == codersdk.WorkspaceAgentTimeout:
|
||||
workspaceAgent.Health.Reason = "agent is taking too long to connect"
|
||||
case workspaceAgent.Status == codersdk.WorkspaceAgentDisconnected:
|
||||
workspaceAgent.Health.Reason = "agent has lost connection"
|
||||
// Note: We could also handle codersdk.WorkspaceAgentLifecycleStartTimeout
|
||||
// here, but it's more of a soft issue, so we don't want to mark the agent
|
||||
// as unhealthy.
|
||||
case workspaceAgent.LifecycleState == codersdk.WorkspaceAgentLifecycleStartError:
|
||||
workspaceAgent.Health.Reason = "agent startup script exited with an error"
|
||||
case workspaceAgent.LifecycleState.ShuttingDown():
|
||||
workspaceAgent.Health.Reason = "agent is shutting down"
|
||||
default:
|
||||
workspaceAgent.Health.Healthy = true
|
||||
}
|
||||
|
||||
return workspaceAgent, nil
|
||||
}
|
||||
|
||||
|
@ -72,6 +72,7 @@ func TestWorkspaceAgent(t *testing.T) {
|
||||
require.Equal(t, tmpDir, workspace.LatestBuild.Resources[0].Agents[0].Directory)
|
||||
_, err = client.WorkspaceAgent(ctx, workspace.LatestBuild.Resources[0].Agents[0].ID)
|
||||
require.NoError(t, err)
|
||||
require.True(t, workspace.LatestBuild.Resources[0].Agents[0].Health.Healthy)
|
||||
})
|
||||
t.Run("HasFallbackTroubleshootingURL", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
@ -167,6 +168,8 @@ func TestWorkspaceAgent(t *testing.T) {
|
||||
}, testutil.IntervalMedium, "agent status timeout")
|
||||
|
||||
require.Equal(t, wantTroubleshootingURL, workspace.LatestBuild.Resources[0].Agents[0].TroubleshootingURL)
|
||||
require.False(t, workspace.LatestBuild.Resources[0].Agents[0].Health.Healthy)
|
||||
require.NotEmpty(t, workspace.LatestBuild.Resources[0].Agents[0].Health.Reason)
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -1110,6 +1110,15 @@ func convertWorkspace(
|
||||
lockedAt = &workspace.LockedAt.Time
|
||||
}
|
||||
|
||||
failingAgents := []uuid.UUID{}
|
||||
for _, resource := range workspaceBuild.Resources {
|
||||
for _, agent := range resource.Agents {
|
||||
if !agent.Health.Healthy {
|
||||
failingAgents = append(failingAgents, agent.ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
ttlMillis = convertWorkspaceTTLMillis(workspace.Ttl)
|
||||
deletingAt = calculateDeletingAt(workspace, template, workspaceBuild)
|
||||
@ -1135,6 +1144,10 @@ func convertWorkspace(
|
||||
LastUsedAt: workspace.LastUsedAt,
|
||||
DeletingAt: deletingAt,
|
||||
LockedAt: lockedAt,
|
||||
Health: codersdk.WorkspaceHealth{
|
||||
Healthy: len(failingAgents) == 0,
|
||||
FailingAgents: failingAgents,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -164,6 +164,148 @@ func TestWorkspace(t *testing.T) {
|
||||
assert.Equal(t, templateDisplayName, ws.TemplateDisplayName)
|
||||
assert.Equal(t, templateAllowUserCancelWorkspaceJobs, ws.TemplateAllowUserCancelWorkspaceJobs)
|
||||
})
|
||||
|
||||
t.Run("Health", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
t.Run("Healthy", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true})
|
||||
user := coderdtest.CreateFirstUser(t, client)
|
||||
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
|
||||
Parse: echo.ParseComplete,
|
||||
ProvisionApply: []*proto.Provision_Response{{
|
||||
Type: &proto.Provision_Response_Complete{
|
||||
Complete: &proto.Provision_Complete{
|
||||
Resources: []*proto.Resource{{
|
||||
Name: "some",
|
||||
Type: "example",
|
||||
Agents: []*proto.Agent{{
|
||||
Id: uuid.NewString(),
|
||||
Auth: &proto.Agent_Token{},
|
||||
}},
|
||||
}},
|
||||
},
|
||||
},
|
||||
}},
|
||||
})
|
||||
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
|
||||
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
|
||||
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
|
||||
coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
|
||||
defer cancel()
|
||||
|
||||
workspace, err := client.Workspace(ctx, workspace.ID)
|
||||
require.NoError(t, err)
|
||||
|
||||
agent := workspace.LatestBuild.Resources[0].Agents[0]
|
||||
|
||||
assert.True(t, workspace.Health.Healthy)
|
||||
assert.Equal(t, []uuid.UUID{}, workspace.Health.FailingAgents)
|
||||
assert.True(t, agent.Health.Healthy)
|
||||
assert.Empty(t, agent.Health.Reason)
|
||||
})
|
||||
|
||||
t.Run("Unhealthy", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true})
|
||||
user := coderdtest.CreateFirstUser(t, client)
|
||||
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
|
||||
Parse: echo.ParseComplete,
|
||||
ProvisionApply: []*proto.Provision_Response{{
|
||||
Type: &proto.Provision_Response_Complete{
|
||||
Complete: &proto.Provision_Complete{
|
||||
Resources: []*proto.Resource{{
|
||||
Name: "some",
|
||||
Type: "example",
|
||||
Agents: []*proto.Agent{{
|
||||
Id: uuid.NewString(),
|
||||
Auth: &proto.Agent_Token{},
|
||||
ConnectionTimeoutSeconds: 1,
|
||||
}},
|
||||
}},
|
||||
},
|
||||
},
|
||||
}},
|
||||
})
|
||||
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
|
||||
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
|
||||
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
|
||||
coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
|
||||
defer cancel()
|
||||
|
||||
var err error
|
||||
testutil.Eventually(ctx, t, func(ctx context.Context) bool {
|
||||
workspace, err = client.Workspace(ctx, workspace.ID)
|
||||
return assert.NoError(t, err) && !workspace.Health.Healthy
|
||||
}, testutil.IntervalMedium)
|
||||
|
||||
agent := workspace.LatestBuild.Resources[0].Agents[0]
|
||||
|
||||
assert.False(t, workspace.Health.Healthy)
|
||||
assert.Equal(t, []uuid.UUID{agent.ID}, workspace.Health.FailingAgents)
|
||||
assert.False(t, agent.Health.Healthy)
|
||||
assert.NotEmpty(t, agent.Health.Reason)
|
||||
})
|
||||
|
||||
t.Run("Mixed health", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
client := coderdtest.New(t, &coderdtest.Options{IncludeProvisionerDaemon: true})
|
||||
user := coderdtest.CreateFirstUser(t, client)
|
||||
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
|
||||
Parse: echo.ParseComplete,
|
||||
ProvisionApply: []*proto.Provision_Response{{
|
||||
Type: &proto.Provision_Response_Complete{
|
||||
Complete: &proto.Provision_Complete{
|
||||
Resources: []*proto.Resource{{
|
||||
Name: "some",
|
||||
Type: "example",
|
||||
Agents: []*proto.Agent{{
|
||||
Id: uuid.NewString(),
|
||||
Name: "a1",
|
||||
Auth: &proto.Agent_Token{},
|
||||
}, {
|
||||
Id: uuid.NewString(),
|
||||
Name: "a2",
|
||||
Auth: &proto.Agent_Token{},
|
||||
ConnectionTimeoutSeconds: 1,
|
||||
}},
|
||||
}},
|
||||
},
|
||||
},
|
||||
}},
|
||||
})
|
||||
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
|
||||
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
|
||||
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
|
||||
coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitLong)
|
||||
defer cancel()
|
||||
|
||||
var err error
|
||||
testutil.Eventually(ctx, t, func(ctx context.Context) bool {
|
||||
workspace, err = client.Workspace(ctx, workspace.ID)
|
||||
return assert.NoError(t, err) && !workspace.Health.Healthy
|
||||
}, testutil.IntervalMedium)
|
||||
|
||||
assert.False(t, workspace.Health.Healthy)
|
||||
assert.Len(t, workspace.Health.FailingAgents, 1)
|
||||
|
||||
agent1 := workspace.LatestBuild.Resources[0].Agents[0]
|
||||
agent2 := workspace.LatestBuild.Resources[0].Agents[1]
|
||||
|
||||
assert.Equal(t, []uuid.UUID{agent2.ID}, workspace.Health.FailingAgents)
|
||||
assert.True(t, agent1.Health.Healthy)
|
||||
assert.Empty(t, agent1.Health.Reason)
|
||||
assert.False(t, agent2.Health.Healthy)
|
||||
assert.NotEmpty(t, agent2.Health.Reason)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func TestAdminViewAllWorkspaces(t *testing.T) {
|
||||
|
Reference in New Issue
Block a user