mirror of
https://github.com/coder/coder.git
synced 2025-07-09 11:45:56 +00:00
feat: include health severity in reports (#10817)
This commit is contained in:
106
coderd/apidoc/docs.go
generated
106
coderd/apidoc/docs.go
generated
@ -12103,6 +12103,7 @@ const docTemplate = `{
|
||||
"type": "string"
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is deprecated and left for backward compatibility purposes, use ` + "`" + `Severity` + "`" + ` instead.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"node": {
|
||||
@ -12117,6 +12118,18 @@ const docTemplate = `{
|
||||
"round_trip_ping_ms": {
|
||||
"type": "integer"
|
||||
},
|
||||
"severity": {
|
||||
"enum": [
|
||||
"ok",
|
||||
"warning",
|
||||
"error"
|
||||
],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/health.Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"stun": {
|
||||
"$ref": "#/definitions/derphealth.StunReport"
|
||||
},
|
||||
@ -12138,6 +12151,7 @@ const docTemplate = `{
|
||||
"type": "string"
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is deprecated and left for backward compatibility purposes, use ` + "`" + `Severity` + "`" + ` instead.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"node_reports": {
|
||||
@ -12149,6 +12163,18 @@ const docTemplate = `{
|
||||
"region": {
|
||||
"$ref": "#/definitions/tailcfg.DERPRegion"
|
||||
},
|
||||
"severity": {
|
||||
"enum": [
|
||||
"ok",
|
||||
"warning",
|
||||
"error"
|
||||
],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/health.Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"warnings": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
@ -12164,6 +12190,7 @@ const docTemplate = `{
|
||||
"type": "string"
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is deprecated and left for backward compatibility purposes, use ` + "`" + `Severity` + "`" + ` instead.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"netcheck": {
|
||||
@ -12184,6 +12211,18 @@ const docTemplate = `{
|
||||
"$ref": "#/definitions/derphealth.RegionReport"
|
||||
}
|
||||
},
|
||||
"severity": {
|
||||
"enum": [
|
||||
"ok",
|
||||
"warning",
|
||||
"error"
|
||||
],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/health.Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"warnings": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
@ -12206,6 +12245,19 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"health.Severity": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"ok",
|
||||
"warning",
|
||||
"error"
|
||||
],
|
||||
"x-enum-varnames": [
|
||||
"SeverityOK",
|
||||
"SeverityWarning",
|
||||
"SeverityError"
|
||||
]
|
||||
},
|
||||
"healthcheck.AccessURLReport": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@ -12216,6 +12268,7 @@ const docTemplate = `{
|
||||
"type": "string"
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is deprecated and left for backward compatibility purposes, use ` + "`" + `Severity` + "`" + ` instead.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"healthz_response": {
|
||||
@ -12224,6 +12277,18 @@ const docTemplate = `{
|
||||
"reachable": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"severity": {
|
||||
"enum": [
|
||||
"ok",
|
||||
"warning",
|
||||
"error"
|
||||
],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/health.Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"status_code": {
|
||||
"type": "integer"
|
||||
},
|
||||
@ -12242,6 +12307,7 @@ const docTemplate = `{
|
||||
"type": "string"
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is deprecated and left for backward compatibility purposes, use ` + "`" + `Severity` + "`" + ` instead.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"latency": {
|
||||
@ -12253,6 +12319,18 @@ const docTemplate = `{
|
||||
"reachable": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"severity": {
|
||||
"enum": [
|
||||
"ok",
|
||||
"warning",
|
||||
"error"
|
||||
],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/health.Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"threshold_ms": {
|
||||
"type": "integer"
|
||||
},
|
||||
@ -12288,9 +12366,22 @@ const docTemplate = `{
|
||||
}
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is true if the report returns no errors.",
|
||||
"description": "Healthy is true if the report returns no errors.\nDeprecated: use ` + "`" + `Severity` + "`" + ` instead",
|
||||
"type": "boolean"
|
||||
},
|
||||
"severity": {
|
||||
"description": "Severity indicates the status of Coder health.",
|
||||
"enum": [
|
||||
"ok",
|
||||
"warning",
|
||||
"error"
|
||||
],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/health.Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"description": "Time is the time the report was generated at.",
|
||||
"type": "string"
|
||||
@ -12313,8 +12404,21 @@ const docTemplate = `{
|
||||
"type": "string"
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is deprecated and left for backward compatibility purposes, use ` + "`" + `Severity` + "`" + ` instead.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"severity": {
|
||||
"enum": [
|
||||
"ok",
|
||||
"warning",
|
||||
"error"
|
||||
],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/health.Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"warnings": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
70
coderd/apidoc/swagger.json
generated
70
coderd/apidoc/swagger.json
generated
@ -11024,6 +11024,7 @@
|
||||
"type": "string"
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"node": {
|
||||
@ -11038,6 +11039,14 @@
|
||||
"round_trip_ping_ms": {
|
||||
"type": "integer"
|
||||
},
|
||||
"severity": {
|
||||
"enum": ["ok", "warning", "error"],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/health.Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"stun": {
|
||||
"$ref": "#/definitions/derphealth.StunReport"
|
||||
},
|
||||
@ -11059,6 +11068,7 @@
|
||||
"type": "string"
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"node_reports": {
|
||||
@ -11070,6 +11080,14 @@
|
||||
"region": {
|
||||
"$ref": "#/definitions/tailcfg.DERPRegion"
|
||||
},
|
||||
"severity": {
|
||||
"enum": ["ok", "warning", "error"],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/health.Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"warnings": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
@ -11085,6 +11103,7 @@
|
||||
"type": "string"
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"netcheck": {
|
||||
@ -11105,6 +11124,14 @@
|
||||
"$ref": "#/definitions/derphealth.RegionReport"
|
||||
}
|
||||
},
|
||||
"severity": {
|
||||
"enum": ["ok", "warning", "error"],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/health.Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"warnings": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
@ -11127,6 +11154,11 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"health.Severity": {
|
||||
"type": "string",
|
||||
"enum": ["ok", "warning", "error"],
|
||||
"x-enum-varnames": ["SeverityOK", "SeverityWarning", "SeverityError"]
|
||||
},
|
||||
"healthcheck.AccessURLReport": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
@ -11137,6 +11169,7 @@
|
||||
"type": "string"
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"healthz_response": {
|
||||
@ -11145,6 +11178,14 @@
|
||||
"reachable": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"severity": {
|
||||
"enum": ["ok", "warning", "error"],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/health.Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"status_code": {
|
||||
"type": "integer"
|
||||
},
|
||||
@ -11163,6 +11204,7 @@
|
||||
"type": "string"
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"latency": {
|
||||
@ -11174,6 +11216,14 @@
|
||||
"reachable": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"severity": {
|
||||
"enum": ["ok", "warning", "error"],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/health.Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"threshold_ms": {
|
||||
"type": "integer"
|
||||
},
|
||||
@ -11209,9 +11259,18 @@
|
||||
}
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is true if the report returns no errors.",
|
||||
"description": "Healthy is true if the report returns no errors.\nDeprecated: use `Severity` instead",
|
||||
"type": "boolean"
|
||||
},
|
||||
"severity": {
|
||||
"description": "Severity indicates the status of Coder health.",
|
||||
"enum": ["ok", "warning", "error"],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/health.Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"description": "Time is the time the report was generated at.",
|
||||
"type": "string"
|
||||
@ -11234,8 +11293,17 @@
|
||||
"type": "string"
|
||||
},
|
||||
"healthy": {
|
||||
"description": "Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.",
|
||||
"type": "boolean"
|
||||
},
|
||||
"severity": {
|
||||
"enum": ["ok", "warning", "error"],
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/definitions/health.Severity"
|
||||
}
|
||||
]
|
||||
},
|
||||
"warnings": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
|
@ -9,13 +9,16 @@ import (
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
"github.com/coder/coder/v2/coderd/healthcheck/health"
|
||||
"github.com/coder/coder/v2/coderd/util/ptr"
|
||||
)
|
||||
|
||||
// @typescript-generate AccessURLReport
|
||||
type AccessURLReport struct {
|
||||
Healthy bool `json:"healthy"`
|
||||
Warnings []string `json:"warnings"`
|
||||
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
|
||||
Healthy bool `json:"healthy"`
|
||||
Severity health.Severity `json:"severity" enums:"ok,warning,error"`
|
||||
Warnings []string `json:"warnings"`
|
||||
|
||||
AccessURL string `json:"access_url"`
|
||||
Reachable bool `json:"reachable"`
|
||||
@ -33,9 +36,11 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
|
||||
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
r.Severity = health.SeverityOK
|
||||
r.Warnings = []string{}
|
||||
if opts.AccessURL == nil {
|
||||
r.Error = ptr.Ref("access URL is nil")
|
||||
r.Severity = health.SeverityError
|
||||
return
|
||||
}
|
||||
r.AccessURL = opts.AccessURL.String()
|
||||
@ -47,18 +52,21 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
|
||||
accessURL, err := opts.AccessURL.Parse("/healthz")
|
||||
if err != nil {
|
||||
r.Error = convertError(xerrors.Errorf("parse healthz endpoint: %w", err))
|
||||
r.Severity = health.SeverityError
|
||||
return
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", accessURL.String(), nil)
|
||||
if err != nil {
|
||||
r.Error = convertError(xerrors.Errorf("create healthz request: %w", err))
|
||||
r.Severity = health.SeverityError
|
||||
return
|
||||
}
|
||||
|
||||
res, err := opts.Client.Do(req)
|
||||
if err != nil {
|
||||
r.Error = convertError(xerrors.Errorf("get healthz endpoint: %w", err))
|
||||
r.Severity = health.SeverityError
|
||||
return
|
||||
}
|
||||
defer res.Body.Close()
|
||||
@ -66,11 +74,15 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
|
||||
body, err := io.ReadAll(res.Body)
|
||||
if err != nil {
|
||||
r.Error = convertError(xerrors.Errorf("read healthz response: %w", err))
|
||||
r.Severity = health.SeverityError
|
||||
return
|
||||
}
|
||||
|
||||
r.Reachable = true
|
||||
r.Healthy = res.StatusCode == http.StatusOK
|
||||
r.StatusCode = res.StatusCode
|
||||
if res.StatusCode != http.StatusOK {
|
||||
r.Severity = health.SeverityWarning
|
||||
}
|
||||
r.HealthzResponse = string(body)
|
||||
}
|
||||
|
@ -13,6 +13,7 @@ import (
|
||||
|
||||
"github.com/coder/coder/v2/coderd/coderdtest"
|
||||
"github.com/coder/coder/v2/coderd/healthcheck"
|
||||
"github.com/coder/coder/v2/coderd/healthcheck/health"
|
||||
)
|
||||
|
||||
func TestAccessURL(t *testing.T) {
|
||||
@ -34,6 +35,7 @@ func TestAccessURL(t *testing.T) {
|
||||
|
||||
assert.True(t, report.Healthy)
|
||||
assert.True(t, report.Reachable)
|
||||
assert.Equal(t, health.SeverityOK, report.Severity)
|
||||
assert.Equal(t, http.StatusOK, report.StatusCode)
|
||||
assert.Equal(t, "OK", report.HealthzResponse)
|
||||
assert.Nil(t, report.Error)
|
||||
@ -64,6 +66,7 @@ func TestAccessURL(t *testing.T) {
|
||||
|
||||
assert.False(t, report.Healthy)
|
||||
assert.True(t, report.Reachable)
|
||||
assert.Equal(t, health.SeverityWarning, report.Severity)
|
||||
assert.Equal(t, http.StatusNotFound, report.StatusCode)
|
||||
assert.Equal(t, string(resp), report.HealthzResponse)
|
||||
assert.Nil(t, report.Error)
|
||||
@ -100,6 +103,7 @@ func TestAccessURL(t *testing.T) {
|
||||
|
||||
assert.False(t, report.Healthy)
|
||||
assert.False(t, report.Reachable)
|
||||
assert.Equal(t, health.SeverityError, report.Severity)
|
||||
assert.Equal(t, 0, report.StatusCode)
|
||||
assert.Equal(t, "", report.HealthzResponse)
|
||||
require.NotNil(t, report.Error)
|
||||
|
@ -8,6 +8,7 @@ import (
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
"github.com/coder/coder/v2/coderd/database"
|
||||
"github.com/coder/coder/v2/coderd/healthcheck/health"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -16,8 +17,10 @@ const (
|
||||
|
||||
// @typescript-generate DatabaseReport
|
||||
type DatabaseReport struct {
|
||||
Healthy bool `json:"healthy"`
|
||||
Warnings []string `json:"warnings"`
|
||||
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
|
||||
Healthy bool `json:"healthy"`
|
||||
Severity health.Severity `json:"severity" enums:"ok,warning,error"`
|
||||
Warnings []string `json:"warnings"`
|
||||
|
||||
Reachable bool `json:"reachable"`
|
||||
Latency string `json:"latency"`
|
||||
@ -33,6 +36,7 @@ type DatabaseReportOptions struct {
|
||||
|
||||
func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
|
||||
r.Warnings = []string{}
|
||||
r.Severity = health.SeverityOK
|
||||
r.ThresholdMS = opts.Threshold.Milliseconds()
|
||||
if r.ThresholdMS == 0 {
|
||||
r.ThresholdMS = DatabaseDefaultThreshold.Milliseconds()
|
||||
@ -47,6 +51,7 @@ func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
|
||||
pong, err := opts.DB.Ping(ctx)
|
||||
if err != nil {
|
||||
r.Error = convertError(xerrors.Errorf("ping: %w", err))
|
||||
r.Severity = health.SeverityError
|
||||
return
|
||||
}
|
||||
pings = append(pings, pong)
|
||||
@ -57,8 +62,9 @@ func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
|
||||
latency := pings[pingCount/2]
|
||||
r.Latency = latency.String()
|
||||
r.LatencyMS = latency.Milliseconds()
|
||||
if r.LatencyMS < r.ThresholdMS {
|
||||
r.Healthy = true
|
||||
if r.LatencyMS >= r.ThresholdMS {
|
||||
r.Severity = health.SeverityWarning
|
||||
}
|
||||
r.Healthy = true
|
||||
r.Reachable = true
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ import (
|
||||
|
||||
"github.com/coder/coder/v2/coderd/database/dbmock"
|
||||
"github.com/coder/coder/v2/coderd/healthcheck"
|
||||
"github.com/coder/coder/v2/coderd/healthcheck/health"
|
||||
"github.com/coder/coder/v2/testutil"
|
||||
)
|
||||
|
||||
@ -35,6 +36,7 @@ func TestDatabase(t *testing.T) {
|
||||
|
||||
assert.True(t, report.Healthy)
|
||||
assert.True(t, report.Reachable)
|
||||
assert.Equal(t, health.SeverityOK, report.Severity)
|
||||
assert.Equal(t, ping.String(), report.Latency)
|
||||
assert.Equal(t, ping.Milliseconds(), report.LatencyMS)
|
||||
assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMS)
|
||||
@ -58,6 +60,7 @@ func TestDatabase(t *testing.T) {
|
||||
|
||||
assert.False(t, report.Healthy)
|
||||
assert.False(t, report.Reachable)
|
||||
assert.Equal(t, health.SeverityError, report.Severity)
|
||||
assert.Zero(t, report.Latency)
|
||||
require.NotNil(t, report.Error)
|
||||
assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMS)
|
||||
@ -84,6 +87,7 @@ func TestDatabase(t *testing.T) {
|
||||
|
||||
assert.True(t, report.Healthy)
|
||||
assert.True(t, report.Reachable)
|
||||
assert.Equal(t, health.SeverityOK, report.Severity)
|
||||
assert.Equal(t, time.Millisecond.String(), report.Latency)
|
||||
assert.EqualValues(t, 1, report.LatencyMS)
|
||||
assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMS)
|
||||
@ -108,8 +112,9 @@ func TestDatabase(t *testing.T) {
|
||||
|
||||
report.Run(ctx, &healthcheck.DatabaseReportOptions{DB: db, Threshold: time.Second})
|
||||
|
||||
assert.False(t, report.Healthy)
|
||||
assert.True(t, report.Healthy)
|
||||
assert.True(t, report.Reachable)
|
||||
assert.Equal(t, health.SeverityWarning, report.Severity)
|
||||
assert.Equal(t, time.Second.String(), report.Latency)
|
||||
assert.EqualValues(t, 1000, report.LatencyMS)
|
||||
assert.Equal(t, time.Second.Milliseconds(), report.ThresholdMS)
|
||||
|
@ -22,6 +22,7 @@ import (
|
||||
"tailscale.com/types/key"
|
||||
tslogger "tailscale.com/types/logger"
|
||||
|
||||
"github.com/coder/coder/v2/coderd/healthcheck/health"
|
||||
"github.com/coder/coder/v2/coderd/util/ptr"
|
||||
"github.com/coder/coder/v2/coderd/util/slice"
|
||||
)
|
||||
@ -29,12 +30,15 @@ import (
|
||||
const (
|
||||
warningNodeUsesWebsocket = `Node uses WebSockets because the "Upgrade: DERP" header may be blocked on the load balancer.`
|
||||
oneNodeUnhealthy = "Region is operational, but performance might be degraded as one node is unhealthy."
|
||||
missingNodeReport = "Missing node health report, probably a developer error."
|
||||
)
|
||||
|
||||
// @typescript-generate Report
|
||||
type Report struct {
|
||||
Healthy bool `json:"healthy"`
|
||||
Warnings []string `json:"warnings"`
|
||||
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
|
||||
Healthy bool `json:"healthy"`
|
||||
Severity health.Severity `json:"severity" enums:"ok,warning,error"`
|
||||
Warnings []string `json:"warnings"`
|
||||
|
||||
Regions map[int]*RegionReport `json:"regions"`
|
||||
|
||||
@ -47,9 +51,12 @@ type Report struct {
|
||||
|
||||
// @typescript-generate RegionReport
|
||||
type RegionReport struct {
|
||||
mu sync.Mutex
|
||||
Healthy bool `json:"healthy"`
|
||||
Warnings []string `json:"warnings"`
|
||||
mu sync.Mutex
|
||||
|
||||
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
|
||||
Healthy bool `json:"healthy"`
|
||||
Severity health.Severity `json:"severity" enums:"ok,warning,error"`
|
||||
Warnings []string `json:"warnings"`
|
||||
|
||||
Region *tailcfg.DERPRegion `json:"region"`
|
||||
NodeReports []*NodeReport `json:"node_reports"`
|
||||
@ -61,8 +68,10 @@ type NodeReport struct {
|
||||
mu sync.Mutex
|
||||
clientCounter int
|
||||
|
||||
Healthy bool `json:"healthy"`
|
||||
Warnings []string `json:"warnings"`
|
||||
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
|
||||
Healthy bool `json:"healthy"`
|
||||
Severity health.Severity `json:"severity" enums:"ok,warning,error"`
|
||||
Warnings []string `json:"warnings"`
|
||||
|
||||
Node *tailcfg.DERPNode `json:"node"`
|
||||
|
||||
@ -91,6 +100,8 @@ type ReportOptions struct {
|
||||
|
||||
func (r *Report) Run(ctx context.Context, opts *ReportOptions) {
|
||||
r.Healthy = true
|
||||
r.Severity = health.SeverityOK
|
||||
|
||||
r.Regions = map[int]*RegionReport{}
|
||||
r.Warnings = []string{}
|
||||
|
||||
@ -142,14 +153,22 @@ func (r *Report) Run(ctx context.Context, opts *ReportOptions) {
|
||||
r.NetcheckErr = convertError(netcheckErr)
|
||||
|
||||
wg.Wait()
|
||||
|
||||
// Review region reports and select the highest severity.
|
||||
for _, regionReport := range r.Regions {
|
||||
if regionReport.Severity.Value() > r.Severity.Value() {
|
||||
r.Severity = regionReport.Severity
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (r *RegionReport) Run(ctx context.Context) {
|
||||
r.Healthy = true
|
||||
r.Severity = health.SeverityOK
|
||||
r.NodeReports = []*NodeReport{}
|
||||
|
||||
wg := &sync.WaitGroup{}
|
||||
var healthyNodes int // atomic.Int64 is not mandatory as we depend on RegionReport mutex.
|
||||
var unhealthyNodes int // atomic.Int64 is not mandatory as we depend on RegionReport mutex.
|
||||
|
||||
wg.Add(len(r.Region.Nodes))
|
||||
for _, node := range r.Region.Nodes {
|
||||
@ -166,6 +185,7 @@ func (r *RegionReport) Run(ctx context.Context) {
|
||||
defer func() {
|
||||
if err := recover(); err != nil {
|
||||
nodeReport.Error = ptr.Ref(fmt.Sprint(err))
|
||||
nodeReport.Severity = health.SeverityError
|
||||
}
|
||||
}()
|
||||
|
||||
@ -173,8 +193,8 @@ func (r *RegionReport) Run(ctx context.Context) {
|
||||
|
||||
r.mu.Lock()
|
||||
r.NodeReports = append(r.NodeReports, &nodeReport)
|
||||
if nodeReport.Healthy {
|
||||
healthyNodes++
|
||||
if nodeReport.Severity != health.SeverityOK {
|
||||
unhealthyNodes++
|
||||
}
|
||||
|
||||
for _, w := range nodeReport.Warnings {
|
||||
@ -190,11 +210,29 @@ func (r *RegionReport) Run(ctx context.Context) {
|
||||
|
||||
sortNodeReports(r.NodeReports)
|
||||
|
||||
// Coder allows for 1 unhealthy node in the region, unless there is only 1 node.
|
||||
if len(r.Region.Nodes) != len(r.NodeReports) {
|
||||
r.Healthy = false
|
||||
r.Severity = health.SeverityError
|
||||
r.Error = ptr.Ref(missingNodeReport)
|
||||
return
|
||||
}
|
||||
|
||||
if len(r.Region.Nodes) == 1 {
|
||||
r.Healthy = healthyNodes == len(r.Region.Nodes)
|
||||
} else if healthyNodes < len(r.Region.Nodes) {
|
||||
r.Healthy = r.NodeReports[0].Severity != health.SeverityError
|
||||
r.Severity = r.NodeReports[0].Severity
|
||||
} else if unhealthyNodes == 1 {
|
||||
// r.Healthy = true (by default)
|
||||
r.Severity = health.SeverityWarning
|
||||
r.Warnings = append(r.Warnings, oneNodeUnhealthy)
|
||||
} else if unhealthyNodes > 1 {
|
||||
r.Healthy = false
|
||||
|
||||
// Review node reports and select the highest severity.
|
||||
for _, nodeReport := range r.NodeReports {
|
||||
if nodeReport.Severity.Value() > r.Severity.Value() {
|
||||
r.Severity = nodeReport.Severity
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -221,6 +259,7 @@ func (r *NodeReport) Run(ctx context.Context) {
|
||||
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
r.Severity = health.SeverityOK
|
||||
r.ClientLogs = [][]string{}
|
||||
r.ClientErrs = [][]string{}
|
||||
|
||||
@ -243,10 +282,12 @@ func (r *NodeReport) Run(ctx context.Context) {
|
||||
// The node was marked as STUN compatible but the STUN test failed.
|
||||
r.STUN.Error != nil {
|
||||
r.Healthy = false
|
||||
r.Severity = health.SeverityError
|
||||
}
|
||||
|
||||
if r.UsesWebsocket {
|
||||
r.Warnings = append(r.Warnings, warningNodeUsesWebsocket)
|
||||
r.Severity = health.SeverityWarning
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,7 @@ import (
|
||||
"tailscale.com/types/key"
|
||||
|
||||
"github.com/coder/coder/v2/coderd/healthcheck/derphealth"
|
||||
"github.com/coder/coder/v2/coderd/healthcheck/health"
|
||||
"github.com/coder/coder/v2/tailnet"
|
||||
"github.com/coder/coder/v2/testutil"
|
||||
)
|
||||
@ -123,10 +124,13 @@ func TestDERP(t *testing.T) {
|
||||
report.Run(ctx, opts)
|
||||
|
||||
assert.True(t, report.Healthy)
|
||||
assert.Equal(t, health.SeverityWarning, report.Severity)
|
||||
for _, region := range report.Regions {
|
||||
assert.True(t, region.Healthy)
|
||||
assert.True(t, region.NodeReports[0].Healthy)
|
||||
assert.Equal(t, health.SeverityOK, region.NodeReports[0].Severity)
|
||||
assert.False(t, region.NodeReports[1].Healthy)
|
||||
assert.Equal(t, health.SeverityError, region.NodeReports[1].Severity)
|
||||
assert.Len(t, region.Warnings, 1)
|
||||
}
|
||||
})
|
||||
@ -221,12 +225,15 @@ func TestDERP(t *testing.T) {
|
||||
report.Run(ctx, opts)
|
||||
|
||||
assert.True(t, report.Healthy)
|
||||
assert.Equal(t, health.SeverityWarning, report.Severity)
|
||||
assert.NotEmpty(t, report.Warnings)
|
||||
for _, region := range report.Regions {
|
||||
assert.True(t, region.Healthy)
|
||||
assert.Equal(t, health.SeverityWarning, region.Severity)
|
||||
assert.NotEmpty(t, region.Warnings)
|
||||
for _, node := range region.NodeReports {
|
||||
assert.True(t, node.Healthy)
|
||||
assert.Equal(t, health.SeverityWarning, node.Severity)
|
||||
assert.NotEmpty(t, node.Warnings)
|
||||
assert.True(t, node.CanExchangeMessages)
|
||||
assert.NotEmpty(t, node.RoundTripPing)
|
||||
|
5
coderd/healthcheck/derphealth/doc.go
Normal file
5
coderd/healthcheck/derphealth/doc.go
Normal file
@ -0,0 +1,5 @@
|
||||
package derphealth
|
||||
|
||||
// DERP healthcheck is kept in a separate package as it is used by `cli/netcheck.go`,
|
||||
// which is part of the slim binary. Slim binary can't have dependency on `database`,
|
||||
// which is used by the database healthcheck.
|
20
coderd/healthcheck/health/model.go
Normal file
20
coderd/healthcheck/health/model.go
Normal file
@ -0,0 +1,20 @@
|
||||
package health
|
||||
|
||||
const (
|
||||
SeverityOK Severity = "ok"
|
||||
SeverityWarning Severity = "warning"
|
||||
SeverityError Severity = "error"
|
||||
)
|
||||
|
||||
// @typescript-generate Severity
|
||||
type Severity string
|
||||
|
||||
var severityRank = map[Severity]int{
|
||||
SeverityOK: 0,
|
||||
SeverityWarning: 1,
|
||||
SeverityError: 2,
|
||||
}
|
||||
|
||||
func (s Severity) Value() int {
|
||||
return severityRank[s]
|
||||
}
|
@ -8,6 +8,7 @@ import (
|
||||
|
||||
"github.com/coder/coder/v2/buildinfo"
|
||||
"github.com/coder/coder/v2/coderd/healthcheck/derphealth"
|
||||
"github.com/coder/coder/v2/coderd/healthcheck/health"
|
||||
"github.com/coder/coder/v2/coderd/util/ptr"
|
||||
)
|
||||
|
||||
@ -30,7 +31,10 @@ type Report struct {
|
||||
// Time is the time the report was generated at.
|
||||
Time time.Time `json:"time"`
|
||||
// Healthy is true if the report returns no errors.
|
||||
// Deprecated: use `Severity` instead
|
||||
Healthy bool `json:"healthy"`
|
||||
// Severity indicates the status of Coder health.
|
||||
Severity health.Severity `json:"severity" enums:"ok,warning,error"`
|
||||
// FailingSections is a list of sections that have failed their healthcheck.
|
||||
FailingSections []string `json:"failing_sections"`
|
||||
|
||||
@ -151,6 +155,22 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
|
||||
}
|
||||
|
||||
report.Healthy = len(report.FailingSections) == 0
|
||||
|
||||
// Review healthcheck sub-reports.
|
||||
report.Severity = health.SeverityOK
|
||||
|
||||
if report.DERP.Severity.Value() > report.Severity.Value() {
|
||||
report.Severity = report.DERP.Severity
|
||||
}
|
||||
if report.AccessURL.Severity.Value() > report.Severity.Value() {
|
||||
report.Severity = report.AccessURL.Severity
|
||||
}
|
||||
if report.Websocket.Severity.Value() > report.Severity.Value() {
|
||||
report.Severity = report.Websocket.Severity
|
||||
}
|
||||
if report.Database.Severity.Value() > report.Severity.Value() {
|
||||
report.Severity = report.Database.Severity
|
||||
}
|
||||
return &report
|
||||
}
|
||||
|
||||
|
@ -11,6 +11,8 @@ import (
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
"nhooyr.io/websocket"
|
||||
|
||||
"github.com/coder/coder/v2/coderd/healthcheck/health"
|
||||
)
|
||||
|
||||
type WebsocketReportOptions struct {
|
||||
@ -21,8 +23,10 @@ type WebsocketReportOptions struct {
|
||||
|
||||
// @typescript-generate WebsocketReport
|
||||
type WebsocketReport struct {
|
||||
Healthy bool `json:"healthy"`
|
||||
Warnings []string `json:"warnings"`
|
||||
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
|
||||
Healthy bool `json:"healthy"`
|
||||
Severity health.Severity `json:"severity" enums:"ok,warning,error"`
|
||||
Warnings []string `json:"warnings"`
|
||||
|
||||
Body string `json:"body"`
|
||||
Code int `json:"code"`
|
||||
@ -33,10 +37,12 @@ func (r *WebsocketReport) Run(ctx context.Context, opts *WebsocketReportOptions)
|
||||
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
r.Severity = health.SeverityOK
|
||||
r.Warnings = []string{}
|
||||
u, err := opts.AccessURL.Parse("/api/v2/debug/ws")
|
||||
if err != nil {
|
||||
r.Error = convertError(xerrors.Errorf("parse access url: %w", err))
|
||||
r.Severity = health.SeverityError
|
||||
return
|
||||
}
|
||||
if u.Scheme == "https" {
|
||||
@ -64,6 +70,7 @@ func (r *WebsocketReport) Run(ctx context.Context, opts *WebsocketReportOptions)
|
||||
}
|
||||
if err != nil {
|
||||
r.Error = convertError(xerrors.Errorf("websocket dial: %w", err))
|
||||
r.Severity = health.SeverityError
|
||||
return
|
||||
}
|
||||
defer c.Close(websocket.StatusGoingAway, "goodbye")
|
||||
@ -73,22 +80,26 @@ func (r *WebsocketReport) Run(ctx context.Context, opts *WebsocketReportOptions)
|
||||
err := c.Write(ctx, websocket.MessageText, []byte(msg))
|
||||
if err != nil {
|
||||
r.Error = convertError(xerrors.Errorf("write message: %w", err))
|
||||
r.Severity = health.SeverityError
|
||||
return
|
||||
}
|
||||
|
||||
ty, got, err := c.Read(ctx)
|
||||
if err != nil {
|
||||
r.Error = convertError(xerrors.Errorf("read message: %w", err))
|
||||
r.Severity = health.SeverityError
|
||||
return
|
||||
}
|
||||
|
||||
if ty != websocket.MessageText {
|
||||
r.Error = convertError(xerrors.Errorf("received incorrect message type: %v", ty))
|
||||
r.Severity = health.SeverityError
|
||||
return
|
||||
}
|
||||
|
||||
if string(got) != msg {
|
||||
r.Error = convertError(xerrors.Errorf("received incorrect message: wanted %q, got %q", msg, string(got)))
|
||||
r.Severity = health.SeverityError
|
||||
return
|
||||
}
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ import (
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
"github.com/coder/coder/v2/coderd/healthcheck"
|
||||
"github.com/coder/coder/v2/coderd/healthcheck/health"
|
||||
"github.com/coder/coder/v2/testutil"
|
||||
)
|
||||
|
||||
@ -63,6 +64,7 @@ func TestWebsocket(t *testing.T) {
|
||||
})
|
||||
|
||||
require.NotNil(t, wsReport.Error)
|
||||
require.Equal(t, health.SeverityError, wsReport.Severity)
|
||||
assert.Equal(t, wsReport.Body, "test error")
|
||||
assert.Equal(t, wsReport.Code, http.StatusBadRequest)
|
||||
})
|
||||
|
Reference in New Issue
Block a user