feat: include health severity in reports (#10817)

This commit is contained in:
Marcin Tojek
2023-11-23 16:08:41 +01:00
committed by GitHub
parent e311e9ec24
commit 78df68348a
18 changed files with 542 additions and 91 deletions

106
coderd/apidoc/docs.go generated
View File

@ -12103,6 +12103,7 @@ const docTemplate = `{
"type": "string"
},
"healthy": {
"description": "Healthy is deprecated and left for backward compatibility purposes, use ` + "`" + `Severity` + "`" + ` instead.",
"type": "boolean"
},
"node": {
@ -12117,6 +12118,18 @@ const docTemplate = `{
"round_trip_ping_ms": {
"type": "integer"
},
"severity": {
"enum": [
"ok",
"warning",
"error"
],
"allOf": [
{
"$ref": "#/definitions/health.Severity"
}
]
},
"stun": {
"$ref": "#/definitions/derphealth.StunReport"
},
@ -12138,6 +12151,7 @@ const docTemplate = `{
"type": "string"
},
"healthy": {
"description": "Healthy is deprecated and left for backward compatibility purposes, use ` + "`" + `Severity` + "`" + ` instead.",
"type": "boolean"
},
"node_reports": {
@ -12149,6 +12163,18 @@ const docTemplate = `{
"region": {
"$ref": "#/definitions/tailcfg.DERPRegion"
},
"severity": {
"enum": [
"ok",
"warning",
"error"
],
"allOf": [
{
"$ref": "#/definitions/health.Severity"
}
]
},
"warnings": {
"type": "array",
"items": {
@ -12164,6 +12190,7 @@ const docTemplate = `{
"type": "string"
},
"healthy": {
"description": "Healthy is deprecated and left for backward compatibility purposes, use ` + "`" + `Severity` + "`" + ` instead.",
"type": "boolean"
},
"netcheck": {
@ -12184,6 +12211,18 @@ const docTemplate = `{
"$ref": "#/definitions/derphealth.RegionReport"
}
},
"severity": {
"enum": [
"ok",
"warning",
"error"
],
"allOf": [
{
"$ref": "#/definitions/health.Severity"
}
]
},
"warnings": {
"type": "array",
"items": {
@ -12206,6 +12245,19 @@ const docTemplate = `{
}
}
},
"health.Severity": {
"type": "string",
"enum": [
"ok",
"warning",
"error"
],
"x-enum-varnames": [
"SeverityOK",
"SeverityWarning",
"SeverityError"
]
},
"healthcheck.AccessURLReport": {
"type": "object",
"properties": {
@ -12216,6 +12268,7 @@ const docTemplate = `{
"type": "string"
},
"healthy": {
"description": "Healthy is deprecated and left for backward compatibility purposes, use ` + "`" + `Severity` + "`" + ` instead.",
"type": "boolean"
},
"healthz_response": {
@ -12224,6 +12277,18 @@ const docTemplate = `{
"reachable": {
"type": "boolean"
},
"severity": {
"enum": [
"ok",
"warning",
"error"
],
"allOf": [
{
"$ref": "#/definitions/health.Severity"
}
]
},
"status_code": {
"type": "integer"
},
@ -12242,6 +12307,7 @@ const docTemplate = `{
"type": "string"
},
"healthy": {
"description": "Healthy is deprecated and left for backward compatibility purposes, use ` + "`" + `Severity` + "`" + ` instead.",
"type": "boolean"
},
"latency": {
@ -12253,6 +12319,18 @@ const docTemplate = `{
"reachable": {
"type": "boolean"
},
"severity": {
"enum": [
"ok",
"warning",
"error"
],
"allOf": [
{
"$ref": "#/definitions/health.Severity"
}
]
},
"threshold_ms": {
"type": "integer"
},
@ -12288,9 +12366,22 @@ const docTemplate = `{
}
},
"healthy": {
"description": "Healthy is true if the report returns no errors.",
"description": "Healthy is true if the report returns no errors.\nDeprecated: use ` + "`" + `Severity` + "`" + ` instead",
"type": "boolean"
},
"severity": {
"description": "Severity indicates the status of Coder health.",
"enum": [
"ok",
"warning",
"error"
],
"allOf": [
{
"$ref": "#/definitions/health.Severity"
}
]
},
"time": {
"description": "Time is the time the report was generated at.",
"type": "string"
@ -12313,8 +12404,21 @@ const docTemplate = `{
"type": "string"
},
"healthy": {
"description": "Healthy is deprecated and left for backward compatibility purposes, use ` + "`" + `Severity` + "`" + ` instead.",
"type": "boolean"
},
"severity": {
"enum": [
"ok",
"warning",
"error"
],
"allOf": [
{
"$ref": "#/definitions/health.Severity"
}
]
},
"warnings": {
"type": "array",
"items": {

View File

@ -11024,6 +11024,7 @@
"type": "string"
},
"healthy": {
"description": "Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.",
"type": "boolean"
},
"node": {
@ -11038,6 +11039,14 @@
"round_trip_ping_ms": {
"type": "integer"
},
"severity": {
"enum": ["ok", "warning", "error"],
"allOf": [
{
"$ref": "#/definitions/health.Severity"
}
]
},
"stun": {
"$ref": "#/definitions/derphealth.StunReport"
},
@ -11059,6 +11068,7 @@
"type": "string"
},
"healthy": {
"description": "Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.",
"type": "boolean"
},
"node_reports": {
@ -11070,6 +11080,14 @@
"region": {
"$ref": "#/definitions/tailcfg.DERPRegion"
},
"severity": {
"enum": ["ok", "warning", "error"],
"allOf": [
{
"$ref": "#/definitions/health.Severity"
}
]
},
"warnings": {
"type": "array",
"items": {
@ -11085,6 +11103,7 @@
"type": "string"
},
"healthy": {
"description": "Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.",
"type": "boolean"
},
"netcheck": {
@ -11105,6 +11124,14 @@
"$ref": "#/definitions/derphealth.RegionReport"
}
},
"severity": {
"enum": ["ok", "warning", "error"],
"allOf": [
{
"$ref": "#/definitions/health.Severity"
}
]
},
"warnings": {
"type": "array",
"items": {
@ -11127,6 +11154,11 @@
}
}
},
"health.Severity": {
"type": "string",
"enum": ["ok", "warning", "error"],
"x-enum-varnames": ["SeverityOK", "SeverityWarning", "SeverityError"]
},
"healthcheck.AccessURLReport": {
"type": "object",
"properties": {
@ -11137,6 +11169,7 @@
"type": "string"
},
"healthy": {
"description": "Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.",
"type": "boolean"
},
"healthz_response": {
@ -11145,6 +11178,14 @@
"reachable": {
"type": "boolean"
},
"severity": {
"enum": ["ok", "warning", "error"],
"allOf": [
{
"$ref": "#/definitions/health.Severity"
}
]
},
"status_code": {
"type": "integer"
},
@ -11163,6 +11204,7 @@
"type": "string"
},
"healthy": {
"description": "Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.",
"type": "boolean"
},
"latency": {
@ -11174,6 +11216,14 @@
"reachable": {
"type": "boolean"
},
"severity": {
"enum": ["ok", "warning", "error"],
"allOf": [
{
"$ref": "#/definitions/health.Severity"
}
]
},
"threshold_ms": {
"type": "integer"
},
@ -11209,9 +11259,18 @@
}
},
"healthy": {
"description": "Healthy is true if the report returns no errors.",
"description": "Healthy is true if the report returns no errors.\nDeprecated: use `Severity` instead",
"type": "boolean"
},
"severity": {
"description": "Severity indicates the status of Coder health.",
"enum": ["ok", "warning", "error"],
"allOf": [
{
"$ref": "#/definitions/health.Severity"
}
]
},
"time": {
"description": "Time is the time the report was generated at.",
"type": "string"
@ -11234,8 +11293,17 @@
"type": "string"
},
"healthy": {
"description": "Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.",
"type": "boolean"
},
"severity": {
"enum": ["ok", "warning", "error"],
"allOf": [
{
"$ref": "#/definitions/health.Severity"
}
]
},
"warnings": {
"type": "array",
"items": {

View File

@ -9,13 +9,16 @@ import (
"golang.org/x/xerrors"
"github.com/coder/coder/v2/coderd/healthcheck/health"
"github.com/coder/coder/v2/coderd/util/ptr"
)
// @typescript-generate AccessURLReport
type AccessURLReport struct {
Healthy bool `json:"healthy"`
Warnings []string `json:"warnings"`
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
Healthy bool `json:"healthy"`
Severity health.Severity `json:"severity" enums:"ok,warning,error"`
Warnings []string `json:"warnings"`
AccessURL string `json:"access_url"`
Reachable bool `json:"reachable"`
@ -33,9 +36,11 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
r.Severity = health.SeverityOK
r.Warnings = []string{}
if opts.AccessURL == nil {
r.Error = ptr.Ref("access URL is nil")
r.Severity = health.SeverityError
return
}
r.AccessURL = opts.AccessURL.String()
@ -47,18 +52,21 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
accessURL, err := opts.AccessURL.Parse("/healthz")
if err != nil {
r.Error = convertError(xerrors.Errorf("parse healthz endpoint: %w", err))
r.Severity = health.SeverityError
return
}
req, err := http.NewRequestWithContext(ctx, "GET", accessURL.String(), nil)
if err != nil {
r.Error = convertError(xerrors.Errorf("create healthz request: %w", err))
r.Severity = health.SeverityError
return
}
res, err := opts.Client.Do(req)
if err != nil {
r.Error = convertError(xerrors.Errorf("get healthz endpoint: %w", err))
r.Severity = health.SeverityError
return
}
defer res.Body.Close()
@ -66,11 +74,15 @@ func (r *AccessURLReport) Run(ctx context.Context, opts *AccessURLReportOptions)
body, err := io.ReadAll(res.Body)
if err != nil {
r.Error = convertError(xerrors.Errorf("read healthz response: %w", err))
r.Severity = health.SeverityError
return
}
r.Reachable = true
r.Healthy = res.StatusCode == http.StatusOK
r.StatusCode = res.StatusCode
if res.StatusCode != http.StatusOK {
r.Severity = health.SeverityWarning
}
r.HealthzResponse = string(body)
}

View File

@ -13,6 +13,7 @@ import (
"github.com/coder/coder/v2/coderd/coderdtest"
"github.com/coder/coder/v2/coderd/healthcheck"
"github.com/coder/coder/v2/coderd/healthcheck/health"
)
func TestAccessURL(t *testing.T) {
@ -34,6 +35,7 @@ func TestAccessURL(t *testing.T) {
assert.True(t, report.Healthy)
assert.True(t, report.Reachable)
assert.Equal(t, health.SeverityOK, report.Severity)
assert.Equal(t, http.StatusOK, report.StatusCode)
assert.Equal(t, "OK", report.HealthzResponse)
assert.Nil(t, report.Error)
@ -64,6 +66,7 @@ func TestAccessURL(t *testing.T) {
assert.False(t, report.Healthy)
assert.True(t, report.Reachable)
assert.Equal(t, health.SeverityWarning, report.Severity)
assert.Equal(t, http.StatusNotFound, report.StatusCode)
assert.Equal(t, string(resp), report.HealthzResponse)
assert.Nil(t, report.Error)
@ -100,6 +103,7 @@ func TestAccessURL(t *testing.T) {
assert.False(t, report.Healthy)
assert.False(t, report.Reachable)
assert.Equal(t, health.SeverityError, report.Severity)
assert.Equal(t, 0, report.StatusCode)
assert.Equal(t, "", report.HealthzResponse)
require.NotNil(t, report.Error)

View File

@ -8,6 +8,7 @@ import (
"golang.org/x/xerrors"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/healthcheck/health"
)
const (
@ -16,8 +17,10 @@ const (
// @typescript-generate DatabaseReport
type DatabaseReport struct {
Healthy bool `json:"healthy"`
Warnings []string `json:"warnings"`
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
Healthy bool `json:"healthy"`
Severity health.Severity `json:"severity" enums:"ok,warning,error"`
Warnings []string `json:"warnings"`
Reachable bool `json:"reachable"`
Latency string `json:"latency"`
@ -33,6 +36,7 @@ type DatabaseReportOptions struct {
func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
r.Warnings = []string{}
r.Severity = health.SeverityOK
r.ThresholdMS = opts.Threshold.Milliseconds()
if r.ThresholdMS == 0 {
r.ThresholdMS = DatabaseDefaultThreshold.Milliseconds()
@ -47,6 +51,7 @@ func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
pong, err := opts.DB.Ping(ctx)
if err != nil {
r.Error = convertError(xerrors.Errorf("ping: %w", err))
r.Severity = health.SeverityError
return
}
pings = append(pings, pong)
@ -57,8 +62,9 @@ func (r *DatabaseReport) Run(ctx context.Context, opts *DatabaseReportOptions) {
latency := pings[pingCount/2]
r.Latency = latency.String()
r.LatencyMS = latency.Milliseconds()
if r.LatencyMS < r.ThresholdMS {
r.Healthy = true
if r.LatencyMS >= r.ThresholdMS {
r.Severity = health.SeverityWarning
}
r.Healthy = true
r.Reachable = true
}

View File

@ -12,6 +12,7 @@ import (
"github.com/coder/coder/v2/coderd/database/dbmock"
"github.com/coder/coder/v2/coderd/healthcheck"
"github.com/coder/coder/v2/coderd/healthcheck/health"
"github.com/coder/coder/v2/testutil"
)
@ -35,6 +36,7 @@ func TestDatabase(t *testing.T) {
assert.True(t, report.Healthy)
assert.True(t, report.Reachable)
assert.Equal(t, health.SeverityOK, report.Severity)
assert.Equal(t, ping.String(), report.Latency)
assert.Equal(t, ping.Milliseconds(), report.LatencyMS)
assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMS)
@ -58,6 +60,7 @@ func TestDatabase(t *testing.T) {
assert.False(t, report.Healthy)
assert.False(t, report.Reachable)
assert.Equal(t, health.SeverityError, report.Severity)
assert.Zero(t, report.Latency)
require.NotNil(t, report.Error)
assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMS)
@ -84,6 +87,7 @@ func TestDatabase(t *testing.T) {
assert.True(t, report.Healthy)
assert.True(t, report.Reachable)
assert.Equal(t, health.SeverityOK, report.Severity)
assert.Equal(t, time.Millisecond.String(), report.Latency)
assert.EqualValues(t, 1, report.LatencyMS)
assert.Equal(t, healthcheck.DatabaseDefaultThreshold.Milliseconds(), report.ThresholdMS)
@ -108,8 +112,9 @@ func TestDatabase(t *testing.T) {
report.Run(ctx, &healthcheck.DatabaseReportOptions{DB: db, Threshold: time.Second})
assert.False(t, report.Healthy)
assert.True(t, report.Healthy)
assert.True(t, report.Reachable)
assert.Equal(t, health.SeverityWarning, report.Severity)
assert.Equal(t, time.Second.String(), report.Latency)
assert.EqualValues(t, 1000, report.LatencyMS)
assert.Equal(t, time.Second.Milliseconds(), report.ThresholdMS)

View File

@ -22,6 +22,7 @@ import (
"tailscale.com/types/key"
tslogger "tailscale.com/types/logger"
"github.com/coder/coder/v2/coderd/healthcheck/health"
"github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/coderd/util/slice"
)
@ -29,12 +30,15 @@ import (
const (
warningNodeUsesWebsocket = `Node uses WebSockets because the "Upgrade: DERP" header may be blocked on the load balancer.`
oneNodeUnhealthy = "Region is operational, but performance might be degraded as one node is unhealthy."
missingNodeReport = "Missing node health report, probably a developer error."
)
// @typescript-generate Report
type Report struct {
Healthy bool `json:"healthy"`
Warnings []string `json:"warnings"`
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
Healthy bool `json:"healthy"`
Severity health.Severity `json:"severity" enums:"ok,warning,error"`
Warnings []string `json:"warnings"`
Regions map[int]*RegionReport `json:"regions"`
@ -47,9 +51,12 @@ type Report struct {
// @typescript-generate RegionReport
type RegionReport struct {
mu sync.Mutex
Healthy bool `json:"healthy"`
Warnings []string `json:"warnings"`
mu sync.Mutex
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
Healthy bool `json:"healthy"`
Severity health.Severity `json:"severity" enums:"ok,warning,error"`
Warnings []string `json:"warnings"`
Region *tailcfg.DERPRegion `json:"region"`
NodeReports []*NodeReport `json:"node_reports"`
@ -61,8 +68,10 @@ type NodeReport struct {
mu sync.Mutex
clientCounter int
Healthy bool `json:"healthy"`
Warnings []string `json:"warnings"`
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
Healthy bool `json:"healthy"`
Severity health.Severity `json:"severity" enums:"ok,warning,error"`
Warnings []string `json:"warnings"`
Node *tailcfg.DERPNode `json:"node"`
@ -91,6 +100,8 @@ type ReportOptions struct {
func (r *Report) Run(ctx context.Context, opts *ReportOptions) {
r.Healthy = true
r.Severity = health.SeverityOK
r.Regions = map[int]*RegionReport{}
r.Warnings = []string{}
@ -142,14 +153,22 @@ func (r *Report) Run(ctx context.Context, opts *ReportOptions) {
r.NetcheckErr = convertError(netcheckErr)
wg.Wait()
// Review region reports and select the highest severity.
for _, regionReport := range r.Regions {
if regionReport.Severity.Value() > r.Severity.Value() {
r.Severity = regionReport.Severity
}
}
}
func (r *RegionReport) Run(ctx context.Context) {
r.Healthy = true
r.Severity = health.SeverityOK
r.NodeReports = []*NodeReport{}
wg := &sync.WaitGroup{}
var healthyNodes int // atomic.Int64 is not mandatory as we depend on RegionReport mutex.
var unhealthyNodes int // atomic.Int64 is not mandatory as we depend on RegionReport mutex.
wg.Add(len(r.Region.Nodes))
for _, node := range r.Region.Nodes {
@ -166,6 +185,7 @@ func (r *RegionReport) Run(ctx context.Context) {
defer func() {
if err := recover(); err != nil {
nodeReport.Error = ptr.Ref(fmt.Sprint(err))
nodeReport.Severity = health.SeverityError
}
}()
@ -173,8 +193,8 @@ func (r *RegionReport) Run(ctx context.Context) {
r.mu.Lock()
r.NodeReports = append(r.NodeReports, &nodeReport)
if nodeReport.Healthy {
healthyNodes++
if nodeReport.Severity != health.SeverityOK {
unhealthyNodes++
}
for _, w := range nodeReport.Warnings {
@ -190,11 +210,29 @@ func (r *RegionReport) Run(ctx context.Context) {
sortNodeReports(r.NodeReports)
// Coder allows for 1 unhealthy node in the region, unless there is only 1 node.
if len(r.Region.Nodes) != len(r.NodeReports) {
r.Healthy = false
r.Severity = health.SeverityError
r.Error = ptr.Ref(missingNodeReport)
return
}
if len(r.Region.Nodes) == 1 {
r.Healthy = healthyNodes == len(r.Region.Nodes)
} else if healthyNodes < len(r.Region.Nodes) {
r.Healthy = r.NodeReports[0].Severity != health.SeverityError
r.Severity = r.NodeReports[0].Severity
} else if unhealthyNodes == 1 {
// r.Healthy = true (by default)
r.Severity = health.SeverityWarning
r.Warnings = append(r.Warnings, oneNodeUnhealthy)
} else if unhealthyNodes > 1 {
r.Healthy = false
// Review node reports and select the highest severity.
for _, nodeReport := range r.NodeReports {
if nodeReport.Severity.Value() > r.Severity.Value() {
r.Severity = nodeReport.Severity
}
}
}
}
@ -221,6 +259,7 @@ func (r *NodeReport) Run(ctx context.Context) {
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
r.Severity = health.SeverityOK
r.ClientLogs = [][]string{}
r.ClientErrs = [][]string{}
@ -243,10 +282,12 @@ func (r *NodeReport) Run(ctx context.Context) {
// The node was marked as STUN compatible but the STUN test failed.
r.STUN.Error != nil {
r.Healthy = false
r.Severity = health.SeverityError
}
if r.UsesWebsocket {
r.Warnings = append(r.Warnings, warningNodeUsesWebsocket)
r.Severity = health.SeverityWarning
}
}

View File

@ -18,6 +18,7 @@ import (
"tailscale.com/types/key"
"github.com/coder/coder/v2/coderd/healthcheck/derphealth"
"github.com/coder/coder/v2/coderd/healthcheck/health"
"github.com/coder/coder/v2/tailnet"
"github.com/coder/coder/v2/testutil"
)
@ -123,10 +124,13 @@ func TestDERP(t *testing.T) {
report.Run(ctx, opts)
assert.True(t, report.Healthy)
assert.Equal(t, health.SeverityWarning, report.Severity)
for _, region := range report.Regions {
assert.True(t, region.Healthy)
assert.True(t, region.NodeReports[0].Healthy)
assert.Equal(t, health.SeverityOK, region.NodeReports[0].Severity)
assert.False(t, region.NodeReports[1].Healthy)
assert.Equal(t, health.SeverityError, region.NodeReports[1].Severity)
assert.Len(t, region.Warnings, 1)
}
})
@ -221,12 +225,15 @@ func TestDERP(t *testing.T) {
report.Run(ctx, opts)
assert.True(t, report.Healthy)
assert.Equal(t, health.SeverityWarning, report.Severity)
assert.NotEmpty(t, report.Warnings)
for _, region := range report.Regions {
assert.True(t, region.Healthy)
assert.Equal(t, health.SeverityWarning, region.Severity)
assert.NotEmpty(t, region.Warnings)
for _, node := range region.NodeReports {
assert.True(t, node.Healthy)
assert.Equal(t, health.SeverityWarning, node.Severity)
assert.NotEmpty(t, node.Warnings)
assert.True(t, node.CanExchangeMessages)
assert.NotEmpty(t, node.RoundTripPing)

View File

@ -0,0 +1,5 @@
package derphealth
// DERP healthcheck is kept in a separate package as it is used by `cli/netcheck.go`,
// which is part of the slim binary. Slim binary can't have dependency on `database`,
// which is used by the database healthcheck.

View File

@ -0,0 +1,20 @@
package health
const (
SeverityOK Severity = "ok"
SeverityWarning Severity = "warning"
SeverityError Severity = "error"
)
// @typescript-generate Severity
type Severity string
var severityRank = map[Severity]int{
SeverityOK: 0,
SeverityWarning: 1,
SeverityError: 2,
}
func (s Severity) Value() int {
return severityRank[s]
}

View File

@ -8,6 +8,7 @@ import (
"github.com/coder/coder/v2/buildinfo"
"github.com/coder/coder/v2/coderd/healthcheck/derphealth"
"github.com/coder/coder/v2/coderd/healthcheck/health"
"github.com/coder/coder/v2/coderd/util/ptr"
)
@ -30,7 +31,10 @@ type Report struct {
// Time is the time the report was generated at.
Time time.Time `json:"time"`
// Healthy is true if the report returns no errors.
// Deprecated: use `Severity` instead
Healthy bool `json:"healthy"`
// Severity indicates the status of Coder health.
Severity health.Severity `json:"severity" enums:"ok,warning,error"`
// FailingSections is a list of sections that have failed their healthcheck.
FailingSections []string `json:"failing_sections"`
@ -151,6 +155,22 @@ func Run(ctx context.Context, opts *ReportOptions) *Report {
}
report.Healthy = len(report.FailingSections) == 0
// Review healthcheck sub-reports.
report.Severity = health.SeverityOK
if report.DERP.Severity.Value() > report.Severity.Value() {
report.Severity = report.DERP.Severity
}
if report.AccessURL.Severity.Value() > report.Severity.Value() {
report.Severity = report.AccessURL.Severity
}
if report.Websocket.Severity.Value() > report.Severity.Value() {
report.Severity = report.Websocket.Severity
}
if report.Database.Severity.Value() > report.Severity.Value() {
report.Severity = report.Database.Severity
}
return &report
}

View File

@ -11,6 +11,8 @@ import (
"golang.org/x/xerrors"
"nhooyr.io/websocket"
"github.com/coder/coder/v2/coderd/healthcheck/health"
)
type WebsocketReportOptions struct {
@ -21,8 +23,10 @@ type WebsocketReportOptions struct {
// @typescript-generate WebsocketReport
type WebsocketReport struct {
Healthy bool `json:"healthy"`
Warnings []string `json:"warnings"`
// Healthy is deprecated and left for backward compatibility purposes, use `Severity` instead.
Healthy bool `json:"healthy"`
Severity health.Severity `json:"severity" enums:"ok,warning,error"`
Warnings []string `json:"warnings"`
Body string `json:"body"`
Code int `json:"code"`
@ -33,10 +37,12 @@ func (r *WebsocketReport) Run(ctx context.Context, opts *WebsocketReportOptions)
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
r.Severity = health.SeverityOK
r.Warnings = []string{}
u, err := opts.AccessURL.Parse("/api/v2/debug/ws")
if err != nil {
r.Error = convertError(xerrors.Errorf("parse access url: %w", err))
r.Severity = health.SeverityError
return
}
if u.Scheme == "https" {
@ -64,6 +70,7 @@ func (r *WebsocketReport) Run(ctx context.Context, opts *WebsocketReportOptions)
}
if err != nil {
r.Error = convertError(xerrors.Errorf("websocket dial: %w", err))
r.Severity = health.SeverityError
return
}
defer c.Close(websocket.StatusGoingAway, "goodbye")
@ -73,22 +80,26 @@ func (r *WebsocketReport) Run(ctx context.Context, opts *WebsocketReportOptions)
err := c.Write(ctx, websocket.MessageText, []byte(msg))
if err != nil {
r.Error = convertError(xerrors.Errorf("write message: %w", err))
r.Severity = health.SeverityError
return
}
ty, got, err := c.Read(ctx)
if err != nil {
r.Error = convertError(xerrors.Errorf("read message: %w", err))
r.Severity = health.SeverityError
return
}
if ty != websocket.MessageText {
r.Error = convertError(xerrors.Errorf("received incorrect message type: %v", ty))
r.Severity = health.SeverityError
return
}
if string(got) != msg {
r.Error = convertError(xerrors.Errorf("received incorrect message: wanted %q, got %q", msg, string(got)))
r.Severity = health.SeverityError
return
}
}

View File

@ -12,6 +12,7 @@ import (
"golang.org/x/xerrors"
"github.com/coder/coder/v2/coderd/healthcheck"
"github.com/coder/coder/v2/coderd/healthcheck/health"
"github.com/coder/coder/v2/testutil"
)
@ -63,6 +64,7 @@ func TestWebsocket(t *testing.T) {
})
require.NotNil(t, wsReport.Error)
require.Equal(t, health.SeverityError, wsReport.Severity)
assert.Equal(t, wsReport.Body, "test error")
assert.Equal(t, wsReport.Code, http.StatusBadRequest)
})