diff --git a/coderd/healthcheck/derphealth/derp.go b/coderd/healthcheck/derphealth/derp.go index 83dfcbf4a1..8f99ca5994 100644 --- a/coderd/healthcheck/derphealth/derp.go +++ b/coderd/healthcheck/derphealth/derp.go @@ -26,6 +26,7 @@ import ( const ( warningNodeUsesWebsocket = `Node uses WebSockets because the "Upgrade: DERP" header may be blocked on the load balancer.` + oneNodeUnhealthy = "Region is operational, but performance might be degraded as one node is unhealthy." ) // @typescript-generate Report @@ -146,6 +147,7 @@ func (r *RegionReport) Run(ctx context.Context) { r.NodeReports = []*NodeReport{} wg := &sync.WaitGroup{} + var healthyNodes int // atomic.Int64 is not mandatory as we depend on RegionReport mutex. wg.Add(len(r.Region.Nodes)) for _, node := range r.Region.Nodes { @@ -169,8 +171,8 @@ func (r *RegionReport) Run(ctx context.Context) { r.mu.Lock() r.NodeReports = append(r.NodeReports, &nodeReport) - if !nodeReport.Healthy { - r.Healthy = false + if nodeReport.Healthy { + healthyNodes++ } for _, w := range nodeReport.Warnings { @@ -179,8 +181,14 @@ func (r *RegionReport) Run(ctx context.Context) { r.mu.Unlock() }() } - wg.Wait() + + // Coder allows for 1 unhealthy node in the region, unless there is only 1 node. + if len(r.Region.Nodes) == 1 { + r.Healthy = healthyNodes == len(r.Region.Nodes) + } else if healthyNodes < len(r.Region.Nodes) { + r.Warnings = append(r.Warnings, oneNodeUnhealthy) + } } func (r *NodeReport) derpURL() *url.URL { diff --git a/coderd/healthcheck/derphealth/derp_test.go b/coderd/healthcheck/derphealth/derp_test.go index c9ed3d5915..a877e03fd2 100644 --- a/coderd/healthcheck/derphealth/derp_test.go +++ b/coderd/healthcheck/derphealth/derp_test.go @@ -81,6 +81,56 @@ func TestDERP(t *testing.T) { } }) + t.Run("HealthyWithNodeDegraded", func(t *testing.T) { + t.Parallel() + + healthyDerpSrv := derp.NewServer(key.NewNode(), func(format string, args ...any) { t.Logf(format, args...) }) + defer healthyDerpSrv.Close() + healthySrv := httptest.NewServer(derphttp.Handler(healthyDerpSrv)) + defer healthySrv.Close() + + var ( + ctx = context.Background() + report = derphealth.Report{} + derpURL, _ = url.Parse(healthySrv.URL) + opts = &derphealth.ReportOptions{ + DERPMap: &tailcfg.DERPMap{Regions: map[int]*tailcfg.DERPRegion{ + 1: { + EmbeddedRelay: true, + RegionID: 999, + Nodes: []*tailcfg.DERPNode{{ + Name: "1a", + RegionID: 999, + HostName: derpURL.Host, + IPv4: derpURL.Host, + STUNPort: -1, + InsecureForTests: true, + ForceHTTP: true, + }, { + Name: "1b", + RegionID: 999, + HostName: "derp.is.dead.tld", + IPv4: "derp.is.dead.tld", + STUNPort: -1, + InsecureForTests: true, + ForceHTTP: true, + }}, + }, + }}, + } + ) + + report.Run(ctx, opts) + + assert.True(t, report.Healthy) + for _, region := range report.Regions { + assert.True(t, region.Healthy) + assert.True(t, region.NodeReports[0].Healthy) + assert.False(t, region.NodeReports[1].Healthy) + assert.Len(t, region.Warnings, 1) + } + }) + t.Run("Tailscale/Dallas/OK", func(t *testing.T) { t.Parallel()