chore(healthcheck): fix DERP test flakes (#7211)

This commit is contained in:
Colin Adler
2023-04-19 20:03:05 -05:00
committed by GitHub
parent f60b5579a7
commit 5f5edb18b0
6 changed files with 161 additions and 41 deletions

16
coderd/apidoc/docs.go generated
View File

@ -9749,6 +9749,19 @@ const docTemplate = `{
"ParameterSourceSchemeData" "ParameterSourceSchemeData"
] ]
}, },
"derp.ServerInfoMessage": {
"type": "object",
"properties": {
"tokenBucketBytesBurst": {
"description": "TokenBucketBytesBurst is how many bytes the server will\nallow to burst, temporarily violating\nTokenBucketBytesPerSecond.\n\nZero means unspecified. There might be a limit, but the\nclient need not try to respect it.",
"type": "integer"
},
"tokenBucketBytesPerSecond": {
"description": "TokenBucketBytesPerSecond is how many bytes per second the\nserver says it will accept, including all framing bytes.\n\nZero means unspecified. There might be a limit, but the\nclient need not try to respect it.",
"type": "integer"
}
}
},
"healthcheck.AccessURLReport": { "healthcheck.AccessURLReport": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -9795,6 +9808,9 @@ const docTemplate = `{
"node": { "node": {
"$ref": "#/definitions/tailcfg.DERPNode" "$ref": "#/definitions/tailcfg.DERPNode"
}, },
"node_info": {
"$ref": "#/definitions/derp.ServerInfoMessage"
},
"round_trip_ping": { "round_trip_ping": {
"type": "integer" "type": "integer"
}, },

View File

@ -8812,6 +8812,19 @@
"ParameterSourceSchemeData" "ParameterSourceSchemeData"
] ]
}, },
"derp.ServerInfoMessage": {
"type": "object",
"properties": {
"tokenBucketBytesBurst": {
"description": "TokenBucketBytesBurst is how many bytes the server will\nallow to burst, temporarily violating\nTokenBucketBytesPerSecond.\n\nZero means unspecified. There might be a limit, but the\nclient need not try to respect it.",
"type": "integer"
},
"tokenBucketBytesPerSecond": {
"description": "TokenBucketBytesPerSecond is how many bytes per second the\nserver says it will accept, including all framing bytes.\n\nZero means unspecified. There might be a limit, but the\nclient need not try to respect it.",
"type": "integer"
}
}
},
"healthcheck.AccessURLReport": { "healthcheck.AccessURLReport": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -8858,6 +8871,9 @@
"node": { "node": {
"$ref": "#/definitions/tailcfg.DERPNode" "$ref": "#/definitions/tailcfg.DERPNode"
}, },
"node_info": {
"$ref": "#/definitions/derp.ServerInfoMessage"
},
"round_trip_ping": { "round_trip_ping": {
"type": "integer" "type": "integer"
}, },

View File

@ -11,7 +11,6 @@ import (
"sync/atomic" "sync/atomic"
"time" "time"
"golang.org/x/sync/errgroup"
"golang.org/x/xerrors" "golang.org/x/xerrors"
"tailscale.com/derp" "tailscale.com/derp"
"tailscale.com/derp/derphttp" "tailscale.com/derp/derphttp"
@ -21,6 +20,8 @@ import (
"tailscale.com/tailcfg" "tailscale.com/tailcfg"
"tailscale.com/types/key" "tailscale.com/types/key"
tslogger "tailscale.com/types/logger" tslogger "tailscale.com/types/logger"
"github.com/coder/coder/coderd/util/ptr"
) )
type DERPReport struct { type DERPReport struct {
@ -48,11 +49,12 @@ type DERPNodeReport struct {
Healthy bool `json:"healthy"` Healthy bool `json:"healthy"`
Node *tailcfg.DERPNode `json:"node"` Node *tailcfg.DERPNode `json:"node"`
CanExchangeMessages bool `json:"can_exchange_messages"` ServerInfo derp.ServerInfoMessage `json:"node_info"`
RoundTripPing time.Duration `json:"round_trip_ping"` CanExchangeMessages bool `json:"can_exchange_messages"`
UsesWebsocket bool `json:"uses_websocket"` RoundTripPing time.Duration `json:"round_trip_ping"`
ClientLogs [][]string `json:"client_logs"` UsesWebsocket bool `json:"uses_websocket"`
ClientErrs [][]error `json:"client_errs"` ClientLogs [][]string `json:"client_logs"`
ClientErrs [][]error `json:"client_errs"`
STUN DERPStunReport `json:"stun"` STUN DERPStunReport `json:"stun"`
} }
@ -161,8 +163,19 @@ func (r *DERPNodeReport) Run(ctx context.Context) {
r.ClientLogs = [][]string{} r.ClientLogs = [][]string{}
r.ClientErrs = [][]error{} r.ClientErrs = [][]error{}
r.doExchangeMessage(ctx) wg := &sync.WaitGroup{}
r.doSTUNTest(ctx)
wg.Add(2)
go func() {
defer wg.Done()
r.doExchangeMessage(ctx)
}()
go func() {
defer wg.Done()
r.doSTUNTest(ctx)
}()
wg.Wait()
// We can't exchange messages with the node, // We can't exchange messages with the node,
if (!r.CanExchangeMessages && !r.Node.STUNOnly) || if (!r.CanExchangeMessages && !r.Node.STUNOnly) ||
@ -181,8 +194,13 @@ func (r *DERPNodeReport) doExchangeMessage(ctx context.Context) {
return return
} }
var peerKey atomic.Pointer[key.NodePublic] var (
eg, ctx := errgroup.WithContext(ctx) peerKey atomic.Pointer[key.NodePublic]
lastSent atomic.Pointer[time.Time]
)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
wg := &sync.WaitGroup{}
receive, receiveID, err := r.derpClient(ctx, r.derpURL()) receive, receiveID, err := r.derpClient(ctx, r.derpURL())
if err != nil { if err != nil {
@ -190,51 +208,64 @@ func (r *DERPNodeReport) doExchangeMessage(ctx context.Context) {
} }
defer receive.Close() defer receive.Close()
eg.Go(func() error { wg.Add(2)
go func() {
defer wg.Done()
defer receive.Close() defer receive.Close()
pkt, err := r.recvData(receive) pkt, err := r.recvData(receive)
if err != nil { if err != nil {
r.writeClientErr(receiveID, xerrors.Errorf("recv derp message: %w", err)) r.writeClientErr(receiveID, xerrors.Errorf("recv derp message: %w", err))
return err return
} }
if *peerKey.Load() != pkt.Source { if *peerKey.Load() != pkt.Source {
r.writeClientErr(receiveID, xerrors.Errorf("received pkt from unknown peer: %s", pkt.Source.ShortString())) r.writeClientErr(receiveID, xerrors.Errorf("received pkt from unknown peer: %s", pkt.Source.ShortString()))
return err return
} }
t, err := time.Parse(time.RFC3339Nano, string(pkt.Data)) t := lastSent.Load()
if err != nil {
r.writeClientErr(receiveID, xerrors.Errorf("parse time from peer: %w", err))
return err
}
r.mu.Lock() r.mu.Lock()
r.CanExchangeMessages = true r.CanExchangeMessages = true
r.RoundTripPing = time.Since(t) r.RoundTripPing = time.Since(*t)
r.mu.Unlock() r.mu.Unlock()
return nil
}) cancel()
eg.Go(func() error { }()
go func() {
defer wg.Done()
send, sendID, err := r.derpClient(ctx, r.derpURL()) send, sendID, err := r.derpClient(ctx, r.derpURL())
if err != nil { if err != nil {
return err return
} }
defer send.Close() defer send.Close()
key := send.SelfPublicKey() key := send.SelfPublicKey()
peerKey.Store(&key) peerKey.Store(&key)
err = send.Send(receive.SelfPublicKey(), []byte(time.Now().Format(time.RFC3339Nano))) ticker := time.NewTicker(time.Second)
if err != nil { defer ticker.Stop()
r.writeClientErr(sendID, xerrors.Errorf("send derp message: %w", err))
return err
}
return nil
})
_ = eg.Wait() var iter uint8
for {
lastSent.Store(ptr.Ref(time.Now()))
err = send.Send(receive.SelfPublicKey(), []byte{iter})
if err != nil {
r.writeClientErr(sendID, xerrors.Errorf("send derp message: %w", err))
return
}
iter++
select {
case <-ctx.Done():
return
case <-ticker.C:
}
}
}()
wg.Wait()
} }
func (r *DERPNodeReport) doSTUNTest(ctx context.Context) { func (r *DERPNodeReport) doSTUNTest(ctx context.Context) {
@ -378,7 +409,7 @@ func (r *DERPNodeReport) derpClient(ctx context.Context, derpURL *url.URL) (*der
return client, id, nil return client, id, nil
} }
func (*DERPNodeReport) recvData(client *derphttp.Client) (derp.ReceivedPacket, error) { func (r *DERPNodeReport) recvData(client *derphttp.Client) (derp.ReceivedPacket, error) {
for { for {
msg, err := client.Recv() msg, err := client.Recv()
if err != nil { if err != nil {
@ -388,6 +419,10 @@ func (*DERPNodeReport) recvData(client *derphttp.Client) (derp.ReceivedPacket, e
switch msg := msg.(type) { switch msg := msg.(type) {
case derp.ReceivedPacket: case derp.ReceivedPacket:
return msg, nil return msg, nil
case derp.ServerInfoMessage:
r.mu.Lock()
r.ServerInfo = msg
r.mu.Unlock()
default: default:
// Drop all others! // Drop all others!
} }

View File

@ -19,6 +19,7 @@ import (
"github.com/coder/coder/coderd/healthcheck" "github.com/coder/coder/coderd/healthcheck"
"github.com/coder/coder/tailnet" "github.com/coder/coder/tailnet"
"github.com/coder/coder/testutil"
) )
//nolint:tparallel //nolint:tparallel
@ -66,8 +67,7 @@ func TestDERP(t *testing.T) {
for _, node := range region.NodeReports { for _, node := range region.NodeReports {
assert.True(t, node.Healthy) assert.True(t, node.Healthy)
assert.True(t, node.CanExchangeMessages) assert.True(t, node.CanExchangeMessages)
// TODO: test this without serializing time.Time over the wire. assert.Positive(t, node.RoundTripPing)
// assert.Positive(t, node.RoundTripPing)
assert.Len(t, node.ClientLogs, 2) assert.Len(t, node.ClientLogs, 2)
assert.Len(t, node.ClientLogs[0], 1) assert.Len(t, node.ClientLogs[0], 1)
assert.Len(t, node.ClientErrs[0], 0) assert.Len(t, node.ClientErrs[0], 0)
@ -81,9 +81,13 @@ func TestDERP(t *testing.T) {
} }
}) })
t.Run("OK/Tailscale/Dallas", func(t *testing.T) { t.Run("Tailscale/Dallas/OK", func(t *testing.T) {
t.Parallel() t.Parallel()
if testutil.InCI() {
t.Skip("This test depends on reaching out over the network to Tailscale servers, which is inherently flaky.")
}
derpSrv := derp.NewServer(key.NewNode(), func(format string, args ...any) { t.Logf(format, args...) }) derpSrv := derp.NewServer(key.NewNode(), func(format string, args ...any) { t.Logf(format, args...) })
defer derpSrv.Close() defer derpSrv.Close()
srv := httptest.NewServer(derphttp.Handler(derpSrv)) srv := httptest.NewServer(derphttp.Handler(derpSrv))
@ -107,8 +111,7 @@ func TestDERP(t *testing.T) {
for _, node := range region.NodeReports { for _, node := range region.NodeReports {
assert.True(t, node.Healthy) assert.True(t, node.Healthy)
assert.True(t, node.CanExchangeMessages) assert.True(t, node.CanExchangeMessages)
// TODO: test this without serializing time.Time over the wire. assert.Positive(t, node.RoundTripPing)
// assert.Positive(t, node.RoundTripPing)
assert.Len(t, node.ClientLogs, 2) assert.Len(t, node.ClientLogs, 2)
assert.Len(t, node.ClientLogs[0], 1) assert.Len(t, node.ClientLogs[0], 1)
assert.Len(t, node.ClientErrs[0], 0) assert.Len(t, node.ClientErrs[0], 0)
@ -171,13 +174,12 @@ func TestDERP(t *testing.T) {
for _, node := range region.NodeReports { for _, node := range region.NodeReports {
assert.False(t, node.Healthy) assert.False(t, node.Healthy)
assert.True(t, node.CanExchangeMessages) assert.True(t, node.CanExchangeMessages)
// TODO: test this without serializing time.Time over the wire. assert.Positive(t, node.RoundTripPing)
// assert.Positive(t, node.RoundTripPing)
assert.Len(t, node.ClientLogs, 2) assert.Len(t, node.ClientLogs, 2)
assert.Len(t, node.ClientLogs[0], 3) assert.Len(t, node.ClientLogs[0], 3)
assert.Len(t, node.ClientLogs[1], 3) assert.Len(t, node.ClientLogs[1], 3)
assert.Len(t, node.ClientErrs, 2) assert.Len(t, node.ClientErrs, 2)
assert.Len(t, node.ClientErrs[0], 1) assert.Len(t, node.ClientErrs[0], 1) // this
assert.Len(t, node.ClientErrs[1], 1) assert.Len(t, node.ClientErrs[1], 1)
assert.True(t, node.UsesWebsocket) assert.True(t, node.UsesWebsocket)
@ -188,7 +190,7 @@ func TestDERP(t *testing.T) {
} }
}) })
t.Run("OK/STUNOnly", func(t *testing.T) { t.Run("STUNOnly/OK", func(t *testing.T) {
t.Parallel() t.Parallel()
var ( var (

View File

@ -103,6 +103,10 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \
"stunport": 0, "stunport": 0,
"stuntestIP": "string" "stuntestIP": "string"
}, },
"node_info": {
"tokenBucketBytesBurst": 0,
"tokenBucketBytesPerSecond": 0
},
"round_trip_ping": 0, "round_trip_ping": 0,
"stun": { "stun": {
"canSTUN": true, "canSTUN": true,
@ -158,6 +162,10 @@ curl -X GET http://coder-server:8080/api/v2/debug/health \
"stunport": 0, "stunport": 0,
"stuntestIP": "string" "stuntestIP": "string"
}, },
"node_info": {
"tokenBucketBytesBurst": 0,
"tokenBucketBytesPerSecond": 0
},
"round_trip_ping": 0, "round_trip_ping": 0,
"stun": { "stun": {
"canSTUN": true, "canSTUN": true,

View File

@ -5563,6 +5563,24 @@ Parameter represents a set value for the scope.
| `none` | | `none` |
| `data` | | `data` |
## derp.ServerInfoMessage
```json
{
"tokenBucketBytesBurst": 0,
"tokenBucketBytesPerSecond": 0
}
```
### Properties
| Name | Type | Required | Restrictions | Description |
| ------------------------------------------------------------------------------------------ | ------- | -------- | ------------ | ------------------------------------------------------------------------------------------------------------------------ |
| `tokenBucketBytesBurst` | integer | false | | Tokenbucketbytesburst is how many bytes the server will allow to burst, temporarily violating TokenBucketBytesPerSecond. |
| Zero means unspecified. There might be a limit, but the client need not try to respect it. |
| `tokenBucketBytesPerSecond` | integer | false | | Tokenbucketbytespersecond is how many bytes per second the server says it will accept, including all framing bytes. |
| Zero means unspecified. There might be a limit, but the client need not try to respect it. |
## healthcheck.AccessURLReport ## healthcheck.AccessURLReport
```json ```json
@ -5607,6 +5625,10 @@ Parameter represents a set value for the scope.
"stunport": 0, "stunport": 0,
"stuntestIP": "string" "stuntestIP": "string"
}, },
"node_info": {
"tokenBucketBytesBurst": 0,
"tokenBucketBytesPerSecond": 0
},
"round_trip_ping": 0, "round_trip_ping": 0,
"stun": { "stun": {
"canSTUN": true, "canSTUN": true,
@ -5626,6 +5648,7 @@ Parameter represents a set value for the scope.
| `client_logs` | array of array | false | | | | `client_logs` | array of array | false | | |
| `healthy` | boolean | false | | | | `healthy` | boolean | false | | |
| `node` | [tailcfg.DERPNode](#tailcfgderpnode) | false | | | | `node` | [tailcfg.DERPNode](#tailcfgderpnode) | false | | |
| `node_info` | [derp.ServerInfoMessage](#derpserverinfomessage) | false | | |
| `round_trip_ping` | integer | false | | | | `round_trip_ping` | integer | false | | |
| `stun` | [healthcheck.DERPStunReport](#healthcheckderpstunreport) | false | | | | `stun` | [healthcheck.DERPStunReport](#healthcheckderpstunreport) | false | | |
| `uses_websocket` | boolean | false | | | | `uses_websocket` | boolean | false | | |
@ -5655,6 +5678,10 @@ Parameter represents a set value for the scope.
"stunport": 0, "stunport": 0,
"stuntestIP": "string" "stuntestIP": "string"
}, },
"node_info": {
"tokenBucketBytesBurst": 0,
"tokenBucketBytesPerSecond": 0
},
"round_trip_ping": 0, "round_trip_ping": 0,
"stun": { "stun": {
"canSTUN": true, "canSTUN": true,
@ -5758,6 +5785,10 @@ Parameter represents a set value for the scope.
"stunport": 0, "stunport": 0,
"stuntestIP": "string" "stuntestIP": "string"
}, },
"node_info": {
"tokenBucketBytesBurst": 0,
"tokenBucketBytesPerSecond": 0
},
"round_trip_ping": 0, "round_trip_ping": 0,
"stun": { "stun": {
"canSTUN": true, "canSTUN": true,
@ -5813,6 +5844,10 @@ Parameter represents a set value for the scope.
"stunport": 0, "stunport": 0,
"stuntestIP": "string" "stuntestIP": "string"
}, },
"node_info": {
"tokenBucketBytesBurst": 0,
"tokenBucketBytesPerSecond": 0
},
"round_trip_ping": 0, "round_trip_ping": 0,
"stun": { "stun": {
"canSTUN": true, "canSTUN": true,
@ -5947,6 +5982,10 @@ Parameter represents a set value for the scope.
"stunport": 0, "stunport": 0,
"stuntestIP": "string" "stuntestIP": "string"
}, },
"node_info": {
"tokenBucketBytesBurst": 0,
"tokenBucketBytesPerSecond": 0
},
"round_trip_ping": 0, "round_trip_ping": 0,
"stun": { "stun": {
"canSTUN": true, "canSTUN": true,
@ -6002,6 +6041,10 @@ Parameter represents a set value for the scope.
"stunport": 0, "stunport": 0,
"stuntestIP": "string" "stuntestIP": "string"
}, },
"node_info": {
"tokenBucketBytesBurst": 0,
"tokenBucketBytesPerSecond": 0
},
"round_trip_ping": 0, "round_trip_ping": 0,
"stun": { "stun": {
"canSTUN": true, "canSTUN": true,