Files
coder/coderd/healthcheck/derp.go
Kyle Carberry 22e781eced chore: add /v2 to import module path (#9072)
* chore: add /v2 to import module path

go mod requires semantic versioning with versions greater than 1.x

This was a mechanical update by running:
```
go install github.com/marwan-at-work/mod/cmd/mod@latest
mod upgrade
```

Migrate generated files to import /v2

* Fix gen
2023-08-18 18:55:43 +00:00

462 lines
10 KiB
Go

package healthcheck
import (
"context"
"fmt"
"net"
"net/netip"
"net/url"
"strings"
"sync"
"sync/atomic"
"time"
"golang.org/x/xerrors"
"tailscale.com/derp"
"tailscale.com/derp/derphttp"
"tailscale.com/net/netcheck"
"tailscale.com/net/portmapper"
"tailscale.com/prober"
"tailscale.com/tailcfg"
"tailscale.com/types/key"
tslogger "tailscale.com/types/logger"
"github.com/coder/coder/v2/coderd/util/ptr"
)
// @typescript-generate DERPReport
type DERPReport struct {
Healthy bool `json:"healthy"`
Regions map[int]*DERPRegionReport `json:"regions"`
Netcheck *netcheck.Report `json:"netcheck"`
NetcheckErr *string `json:"netcheck_err"`
NetcheckLogs []string `json:"netcheck_logs"`
Error *string `json:"error"`
}
// @typescript-generate DERPRegionReport
type DERPRegionReport struct {
mu sync.Mutex
Healthy bool `json:"healthy"`
Region *tailcfg.DERPRegion `json:"region"`
NodeReports []*DERPNodeReport `json:"node_reports"`
Error *string `json:"error"`
}
// @typescript-generate DERPNodeReport
type DERPNodeReport struct {
mu sync.Mutex
clientCounter int
Healthy bool `json:"healthy"`
Node *tailcfg.DERPNode `json:"node"`
ServerInfo derp.ServerInfoMessage `json:"node_info"`
CanExchangeMessages bool `json:"can_exchange_messages"`
RoundTripPing string `json:"round_trip_ping"`
RoundTripPingMs int `json:"round_trip_ping_ms"`
UsesWebsocket bool `json:"uses_websocket"`
ClientLogs [][]string `json:"client_logs"`
ClientErrs [][]string `json:"client_errs"`
Error *string `json:"error"`
STUN DERPStunReport `json:"stun"`
}
// @typescript-generate DERPStunReport
type DERPStunReport struct {
Enabled bool
CanSTUN bool
Error *string
}
type DERPReportOptions struct {
DERPMap *tailcfg.DERPMap
}
func (r *DERPReport) Run(ctx context.Context, opts *DERPReportOptions) {
r.Healthy = true
r.Regions = map[int]*DERPRegionReport{}
wg := &sync.WaitGroup{}
mu := sync.Mutex{}
wg.Add(len(opts.DERPMap.Regions))
for _, region := range opts.DERPMap.Regions {
var (
region = region
regionReport = DERPRegionReport{
Region: region,
}
)
go func() {
defer wg.Done()
defer func() {
if err := recover(); err != nil {
regionReport.Error = ptr.Ref(fmt.Sprint(err))
}
}()
regionReport.Run(ctx)
mu.Lock()
r.Regions[region.RegionID] = &regionReport
if !regionReport.Healthy {
r.Healthy = false
}
mu.Unlock()
}()
}
ncLogf := func(format string, args ...interface{}) {
mu.Lock()
r.NetcheckLogs = append(r.NetcheckLogs, fmt.Sprintf(format, args...))
mu.Unlock()
}
nc := &netcheck.Client{
PortMapper: portmapper.NewClient(tslogger.WithPrefix(ncLogf, "portmap: "), nil, nil, nil),
Logf: tslogger.WithPrefix(ncLogf, "netcheck: "),
}
ncReport, netcheckErr := nc.GetReport(ctx, opts.DERPMap)
r.Netcheck = ncReport
r.NetcheckErr = convertError(netcheckErr)
wg.Wait()
}
func (r *DERPRegionReport) Run(ctx context.Context) {
r.Healthy = true
r.NodeReports = []*DERPNodeReport{}
wg := &sync.WaitGroup{}
wg.Add(len(r.Region.Nodes))
for _, node := range r.Region.Nodes {
var (
node = node
nodeReport = DERPNodeReport{
Node: node,
Healthy: true,
}
)
go func() {
defer wg.Done()
defer func() {
if err := recover(); err != nil {
nodeReport.Error = ptr.Ref(fmt.Sprint(err))
}
}()
nodeReport.Run(ctx)
r.mu.Lock()
r.NodeReports = append(r.NodeReports, &nodeReport)
if !nodeReport.Healthy {
r.Healthy = false
}
r.mu.Unlock()
}()
}
wg.Wait()
}
func (r *DERPNodeReport) derpURL() *url.URL {
derpURL := &url.URL{
Scheme: "https",
Host: r.Node.HostName,
Path: "/derp",
}
if r.Node.ForceHTTP {
derpURL.Scheme = "http"
}
if r.Node.HostName == "" {
derpURL.Host = r.Node.IPv4
}
if r.Node.DERPPort != 0 && !(r.Node.DERPPort == 443 && derpURL.Scheme == "https") && !(r.Node.DERPPort == 80 && derpURL.Scheme == "http") {
derpURL.Host = fmt.Sprintf("%s:%d", derpURL.Host, r.Node.DERPPort)
}
return derpURL
}
func (r *DERPNodeReport) Run(ctx context.Context) {
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
r.ClientLogs = [][]string{}
r.ClientErrs = [][]string{}
wg := &sync.WaitGroup{}
wg.Add(2)
go func() {
defer wg.Done()
r.doExchangeMessage(ctx)
}()
go func() {
defer wg.Done()
r.doSTUNTest(ctx)
}()
wg.Wait()
// We can't exchange messages with the node,
if (!r.CanExchangeMessages && !r.Node.STUNOnly) ||
// A node may use websockets because `Upgrade: DERP` may be blocked on
// the load balancer. This is unhealthy because websockets are slower
// than the regular DERP protocol.
r.UsesWebsocket ||
// The node was marked as STUN compatible but the STUN test failed.
r.STUN.Error != nil {
r.Healthy = false
}
}
func (r *DERPNodeReport) doExchangeMessage(ctx context.Context) {
if r.Node.STUNOnly {
return
}
var (
peerKey atomic.Pointer[key.NodePublic]
lastSent atomic.Pointer[time.Time]
)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
wg := &sync.WaitGroup{}
receive, receiveID, err := r.derpClient(ctx, r.derpURL())
if err != nil {
return
}
defer receive.Close()
wg.Add(2)
go func() {
defer wg.Done()
defer receive.Close()
pkt, err := r.recvData(receive)
if err != nil {
r.writeClientErr(receiveID, xerrors.Errorf("recv derp message: %w", err))
return
}
if *peerKey.Load() != pkt.Source {
r.writeClientErr(receiveID, xerrors.Errorf("received pkt from unknown peer: %s", pkt.Source.ShortString()))
return
}
t := lastSent.Load()
r.mu.Lock()
r.CanExchangeMessages = true
rtt := time.Since(*t)
r.RoundTripPing = rtt.String()
r.RoundTripPingMs = int(rtt.Milliseconds())
r.mu.Unlock()
cancel()
}()
go func() {
defer wg.Done()
send, sendID, err := r.derpClient(ctx, r.derpURL())
if err != nil {
return
}
defer send.Close()
key := send.SelfPublicKey()
peerKey.Store(&key)
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
var iter uint8
for {
lastSent.Store(ptr.Ref(time.Now()))
err = send.Send(receive.SelfPublicKey(), []byte{iter})
if err != nil {
r.writeClientErr(sendID, xerrors.Errorf("send derp message: %w", err))
return
}
iter++
select {
case <-ctx.Done():
return
case <-ticker.C:
}
}
}()
wg.Wait()
}
func (r *DERPNodeReport) doSTUNTest(ctx context.Context) {
if r.Node.STUNPort == -1 {
return
}
r.mu.Lock()
r.STUN.Enabled = true
r.mu.Unlock()
addr, port, err := r.stunAddr(ctx)
if err != nil {
r.STUN.Error = convertError(xerrors.Errorf("get stun addr: %w", err))
return
}
// We only create a prober to call ProbeUDP manually.
p, err := prober.DERP(prober.New(), "", time.Second, time.Second, time.Second)
if err != nil {
r.STUN.Error = convertError(xerrors.Errorf("create prober: %w", err))
return
}
err = p.ProbeUDP(addr, port)(ctx)
if err != nil {
r.STUN.Error = convertError(xerrors.Errorf("probe stun: %w", err))
return
}
r.mu.Lock()
r.STUN.CanSTUN = true
r.mu.Unlock()
}
func (r *DERPNodeReport) stunAddr(ctx context.Context) (string, int, error) {
port := r.Node.STUNPort
if port == 0 {
port = 3478
}
if port < 0 || port > 1<<16-1 {
return "", 0, xerrors.Errorf("invalid stun port %d", port)
}
if r.Node.STUNTestIP != "" {
ip, err := netip.ParseAddr(r.Node.STUNTestIP)
if err != nil {
return "", 0, xerrors.Errorf("invalid stun test ip %q: %w", r.Node.STUNTestIP, err)
}
return ip.String(), port, nil
}
if r.Node.HostName != "" {
addrs, err := net.DefaultResolver.LookupIPAddr(ctx, r.Node.HostName)
if err != nil {
return "", 0, xerrors.Errorf("lookup ip addr: %w", err)
}
for _, a := range addrs {
return a.String(), port, nil
}
}
if r.Node.IPv4 != "" {
ip, err := netip.ParseAddr(r.Node.IPv4)
if err != nil {
return "", 0, xerrors.Errorf("invalid ipv4 %q: %w", r.Node.IPv4, err)
}
if !ip.Is4() {
return "", 0, xerrors.Errorf("provided node ipv4 is not v4 %q: %w", r.Node.IPv4, err)
}
return ip.String(), port, nil
}
if r.Node.IPv6 != "" {
ip, err := netip.ParseAddr(r.Node.IPv6)
if err != nil {
return "", 0, xerrors.Errorf("invalid ipv6 %q: %w", r.Node.IPv6, err)
}
if !ip.Is6() {
return "", 0, xerrors.Errorf("provided node ipv6 is not v6 %q: %w", r.Node.IPv6, err)
}
return ip.String(), port, nil
}
return "", 0, xerrors.New("no stun ips provided")
}
func (r *DERPNodeReport) writeClientErr(clientID int, err error) {
r.mu.Lock()
r.ClientErrs[clientID] = append(r.ClientErrs[clientID], err.Error())
r.mu.Unlock()
}
func (r *DERPNodeReport) derpClient(ctx context.Context, derpURL *url.URL) (*derphttp.Client, int, error) {
r.mu.Lock()
id := r.clientCounter
r.clientCounter++
r.ClientLogs = append(r.ClientLogs, []string{})
r.ClientErrs = append(r.ClientErrs, []string{})
r.mu.Unlock()
client, err := derphttp.NewClient(key.NewNode(), derpURL.String(), func(format string, args ...any) {
r.mu.Lock()
defer r.mu.Unlock()
msg := fmt.Sprintf(format, args...)
if strings.Contains(msg, "We'll use WebSockets on the next connection attempt") {
r.UsesWebsocket = true
}
r.ClientLogs[id] = append(r.ClientLogs[id], msg)
})
if err != nil {
err := xerrors.Errorf("create derp client: %w", err)
r.writeClientErr(id, err)
return nil, id, err
}
go func() {
<-ctx.Done()
_ = client.Close()
}()
i := 0
for ; i < 5; i++ {
err = client.Connect(ctx)
if err != nil {
r.writeClientErr(id, xerrors.Errorf("connect to derp: %w", err))
continue
}
break
}
if i == 5 {
err := xerrors.Errorf("couldn't connect after 5 tries, last error: %w", err)
r.writeClientErr(id, xerrors.Errorf("couldn't connect after 5 tries, last error: %w", err))
return nil, id, err
}
return client, id, nil
}
func (r *DERPNodeReport) recvData(client *derphttp.Client) (derp.ReceivedPacket, error) {
for {
msg, err := client.Recv()
if err != nil {
return derp.ReceivedPacket{}, err
}
switch msg := msg.(type) {
case derp.ReceivedPacket:
return msg, nil
case derp.ServerInfoMessage:
r.mu.Lock()
r.ServerInfo = msg
r.mu.Unlock()
default:
// Drop all others!
}
}
}