feat(cli): add p2p diagnostics to ping (#14426)

First PR to address #14244.

Adds common potential reasons as to why a direct connection to the workspace agent couldn't be established to `coder ping`:
- If the Coder deployment administrator has blocked direction connections (`CODER_BLOCK_DIRECT`).
- If the client has no STUN servers within it's DERP map.
- If the client or agent appears to be behind a hard NAT, as per Tailscale `netInfo.MappingVariesByDestIP`

Also adds a warning if the client or agent has a network interface below the 'safe' MTU for tailnet. This warning is always displayed at the end of a `coder ping`.
This commit is contained in:
Ethan
2024-08-28 15:39:01 +10:00
committed by GitHub
parent b36d979a60
commit 8c15192433
8 changed files with 298 additions and 5 deletions

View File

@ -37,6 +37,7 @@ func (a *agent) apiHandler() http.Handler {
} }
promHandler := PrometheusMetricsHandler(a.prometheusRegistry, a.logger) promHandler := PrometheusMetricsHandler(a.prometheusRegistry, a.logger)
r.Get("/api/v0/listening-ports", lp.handler) r.Get("/api/v0/listening-ports", lp.handler)
r.Get("/api/v0/netcheck", a.HandleNetcheck)
r.Get("/debug/logs", a.HandleHTTPDebugLogs) r.Get("/debug/logs", a.HandleHTTPDebugLogs)
r.Get("/debug/magicsock", a.HandleHTTPDebugMagicsock) r.Get("/debug/magicsock", a.HandleHTTPDebugMagicsock)
r.Get("/debug/magicsock/debug-logging/{state}", a.HandleHTTPMagicsockDebugLoggingState) r.Get("/debug/magicsock/debug-logging/{state}", a.HandleHTTPMagicsockDebugLoggingState)

31
agent/health.go Normal file
View File

@ -0,0 +1,31 @@
package agent
import (
"net/http"
"github.com/coder/coder/v2/coderd/healthcheck/health"
"github.com/coder/coder/v2/coderd/httpapi"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/healthsdk"
)
func (a *agent) HandleNetcheck(rw http.ResponseWriter, r *http.Request) {
ni := a.TailnetConn().GetNetInfo()
ifReport, err := healthsdk.RunInterfacesReport()
if err != nil {
httpapi.Write(r.Context(), rw, http.StatusInternalServerError, codersdk.Response{
Message: "Failed to run interfaces report",
Detail: err.Error(),
})
return
}
httpapi.Write(r.Context(), rw, http.StatusOK, healthsdk.AgentNetcheckReport{
BaseReport: healthsdk.BaseReport{
Severity: health.SeverityOK,
},
NetInfo: ni,
Interfaces: ifReport,
})
}

View File

@ -10,8 +10,11 @@ import (
"github.com/google/uuid" "github.com/google/uuid"
"golang.org/x/xerrors" "golang.org/x/xerrors"
"tailscale.com/tailcfg"
"github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/healthsdk"
"github.com/coder/coder/v2/codersdk/workspacesdk"
"github.com/coder/coder/v2/tailnet" "github.com/coder/coder/v2/tailnet"
) )
@ -346,3 +349,55 @@ func PeerDiagnostics(w io.Writer, d tailnet.PeerDiagnostics) {
_, _ = fmt.Fprint(w, "✘ Wireguard is not connected\n") _, _ = fmt.Fprint(w, "✘ Wireguard is not connected\n")
} }
} }
type ConnDiags struct {
ConnInfo *workspacesdk.AgentConnectionInfo
PingP2P bool
DisableDirect bool
LocalNetInfo *tailcfg.NetInfo
LocalInterfaces *healthsdk.InterfacesReport
AgentNetcheck *healthsdk.AgentNetcheckReport
// TODO: More diagnostics
}
func ConnDiagnostics(w io.Writer, d ConnDiags) {
if d.AgentNetcheck != nil {
for _, msg := range d.AgentNetcheck.Interfaces.Warnings {
_, _ = fmt.Fprintf(w, "❗ Agent: %s\n", msg.Message)
}
}
if d.LocalInterfaces != nil {
for _, msg := range d.LocalInterfaces.Warnings {
_, _ = fmt.Fprintf(w, "❗ Client: %s\n", msg.Message)
}
}
if d.PingP2P {
_, _ = fmt.Fprint(w, "✔ You are connected directly (p2p)\n")
return
}
_, _ = fmt.Fprint(w, "❗ You are connected via a DERP relay, not directly (p2p)\n")
if d.DisableDirect {
_, _ = fmt.Fprint(w, "❗ Direct connections are disabled locally, by `--disable-direct` or `CODER_DISABLE_DIRECT`\n")
return
}
if d.ConnInfo != nil && d.ConnInfo.DisableDirectConnections {
_, _ = fmt.Fprint(w, "❗ Your Coder administrator has blocked direct connections\n")
return
}
if d.ConnInfo != nil && d.ConnInfo.DERPMap != nil && !d.ConnInfo.DERPMap.HasSTUN() {
_, _ = fmt.Fprint(w, "✘ The DERP map is not configured to use STUN, which will prevent direct connections from starting outside of local networks\n")
}
if d.LocalNetInfo != nil && d.LocalNetInfo.MappingVariesByDestIP.EqualBool(true) {
_, _ = fmt.Fprint(w, "❗ Client is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers\n")
}
if d.AgentNetcheck != nil && d.AgentNetcheck.NetInfo != nil && d.AgentNetcheck.NetInfo.MappingVariesByDestIP.EqualBool(true) {
_, _ = fmt.Fprint(w, "❗ Agent is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers\n")
}
}

View File

@ -20,8 +20,11 @@ import (
"github.com/coder/coder/v2/cli/clitest" "github.com/coder/coder/v2/cli/clitest"
"github.com/coder/coder/v2/cli/cliui" "github.com/coder/coder/v2/cli/cliui"
"github.com/coder/coder/v2/coderd/healthcheck/health"
"github.com/coder/coder/v2/coderd/util/ptr" "github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/healthsdk"
"github.com/coder/coder/v2/codersdk/workspacesdk"
"github.com/coder/coder/v2/tailnet" "github.com/coder/coder/v2/tailnet"
"github.com/coder/coder/v2/testutil" "github.com/coder/coder/v2/testutil"
"github.com/coder/serpent" "github.com/coder/serpent"
@ -672,3 +675,129 @@ func TestPeerDiagnostics(t *testing.T) {
}) })
} }
} }
func TestConnDiagnostics(t *testing.T) {
t.Parallel()
testCases := []struct {
name string
diags cliui.ConnDiags
want []string
}{
{
name: "Direct",
diags: cliui.ConnDiags{
ConnInfo: &workspacesdk.AgentConnectionInfo{},
PingP2P: true,
LocalNetInfo: &tailcfg.NetInfo{},
},
want: []string{
`✔ You are connected directly (p2p)`,
},
},
{
name: "DirectBlocked",
diags: cliui.ConnDiags{
ConnInfo: &workspacesdk.AgentConnectionInfo{
DisableDirectConnections: true,
},
},
want: []string{
`❗ You are connected via a DERP relay, not directly (p2p)`,
`❗ Your Coder administrator has blocked direct connections`,
},
},
{
name: "NoStun",
diags: cliui.ConnDiags{
ConnInfo: &workspacesdk.AgentConnectionInfo{
DERPMap: &tailcfg.DERPMap{},
},
LocalNetInfo: &tailcfg.NetInfo{},
},
want: []string{
`❗ You are connected via a DERP relay, not directly (p2p)`,
`✘ The DERP map is not configured to use STUN, which will prevent direct connections from starting outside of local networks`,
},
},
{
name: "ClientHardNat",
diags: cliui.ConnDiags{
LocalNetInfo: &tailcfg.NetInfo{
MappingVariesByDestIP: "true",
},
},
want: []string{
`❗ You are connected via a DERP relay, not directly (p2p)`,
`❗ Client is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers`,
},
},
{
name: "AgentHardNat",
diags: cliui.ConnDiags{
ConnInfo: &workspacesdk.AgentConnectionInfo{},
PingP2P: false,
LocalNetInfo: &tailcfg.NetInfo{},
AgentNetcheck: &healthsdk.AgentNetcheckReport{
NetInfo: &tailcfg.NetInfo{MappingVariesByDestIP: "true"},
},
},
want: []string{
`❗ You are connected via a DERP relay, not directly (p2p)`,
`❗ Agent is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers`,
},
},
{
name: "AgentInterfaceWarnings",
diags: cliui.ConnDiags{
PingP2P: true,
AgentNetcheck: &healthsdk.AgentNetcheckReport{
Interfaces: healthsdk.InterfacesReport{
BaseReport: healthsdk.BaseReport{
Warnings: []health.Message{
health.Messagef(health.CodeInterfaceSmallMTU, "network interface eth0 has MTU 1280, (less than 1378), which may cause problems with direct connections"),
},
},
},
},
},
want: []string{
`❗ Agent: network interface eth0 has MTU 1280, (less than 1378), which may cause problems with direct connections`,
`✔ You are connected directly (p2p)`,
},
},
{
name: "LocalInterfaceWarnings",
diags: cliui.ConnDiags{
PingP2P: true,
LocalInterfaces: &healthsdk.InterfacesReport{
BaseReport: healthsdk.BaseReport{
Warnings: []health.Message{
health.Messagef(health.CodeInterfaceSmallMTU, "network interface eth1 has MTU 1310, (less than 1378), which may cause problems with direct connections"),
},
},
},
},
want: []string{
`❗ Client: network interface eth1 has MTU 1310, (less than 1378), which may cause problems with direct connections`,
`✔ You are connected directly (p2p)`,
},
},
}
for _, tc := range testCases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
r, w := io.Pipe()
go func() {
defer w.Close()
cliui.ConnDiagnostics(w, tc.diags)
}()
bytes, err := io.ReadAll(r)
require.NoError(t, err)
output := string(bytes)
for _, want := range tc.want {
require.Contains(t, output, want)
}
})
}
}

View File

@ -2,7 +2,9 @@ package cli
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"net/http"
"time" "time"
"golang.org/x/xerrors" "golang.org/x/xerrors"
@ -14,6 +16,7 @@ import (
"github.com/coder/coder/v2/cli/cliui" "github.com/coder/coder/v2/cli/cliui"
"github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/healthsdk"
"github.com/coder/coder/v2/codersdk/workspacesdk" "github.com/coder/coder/v2/codersdk/workspacesdk"
"github.com/coder/serpent" "github.com/coder/serpent"
) )
@ -61,7 +64,8 @@ func (r *RootCmd) ping() *serpent.Command {
if !r.disableNetworkTelemetry { if !r.disableNetworkTelemetry {
opts.EnableTelemetry = true opts.EnableTelemetry = true
} }
conn, err := workspacesdk.New(client).DialAgent(ctx, workspaceAgent.ID, opts) client := workspacesdk.New(client)
conn, err := client.DialAgent(ctx, workspaceAgent.ID, opts)
if err != nil { if err != nil {
return err return err
} }
@ -138,11 +142,44 @@ func (r *RootCmd) ping() *serpent.Command {
) )
if n == int(pingNum) { if n == int(pingNum) {
diags := conn.GetPeerDiagnostics() break
cliui.PeerDiagnostics(inv.Stdout, diags)
return nil
} }
} }
ctx, cancel = context.WithTimeout(inv.Context(), 30*time.Second)
defer cancel()
diags := conn.GetPeerDiagnostics()
cliui.PeerDiagnostics(inv.Stdout, diags)
connDiags := cliui.ConnDiags{
PingP2P: didP2p,
DisableDirect: r.disableDirect,
LocalNetInfo: conn.GetNetInfo(),
}
connInfo, err := client.AgentConnectionInfoGeneric(ctx)
if err == nil {
connDiags.ConnInfo = &connInfo
} else {
_, _ = fmt.Fprintf(inv.Stdout, "Failed to retrieve connection info from server: %v\n", err)
}
ifReport, err := healthsdk.RunInterfacesReport()
if err == nil {
connDiags.LocalInterfaces = &ifReport
} else {
_, _ = fmt.Fprintf(inv.Stdout, "Failed to retrieve local interfaces report: %v\n", err)
}
agentNetcheck, err := conn.Netcheck(ctx)
if err == nil {
connDiags.AgentNetcheck = &agentNetcheck
} else {
var sdkErr *codersdk.Error
if errors.As(err, &sdkErr) && sdkErr.StatusCode() == http.StatusNotFound {
_, _ = fmt.Fprint(inv.Stdout, "Could not generate full connection report as the workspace agent is outdated\n")
} else {
_, _ = fmt.Fprintf(inv.Stdout, "Failed to retrieve connection report from agent: %v\n", err)
}
}
cliui.ConnDiagnostics(inv.Stdout, connDiags)
return nil
}, },
} }

View File

@ -273,3 +273,10 @@ type ClientNetcheckReport struct {
DERP DERPHealthReport `json:"derp"` DERP DERPHealthReport `json:"derp"`
Interfaces InterfacesReport `json:"interfaces"` Interfaces InterfacesReport `json:"interfaces"`
} }
// @typescript-ignore AgentNetcheckReport
type AgentNetcheckReport struct {
BaseReport
NetInfo *tailcfg.NetInfo `json:"net_info"`
Interfaces InterfacesReport `json:"interfaces"`
}

View File

@ -22,6 +22,7 @@ import (
"github.com/coder/coder/v2/coderd/tracing" "github.com/coder/coder/v2/coderd/tracing"
"github.com/coder/coder/v2/codersdk" "github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/healthsdk"
"github.com/coder/coder/v2/tailnet" "github.com/coder/coder/v2/tailnet"
) )
@ -241,6 +242,23 @@ func (c *AgentConn) ListeningPorts(ctx context.Context) (codersdk.WorkspaceAgent
return resp, json.NewDecoder(res.Body).Decode(&resp) return resp, json.NewDecoder(res.Body).Decode(&resp)
} }
// Netcheck returns a network check report from the workspace agent.
func (c *AgentConn) Netcheck(ctx context.Context) (healthsdk.AgentNetcheckReport, error) {
ctx, span := tracing.StartSpan(ctx)
defer span.End()
res, err := c.apiRequest(ctx, http.MethodGet, "/api/v0/netcheck", nil)
if err != nil {
return healthsdk.AgentNetcheckReport{}, xerrors.Errorf("do request: %w", err)
}
defer res.Body.Close()
if res.StatusCode != http.StatusOK {
return healthsdk.AgentNetcheckReport{}, codersdk.ReadBodyAsError(res)
}
var resp healthsdk.AgentNetcheckReport
return resp, json.NewDecoder(res.Body).Decode(&resp)
}
// DebugMagicsock makes a request to the workspace agent's magicsock debug endpoint. // DebugMagicsock makes a request to the workspace agent's magicsock debug endpoint.
func (c *AgentConn) DebugMagicsock(ctx context.Context) ([]byte, error) { func (c *AgentConn) DebugMagicsock(ctx context.Context) ([]byte, error) {
ctx, span := tracing.StartSpan(ctx) ctx, span := tracing.StartSpan(ctx)

View File

@ -294,6 +294,9 @@ func NewConn(options *Options) (conn *Conn, err error) {
}() }()
if server.telemetryStore != nil { if server.telemetryStore != nil {
server.wireguardEngine.SetNetInfoCallback(func(ni *tailcfg.NetInfo) { server.wireguardEngine.SetNetInfoCallback(func(ni *tailcfg.NetInfo) {
server.mutex.Lock()
server.lastNetInfo = ni.Clone()
server.mutex.Unlock()
server.telemetryStore.setNetInfo(ni) server.telemetryStore.setNetInfo(ni)
nodeUp.setNetInfo(ni) nodeUp.setNetInfo(ni)
server.telemetryStore.pingPeer(server) server.telemetryStore.pingPeer(server)
@ -304,7 +307,12 @@ func NewConn(options *Options) (conn *Conn, err error) {
}) })
go server.watchConnChange() go server.watchConnChange()
} else { } else {
server.wireguardEngine.SetNetInfoCallback(nodeUp.setNetInfo) server.wireguardEngine.SetNetInfoCallback(func(ni *tailcfg.NetInfo) {
server.mutex.Lock()
server.lastNetInfo = ni.Clone()
server.mutex.Unlock()
nodeUp.setNetInfo(ni)
})
} }
server.wireguardEngine.SetStatusCallback(nodeUp.setStatus) server.wireguardEngine.SetStatusCallback(nodeUp.setStatus)
server.magicConn.SetDERPForcedWebsocketCallback(nodeUp.setDERPForcedWebsocket) server.magicConn.SetDERPForcedWebsocketCallback(nodeUp.setDERPForcedWebsocket)
@ -373,6 +381,13 @@ type Conn struct {
watchCancel func() watchCancel func()
trafficStats *connstats.Statistics trafficStats *connstats.Statistics
lastNetInfo *tailcfg.NetInfo
}
func (c *Conn) GetNetInfo() *tailcfg.NetInfo {
c.mutex.Lock()
defer c.mutex.Unlock()
return c.lastNetInfo.Clone()
} }
func (c *Conn) SetTunnelDestination(id uuid.UUID) { func (c *Conn) SetTunnelDestination(id uuid.UUID) {