mirror of
https://github.com/coder/coder.git
synced 2025-07-09 11:45:56 +00:00
feat(cli): add p2p diagnostics to ping (#14426)
First PR to address #14244. Adds common potential reasons as to why a direct connection to the workspace agent couldn't be established to `coder ping`: - If the Coder deployment administrator has blocked direction connections (`CODER_BLOCK_DIRECT`). - If the client has no STUN servers within it's DERP map. - If the client or agent appears to be behind a hard NAT, as per Tailscale `netInfo.MappingVariesByDestIP` Also adds a warning if the client or agent has a network interface below the 'safe' MTU for tailnet. This warning is always displayed at the end of a `coder ping`.
This commit is contained in:
@ -37,6 +37,7 @@ func (a *agent) apiHandler() http.Handler {
|
||||
}
|
||||
promHandler := PrometheusMetricsHandler(a.prometheusRegistry, a.logger)
|
||||
r.Get("/api/v0/listening-ports", lp.handler)
|
||||
r.Get("/api/v0/netcheck", a.HandleNetcheck)
|
||||
r.Get("/debug/logs", a.HandleHTTPDebugLogs)
|
||||
r.Get("/debug/magicsock", a.HandleHTTPDebugMagicsock)
|
||||
r.Get("/debug/magicsock/debug-logging/{state}", a.HandleHTTPMagicsockDebugLoggingState)
|
||||
|
31
agent/health.go
Normal file
31
agent/health.go
Normal file
@ -0,0 +1,31 @@
|
||||
package agent
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/coder/coder/v2/coderd/healthcheck/health"
|
||||
"github.com/coder/coder/v2/coderd/httpapi"
|
||||
"github.com/coder/coder/v2/codersdk"
|
||||
"github.com/coder/coder/v2/codersdk/healthsdk"
|
||||
)
|
||||
|
||||
func (a *agent) HandleNetcheck(rw http.ResponseWriter, r *http.Request) {
|
||||
ni := a.TailnetConn().GetNetInfo()
|
||||
|
||||
ifReport, err := healthsdk.RunInterfacesReport()
|
||||
if err != nil {
|
||||
httpapi.Write(r.Context(), rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: "Failed to run interfaces report",
|
||||
Detail: err.Error(),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
httpapi.Write(r.Context(), rw, http.StatusOK, healthsdk.AgentNetcheckReport{
|
||||
BaseReport: healthsdk.BaseReport{
|
||||
Severity: health.SeverityOK,
|
||||
},
|
||||
NetInfo: ni,
|
||||
Interfaces: ifReport,
|
||||
})
|
||||
}
|
@ -10,8 +10,11 @@ import (
|
||||
|
||||
"github.com/google/uuid"
|
||||
"golang.org/x/xerrors"
|
||||
"tailscale.com/tailcfg"
|
||||
|
||||
"github.com/coder/coder/v2/codersdk"
|
||||
"github.com/coder/coder/v2/codersdk/healthsdk"
|
||||
"github.com/coder/coder/v2/codersdk/workspacesdk"
|
||||
"github.com/coder/coder/v2/tailnet"
|
||||
)
|
||||
|
||||
@ -346,3 +349,55 @@ func PeerDiagnostics(w io.Writer, d tailnet.PeerDiagnostics) {
|
||||
_, _ = fmt.Fprint(w, "✘ Wireguard is not connected\n")
|
||||
}
|
||||
}
|
||||
|
||||
type ConnDiags struct {
|
||||
ConnInfo *workspacesdk.AgentConnectionInfo
|
||||
PingP2P bool
|
||||
DisableDirect bool
|
||||
LocalNetInfo *tailcfg.NetInfo
|
||||
LocalInterfaces *healthsdk.InterfacesReport
|
||||
AgentNetcheck *healthsdk.AgentNetcheckReport
|
||||
// TODO: More diagnostics
|
||||
}
|
||||
|
||||
func ConnDiagnostics(w io.Writer, d ConnDiags) {
|
||||
if d.AgentNetcheck != nil {
|
||||
for _, msg := range d.AgentNetcheck.Interfaces.Warnings {
|
||||
_, _ = fmt.Fprintf(w, "❗ Agent: %s\n", msg.Message)
|
||||
}
|
||||
}
|
||||
|
||||
if d.LocalInterfaces != nil {
|
||||
for _, msg := range d.LocalInterfaces.Warnings {
|
||||
_, _ = fmt.Fprintf(w, "❗ Client: %s\n", msg.Message)
|
||||
}
|
||||
}
|
||||
|
||||
if d.PingP2P {
|
||||
_, _ = fmt.Fprint(w, "✔ You are connected directly (p2p)\n")
|
||||
return
|
||||
}
|
||||
_, _ = fmt.Fprint(w, "❗ You are connected via a DERP relay, not directly (p2p)\n")
|
||||
|
||||
if d.DisableDirect {
|
||||
_, _ = fmt.Fprint(w, "❗ Direct connections are disabled locally, by `--disable-direct` or `CODER_DISABLE_DIRECT`\n")
|
||||
return
|
||||
}
|
||||
|
||||
if d.ConnInfo != nil && d.ConnInfo.DisableDirectConnections {
|
||||
_, _ = fmt.Fprint(w, "❗ Your Coder administrator has blocked direct connections\n")
|
||||
return
|
||||
}
|
||||
|
||||
if d.ConnInfo != nil && d.ConnInfo.DERPMap != nil && !d.ConnInfo.DERPMap.HasSTUN() {
|
||||
_, _ = fmt.Fprint(w, "✘ The DERP map is not configured to use STUN, which will prevent direct connections from starting outside of local networks\n")
|
||||
}
|
||||
|
||||
if d.LocalNetInfo != nil && d.LocalNetInfo.MappingVariesByDestIP.EqualBool(true) {
|
||||
_, _ = fmt.Fprint(w, "❗ Client is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers\n")
|
||||
}
|
||||
|
||||
if d.AgentNetcheck != nil && d.AgentNetcheck.NetInfo != nil && d.AgentNetcheck.NetInfo.MappingVariesByDestIP.EqualBool(true) {
|
||||
_, _ = fmt.Fprint(w, "❗ Agent is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers\n")
|
||||
}
|
||||
}
|
||||
|
@ -20,8 +20,11 @@ import (
|
||||
|
||||
"github.com/coder/coder/v2/cli/clitest"
|
||||
"github.com/coder/coder/v2/cli/cliui"
|
||||
"github.com/coder/coder/v2/coderd/healthcheck/health"
|
||||
"github.com/coder/coder/v2/coderd/util/ptr"
|
||||
"github.com/coder/coder/v2/codersdk"
|
||||
"github.com/coder/coder/v2/codersdk/healthsdk"
|
||||
"github.com/coder/coder/v2/codersdk/workspacesdk"
|
||||
"github.com/coder/coder/v2/tailnet"
|
||||
"github.com/coder/coder/v2/testutil"
|
||||
"github.com/coder/serpent"
|
||||
@ -672,3 +675,129 @@ func TestPeerDiagnostics(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestConnDiagnostics(t *testing.T) {
|
||||
t.Parallel()
|
||||
testCases := []struct {
|
||||
name string
|
||||
diags cliui.ConnDiags
|
||||
want []string
|
||||
}{
|
||||
{
|
||||
name: "Direct",
|
||||
diags: cliui.ConnDiags{
|
||||
ConnInfo: &workspacesdk.AgentConnectionInfo{},
|
||||
PingP2P: true,
|
||||
LocalNetInfo: &tailcfg.NetInfo{},
|
||||
},
|
||||
want: []string{
|
||||
`✔ You are connected directly (p2p)`,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "DirectBlocked",
|
||||
diags: cliui.ConnDiags{
|
||||
ConnInfo: &workspacesdk.AgentConnectionInfo{
|
||||
DisableDirectConnections: true,
|
||||
},
|
||||
},
|
||||
want: []string{
|
||||
`❗ You are connected via a DERP relay, not directly (p2p)`,
|
||||
`❗ Your Coder administrator has blocked direct connections`,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "NoStun",
|
||||
diags: cliui.ConnDiags{
|
||||
ConnInfo: &workspacesdk.AgentConnectionInfo{
|
||||
DERPMap: &tailcfg.DERPMap{},
|
||||
},
|
||||
LocalNetInfo: &tailcfg.NetInfo{},
|
||||
},
|
||||
want: []string{
|
||||
`❗ You are connected via a DERP relay, not directly (p2p)`,
|
||||
`✘ The DERP map is not configured to use STUN, which will prevent direct connections from starting outside of local networks`,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ClientHardNat",
|
||||
diags: cliui.ConnDiags{
|
||||
LocalNetInfo: &tailcfg.NetInfo{
|
||||
MappingVariesByDestIP: "true",
|
||||
},
|
||||
},
|
||||
want: []string{
|
||||
`❗ You are connected via a DERP relay, not directly (p2p)`,
|
||||
`❗ Client is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers`,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "AgentHardNat",
|
||||
diags: cliui.ConnDiags{
|
||||
ConnInfo: &workspacesdk.AgentConnectionInfo{},
|
||||
PingP2P: false,
|
||||
LocalNetInfo: &tailcfg.NetInfo{},
|
||||
AgentNetcheck: &healthsdk.AgentNetcheckReport{
|
||||
NetInfo: &tailcfg.NetInfo{MappingVariesByDestIP: "true"},
|
||||
},
|
||||
},
|
||||
want: []string{
|
||||
`❗ You are connected via a DERP relay, not directly (p2p)`,
|
||||
`❗ Agent is potentially behind a hard NAT, as multiple endpoints were retrieved from different STUN servers`,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "AgentInterfaceWarnings",
|
||||
diags: cliui.ConnDiags{
|
||||
PingP2P: true,
|
||||
AgentNetcheck: &healthsdk.AgentNetcheckReport{
|
||||
Interfaces: healthsdk.InterfacesReport{
|
||||
BaseReport: healthsdk.BaseReport{
|
||||
Warnings: []health.Message{
|
||||
health.Messagef(health.CodeInterfaceSmallMTU, "network interface eth0 has MTU 1280, (less than 1378), which may cause problems with direct connections"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
want: []string{
|
||||
`❗ Agent: network interface eth0 has MTU 1280, (less than 1378), which may cause problems with direct connections`,
|
||||
`✔ You are connected directly (p2p)`,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LocalInterfaceWarnings",
|
||||
diags: cliui.ConnDiags{
|
||||
PingP2P: true,
|
||||
LocalInterfaces: &healthsdk.InterfacesReport{
|
||||
BaseReport: healthsdk.BaseReport{
|
||||
Warnings: []health.Message{
|
||||
health.Messagef(health.CodeInterfaceSmallMTU, "network interface eth1 has MTU 1310, (less than 1378), which may cause problems with direct connections"),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
want: []string{
|
||||
`❗ Client: network interface eth1 has MTU 1310, (less than 1378), which may cause problems with direct connections`,
|
||||
`✔ You are connected directly (p2p)`,
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
tc := tc
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
r, w := io.Pipe()
|
||||
go func() {
|
||||
defer w.Close()
|
||||
cliui.ConnDiagnostics(w, tc.diags)
|
||||
}()
|
||||
bytes, err := io.ReadAll(r)
|
||||
require.NoError(t, err)
|
||||
output := string(bytes)
|
||||
for _, want := range tc.want {
|
||||
require.Contains(t, output, want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
45
cli/ping.go
45
cli/ping.go
@ -2,7 +2,9 @@ package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
@ -14,6 +16,7 @@ import (
|
||||
|
||||
"github.com/coder/coder/v2/cli/cliui"
|
||||
"github.com/coder/coder/v2/codersdk"
|
||||
"github.com/coder/coder/v2/codersdk/healthsdk"
|
||||
"github.com/coder/coder/v2/codersdk/workspacesdk"
|
||||
"github.com/coder/serpent"
|
||||
)
|
||||
@ -61,7 +64,8 @@ func (r *RootCmd) ping() *serpent.Command {
|
||||
if !r.disableNetworkTelemetry {
|
||||
opts.EnableTelemetry = true
|
||||
}
|
||||
conn, err := workspacesdk.New(client).DialAgent(ctx, workspaceAgent.ID, opts)
|
||||
client := workspacesdk.New(client)
|
||||
conn, err := client.DialAgent(ctx, workspaceAgent.ID, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -138,11 +142,44 @@ func (r *RootCmd) ping() *serpent.Command {
|
||||
)
|
||||
|
||||
if n == int(pingNum) {
|
||||
diags := conn.GetPeerDiagnostics()
|
||||
cliui.PeerDiagnostics(inv.Stdout, diags)
|
||||
return nil
|
||||
break
|
||||
}
|
||||
}
|
||||
ctx, cancel = context.WithTimeout(inv.Context(), 30*time.Second)
|
||||
defer cancel()
|
||||
diags := conn.GetPeerDiagnostics()
|
||||
cliui.PeerDiagnostics(inv.Stdout, diags)
|
||||
|
||||
connDiags := cliui.ConnDiags{
|
||||
PingP2P: didP2p,
|
||||
DisableDirect: r.disableDirect,
|
||||
LocalNetInfo: conn.GetNetInfo(),
|
||||
}
|
||||
connInfo, err := client.AgentConnectionInfoGeneric(ctx)
|
||||
if err == nil {
|
||||
connDiags.ConnInfo = &connInfo
|
||||
} else {
|
||||
_, _ = fmt.Fprintf(inv.Stdout, "Failed to retrieve connection info from server: %v\n", err)
|
||||
}
|
||||
ifReport, err := healthsdk.RunInterfacesReport()
|
||||
if err == nil {
|
||||
connDiags.LocalInterfaces = &ifReport
|
||||
} else {
|
||||
_, _ = fmt.Fprintf(inv.Stdout, "Failed to retrieve local interfaces report: %v\n", err)
|
||||
}
|
||||
agentNetcheck, err := conn.Netcheck(ctx)
|
||||
if err == nil {
|
||||
connDiags.AgentNetcheck = &agentNetcheck
|
||||
} else {
|
||||
var sdkErr *codersdk.Error
|
||||
if errors.As(err, &sdkErr) && sdkErr.StatusCode() == http.StatusNotFound {
|
||||
_, _ = fmt.Fprint(inv.Stdout, "Could not generate full connection report as the workspace agent is outdated\n")
|
||||
} else {
|
||||
_, _ = fmt.Fprintf(inv.Stdout, "Failed to retrieve connection report from agent: %v\n", err)
|
||||
}
|
||||
}
|
||||
cliui.ConnDiagnostics(inv.Stdout, connDiags)
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -273,3 +273,10 @@ type ClientNetcheckReport struct {
|
||||
DERP DERPHealthReport `json:"derp"`
|
||||
Interfaces InterfacesReport `json:"interfaces"`
|
||||
}
|
||||
|
||||
// @typescript-ignore AgentNetcheckReport
|
||||
type AgentNetcheckReport struct {
|
||||
BaseReport
|
||||
NetInfo *tailcfg.NetInfo `json:"net_info"`
|
||||
Interfaces InterfacesReport `json:"interfaces"`
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ import (
|
||||
|
||||
"github.com/coder/coder/v2/coderd/tracing"
|
||||
"github.com/coder/coder/v2/codersdk"
|
||||
"github.com/coder/coder/v2/codersdk/healthsdk"
|
||||
"github.com/coder/coder/v2/tailnet"
|
||||
)
|
||||
|
||||
@ -241,6 +242,23 @@ func (c *AgentConn) ListeningPorts(ctx context.Context) (codersdk.WorkspaceAgent
|
||||
return resp, json.NewDecoder(res.Body).Decode(&resp)
|
||||
}
|
||||
|
||||
// Netcheck returns a network check report from the workspace agent.
|
||||
func (c *AgentConn) Netcheck(ctx context.Context) (healthsdk.AgentNetcheckReport, error) {
|
||||
ctx, span := tracing.StartSpan(ctx)
|
||||
defer span.End()
|
||||
res, err := c.apiRequest(ctx, http.MethodGet, "/api/v0/netcheck", nil)
|
||||
if err != nil {
|
||||
return healthsdk.AgentNetcheckReport{}, xerrors.Errorf("do request: %w", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != http.StatusOK {
|
||||
return healthsdk.AgentNetcheckReport{}, codersdk.ReadBodyAsError(res)
|
||||
}
|
||||
|
||||
var resp healthsdk.AgentNetcheckReport
|
||||
return resp, json.NewDecoder(res.Body).Decode(&resp)
|
||||
}
|
||||
|
||||
// DebugMagicsock makes a request to the workspace agent's magicsock debug endpoint.
|
||||
func (c *AgentConn) DebugMagicsock(ctx context.Context) ([]byte, error) {
|
||||
ctx, span := tracing.StartSpan(ctx)
|
||||
|
@ -294,6 +294,9 @@ func NewConn(options *Options) (conn *Conn, err error) {
|
||||
}()
|
||||
if server.telemetryStore != nil {
|
||||
server.wireguardEngine.SetNetInfoCallback(func(ni *tailcfg.NetInfo) {
|
||||
server.mutex.Lock()
|
||||
server.lastNetInfo = ni.Clone()
|
||||
server.mutex.Unlock()
|
||||
server.telemetryStore.setNetInfo(ni)
|
||||
nodeUp.setNetInfo(ni)
|
||||
server.telemetryStore.pingPeer(server)
|
||||
@ -304,7 +307,12 @@ func NewConn(options *Options) (conn *Conn, err error) {
|
||||
})
|
||||
go server.watchConnChange()
|
||||
} else {
|
||||
server.wireguardEngine.SetNetInfoCallback(nodeUp.setNetInfo)
|
||||
server.wireguardEngine.SetNetInfoCallback(func(ni *tailcfg.NetInfo) {
|
||||
server.mutex.Lock()
|
||||
server.lastNetInfo = ni.Clone()
|
||||
server.mutex.Unlock()
|
||||
nodeUp.setNetInfo(ni)
|
||||
})
|
||||
}
|
||||
server.wireguardEngine.SetStatusCallback(nodeUp.setStatus)
|
||||
server.magicConn.SetDERPForcedWebsocketCallback(nodeUp.setDERPForcedWebsocket)
|
||||
@ -373,6 +381,13 @@ type Conn struct {
|
||||
watchCancel func()
|
||||
|
||||
trafficStats *connstats.Statistics
|
||||
lastNetInfo *tailcfg.NetInfo
|
||||
}
|
||||
|
||||
func (c *Conn) GetNetInfo() *tailcfg.NetInfo {
|
||||
c.mutex.Lock()
|
||||
defer c.mutex.Unlock()
|
||||
return c.lastNetInfo.Clone()
|
||||
}
|
||||
|
||||
func (c *Conn) SetTunnelDestination(id uuid.UUID) {
|
||||
|
Reference in New Issue
Block a user