chore(vpn): send ping results over tunnel (#18200)

Closes #17982.

The purpose of this PR is to expose network latency via the API used by Coder Desktop.

This PR has the tunnel ping all known agents every 5 seconds, in order to produce an instance of:
```proto
message LastPing {
	// latency is the RTT of the ping to the agent.
	google.protobuf.Duration latency = 1;
	// did_p2p indicates whether the ping was sent P2P, or over DERP.
	bool did_p2p = 2;
	// preferred_derp is the human readable name of the preferred DERP region,
	// or the region used for the last ping, if it was sent over DERP.
	string preferred_derp = 3;
	// preferred_derp_latency is the last known latency to the preferred DERP
	// region. Unset if the region does not appear in the DERP map.
	optional google.protobuf.Duration preferred_derp_latency = 4;
}
```
The contents of this message are stored and included on all subsequent upsertions of the agent. 
Note that we upsert existing agents every 5 seconds to update the `last_handshake` value.

On the desktop apps, this message will be used to produce a tooltip similar to that of the VS Code extension:
<img width="495" alt="image" src="https://github.com/user-attachments/assets/d8b65f3d-f536-4c64-9af9-35c1a42c92d2" />
(wording not final)

Unlike the VS Code extension, we omit:
- The Latency of *all* available DERP regions. It seems not ideal to send a copy of this entire map for every online agent, and it certainly doesn't make sense for it to be on the `Agent` or `LastPing` message. 
If we do want to expose this info on Coder Desktop, we should consider how best to do so; maybe we want to include it on a more generic `Netcheck` message.
- The current throughput (Bytes up/down). This is out of scope of the linked issue, and is non-trivial to implement. I'm also not sure of the value given the frequency we're doing these status updates (every 5 seconds).
If we want to expose it, it'll be in a separate PR.

<img width="343" alt="image" src="https://github.com/user-attachments/assets/8447d03b-9721-4111-8ac1-332d70a1e8f1" />
This commit is contained in:
Ethan
2025-06-06 14:18:57 +10:00
committed by GitHub
parent b4f71b70aa
commit 0076e8479f
12 changed files with 973 additions and 318 deletions

View File

@ -16,7 +16,6 @@ import (
"path/filepath"
"regexp"
"slices"
"strconv"
"strings"
"sync"
"time"
@ -31,7 +30,6 @@ import (
"golang.org/x/term"
"golang.org/x/xerrors"
"gvisor.dev/gvisor/pkg/tcpip/adapters/gonet"
"tailscale.com/tailcfg"
"tailscale.com/types/netlogtype"
"cdr.dev/slog"
@ -40,11 +38,13 @@ import (
"github.com/coder/coder/v2/cli/cliui"
"github.com/coder/coder/v2/cli/cliutil"
"github.com/coder/coder/v2/coderd/autobuild/notify"
"github.com/coder/coder/v2/coderd/util/maps"
"github.com/coder/coder/v2/coderd/util/ptr"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/codersdk/workspacesdk"
"github.com/coder/coder/v2/cryptorand"
"github.com/coder/coder/v2/pty"
"github.com/coder/coder/v2/tailnet"
"github.com/coder/quartz"
"github.com/coder/retry"
"github.com/coder/serpent"
@ -1456,28 +1456,6 @@ func collectNetworkStats(ctx context.Context, agentConn *workspacesdk.AgentConn,
}
node := agentConn.Node()
derpMap := agentConn.DERPMap()
derpLatency := map[string]float64{}
// Convert DERP region IDs to friendly names for display in the UI.
for rawRegion, latency := range node.DERPLatency {
regionParts := strings.SplitN(rawRegion, "-", 2)
regionID, err := strconv.Atoi(regionParts[0])
if err != nil {
continue
}
region, found := derpMap.Regions[regionID]
if !found {
// It's possible that a workspace agent is using an old DERPMap
// and reports regions that do not exist. If that's the case,
// report the region as unknown!
region = &tailcfg.DERPRegion{
RegionID: regionID,
RegionName: fmt.Sprintf("Unnamed %d", regionID),
}
}
// Convert the microseconds to milliseconds.
derpLatency[region.RegionName] = latency * 1000
}
totalRx := uint64(0)
totalTx := uint64(0)
@ -1491,27 +1469,20 @@ func collectNetworkStats(ctx context.Context, agentConn *workspacesdk.AgentConn,
uploadSecs := float64(totalTx) / dur.Seconds()
downloadSecs := float64(totalRx) / dur.Seconds()
// Sometimes the preferred DERP doesn't match the one we're actually
// connected with. Perhaps because the agent prefers a different DERP and
// we're using that server instead.
preferredDerpID := node.PreferredDERP
if pingResult.DERPRegionID != 0 {
preferredDerpID = pingResult.DERPRegionID
}
preferredDerp, ok := derpMap.Regions[preferredDerpID]
preferredDerpName := fmt.Sprintf("Unnamed %d", preferredDerpID)
if ok {
preferredDerpName = preferredDerp.RegionName
}
preferredDerpName := tailnet.ExtractPreferredDERPName(pingResult, node, derpMap)
derpLatency := tailnet.ExtractDERPLatency(node, derpMap)
if _, ok := derpLatency[preferredDerpName]; !ok {
derpLatency[preferredDerpName] = 0
}
derpLatencyMs := maps.Map(derpLatency, func(dur time.Duration) float64 {
return float64(dur) / float64(time.Millisecond)
})
return &sshNetworkStats{
P2P: p2p,
Latency: float64(latency.Microseconds()) / 1000,
PreferredDERP: preferredDerpName,
DERPLatency: derpLatency,
DERPLatency: derpLatencyMs,
UploadBytesSec: int64(uploadSecs),
DownloadBytesSec: int64(downloadSecs),
}, nil