mirror of
https://github.com/coder/coder.git
synced 2025-07-06 15:41:45 +00:00
fix: ensure wsproxy MultiAgent
is closed when websocket dies (#11414)
The `SingleTailnet` behavior only checked to see if the `MultiAgent` was closed, but the websocket error was not being propogated into the `MultiAgent`, causing it to never be swapped for a new working one. Fixes https://github.com/coder/coder/issues/11401 Before: ``` Coder Workspace Proxy v0.0.0-devel+85ff030 - Your Self-Hosted Remote Development Platform Started HTTP listener at http://0.0.0.0:3001 View the Web UI: http://127.0.0.1:3001 ==> Logs will stream in below (press ctrl+c to gracefully exit): 2024-01-04 20:11:56.376 [warn] net.workspace-proxy.servertailnet: broadcast server node to agents ... error= write message: github.com/coder/coder/v2/enterprise/wsproxy/wsproxysdk.(*remoteMultiAgentHandler).writeJSON /home/coder/coder/enterprise/wsproxy/wsproxysdk/wsproxysdk.go:524 - failed to write msg: WebSocket closed: failed to read frame header: EOF ``` After: ``` Coder Workspace Proxy v0.0.0-devel+12f1878 - Your Self-Hosted Remote Development Platform Started HTTP listener at http://0.0.0.0:3001 View the Web UI: http://127.0.0.1:3001 ==> Logs will stream in below (press ctrl+c to gracefully exit): 2024-01-04 20:26:38.545 [warn] net.workspace-proxy.servertailnet: multiagent closed, reinitializing 2024-01-04 20:26:38.546 [erro] net.workspace-proxy.servertailnet: reinit multi agent ... error= dial coordinate websocket: github.com/coder/coder/v2/enterprise/wsproxy/wsproxysdk.(*Client).DialCoordinator /home/coder/coder/enterprise/wsproxy/wsproxysdk/wsproxysdk.go:454 - failed to WebSocket dial: failed to send handshake request: Get "http://127.0.0.1:3000/api/v2/workspaceproxies/me/coordinate": dial tcp 127.0.0.1:3000: connect: connection refused 2024-01-04 20:26:38.587 [erro] net.workspace-proxy.servertailnet: reinit multi agent ... error= dial coordinate websocket: github.com/coder/coder/v2/enterprise/wsproxy/wsproxysdk.(*Client).DialCoordinator /home/coder/coder/enterprise/wsproxy/wsproxysdk/wsproxysdk.go:454 - failed to WebSocket dial: failed to send handshake request: Get "http://127.0.0.1:3000/api/v2/workspaceproxies/me/coordinate": dial tcp 127.0.0.1:3000: connect: connection refusedhandshake request: Get "http://127.0.0.1:3000/api/v2/workspaceproxies/me/coordinate": dial tcp 127.0.0.1:3000: connect: connection refused 2024-01-04 20:26:40.446 [info] net.workspace-proxy.servertailnet: successfully reinitialized multiagent agents=0 took=1.900892615s ```
This commit is contained in:
@ -224,6 +224,7 @@ func (s *ServerTailnet) watchAgentUpdates() {
|
||||
nodes, ok := conn.NextUpdate(s.ctx)
|
||||
if !ok {
|
||||
if conn.IsClosed() && s.ctx.Err() == nil {
|
||||
s.logger.Warn(s.ctx, "multiagent closed, reinitializing")
|
||||
s.reinitCoordinator()
|
||||
continue
|
||||
}
|
||||
@ -247,6 +248,7 @@ func (s *ServerTailnet) getAgentConn() tailnet.MultiAgentConn {
|
||||
}
|
||||
|
||||
func (s *ServerTailnet) reinitCoordinator() {
|
||||
start := time.Now()
|
||||
for retrier := retry.New(25*time.Millisecond, 5*time.Second); retrier.Wait(s.ctx); {
|
||||
s.nodesMu.Lock()
|
||||
agentConn, err := s.getMultiAgent(s.ctx)
|
||||
@ -264,6 +266,11 @@ func (s *ServerTailnet) reinitCoordinator() {
|
||||
s.logger.Warn(s.ctx, "resubscribe to agent", slog.Error(err), slog.F("agent_id", agentID))
|
||||
}
|
||||
}
|
||||
|
||||
s.logger.Info(s.ctx, "successfully reinitialized multiagent",
|
||||
slog.F("agents", len(s.agentConnectionTimes)),
|
||||
slog.F("took", time.Since(start)),
|
||||
)
|
||||
s.nodesMu.Unlock()
|
||||
return
|
||||
}
|
||||
|
Reference in New Issue
Block a user