From 012a9e759e8b8d856bd814b0e2f77d7daa6424eb Mon Sep 17 00:00:00 2001 From: Mathias Fredriksson Date: Wed, 14 Dec 2022 18:45:46 +0200 Subject: [PATCH] fix: Avoid deadlock in AgentReportStats Close during agent Close (#5415) Since AgentReportStats takes a stats function which was doing mutex locking on agent shutdown, it was possible for there to be a deadlock depending on how the AgentReportsStats Close function is implemented. This mostly seems to happen on Windows test runners as it's pretty hard to hit this edge case. The bug currently only exists in the test implementation of AgentReportStats, however, this was refactored to be more robust in case of future changes. --- agent/agent.go | 41 +++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index b25c6217e3..53def9472a 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -226,6 +226,25 @@ func (a *agent) run(ctx context.Context) error { _ = network.Close() return xerrors.New("agent is closed") } + + // Report statistics from the created network. + cl, err := a.client.AgentReportStats(ctx, a.logger, func() *codersdk.AgentStats { + stats := network.ExtractTrafficStats() + return convertAgentStats(stats) + }) + if err != nil { + a.logger.Error(ctx, "report stats", slog.Error(err)) + } else { + if err = a.trackConnGoroutine(func() { + // This is OK because the agent never re-creates the tailnet + // and the only shutdown indicator is agent.Close(). + <-a.closed + _ = cl.Close() + }); err != nil { + a.logger.Debug(ctx, "report stats goroutine", slog.Error(err)) + _ = cl.Close() + } + } } else { // Update the DERP map! network.SetDERPMap(metadata.DERPMap) @@ -561,28 +580,6 @@ func (a *agent) init(ctx context.Context) { } go a.runLoop(ctx) - cl, err := a.client.AgentReportStats(ctx, a.logger, func() *codersdk.AgentStats { - stats := map[netlogtype.Connection]netlogtype.Counts{} - a.closeMutex.Lock() - if a.network != nil { - stats = a.network.ExtractTrafficStats() - } - a.closeMutex.Unlock() - return convertAgentStats(stats) - }) - if err != nil { - a.logger.Error(ctx, "report stats", slog.Error(err)) - return - } - - if err = a.trackConnGoroutine(func() { - <-a.closed - _ = cl.Close() - }); err != nil { - a.logger.Error(ctx, "report stats goroutine", slog.Error(err)) - _ = cl.Close() - return - } } func convertAgentStats(counts map[netlogtype.Connection]netlogtype.Counts) *codersdk.AgentStats {