From b5fbfd727c9c2f1ce46726704e6b161c3e1c48ce Mon Sep 17 00:00:00 2001 From: Spike Curtis Date: Fri, 22 Nov 2024 11:44:56 +0400 Subject: [PATCH] fix: fix hang in teardown of TestConn_CoordinatorRollingRestart (#15624) fixes a flake seen on main: https://github.com/coder/coder/actions/runs/11967210463/job/33364072261 the TCP echo server had a waitgroup to ensure that all accepted connections get torn down, but no explicit teardown of the connection. We depended on the tailnet agent closing its side of the connection, which depends on closing the tunneled connection. The tunneled `FIN` could race with tearing down the tunnel itself. So, this PR adds explicit `t.Cleanup` to close the echo connection. It also removes the waitgroup. The purpose of the waitgroup was to ensure that all goroutines created by the echo listener get shut down, but we have `goleak` for that, which fails much faster than the 20 minutes this test run took. --- enterprise/coderd/coderd_test.go | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/enterprise/coderd/coderd_test.go b/enterprise/coderd/coderd_test.go index 8c6de3fc3d..73e169ff0d 100644 --- a/enterprise/coderd/coderd_test.go +++ b/enterprise/coderd/coderd_test.go @@ -852,30 +852,26 @@ func TestConn_CoordinatorRollingRestart(t *testing.T) { } func tcpEchoServer(t *testing.T) string { - var listenerWg sync.WaitGroup tcpListener, err := net.Listen("tcp", "127.0.0.1:0") require.NoError(t, err) t.Cleanup(func() { _ = tcpListener.Close() - listenerWg.Wait() }) - listenerWg.Add(1) go func() { - defer listenerWg.Done() for { conn, err := tcpListener.Accept() if err != nil { return } - listenerWg.Add(1) + t.Cleanup(func() { + _ = conn.Close() + }) go func() { - defer listenerWg.Done() defer conn.Close() _, _ = io.Copy(conn, conn) }() } }() - return tcpListener.Addr().String() }