fix: fix hang in teardown of TestConn_CoordinatorRollingRestart (#15624)

fixes a flake seen on main: https://github.com/coder/coder/actions/runs/11967210463/job/33364072261

the TCP echo server had a waitgroup to ensure that all accepted connections get torn down, but no explicit teardown of the connection. We depended on the tailnet agent closing its side of the connection, which depends on closing the tunneled connection. The tunneled `FIN` could race with tearing down the tunnel itself.

So, this PR adds explicit `t.Cleanup` to close the echo connection. It also removes the waitgroup. The purpose of the waitgroup was to ensure that all goroutines created by the echo listener get shut down, but we have `goleak` for that, which fails much faster than the 20 minutes this test run took.
This commit is contained in:
Spike Curtis
2024-11-22 11:44:56 +04:00
committed by GitHub
parent 103824f726
commit b5fbfd727c

View File

@ -852,30 +852,26 @@ func TestConn_CoordinatorRollingRestart(t *testing.T) {
}
func tcpEchoServer(t *testing.T) string {
var listenerWg sync.WaitGroup
tcpListener, err := net.Listen("tcp", "127.0.0.1:0")
require.NoError(t, err)
t.Cleanup(func() {
_ = tcpListener.Close()
listenerWg.Wait()
})
listenerWg.Add(1)
go func() {
defer listenerWg.Done()
for {
conn, err := tcpListener.Accept()
if err != nil {
return
}
listenerWg.Add(1)
t.Cleanup(func() {
_ = conn.Close()
})
go func() {
defer listenerWg.Done()
defer conn.Close()
_, _ = io.Copy(conn, conn)
}()
}
}()
return tcpListener.Addr().String()
}