From 623dcd97dc832f3dd2e3d23cae1869ea74b1548f Mon Sep 17 00:00:00 2001 From: Hugo Dutka Date: Thu, 5 Jun 2025 15:54:13 +0200 Subject: [PATCH] fix(cli): fix flakes related to context cancellation when establishing pg connections (#18246) Since https://github.com/coder/coder/pull/18195 was merged, we started running CLI tests with postgres instead of just dbmem. This surfaced errors related to context cancellation while establishing postgres connections. This PR should fix https://github.com/coder/internal/issues/672. Related to https://github.com/coder/coder/issues/15109. --- cli/clitest/clitest.go | 6 ++++++ cli/server.go | 5 ++++- coderd/database/pubsub/pubsub.go | 15 +++++++-------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/cli/clitest/clitest.go b/cli/clitest/clitest.go index fbc913e7b8..8d1f5302ce 100644 --- a/cli/clitest/clitest.go +++ b/cli/clitest/clitest.go @@ -168,6 +168,12 @@ func StartWithAssert(t *testing.T, inv *serpent.Invocation, assertCallback func( switch { case errors.Is(err, context.Canceled): return + case err != nil && strings.Contains(err.Error(), "driver: bad connection"): + // When we cancel the context on a query that's being executed within + // a transaction, sometimes, instead of a context.Canceled error we get + // a "driver: bad connection" error. + // https://github.com/lib/pq/issues/1137 + return default: assert.NoError(t, err) } diff --git a/cli/server.go b/cli/server.go index 9f55b63fc7..d9badd02d9 100644 --- a/cli/server.go +++ b/cli/server.go @@ -2359,6 +2359,10 @@ func ConnectToPostgres(ctx context.Context, logger slog.Logger, driver string, d if err != nil { return nil, xerrors.Errorf("get postgres version: %w", err) } + defer version.Close() + if version.Err() != nil { + return nil, xerrors.Errorf("version select: %w", version.Err()) + } if !version.Next() { return nil, xerrors.Errorf("no rows returned for version select") } @@ -2367,7 +2371,6 @@ func ConnectToPostgres(ctx context.Context, logger slog.Logger, driver string, d if err != nil { return nil, xerrors.Errorf("scan version: %w", err) } - _ = version.Close() if versionNum < 130000 { return nil, xerrors.Errorf("PostgreSQL version must be v13.0.0 or higher! Got: %d", versionNum) diff --git a/coderd/database/pubsub/pubsub.go b/coderd/database/pubsub/pubsub.go index 8019754e15..c4b454abdf 100644 --- a/coderd/database/pubsub/pubsub.go +++ b/coderd/database/pubsub/pubsub.go @@ -552,15 +552,14 @@ func (p *PGPubsub) startListener(ctx context.Context, connectURL string) error { sentErrCh = true }), } - select { - case err = <-errCh: - if err != nil { - _ = p.pgListener.Close() - return xerrors.Errorf("create pq listener: %w", err) - } - case <-ctx.Done(): + // We don't respect context cancellation here. There's a bug in the pq library + // where if you close the listener before or while the connection is being + // established, the connection will be established anyway, and will not be + // closed. + // https://github.com/lib/pq/issues/1192 + if err := <-errCh; err != nil { _ = p.pgListener.Close() - return ctx.Err() + return xerrors.Errorf("create pq listener: %w", err) } return nil }