From bedd2c59228473efca1efdd74315ee6f67a68cd3 Mon Sep 17 00:00:00 2001 From: Dean Sheather Date: Wed, 28 Feb 2024 08:14:11 -0800 Subject: [PATCH] fix: avoid race between replicas on start (#12344) DERP mesh key setup would do a SELECT and then an INSERT on failure, without a lock. During some testing with multiple replicas, I managed to cause a replica to crash due to them initializing simultaneously. Fixes: Encountered an error running "coder server" create coder API: insert mesh key: pq: duplicate key value violates unique constraint "site_configs_key_key" Co-authored-by: Cian Johnston --- coderd/database/dbauthz/dbauthz_test.go | 1 + coderd/database/dbmem/dbmem.go | 3 +++ coderd/database/lock.go | 1 + enterprise/cli/server.go | 33 ++++++++++++++++++++----- 4 files changed, 32 insertions(+), 6 deletions(-) diff --git a/coderd/database/dbauthz/dbauthz_test.go b/coderd/database/dbauthz/dbauthz_test.go index 9d6a43ccb4..b6fef0deca 100644 --- a/coderd/database/dbauthz/dbauthz_test.go +++ b/coderd/database/dbauthz/dbauthz_test.go @@ -1874,6 +1874,7 @@ func (s *MethodTestSuite) TestSystemFunctions() { check.Args(u.ID).Asserts(rbac.ResourceSystem, rbac.ActionRead) })) s.Run("GetDERPMeshKey", s.Subtest(func(db database.Store, check *expects) { + db.InsertDERPMeshKey(context.Background(), "testing") check.Args().Asserts(rbac.ResourceSystem, rbac.ActionRead) })) s.Run("InsertDERPMeshKey", s.Subtest(func(db database.Store, check *expects) { diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index fad8441c68..3e6b0e1d15 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -1761,6 +1761,9 @@ func (q *FakeQuerier) GetDERPMeshKey(_ context.Context) (string, error) { q.mutex.RLock() defer q.mutex.RUnlock() + if q.derpMeshKey == "" { + return "", sql.ErrNoRows + } return q.derpMeshKey, nil } diff --git a/coderd/database/lock.go b/coderd/database/lock.go index a17903e4a7..a007e5e03e 100644 --- a/coderd/database/lock.go +++ b/coderd/database/lock.go @@ -9,6 +9,7 @@ const ( // Keep the unused iota here so we don't need + 1 every time lockIDUnused = iota LockIDDeploymentSetup + LockIDEnterpriseDeploymentSetup ) // GenLockID generates a unique and consistent lock ID from a given string. diff --git a/enterprise/cli/server.go b/enterprise/cli/server.go index 7fb1526c50..a21053ea35 100644 --- a/enterprise/cli/server.go +++ b/enterprise/cli/server.go @@ -15,6 +15,7 @@ import ( "tailscale.com/types/key" "github.com/coder/coder/v2/cli/clibase" + "github.com/coder/coder/v2/coderd/database" "github.com/coder/coder/v2/cryptorand" "github.com/coder/coder/v2/enterprise/audit" "github.com/coder/coder/v2/enterprise/audit/backends" @@ -37,21 +38,41 @@ func (r *RootCmd) Server(_ func()) *clibase.Cmd { } options.DERPServer = derp.NewServer(key.NewNode(), tailnet.Logger(options.Logger.Named("derp"))) - meshKey, err := options.Database.GetDERPMeshKey(ctx) - if err != nil { + + var meshKey string + err := options.Database.InTx(func(tx database.Store) error { + // This will block until the lock is acquired, and will be + // automatically released when the transaction ends. + err := tx.AcquireLock(ctx, database.LockIDEnterpriseDeploymentSetup) + if err != nil { + return xerrors.Errorf("acquire lock: %w", err) + } + + meshKey, err = tx.GetDERPMeshKey(ctx) + if err == nil { + return nil + } if !errors.Is(err, sql.ErrNoRows) { - return nil, nil, xerrors.Errorf("get mesh key: %w", err) + return xerrors.Errorf("get DERP mesh key: %w", err) } meshKey, err = cryptorand.String(32) if err != nil { - return nil, nil, xerrors.Errorf("generate mesh key: %w", err) + return xerrors.Errorf("generate DERP mesh key: %w", err) } - err = options.Database.InsertDERPMeshKey(ctx, meshKey) + err = tx.InsertDERPMeshKey(ctx, meshKey) if err != nil { - return nil, nil, xerrors.Errorf("insert mesh key: %w", err) + return xerrors.Errorf("insert DERP mesh key: %w", err) } + return nil + }, nil) + if err != nil { + return nil, nil, err + } + if meshKey == "" { + return nil, nil, xerrors.New("mesh key is empty") } options.DERPServer.SetMeshKey(meshKey) + options.Auditor = audit.NewAuditor( options.Database, audit.DefaultFilter,