mirror of
https://github.com/coder/coder.git
synced 2025-07-03 16:13:58 +00:00
feat: Add high availability for multiple replicas (#4555)
* feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
This commit is contained in:
@ -2031,7 +2031,7 @@ func (q *sqlQuerier) ParameterValues(ctx context.Context, arg ParameterValuesPar
|
||||
|
||||
const getProvisionerDaemonByID = `-- name: GetProvisionerDaemonByID :one
|
||||
SELECT
|
||||
id, created_at, updated_at, name, provisioners
|
||||
id, created_at, updated_at, name, provisioners, replica_id
|
||||
FROM
|
||||
provisioner_daemons
|
||||
WHERE
|
||||
@ -2047,13 +2047,14 @@ func (q *sqlQuerier) GetProvisionerDaemonByID(ctx context.Context, id uuid.UUID)
|
||||
&i.UpdatedAt,
|
||||
&i.Name,
|
||||
pq.Array(&i.Provisioners),
|
||||
&i.ReplicaID,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
|
||||
const getProvisionerDaemons = `-- name: GetProvisionerDaemons :many
|
||||
SELECT
|
||||
id, created_at, updated_at, name, provisioners
|
||||
id, created_at, updated_at, name, provisioners, replica_id
|
||||
FROM
|
||||
provisioner_daemons
|
||||
`
|
||||
@ -2073,6 +2074,7 @@ func (q *sqlQuerier) GetProvisionerDaemons(ctx context.Context) ([]ProvisionerDa
|
||||
&i.UpdatedAt,
|
||||
&i.Name,
|
||||
pq.Array(&i.Provisioners),
|
||||
&i.ReplicaID,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -2096,7 +2098,7 @@ INSERT INTO
|
||||
provisioners
|
||||
)
|
||||
VALUES
|
||||
($1, $2, $3, $4) RETURNING id, created_at, updated_at, name, provisioners
|
||||
($1, $2, $3, $4) RETURNING id, created_at, updated_at, name, provisioners, replica_id
|
||||
`
|
||||
|
||||
type InsertProvisionerDaemonParams struct {
|
||||
@ -2120,6 +2122,7 @@ func (q *sqlQuerier) InsertProvisionerDaemon(ctx context.Context, arg InsertProv
|
||||
&i.UpdatedAt,
|
||||
&i.Name,
|
||||
pq.Array(&i.Provisioners),
|
||||
&i.ReplicaID,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
@ -2577,6 +2580,177 @@ func (q *sqlQuerier) UpdateProvisionerJobWithCompleteByID(ctx context.Context, a
|
||||
return err
|
||||
}
|
||||
|
||||
const deleteReplicasUpdatedBefore = `-- name: DeleteReplicasUpdatedBefore :exec
|
||||
DELETE FROM replicas WHERE updated_at < $1
|
||||
`
|
||||
|
||||
func (q *sqlQuerier) DeleteReplicasUpdatedBefore(ctx context.Context, updatedAt time.Time) error {
|
||||
_, err := q.db.ExecContext(ctx, deleteReplicasUpdatedBefore, updatedAt)
|
||||
return err
|
||||
}
|
||||
|
||||
const getReplicasUpdatedAfter = `-- name: GetReplicasUpdatedAfter :many
|
||||
SELECT id, created_at, started_at, stopped_at, updated_at, hostname, region_id, relay_address, database_latency, version, error FROM replicas WHERE updated_at > $1 AND stopped_at IS NULL
|
||||
`
|
||||
|
||||
func (q *sqlQuerier) GetReplicasUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]Replica, error) {
|
||||
rows, err := q.db.QueryContext(ctx, getReplicasUpdatedAfter, updatedAt)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
var items []Replica
|
||||
for rows.Next() {
|
||||
var i Replica
|
||||
if err := rows.Scan(
|
||||
&i.ID,
|
||||
&i.CreatedAt,
|
||||
&i.StartedAt,
|
||||
&i.StoppedAt,
|
||||
&i.UpdatedAt,
|
||||
&i.Hostname,
|
||||
&i.RegionID,
|
||||
&i.RelayAddress,
|
||||
&i.DatabaseLatency,
|
||||
&i.Version,
|
||||
&i.Error,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
items = append(items, i)
|
||||
}
|
||||
if err := rows.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return items, nil
|
||||
}
|
||||
|
||||
const insertReplica = `-- name: InsertReplica :one
|
||||
INSERT INTO replicas (
|
||||
id,
|
||||
created_at,
|
||||
started_at,
|
||||
updated_at,
|
||||
hostname,
|
||||
region_id,
|
||||
relay_address,
|
||||
version,
|
||||
database_latency
|
||||
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) RETURNING id, created_at, started_at, stopped_at, updated_at, hostname, region_id, relay_address, database_latency, version, error
|
||||
`
|
||||
|
||||
type InsertReplicaParams struct {
|
||||
ID uuid.UUID `db:"id" json:"id"`
|
||||
CreatedAt time.Time `db:"created_at" json:"created_at"`
|
||||
StartedAt time.Time `db:"started_at" json:"started_at"`
|
||||
UpdatedAt time.Time `db:"updated_at" json:"updated_at"`
|
||||
Hostname string `db:"hostname" json:"hostname"`
|
||||
RegionID int32 `db:"region_id" json:"region_id"`
|
||||
RelayAddress string `db:"relay_address" json:"relay_address"`
|
||||
Version string `db:"version" json:"version"`
|
||||
DatabaseLatency int32 `db:"database_latency" json:"database_latency"`
|
||||
}
|
||||
|
||||
func (q *sqlQuerier) InsertReplica(ctx context.Context, arg InsertReplicaParams) (Replica, error) {
|
||||
row := q.db.QueryRowContext(ctx, insertReplica,
|
||||
arg.ID,
|
||||
arg.CreatedAt,
|
||||
arg.StartedAt,
|
||||
arg.UpdatedAt,
|
||||
arg.Hostname,
|
||||
arg.RegionID,
|
||||
arg.RelayAddress,
|
||||
arg.Version,
|
||||
arg.DatabaseLatency,
|
||||
)
|
||||
var i Replica
|
||||
err := row.Scan(
|
||||
&i.ID,
|
||||
&i.CreatedAt,
|
||||
&i.StartedAt,
|
||||
&i.StoppedAt,
|
||||
&i.UpdatedAt,
|
||||
&i.Hostname,
|
||||
&i.RegionID,
|
||||
&i.RelayAddress,
|
||||
&i.DatabaseLatency,
|
||||
&i.Version,
|
||||
&i.Error,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
|
||||
const updateReplica = `-- name: UpdateReplica :one
|
||||
UPDATE replicas SET
|
||||
updated_at = $2,
|
||||
started_at = $3,
|
||||
stopped_at = $4,
|
||||
relay_address = $5,
|
||||
region_id = $6,
|
||||
hostname = $7,
|
||||
version = $8,
|
||||
error = $9,
|
||||
database_latency = $10
|
||||
WHERE id = $1 RETURNING id, created_at, started_at, stopped_at, updated_at, hostname, region_id, relay_address, database_latency, version, error
|
||||
`
|
||||
|
||||
type UpdateReplicaParams struct {
|
||||
ID uuid.UUID `db:"id" json:"id"`
|
||||
UpdatedAt time.Time `db:"updated_at" json:"updated_at"`
|
||||
StartedAt time.Time `db:"started_at" json:"started_at"`
|
||||
StoppedAt sql.NullTime `db:"stopped_at" json:"stopped_at"`
|
||||
RelayAddress string `db:"relay_address" json:"relay_address"`
|
||||
RegionID int32 `db:"region_id" json:"region_id"`
|
||||
Hostname string `db:"hostname" json:"hostname"`
|
||||
Version string `db:"version" json:"version"`
|
||||
Error string `db:"error" json:"error"`
|
||||
DatabaseLatency int32 `db:"database_latency" json:"database_latency"`
|
||||
}
|
||||
|
||||
func (q *sqlQuerier) UpdateReplica(ctx context.Context, arg UpdateReplicaParams) (Replica, error) {
|
||||
row := q.db.QueryRowContext(ctx, updateReplica,
|
||||
arg.ID,
|
||||
arg.UpdatedAt,
|
||||
arg.StartedAt,
|
||||
arg.StoppedAt,
|
||||
arg.RelayAddress,
|
||||
arg.RegionID,
|
||||
arg.Hostname,
|
||||
arg.Version,
|
||||
arg.Error,
|
||||
arg.DatabaseLatency,
|
||||
)
|
||||
var i Replica
|
||||
err := row.Scan(
|
||||
&i.ID,
|
||||
&i.CreatedAt,
|
||||
&i.StartedAt,
|
||||
&i.StoppedAt,
|
||||
&i.UpdatedAt,
|
||||
&i.Hostname,
|
||||
&i.RegionID,
|
||||
&i.RelayAddress,
|
||||
&i.DatabaseLatency,
|
||||
&i.Version,
|
||||
&i.Error,
|
||||
)
|
||||
return i, err
|
||||
}
|
||||
|
||||
const getDERPMeshKey = `-- name: GetDERPMeshKey :one
|
||||
SELECT value FROM site_configs WHERE key = 'derp_mesh_key'
|
||||
`
|
||||
|
||||
func (q *sqlQuerier) GetDERPMeshKey(ctx context.Context) (string, error) {
|
||||
row := q.db.QueryRowContext(ctx, getDERPMeshKey)
|
||||
var value string
|
||||
err := row.Scan(&value)
|
||||
return value, err
|
||||
}
|
||||
|
||||
const getDeploymentID = `-- name: GetDeploymentID :one
|
||||
SELECT value FROM site_configs WHERE key = 'deployment_id'
|
||||
`
|
||||
@ -2588,6 +2762,15 @@ func (q *sqlQuerier) GetDeploymentID(ctx context.Context) (string, error) {
|
||||
return value, err
|
||||
}
|
||||
|
||||
const insertDERPMeshKey = `-- name: InsertDERPMeshKey :exec
|
||||
INSERT INTO site_configs (key, value) VALUES ('derp_mesh_key', $1)
|
||||
`
|
||||
|
||||
func (q *sqlQuerier) InsertDERPMeshKey(ctx context.Context, value string) error {
|
||||
_, err := q.db.ExecContext(ctx, insertDERPMeshKey, value)
|
||||
return err
|
||||
}
|
||||
|
||||
const insertDeploymentID = `-- name: InsertDeploymentID :exec
|
||||
INSERT INTO site_configs (key, value) VALUES ('deployment_id', $1)
|
||||
`
|
||||
|
Reference in New Issue
Block a user