feat: Add high availability for multiple replicas (#4555)

* feat: HA tailnet coordinator

* fixup! feat: HA tailnet coordinator

* fixup! feat: HA tailnet coordinator

* remove printlns

* close all connections on coordinator

* impelement high availability feature

* fixup! impelement high availability feature

* fixup! impelement high availability feature

* fixup! impelement high availability feature

* fixup! impelement high availability feature

* Add replicas

* Add DERP meshing to arbitrary addresses

* Move packages to highavailability folder

* Move coordinator to high availability package

* Add flags for HA

* Rename to replicasync

* Denest packages for replicas

* Add test for multiple replicas

* Fix coordination test

* Add HA to the helm chart

* Rename function pointer

* Add warnings for HA

* Add the ability to block endpoints

* Add flag to disable P2P connections

* Wow, I made the tests pass

* Add replicas endpoint

* Ensure close kills replica

* Update sql

* Add database latency to high availability

* Pipe TLS to DERP mesh

* Fix DERP mesh with TLS

* Add tests for TLS

* Fix replica sync TLS

* Fix RootCA for replica meshing

* Remove ID from replicasync

* Fix getting certificates for meshing

* Remove excessive locking

* Fix linting

* Store mesh key in the database

* Fix replica key for tests

* Fix types gen

* Fix unlocking unlocked

* Fix race in tests

* Update enterprise/derpmesh/derpmesh.go

Co-authored-by: Colin Adler <colin1adler@gmail.com>

* Rename to syncReplicas

* Reuse http client

* Delete old replicas on a CRON

* Fix race condition in connection tests

* Fix linting

* Fix nil type

* Move pubsub to in-memory for twenty test

* Add comment for configuration tweaking

* Fix leak with transport

* Fix close leak in derpmesh

* Fix race when creating server

* Remove handler update

* Skip test on Windows

* Fix DERP mesh test

* Wrap HTTP handler replacement in mutex

* Fix error message for relay

* Fix API handler for normal tests

* Fix speedtest

* Fix replica resend

* Fix derpmesh send

* Ping async

* Increase wait time of template version jobd

* Fix race when closing replica sync

* Add name to client

* Log the derpmap being used

* Don't connect if DERP is empty

* Improve agent coordinator logging

* Fix lock in coordinator

* Fix relay addr

* Fix race when updating durations

* Fix client publish race

* Run pubsub loop in a queue

* Store agent nodes in order

* Fix coordinator locking

* Check for closed pipe

Co-authored-by: Colin Adler <colin1adler@gmail.com>
This commit is contained in:
Kyle Carberry
2022-10-17 08:43:30 -05:00
committed by GitHub
parent dc3519e973
commit 2ba4a62a0d
76 changed files with 3437 additions and 404 deletions

View File

@ -2031,7 +2031,7 @@ func (q *sqlQuerier) ParameterValues(ctx context.Context, arg ParameterValuesPar
const getProvisionerDaemonByID = `-- name: GetProvisionerDaemonByID :one
SELECT
id, created_at, updated_at, name, provisioners
id, created_at, updated_at, name, provisioners, replica_id
FROM
provisioner_daemons
WHERE
@ -2047,13 +2047,14 @@ func (q *sqlQuerier) GetProvisionerDaemonByID(ctx context.Context, id uuid.UUID)
&i.UpdatedAt,
&i.Name,
pq.Array(&i.Provisioners),
&i.ReplicaID,
)
return i, err
}
const getProvisionerDaemons = `-- name: GetProvisionerDaemons :many
SELECT
id, created_at, updated_at, name, provisioners
id, created_at, updated_at, name, provisioners, replica_id
FROM
provisioner_daemons
`
@ -2073,6 +2074,7 @@ func (q *sqlQuerier) GetProvisionerDaemons(ctx context.Context) ([]ProvisionerDa
&i.UpdatedAt,
&i.Name,
pq.Array(&i.Provisioners),
&i.ReplicaID,
); err != nil {
return nil, err
}
@ -2096,7 +2098,7 @@ INSERT INTO
provisioners
)
VALUES
($1, $2, $3, $4) RETURNING id, created_at, updated_at, name, provisioners
($1, $2, $3, $4) RETURNING id, created_at, updated_at, name, provisioners, replica_id
`
type InsertProvisionerDaemonParams struct {
@ -2120,6 +2122,7 @@ func (q *sqlQuerier) InsertProvisionerDaemon(ctx context.Context, arg InsertProv
&i.UpdatedAt,
&i.Name,
pq.Array(&i.Provisioners),
&i.ReplicaID,
)
return i, err
}
@ -2577,6 +2580,177 @@ func (q *sqlQuerier) UpdateProvisionerJobWithCompleteByID(ctx context.Context, a
return err
}
const deleteReplicasUpdatedBefore = `-- name: DeleteReplicasUpdatedBefore :exec
DELETE FROM replicas WHERE updated_at < $1
`
func (q *sqlQuerier) DeleteReplicasUpdatedBefore(ctx context.Context, updatedAt time.Time) error {
_, err := q.db.ExecContext(ctx, deleteReplicasUpdatedBefore, updatedAt)
return err
}
const getReplicasUpdatedAfter = `-- name: GetReplicasUpdatedAfter :many
SELECT id, created_at, started_at, stopped_at, updated_at, hostname, region_id, relay_address, database_latency, version, error FROM replicas WHERE updated_at > $1 AND stopped_at IS NULL
`
func (q *sqlQuerier) GetReplicasUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]Replica, error) {
rows, err := q.db.QueryContext(ctx, getReplicasUpdatedAfter, updatedAt)
if err != nil {
return nil, err
}
defer rows.Close()
var items []Replica
for rows.Next() {
var i Replica
if err := rows.Scan(
&i.ID,
&i.CreatedAt,
&i.StartedAt,
&i.StoppedAt,
&i.UpdatedAt,
&i.Hostname,
&i.RegionID,
&i.RelayAddress,
&i.DatabaseLatency,
&i.Version,
&i.Error,
); err != nil {
return nil, err
}
items = append(items, i)
}
if err := rows.Close(); err != nil {
return nil, err
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const insertReplica = `-- name: InsertReplica :one
INSERT INTO replicas (
id,
created_at,
started_at,
updated_at,
hostname,
region_id,
relay_address,
version,
database_latency
) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9) RETURNING id, created_at, started_at, stopped_at, updated_at, hostname, region_id, relay_address, database_latency, version, error
`
type InsertReplicaParams struct {
ID uuid.UUID `db:"id" json:"id"`
CreatedAt time.Time `db:"created_at" json:"created_at"`
StartedAt time.Time `db:"started_at" json:"started_at"`
UpdatedAt time.Time `db:"updated_at" json:"updated_at"`
Hostname string `db:"hostname" json:"hostname"`
RegionID int32 `db:"region_id" json:"region_id"`
RelayAddress string `db:"relay_address" json:"relay_address"`
Version string `db:"version" json:"version"`
DatabaseLatency int32 `db:"database_latency" json:"database_latency"`
}
func (q *sqlQuerier) InsertReplica(ctx context.Context, arg InsertReplicaParams) (Replica, error) {
row := q.db.QueryRowContext(ctx, insertReplica,
arg.ID,
arg.CreatedAt,
arg.StartedAt,
arg.UpdatedAt,
arg.Hostname,
arg.RegionID,
arg.RelayAddress,
arg.Version,
arg.DatabaseLatency,
)
var i Replica
err := row.Scan(
&i.ID,
&i.CreatedAt,
&i.StartedAt,
&i.StoppedAt,
&i.UpdatedAt,
&i.Hostname,
&i.RegionID,
&i.RelayAddress,
&i.DatabaseLatency,
&i.Version,
&i.Error,
)
return i, err
}
const updateReplica = `-- name: UpdateReplica :one
UPDATE replicas SET
updated_at = $2,
started_at = $3,
stopped_at = $4,
relay_address = $5,
region_id = $6,
hostname = $7,
version = $8,
error = $9,
database_latency = $10
WHERE id = $1 RETURNING id, created_at, started_at, stopped_at, updated_at, hostname, region_id, relay_address, database_latency, version, error
`
type UpdateReplicaParams struct {
ID uuid.UUID `db:"id" json:"id"`
UpdatedAt time.Time `db:"updated_at" json:"updated_at"`
StartedAt time.Time `db:"started_at" json:"started_at"`
StoppedAt sql.NullTime `db:"stopped_at" json:"stopped_at"`
RelayAddress string `db:"relay_address" json:"relay_address"`
RegionID int32 `db:"region_id" json:"region_id"`
Hostname string `db:"hostname" json:"hostname"`
Version string `db:"version" json:"version"`
Error string `db:"error" json:"error"`
DatabaseLatency int32 `db:"database_latency" json:"database_latency"`
}
func (q *sqlQuerier) UpdateReplica(ctx context.Context, arg UpdateReplicaParams) (Replica, error) {
row := q.db.QueryRowContext(ctx, updateReplica,
arg.ID,
arg.UpdatedAt,
arg.StartedAt,
arg.StoppedAt,
arg.RelayAddress,
arg.RegionID,
arg.Hostname,
arg.Version,
arg.Error,
arg.DatabaseLatency,
)
var i Replica
err := row.Scan(
&i.ID,
&i.CreatedAt,
&i.StartedAt,
&i.StoppedAt,
&i.UpdatedAt,
&i.Hostname,
&i.RegionID,
&i.RelayAddress,
&i.DatabaseLatency,
&i.Version,
&i.Error,
)
return i, err
}
const getDERPMeshKey = `-- name: GetDERPMeshKey :one
SELECT value FROM site_configs WHERE key = 'derp_mesh_key'
`
func (q *sqlQuerier) GetDERPMeshKey(ctx context.Context) (string, error) {
row := q.db.QueryRowContext(ctx, getDERPMeshKey)
var value string
err := row.Scan(&value)
return value, err
}
const getDeploymentID = `-- name: GetDeploymentID :one
SELECT value FROM site_configs WHERE key = 'deployment_id'
`
@ -2588,6 +2762,15 @@ func (q *sqlQuerier) GetDeploymentID(ctx context.Context) (string, error) {
return value, err
}
const insertDERPMeshKey = `-- name: InsertDERPMeshKey :exec
INSERT INTO site_configs (key, value) VALUES ('derp_mesh_key', $1)
`
func (q *sqlQuerier) InsertDERPMeshKey(ctx context.Context, value string) error {
_, err := q.db.ExecContext(ctx, insertDERPMeshKey, value)
return err
}
const insertDeploymentID = `-- name: InsertDeploymentID :exec
INSERT INTO site_configs (key, value) VALUES ('deployment_id', $1)
`