feat: Add high availability for multiple replicas (#4555)

* feat: HA tailnet coordinator

* fixup! feat: HA tailnet coordinator

* fixup! feat: HA tailnet coordinator

* remove printlns

* close all connections on coordinator

* impelement high availability feature

* fixup! impelement high availability feature

* fixup! impelement high availability feature

* fixup! impelement high availability feature

* fixup! impelement high availability feature

* Add replicas

* Add DERP meshing to arbitrary addresses

* Move packages to highavailability folder

* Move coordinator to high availability package

* Add flags for HA

* Rename to replicasync

* Denest packages for replicas

* Add test for multiple replicas

* Fix coordination test

* Add HA to the helm chart

* Rename function pointer

* Add warnings for HA

* Add the ability to block endpoints

* Add flag to disable P2P connections

* Wow, I made the tests pass

* Add replicas endpoint

* Ensure close kills replica

* Update sql

* Add database latency to high availability

* Pipe TLS to DERP mesh

* Fix DERP mesh with TLS

* Add tests for TLS

* Fix replica sync TLS

* Fix RootCA for replica meshing

* Remove ID from replicasync

* Fix getting certificates for meshing

* Remove excessive locking

* Fix linting

* Store mesh key in the database

* Fix replica key for tests

* Fix types gen

* Fix unlocking unlocked

* Fix race in tests

* Update enterprise/derpmesh/derpmesh.go

Co-authored-by: Colin Adler <colin1adler@gmail.com>

* Rename to syncReplicas

* Reuse http client

* Delete old replicas on a CRON

* Fix race condition in connection tests

* Fix linting

* Fix nil type

* Move pubsub to in-memory for twenty test

* Add comment for configuration tweaking

* Fix leak with transport

* Fix close leak in derpmesh

* Fix race when creating server

* Remove handler update

* Skip test on Windows

* Fix DERP mesh test

* Wrap HTTP handler replacement in mutex

* Fix error message for relay

* Fix API handler for normal tests

* Fix speedtest

* Fix replica resend

* Fix derpmesh send

* Ping async

* Increase wait time of template version jobd

* Fix race when closing replica sync

* Add name to client

* Log the derpmap being used

* Don't connect if DERP is empty

* Improve agent coordinator logging

* Fix lock in coordinator

* Fix relay addr

* Fix race when updating durations

* Fix client publish race

* Run pubsub loop in a queue

* Store agent nodes in order

* Fix coordinator locking

* Check for closed pipe

Co-authored-by: Colin Adler <colin1adler@gmail.com>
This commit is contained in:
Kyle Carberry
2022-10-17 08:43:30 -05:00
committed by GitHub
parent dc3519e973
commit 2ba4a62a0d
76 changed files with 3437 additions and 404 deletions

View File

@ -26,6 +26,7 @@ type sqlcQuerier interface {
DeleteLicense(ctx context.Context, id int32) (int32, error)
DeleteOldAgentStats(ctx context.Context) error
DeleteParameterValueByID(ctx context.Context, id uuid.UUID) error
DeleteReplicasUpdatedBefore(ctx context.Context, updatedAt time.Time) error
GetAPIKeyByID(ctx context.Context, id string) (APIKey, error)
GetAPIKeysByLoginType(ctx context.Context, loginType LoginType) ([]APIKey, error)
GetAPIKeysLastUsedAfter(ctx context.Context, lastUsed time.Time) ([]APIKey, error)
@ -38,6 +39,7 @@ type sqlcQuerier interface {
// This function returns roles for authorization purposes. Implied member roles
// are included.
GetAuthorizationUserRoles(ctx context.Context, userID uuid.UUID) (GetAuthorizationUserRolesRow, error)
GetDERPMeshKey(ctx context.Context) (string, error)
GetDeploymentID(ctx context.Context) (string, error)
GetFileByHashAndCreator(ctx context.Context, arg GetFileByHashAndCreatorParams) (File, error)
GetFileByID(ctx context.Context, id uuid.UUID) (File, error)
@ -67,6 +69,7 @@ type sqlcQuerier interface {
GetProvisionerJobsByIDs(ctx context.Context, ids []uuid.UUID) ([]ProvisionerJob, error)
GetProvisionerJobsCreatedAfter(ctx context.Context, createdAt time.Time) ([]ProvisionerJob, error)
GetProvisionerLogsByIDBetween(ctx context.Context, arg GetProvisionerLogsByIDBetweenParams) ([]ProvisionerJobLog, error)
GetReplicasUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]Replica, error)
GetTemplateAverageBuildTime(ctx context.Context, arg GetTemplateAverageBuildTimeParams) (GetTemplateAverageBuildTimeRow, error)
GetTemplateByID(ctx context.Context, id uuid.UUID) (Template, error)
GetTemplateByOrganizationAndName(ctx context.Context, arg GetTemplateByOrganizationAndNameParams) (Template, error)
@ -123,6 +126,7 @@ type sqlcQuerier interface {
// every member of the org.
InsertAllUsersGroup(ctx context.Context, organizationID uuid.UUID) (Group, error)
InsertAuditLog(ctx context.Context, arg InsertAuditLogParams) (AuditLog, error)
InsertDERPMeshKey(ctx context.Context, value string) error
InsertDeploymentID(ctx context.Context, value string) error
InsertFile(ctx context.Context, arg InsertFileParams) (File, error)
InsertGitSSHKey(ctx context.Context, arg InsertGitSSHKeyParams) (GitSSHKey, error)
@ -136,6 +140,7 @@ type sqlcQuerier interface {
InsertProvisionerDaemon(ctx context.Context, arg InsertProvisionerDaemonParams) (ProvisionerDaemon, error)
InsertProvisionerJob(ctx context.Context, arg InsertProvisionerJobParams) (ProvisionerJob, error)
InsertProvisionerJobLogs(ctx context.Context, arg InsertProvisionerJobLogsParams) ([]ProvisionerJobLog, error)
InsertReplica(ctx context.Context, arg InsertReplicaParams) (Replica, error)
InsertTemplate(ctx context.Context, arg InsertTemplateParams) (Template, error)
InsertTemplateVersion(ctx context.Context, arg InsertTemplateVersionParams) (TemplateVersion, error)
InsertUser(ctx context.Context, arg InsertUserParams) (User, error)
@ -156,6 +161,7 @@ type sqlcQuerier interface {
UpdateProvisionerJobByID(ctx context.Context, arg UpdateProvisionerJobByIDParams) error
UpdateProvisionerJobWithCancelByID(ctx context.Context, arg UpdateProvisionerJobWithCancelByIDParams) error
UpdateProvisionerJobWithCompleteByID(ctx context.Context, arg UpdateProvisionerJobWithCompleteByIDParams) error
UpdateReplica(ctx context.Context, arg UpdateReplicaParams) (Replica, error)
UpdateTemplateActiveVersionByID(ctx context.Context, arg UpdateTemplateActiveVersionByIDParams) error
UpdateTemplateDeletedByID(ctx context.Context, arg UpdateTemplateDeletedByIDParams) error
UpdateTemplateMetaByID(ctx context.Context, arg UpdateTemplateMetaByIDParams) (Template, error)