mirror of
https://github.com/coder/coder.git
synced 2025-07-15 22:20:27 +00:00
feat: Add high availability for multiple replicas (#4555)
* feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * fixup! feat: HA tailnet coordinator * remove printlns * close all connections on coordinator * impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * fixup! impelement high availability feature * Add replicas * Add DERP meshing to arbitrary addresses * Move packages to highavailability folder * Move coordinator to high availability package * Add flags for HA * Rename to replicasync * Denest packages for replicas * Add test for multiple replicas * Fix coordination test * Add HA to the helm chart * Rename function pointer * Add warnings for HA * Add the ability to block endpoints * Add flag to disable P2P connections * Wow, I made the tests pass * Add replicas endpoint * Ensure close kills replica * Update sql * Add database latency to high availability * Pipe TLS to DERP mesh * Fix DERP mesh with TLS * Add tests for TLS * Fix replica sync TLS * Fix RootCA for replica meshing * Remove ID from replicasync * Fix getting certificates for meshing * Remove excessive locking * Fix linting * Store mesh key in the database * Fix replica key for tests * Fix types gen * Fix unlocking unlocked * Fix race in tests * Update enterprise/derpmesh/derpmesh.go Co-authored-by: Colin Adler <colin1adler@gmail.com> * Rename to syncReplicas * Reuse http client * Delete old replicas on a CRON * Fix race condition in connection tests * Fix linting * Fix nil type * Move pubsub to in-memory for twenty test * Add comment for configuration tweaking * Fix leak with transport * Fix close leak in derpmesh * Fix race when creating server * Remove handler update * Skip test on Windows * Fix DERP mesh test * Wrap HTTP handler replacement in mutex * Fix error message for relay * Fix API handler for normal tests * Fix speedtest * Fix replica resend * Fix derpmesh send * Ping async * Increase wait time of template version jobd * Fix race when closing replica sync * Add name to client * Log the derpmap being used * Don't connect if DERP is empty * Improve agent coordinator logging * Fix lock in coordinator * Fix relay addr * Fix race when updating durations * Fix client publish race * Run pubsub loop in a queue * Store agent nodes in order * Fix coordinator locking * Check for closed pipe Co-authored-by: Colin Adler <colin1adler@gmail.com>
This commit is contained in:
@ -49,7 +49,7 @@ func (api *API) workspaceAgent(rw http.ResponseWriter, r *http.Request) {
|
||||
})
|
||||
return
|
||||
}
|
||||
apiAgent, err := convertWorkspaceAgent(api.DERPMap, api.TailnetCoordinator, workspaceAgent, convertApps(dbApps), api.AgentInactiveDisconnectTimeout)
|
||||
apiAgent, err := convertWorkspaceAgent(api.DERPMap, *api.TailnetCoordinator.Load(), workspaceAgent, convertApps(dbApps), api.AgentInactiveDisconnectTimeout)
|
||||
if err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: "Internal error reading workspace agent.",
|
||||
@ -78,7 +78,7 @@ func (api *API) workspaceAgentApps(rw http.ResponseWriter, r *http.Request) {
|
||||
func (api *API) workspaceAgentMetadata(rw http.ResponseWriter, r *http.Request) {
|
||||
ctx := r.Context()
|
||||
workspaceAgent := httpmw.WorkspaceAgent(r)
|
||||
apiAgent, err := convertWorkspaceAgent(api.DERPMap, api.TailnetCoordinator, workspaceAgent, nil, api.AgentInactiveDisconnectTimeout)
|
||||
apiAgent, err := convertWorkspaceAgent(api.DERPMap, *api.TailnetCoordinator.Load(), workspaceAgent, nil, api.AgentInactiveDisconnectTimeout)
|
||||
if err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: "Internal error reading workspace agent.",
|
||||
@ -98,7 +98,7 @@ func (api *API) workspaceAgentMetadata(rw http.ResponseWriter, r *http.Request)
|
||||
func (api *API) postWorkspaceAgentVersion(rw http.ResponseWriter, r *http.Request) {
|
||||
ctx := r.Context()
|
||||
workspaceAgent := httpmw.WorkspaceAgent(r)
|
||||
apiAgent, err := convertWorkspaceAgent(api.DERPMap, api.TailnetCoordinator, workspaceAgent, nil, api.AgentInactiveDisconnectTimeout)
|
||||
apiAgent, err := convertWorkspaceAgent(api.DERPMap, *api.TailnetCoordinator.Load(), workspaceAgent, nil, api.AgentInactiveDisconnectTimeout)
|
||||
if err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: "Internal error reading workspace agent.",
|
||||
@ -152,7 +152,7 @@ func (api *API) workspaceAgentPTY(rw http.ResponseWriter, r *http.Request) {
|
||||
httpapi.ResourceNotFound(rw)
|
||||
return
|
||||
}
|
||||
apiAgent, err := convertWorkspaceAgent(api.DERPMap, api.TailnetCoordinator, workspaceAgent, nil, api.AgentInactiveDisconnectTimeout)
|
||||
apiAgent, err := convertWorkspaceAgent(api.DERPMap, *api.TailnetCoordinator.Load(), workspaceAgent, nil, api.AgentInactiveDisconnectTimeout)
|
||||
if err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: "Internal error reading workspace agent.",
|
||||
@ -229,7 +229,7 @@ func (api *API) workspaceAgentListeningPorts(rw http.ResponseWriter, r *http.Req
|
||||
return
|
||||
}
|
||||
|
||||
apiAgent, err := convertWorkspaceAgent(api.DERPMap, api.TailnetCoordinator, workspaceAgent, nil, api.AgentInactiveDisconnectTimeout)
|
||||
apiAgent, err := convertWorkspaceAgent(api.DERPMap, *api.TailnetCoordinator.Load(), workspaceAgent, nil, api.AgentInactiveDisconnectTimeout)
|
||||
if err != nil {
|
||||
httpapi.Write(ctx, rw, http.StatusInternalServerError, codersdk.Response{
|
||||
Message: "Internal error reading workspace agent.",
|
||||
@ -376,8 +376,9 @@ func (api *API) dialWorkspaceAgentTailnet(r *http.Request, agentID uuid.UUID) (*
|
||||
})
|
||||
conn.SetNodeCallback(sendNodes)
|
||||
go func() {
|
||||
err := api.TailnetCoordinator.ServeClient(serverConn, uuid.New(), agentID)
|
||||
err := (*api.TailnetCoordinator.Load()).ServeClient(serverConn, uuid.New(), agentID)
|
||||
if err != nil {
|
||||
api.Logger.Warn(r.Context(), "tailnet coordinator client error", slog.Error(err))
|
||||
_ = conn.Close()
|
||||
}
|
||||
}()
|
||||
@ -514,8 +515,9 @@ func (api *API) workspaceAgentCoordinate(rw http.ResponseWriter, r *http.Request
|
||||
closeChan := make(chan struct{})
|
||||
go func() {
|
||||
defer close(closeChan)
|
||||
err := api.TailnetCoordinator.ServeAgent(wsNetConn, workspaceAgent.ID)
|
||||
err := (*api.TailnetCoordinator.Load()).ServeAgent(wsNetConn, workspaceAgent.ID)
|
||||
if err != nil {
|
||||
api.Logger.Warn(ctx, "tailnet coordinator agent error", slog.Error(err))
|
||||
_ = conn.Close(websocket.StatusInternalError, err.Error())
|
||||
return
|
||||
}
|
||||
@ -583,7 +585,7 @@ func (api *API) workspaceAgentClientCoordinate(rw http.ResponseWriter, r *http.R
|
||||
go httpapi.Heartbeat(ctx, conn)
|
||||
|
||||
defer conn.Close(websocket.StatusNormalClosure, "")
|
||||
err = api.TailnetCoordinator.ServeClient(websocket.NetConn(ctx, conn, websocket.MessageBinary), uuid.New(), workspaceAgent.ID)
|
||||
err = (*api.TailnetCoordinator.Load()).ServeClient(websocket.NetConn(ctx, conn, websocket.MessageBinary), uuid.New(), workspaceAgent.ID)
|
||||
if err != nil {
|
||||
_ = conn.Close(websocket.StatusInternalError, err.Error())
|
||||
return
|
||||
@ -611,7 +613,7 @@ func convertApps(dbApps []database.WorkspaceApp) []codersdk.WorkspaceApp {
|
||||
return apps
|
||||
}
|
||||
|
||||
func convertWorkspaceAgent(derpMap *tailcfg.DERPMap, coordinator *tailnet.Coordinator, dbAgent database.WorkspaceAgent, apps []codersdk.WorkspaceApp, agentInactiveDisconnectTimeout time.Duration) (codersdk.WorkspaceAgent, error) {
|
||||
func convertWorkspaceAgent(derpMap *tailcfg.DERPMap, coordinator tailnet.Coordinator, dbAgent database.WorkspaceAgent, apps []codersdk.WorkspaceApp, agentInactiveDisconnectTimeout time.Duration) (codersdk.WorkspaceAgent, error) {
|
||||
var envs map[string]string
|
||||
if dbAgent.EnvironmentVariables.Valid {
|
||||
err := json.Unmarshal(dbAgent.EnvironmentVariables.RawMessage, &envs)
|
||||
|
Reference in New Issue
Block a user