Files
coder/cli/agent_test.go
Kyle Carberry 2ba4a62a0d feat: Add high availability for multiple replicas (#4555)
* feat: HA tailnet coordinator

* fixup! feat: HA tailnet coordinator

* fixup! feat: HA tailnet coordinator

* remove printlns

* close all connections on coordinator

* impelement high availability feature

* fixup! impelement high availability feature

* fixup! impelement high availability feature

* fixup! impelement high availability feature

* fixup! impelement high availability feature

* Add replicas

* Add DERP meshing to arbitrary addresses

* Move packages to highavailability folder

* Move coordinator to high availability package

* Add flags for HA

* Rename to replicasync

* Denest packages for replicas

* Add test for multiple replicas

* Fix coordination test

* Add HA to the helm chart

* Rename function pointer

* Add warnings for HA

* Add the ability to block endpoints

* Add flag to disable P2P connections

* Wow, I made the tests pass

* Add replicas endpoint

* Ensure close kills replica

* Update sql

* Add database latency to high availability

* Pipe TLS to DERP mesh

* Fix DERP mesh with TLS

* Add tests for TLS

* Fix replica sync TLS

* Fix RootCA for replica meshing

* Remove ID from replicasync

* Fix getting certificates for meshing

* Remove excessive locking

* Fix linting

* Store mesh key in the database

* Fix replica key for tests

* Fix types gen

* Fix unlocking unlocked

* Fix race in tests

* Update enterprise/derpmesh/derpmesh.go

Co-authored-by: Colin Adler <colin1adler@gmail.com>

* Rename to syncReplicas

* Reuse http client

* Delete old replicas on a CRON

* Fix race condition in connection tests

* Fix linting

* Fix nil type

* Move pubsub to in-memory for twenty test

* Add comment for configuration tweaking

* Fix leak with transport

* Fix close leak in derpmesh

* Fix race when creating server

* Remove handler update

* Skip test on Windows

* Fix DERP mesh test

* Wrap HTTP handler replacement in mutex

* Fix error message for relay

* Fix API handler for normal tests

* Fix speedtest

* Fix replica resend

* Fix derpmesh send

* Ping async

* Increase wait time of template version jobd

* Fix race when closing replica sync

* Add name to client

* Log the derpmap being used

* Don't connect if DERP is empty

* Improve agent coordinator logging

* Fix lock in coordinator

* Fix relay addr

* Fix race when updating durations

* Fix client publish race

* Run pubsub loop in a queue

* Store agent nodes in order

* Fix coordinator locking

* Check for closed pipe

Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-10-17 13:43:30 +00:00

202 lines
7.1 KiB
Go

package cli_test
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/coder/coder/cli/clitest"
"github.com/coder/coder/coderd/coderdtest"
"github.com/coder/coder/provisioner/echo"
"github.com/coder/coder/provisionersdk/proto"
"github.com/coder/coder/testutil"
)
func TestWorkspaceAgent(t *testing.T) {
t.Parallel()
t.Run("Azure", func(t *testing.T) {
t.Parallel()
instanceID := "instanceidentifier"
certificates, metadataClient := coderdtest.NewAzureInstanceIdentity(t, instanceID)
client := coderdtest.New(t, &coderdtest.Options{
AzureCertificates: certificates,
IncludeProvisionerDaemon: true,
})
user := coderdtest.CreateFirstUser(t, client)
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
Provision: []*proto.Provision_Response{{
Type: &proto.Provision_Response_Complete{
Complete: &proto.Provision_Complete{
Resources: []*proto.Resource{{
Name: "somename",
Type: "someinstance",
Agents: []*proto.Agent{{
Auth: &proto.Agent_InstanceId{
InstanceId: instanceID,
},
}},
}},
},
},
}},
})
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
cmd, _ := clitest.New(t, "agent", "--auth", "azure-instance-identity", "--agent-url", client.URL.String())
ctx, cancelFunc := context.WithCancel(context.Background())
defer cancelFunc()
errC := make(chan error)
go func() {
// A linting error occurs for weakly typing the context value here.
//nolint // The above seems reasonable for a one-off test.
ctx := context.WithValue(ctx, "azure-client", metadataClient)
errC <- cmd.ExecuteContext(ctx)
}()
coderdtest.AwaitWorkspaceAgents(t, client, workspace.ID)
workspace, err := client.Workspace(ctx, workspace.ID)
require.NoError(t, err)
resources := workspace.LatestBuild.Resources
if assert.NotEmpty(t, workspace.LatestBuild.Resources) && assert.NotEmpty(t, resources[0].Agents) {
assert.NotEmpty(t, resources[0].Agents[0].Version)
}
dialer, err := client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, nil)
require.NoError(t, err)
defer dialer.Close()
require.Eventually(t, func() bool {
_, err := dialer.Ping(ctx)
return err == nil
}, testutil.WaitMedium, testutil.IntervalFast)
cancelFunc()
err = <-errC
require.NoError(t, err)
})
t.Run("AWS", func(t *testing.T) {
t.Parallel()
instanceID := "instanceidentifier"
certificates, metadataClient := coderdtest.NewAWSInstanceIdentity(t, instanceID)
client := coderdtest.New(t, &coderdtest.Options{
AWSCertificates: certificates,
IncludeProvisionerDaemon: true,
})
user := coderdtest.CreateFirstUser(t, client)
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
Provision: []*proto.Provision_Response{{
Type: &proto.Provision_Response_Complete{
Complete: &proto.Provision_Complete{
Resources: []*proto.Resource{{
Name: "somename",
Type: "someinstance",
Agents: []*proto.Agent{{
Auth: &proto.Agent_InstanceId{
InstanceId: instanceID,
},
}},
}},
},
},
}},
})
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
cmd, _ := clitest.New(t, "agent", "--auth", "aws-instance-identity", "--agent-url", client.URL.String())
ctx, cancelFunc := context.WithCancel(context.Background())
defer cancelFunc()
errC := make(chan error)
go func() {
// A linting error occurs for weakly typing the context value here.
//nolint // The above seems reasonable for a one-off test.
ctx := context.WithValue(ctx, "aws-client", metadataClient)
errC <- cmd.ExecuteContext(ctx)
}()
coderdtest.AwaitWorkspaceAgents(t, client, workspace.ID)
workspace, err := client.Workspace(ctx, workspace.ID)
require.NoError(t, err)
resources := workspace.LatestBuild.Resources
if assert.NotEmpty(t, resources) && assert.NotEmpty(t, resources[0].Agents) {
assert.NotEmpty(t, resources[0].Agents[0].Version)
}
dialer, err := client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, nil)
require.NoError(t, err)
defer dialer.Close()
require.Eventually(t, func() bool {
_, err := dialer.Ping(ctx)
return err == nil
}, testutil.WaitMedium, testutil.IntervalFast)
cancelFunc()
err = <-errC
require.NoError(t, err)
})
t.Run("GoogleCloud", func(t *testing.T) {
t.Parallel()
instanceID := "instanceidentifier"
validator, metadata := coderdtest.NewGoogleInstanceIdentity(t, instanceID, false)
client := coderdtest.New(t, &coderdtest.Options{
GoogleTokenValidator: validator,
IncludeProvisionerDaemon: true,
})
user := coderdtest.CreateFirstUser(t, client)
version := coderdtest.CreateTemplateVersion(t, client, user.OrganizationID, &echo.Responses{
Parse: echo.ParseComplete,
Provision: []*proto.Provision_Response{{
Type: &proto.Provision_Response_Complete{
Complete: &proto.Provision_Complete{
Resources: []*proto.Resource{{
Name: "somename",
Type: "someinstance",
Agents: []*proto.Agent{{
Auth: &proto.Agent_InstanceId{
InstanceId: instanceID,
},
}},
}},
},
},
}},
})
template := coderdtest.CreateTemplate(t, client, user.OrganizationID, version.ID)
coderdtest.AwaitTemplateVersionJob(t, client, version.ID)
workspace := coderdtest.CreateWorkspace(t, client, user.OrganizationID, template.ID)
coderdtest.AwaitWorkspaceBuildJob(t, client, workspace.LatestBuild.ID)
cmd, _ := clitest.New(t, "agent", "--auth", "google-instance-identity", "--agent-url", client.URL.String())
ctx, cancelFunc := context.WithCancel(context.Background())
defer cancelFunc()
errC := make(chan error)
go func() {
// A linting error occurs for weakly typing the context value here.
//nolint // The above seems reasonable for a one-off test.
ctx := context.WithValue(ctx, "gcp-client", metadata)
errC <- cmd.ExecuteContext(ctx)
}()
coderdtest.AwaitWorkspaceAgents(t, client, workspace.ID)
workspace, err := client.Workspace(ctx, workspace.ID)
require.NoError(t, err)
resources := workspace.LatestBuild.Resources
if assert.NotEmpty(t, resources) && assert.NotEmpty(t, resources[0].Agents) {
assert.NotEmpty(t, resources[0].Agents[0].Version)
}
dialer, err := client.DialWorkspaceAgent(ctx, resources[0].Agents[0].ID, nil)
require.NoError(t, err)
defer dialer.Close()
require.Eventually(t, func() bool {
_, err := dialer.Ping(ctx)
return err == nil
}, testutil.WaitMedium, testutil.IntervalFast)
cancelFunc()
err = <-errC
require.NoError(t, err)
})
}