mirror of
https://github.com/coder/coder.git
synced 2025-07-15 22:20:27 +00:00
feat: use tailnet v2 API for coordination (#11638)
This one is huge, and I'm sorry. The problem is that once I change `tailnet.Conn` to start doing v2 behavior, I kind of have to change it everywhere, including in CoderSDK (CLI), the agent, wsproxy, and ServerTailnet. There is still a bit more cleanup to do, and I need to add code so that when we lose connection to the Coordinator, we mark all peers as LOST, but that will be in a separate PR since this is big enough!
This commit is contained in:
@ -12,14 +12,19 @@ import (
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.uber.org/atomic"
|
||||
"go.uber.org/goleak"
|
||||
"golang.org/x/xerrors"
|
||||
"storj.io/drpc"
|
||||
"storj.io/drpc/drpcmux"
|
||||
"storj.io/drpc/drpcserver"
|
||||
"tailscale.com/tailcfg"
|
||||
|
||||
"cdr.dev/slog"
|
||||
"cdr.dev/slog/sloggers/slogtest"
|
||||
@ -27,7 +32,9 @@ import (
|
||||
"github.com/coder/coder/v2/coderd/wsconncache"
|
||||
"github.com/coder/coder/v2/codersdk"
|
||||
"github.com/coder/coder/v2/codersdk/agentsdk"
|
||||
drpcsdk "github.com/coder/coder/v2/codersdk/drpc"
|
||||
"github.com/coder/coder/v2/tailnet"
|
||||
"github.com/coder/coder/v2/tailnet/proto"
|
||||
"github.com/coder/coder/v2/tailnet/tailnettest"
|
||||
"github.com/coder/coder/v2/testutil"
|
||||
)
|
||||
@ -41,7 +48,7 @@ func TestCache(t *testing.T) {
|
||||
t.Run("Same", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
cache := wsconncache.New(func(id uuid.UUID) (*codersdk.WorkspaceAgentConn, error) {
|
||||
return setupAgent(t, agentsdk.Manifest{}, 0), nil
|
||||
return setupAgent(t, agentsdk.Manifest{}, 0)
|
||||
}, 0)
|
||||
defer func() {
|
||||
_ = cache.Close()
|
||||
@ -54,10 +61,10 @@ func TestCache(t *testing.T) {
|
||||
})
|
||||
t.Run("Expire", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
called := atomic.NewInt32(0)
|
||||
called := int32(0)
|
||||
cache := wsconncache.New(func(id uuid.UUID) (*codersdk.WorkspaceAgentConn, error) {
|
||||
called.Add(1)
|
||||
return setupAgent(t, agentsdk.Manifest{}, 0), nil
|
||||
atomic.AddInt32(&called, 1)
|
||||
return setupAgent(t, agentsdk.Manifest{}, 0)
|
||||
}, time.Microsecond)
|
||||
defer func() {
|
||||
_ = cache.Close()
|
||||
@ -70,12 +77,12 @@ func TestCache(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
release()
|
||||
<-conn.Closed()
|
||||
require.Equal(t, int32(2), called.Load())
|
||||
require.Equal(t, int32(2), called)
|
||||
})
|
||||
t.Run("NoExpireWhenLocked", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
cache := wsconncache.New(func(id uuid.UUID) (*codersdk.WorkspaceAgentConn, error) {
|
||||
return setupAgent(t, agentsdk.Manifest{}, 0), nil
|
||||
return setupAgent(t, agentsdk.Manifest{}, 0)
|
||||
}, time.Microsecond)
|
||||
defer func() {
|
||||
_ = cache.Close()
|
||||
@ -108,7 +115,7 @@ func TestCache(t *testing.T) {
|
||||
go server.Serve(random)
|
||||
|
||||
cache := wsconncache.New(func(id uuid.UUID) (*codersdk.WorkspaceAgentConn, error) {
|
||||
return setupAgent(t, agentsdk.Manifest{}, 0), nil
|
||||
return setupAgent(t, agentsdk.Manifest{}, 0)
|
||||
}, time.Microsecond)
|
||||
defer func() {
|
||||
_ = cache.Close()
|
||||
@ -154,7 +161,7 @@ func TestCache(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func setupAgent(t *testing.T, manifest agentsdk.Manifest, ptyTimeout time.Duration) *codersdk.WorkspaceAgentConn {
|
||||
func setupAgent(t *testing.T, manifest agentsdk.Manifest, ptyTimeout time.Duration) (*codersdk.WorkspaceAgentConn, error) {
|
||||
t.Helper()
|
||||
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
|
||||
manifest.DERPMap, _ = tailnettest.RunDERPAndSTUN(t)
|
||||
@ -184,18 +191,25 @@ func setupAgent(t *testing.T, manifest agentsdk.Manifest, ptyTimeout time.Durati
|
||||
DERPForceWebSockets: manifest.DERPForceWebSockets,
|
||||
Logger: slogtest.Make(t, nil).Named("tailnet").Leveled(slog.LevelDebug),
|
||||
})
|
||||
require.NoError(t, err)
|
||||
clientConn, serverConn := net.Pipe()
|
||||
// setupAgent is called by wsconncache Dialer, so we can't use require here as it will end the
|
||||
// test, which in turn closes the wsconncache, which in turn waits for the Dialer and deadlocks.
|
||||
if !assert.NoError(t, err) {
|
||||
return nil, err
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
_ = clientConn.Close()
|
||||
_ = serverConn.Close()
|
||||
_ = conn.Close()
|
||||
})
|
||||
go coordinator.ServeClient(serverConn, uuid.New(), manifest.AgentID)
|
||||
sendNode, _ := tailnet.ServeCoordinator(clientConn, func(nodes []*tailnet.Node) error {
|
||||
return conn.UpdateNodes(nodes, false)
|
||||
clientID := uuid.New()
|
||||
testCtx, testCtxCancel := context.WithCancel(context.Background())
|
||||
t.Cleanup(testCtxCancel)
|
||||
coordination := tailnet.NewInMemoryCoordination(
|
||||
testCtx, logger,
|
||||
clientID, manifest.AgentID,
|
||||
coordinator, conn,
|
||||
)
|
||||
t.Cleanup(func() {
|
||||
_ = coordination.Close()
|
||||
})
|
||||
conn.SetNodeCallback(sendNode)
|
||||
agentConn := codersdk.NewWorkspaceAgentConn(conn, codersdk.WorkspaceAgentConnOptions{
|
||||
AgentID: manifest.AgentID,
|
||||
AgentIP: codersdk.WorkspaceAgentIP,
|
||||
@ -206,16 +220,20 @@ func setupAgent(t *testing.T, manifest agentsdk.Manifest, ptyTimeout time.Durati
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitMedium)
|
||||
defer cancel()
|
||||
if !agentConn.AwaitReachable(ctx) {
|
||||
t.Fatal("agent not reachable")
|
||||
// setupAgent is called by wsconncache Dialer, so we can't use t.Fatal here as it will end
|
||||
// the test, which in turn closes the wsconncache, which in turn waits for the Dialer and
|
||||
// deadlocks.
|
||||
t.Error("agent not reachable")
|
||||
return nil, xerrors.New("agent not reachable")
|
||||
}
|
||||
return agentConn
|
||||
return agentConn, nil
|
||||
}
|
||||
|
||||
type client struct {
|
||||
t *testing.T
|
||||
agentID uuid.UUID
|
||||
manifest agentsdk.Manifest
|
||||
coordinator tailnet.CoordinatorV1
|
||||
coordinator tailnet.Coordinator
|
||||
}
|
||||
|
||||
func (c *client) Manifest(_ context.Context) (agentsdk.Manifest, error) {
|
||||
@ -240,19 +258,53 @@ func (*client) DERPMapUpdates(_ context.Context) (<-chan agentsdk.DERPMapUpdate,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *client) Listen(_ context.Context) (net.Conn, error) {
|
||||
clientConn, serverConn := net.Pipe()
|
||||
func (c *client) Listen(_ context.Context) (drpc.Conn, error) {
|
||||
logger := slogtest.Make(c.t, nil).Leveled(slog.LevelDebug).Named("drpc")
|
||||
conn, lis := drpcsdk.MemTransportPipe()
|
||||
closed := make(chan struct{})
|
||||
c.t.Cleanup(func() {
|
||||
_ = serverConn.Close()
|
||||
_ = clientConn.Close()
|
||||
_ = conn.Close()
|
||||
_ = lis.Close()
|
||||
<-closed
|
||||
})
|
||||
coordPtr := atomic.Pointer[tailnet.Coordinator]{}
|
||||
coordPtr.Store(&c.coordinator)
|
||||
mux := drpcmux.New()
|
||||
drpcService := &tailnet.DRPCService{
|
||||
CoordPtr: &coordPtr,
|
||||
Logger: logger,
|
||||
// TODO: handle DERPMap too!
|
||||
DerpMapUpdateFrequency: time.Hour,
|
||||
DerpMapFn: func() *tailcfg.DERPMap { panic("not implemented") },
|
||||
}
|
||||
err := proto.DRPCRegisterTailnet(mux, drpcService)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("register DRPC service: %w", err)
|
||||
}
|
||||
server := drpcserver.NewWithOptions(mux, drpcserver.Options{
|
||||
Log: func(err error) {
|
||||
if xerrors.Is(err, io.EOF) ||
|
||||
xerrors.Is(err, context.Canceled) ||
|
||||
xerrors.Is(err, context.DeadlineExceeded) {
|
||||
return
|
||||
}
|
||||
logger.Debug(context.Background(), "drpc server error", slog.Error(err))
|
||||
},
|
||||
})
|
||||
serveCtx, cancel := context.WithCancel(context.Background())
|
||||
c.t.Cleanup(cancel)
|
||||
auth := tailnet.AgentTunnelAuth{}
|
||||
streamID := tailnet.StreamID{
|
||||
Name: "wsconncache_test-agent",
|
||||
ID: c.agentID,
|
||||
Auth: auth,
|
||||
}
|
||||
serveCtx = tailnet.WithStreamID(serveCtx, streamID)
|
||||
go func() {
|
||||
_ = c.coordinator.ServeAgent(serverConn, c.agentID, "")
|
||||
server.Serve(serveCtx, lis)
|
||||
close(closed)
|
||||
}()
|
||||
return clientConn, nil
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
func (*client) ReportStats(_ context.Context, _ slog.Logger, _ <-chan *agentsdk.Stats, _ func(time.Duration)) (io.Closer, error) {
|
||||
|
Reference in New Issue
Block a user