mirror of
https://github.com/coder/coder.git
synced 2025-07-10 23:53:15 +00:00
Closes https://github.com/coder/vscode-coder/issues/447 Closes https://github.com/coder/jetbrains-coder/issues/543 Closes https://github.com/coder/coder-jetbrains-toolbox/issues/21 This PR adds Coder Connect support to `coder ssh --stdio`. When connecting to a workspace, if `--force-new-tunnel` is not passed, the CLI will first do a DNS lookup for `<agent>.<workspace>.<owner>.<hostname-suffix>`. If an IP address is returned, and it's within the Coder service prefix, the CLI will not create a new tailnet connection to the workspace, and instead dial the SSH server running on port 22 on the workspace directly over TCP. This allows IDE extensions to use the Coder Connect tunnel, without requiring any modifications to the extensions themselves. Additionally, `using_coder_connect` is added to the `sshNetworkStats` file, which the VS Code extension (and maybe Jetbrains?) will be able to read, and indicate to the user that they are using Coder Connect. One advantage of this approach is that running `coder ssh --stdio` on an offline workspace with Coder Connect enabled will have the CLI wait for the workspace to build, the agent to connect (and optionally, for the startup scripts to finish), before finally connecting using the Coder Connect tunnel. As a result, `coder ssh --stdio` has the overhead of looking up the workspace and agent, and checking if they are running. On my device, this meant `coder ssh --stdio <workspace>` was approximately a second slower than just connecting to the workspace directly using `ssh <workspace>.coder` (I would assume anyone serious about their Coder Connect usage would know to just do the latter anyway). To ensure this doesn't come at a significant performance cost, I've also benchmarked this PR. <details> <summary>Benchmark</summary> ## Methodology All tests were completed on `dev.coder.com`, where a Linux workspace running in AWS `us-west1` was created. The machine running Coder Desktop (the 'client') was a Windows VM running in the same AWS region and VPC as the workspace. To test the performance of specifically the SSH connection, a port was forwarded between the client and workspace using: ``` ssh -p 22 -L7001:localhost:7001 <host> ``` where `host` was either an alias for an SSH ProxyCommand that called `coder ssh`, or a Coder Connect hostname. For latency, [`tcping`](https://www.elifulkerson.com/projects/tcping.php) was used against the forwarded port: ``` tcping -n 100 localhost 7001 ``` For throughput, [`iperf3`](https://iperf.fr/iperf-download.php) was used: ``` iperf3 -c localhost -p 7001 ``` where an `iperf3` server was running on the workspace on port 7001. ## Test Cases ### Testcase 1: `coder ssh` `ProxyCommand` that bicopies from Coder Connect This case tests the implementation in this PR, such that we can write a config like: ``` Host codercliconnect ProxyCommand /path/to/coder ssh --stdio workspace ``` With Coder Connect enabled, `ssh -p 22 -L7001:localhost:7001 codercliconnect` will use the Coder Connect tunnel. The results were as follows: **Throughput, 10 tests, back to back:** - Average throughput across all tests: 788.20 Mbits/sec - Minimum average throughput: 731 Mbits/sec - Maximum average throughput: 871 Mbits/sec - Standard Deviation: 38.88 Mbits/sec **Latency, 100 RTTs:** - Average: 0.369ms - Minimum: 0.290ms - Maximum: 0.473ms ### Testcase 2: `ssh` dialing Coder Connect directly without a `ProxyCommand` This is what we assume to be the 'best' way to use Coder Connect **Throughput, 10 tests, back to back:** - Average throughput across all tests: 789.50 Mbits/sec - Minimum average throughput: 708 Mbits/sec - Maximum average throughput: 839 Mbits/sec - Standard Deviation: 39.98 Mbits/sec **Latency, 100 RTTs:** - Average: 0.369ms - Minimum: 0.267ms - Maximum: 0.440ms ### Testcase 3: `coder ssh` `ProxyCommand` that creates its own Tailnet connection in-process This is what normally happens when you run `coder ssh`: **Throughput, 10 tests, back to back:** - Average throughput across all tests: 610.20 Mbits/sec - Minimum average throughput: 569 Mbits/sec - Maximum average throughput: 664 Mbits/sec - Standard Deviation: 27.29 Mbits/sec **Latency, 100 RTTs:** - Average: 0.335ms - Minimum: 0.262ms - Maximum: 0.452ms ## Analysis Performing a two-tailed, unpaired t-test against the throughput of testcases 1 and 2, we find a P value of `0.9450`. This suggests the difference between the data sets is not statistically significant. In other words, there is a 94.5% chance that the difference between the data sets is due to chance. ## Conclusion From the t-test, and by comparison to the status quo (regular `coder ssh`, which uses gvisor, and is noticeably slower), I think it's safe to say any impact on throughput or latency by the `ProxyCommand` performing a bicopy against Coder Connect is negligible. Users are very much unlikely to run into performance issues as a result of using Coder Connect via `coder ssh`, as implemented in this PR. Less scientifically, I ran these same tests on my home network with my Sydney workspace, and both throughput and latency were consistent across testcases 1 and 2. </details>
349 lines
8.3 KiB
Go
349 lines
8.3 KiB
Go
package cli
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
"net/url"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
gliderssh "github.com/gliderlabs/ssh"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"golang.org/x/crypto/ssh"
|
|
"golang.org/x/xerrors"
|
|
|
|
"cdr.dev/slog"
|
|
"cdr.dev/slog/sloggers/slogtest"
|
|
"github.com/coder/quartz"
|
|
|
|
"github.com/coder/coder/v2/codersdk"
|
|
"github.com/coder/coder/v2/testutil"
|
|
)
|
|
|
|
const (
|
|
fakeOwnerName = "fake-owner-name"
|
|
fakeServerURL = "https://fake-foo-url"
|
|
fakeWorkspaceName = "fake-workspace-name"
|
|
)
|
|
|
|
func TestVerifyWorkspaceOutdated(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
serverURL, err := url.Parse(fakeServerURL)
|
|
require.NoError(t, err)
|
|
|
|
client := codersdk.Client{URL: serverURL}
|
|
|
|
t.Run("Up-to-date", func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
workspace := codersdk.Workspace{Name: fakeWorkspaceName, OwnerName: fakeOwnerName}
|
|
|
|
_, outdated := verifyWorkspaceOutdated(&client, workspace)
|
|
|
|
assert.False(t, outdated, "workspace should be up-to-date")
|
|
})
|
|
t.Run("Outdated", func(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
workspace := codersdk.Workspace{Name: fakeWorkspaceName, OwnerName: fakeOwnerName, Outdated: true}
|
|
|
|
updateWorkspaceBanner, outdated := verifyWorkspaceOutdated(&client, workspace)
|
|
|
|
assert.True(t, outdated, "workspace should be outdated")
|
|
assert.NotEmpty(t, updateWorkspaceBanner, "workspace banner should be present")
|
|
})
|
|
}
|
|
|
|
func TestBuildWorkspaceLink(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
serverURL, err := url.Parse(fakeServerURL)
|
|
require.NoError(t, err)
|
|
|
|
workspace := codersdk.Workspace{Name: fakeWorkspaceName, OwnerName: fakeOwnerName}
|
|
workspaceLink := buildWorkspaceLink(serverURL, workspace)
|
|
|
|
assert.Equal(t, workspaceLink.String(), fakeServerURL+"/@"+fakeOwnerName+"/"+fakeWorkspaceName)
|
|
}
|
|
|
|
func TestCloserStack_Mainline(t *testing.T) {
|
|
t.Parallel()
|
|
ctx := testutil.Context(t, testutil.WaitShort)
|
|
logger := testutil.Logger(t)
|
|
uut := newCloserStack(ctx, logger, quartz.NewMock(t))
|
|
closes := new([]*fakeCloser)
|
|
fc0 := &fakeCloser{closes: closes}
|
|
fc1 := &fakeCloser{closes: closes}
|
|
|
|
func() {
|
|
defer uut.close(nil)
|
|
err := uut.push("fc0", fc0)
|
|
require.NoError(t, err)
|
|
err = uut.push("fc1", fc1)
|
|
require.NoError(t, err)
|
|
}()
|
|
// order reversed
|
|
require.Equal(t, []*fakeCloser{fc1, fc0}, *closes)
|
|
}
|
|
|
|
func TestCloserStack_Empty(t *testing.T) {
|
|
t.Parallel()
|
|
ctx := testutil.Context(t, testutil.WaitShort)
|
|
logger := testutil.Logger(t)
|
|
uut := newCloserStack(ctx, logger, quartz.NewMock(t))
|
|
|
|
closed := make(chan struct{})
|
|
go func() {
|
|
defer close(closed)
|
|
uut.close(nil)
|
|
}()
|
|
testutil.TryReceive(ctx, t, closed)
|
|
}
|
|
|
|
func TestCloserStack_Context(t *testing.T) {
|
|
t.Parallel()
|
|
ctx := testutil.Context(t, testutil.WaitShort)
|
|
ctx, cancel := context.WithCancel(ctx)
|
|
defer cancel()
|
|
logger := testutil.Logger(t)
|
|
uut := newCloserStack(ctx, logger, quartz.NewMock(t))
|
|
closes := new([]*fakeCloser)
|
|
fc0 := &fakeCloser{closes: closes}
|
|
fc1 := &fakeCloser{closes: closes}
|
|
|
|
err := uut.push("fc0", fc0)
|
|
require.NoError(t, err)
|
|
err = uut.push("fc1", fc1)
|
|
require.NoError(t, err)
|
|
cancel()
|
|
require.Eventually(t, func() bool {
|
|
uut.Lock()
|
|
defer uut.Unlock()
|
|
return uut.closed
|
|
}, testutil.WaitShort, testutil.IntervalFast)
|
|
}
|
|
|
|
func TestCloserStack_PushAfterClose(t *testing.T) {
|
|
t.Parallel()
|
|
ctx := testutil.Context(t, testutil.WaitShort)
|
|
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
|
|
uut := newCloserStack(ctx, logger, quartz.NewMock(t))
|
|
closes := new([]*fakeCloser)
|
|
fc0 := &fakeCloser{closes: closes}
|
|
fc1 := &fakeCloser{closes: closes}
|
|
|
|
err := uut.push("fc0", fc0)
|
|
require.NoError(t, err)
|
|
|
|
exErr := xerrors.New("test")
|
|
uut.close(exErr)
|
|
require.Equal(t, []*fakeCloser{fc0}, *closes)
|
|
|
|
err = uut.push("fc1", fc1)
|
|
require.ErrorIs(t, err, exErr)
|
|
require.Equal(t, []*fakeCloser{fc1, fc0}, *closes, "should close fc1")
|
|
}
|
|
|
|
func TestCloserStack_CloseAfterContext(t *testing.T) {
|
|
t.Parallel()
|
|
testCtx := testutil.Context(t, testutil.WaitShort)
|
|
ctx, cancel := context.WithCancel(testCtx)
|
|
defer cancel()
|
|
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
|
|
uut := newCloserStack(ctx, logger, quartz.NewMock(t))
|
|
ac := newAsyncCloser(testCtx, t)
|
|
defer ac.complete()
|
|
err := uut.push("async", ac)
|
|
require.NoError(t, err)
|
|
cancel()
|
|
testutil.TryReceive(testCtx, t, ac.started)
|
|
|
|
closed := make(chan struct{})
|
|
go func() {
|
|
defer close(closed)
|
|
uut.close(nil)
|
|
}()
|
|
|
|
// since the asyncCloser is still waiting, we shouldn't complete uut.close()
|
|
select {
|
|
case <-time.After(testutil.IntervalFast):
|
|
// OK!
|
|
case <-closed:
|
|
t.Fatal("closed before stack was finished")
|
|
}
|
|
|
|
ac.complete()
|
|
testutil.TryReceive(testCtx, t, closed)
|
|
}
|
|
|
|
func TestCloserStack_Timeout(t *testing.T) {
|
|
t.Parallel()
|
|
ctx := testutil.Context(t, testutil.WaitShort)
|
|
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}).Leveled(slog.LevelDebug)
|
|
mClock := quartz.NewMock(t)
|
|
trap := mClock.Trap().TickerFunc("closerStack")
|
|
defer trap.Close()
|
|
uut := newCloserStack(ctx, logger, mClock)
|
|
var ac [3]*asyncCloser
|
|
for i := range ac {
|
|
ac[i] = newAsyncCloser(ctx, t)
|
|
err := uut.push(fmt.Sprintf("async %d", i), ac[i])
|
|
require.NoError(t, err)
|
|
}
|
|
defer func() {
|
|
for _, a := range ac {
|
|
a.complete()
|
|
}
|
|
}()
|
|
|
|
closed := make(chan struct{})
|
|
go func() {
|
|
defer close(closed)
|
|
uut.close(nil)
|
|
}()
|
|
trap.MustWait(ctx).Release()
|
|
// top starts right away, but it hangs
|
|
testutil.TryReceive(ctx, t, ac[2].started)
|
|
// timer pops and we start the middle one
|
|
mClock.Advance(gracefulShutdownTimeout).MustWait(ctx)
|
|
testutil.TryReceive(ctx, t, ac[1].started)
|
|
|
|
// middle one finishes
|
|
ac[1].complete()
|
|
// bottom starts, but also hangs
|
|
testutil.TryReceive(ctx, t, ac[0].started)
|
|
|
|
// timer has to pop twice to time out.
|
|
mClock.Advance(gracefulShutdownTimeout).MustWait(ctx)
|
|
mClock.Advance(gracefulShutdownTimeout).MustWait(ctx)
|
|
testutil.TryReceive(ctx, t, closed)
|
|
}
|
|
|
|
func TestCoderConnectStdio(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctx := testutil.Context(t, testutil.WaitShort)
|
|
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
|
|
stack := newCloserStack(ctx, logger, quartz.NewMock(t))
|
|
|
|
clientOutput, clientInput := io.Pipe()
|
|
serverOutput, serverInput := io.Pipe()
|
|
defer func() {
|
|
for _, c := range []io.Closer{clientOutput, clientInput, serverOutput, serverInput} {
|
|
_ = c.Close()
|
|
}
|
|
}()
|
|
|
|
server := newSSHServer("127.0.0.1:0")
|
|
ln, err := net.Listen("tcp", server.server.Addr)
|
|
require.NoError(t, err)
|
|
|
|
go func() {
|
|
_ = server.Serve(ln)
|
|
}()
|
|
t.Cleanup(func() {
|
|
_ = server.Close()
|
|
})
|
|
|
|
stdioDone := make(chan struct{})
|
|
go func() {
|
|
err = runCoderConnectStdio(ctx, ln.Addr().String(), clientOutput, serverInput, stack)
|
|
assert.NoError(t, err)
|
|
close(stdioDone)
|
|
}()
|
|
|
|
conn, channels, requests, err := ssh.NewClientConn(&testutil.ReaderWriterConn{
|
|
Reader: serverOutput,
|
|
Writer: clientInput,
|
|
}, "", &ssh.ClientConfig{
|
|
// #nosec
|
|
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
|
|
})
|
|
require.NoError(t, err)
|
|
defer conn.Close()
|
|
|
|
sshClient := ssh.NewClient(conn, channels, requests)
|
|
session, err := sshClient.NewSession()
|
|
require.NoError(t, err)
|
|
defer session.Close()
|
|
|
|
// We're not connected to a real shell
|
|
err = session.Run("")
|
|
require.NoError(t, err)
|
|
err = sshClient.Close()
|
|
require.NoError(t, err)
|
|
_ = clientOutput.Close()
|
|
|
|
<-stdioDone
|
|
}
|
|
|
|
type sshServer struct {
|
|
server *gliderssh.Server
|
|
}
|
|
|
|
func newSSHServer(addr string) *sshServer {
|
|
return &sshServer{
|
|
server: &gliderssh.Server{
|
|
Addr: addr,
|
|
Handler: func(s gliderssh.Session) {
|
|
_, _ = io.WriteString(s.Stderr(), "Connected!")
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
func (s *sshServer) Serve(ln net.Listener) error {
|
|
return s.server.Serve(ln)
|
|
}
|
|
|
|
func (s *sshServer) Close() error {
|
|
return s.server.Close()
|
|
}
|
|
|
|
type fakeCloser struct {
|
|
closes *[]*fakeCloser
|
|
err error
|
|
}
|
|
|
|
func (c *fakeCloser) Close() error {
|
|
*c.closes = append(*c.closes, c)
|
|
return c.err
|
|
}
|
|
|
|
type asyncCloser struct {
|
|
t *testing.T
|
|
ctx context.Context
|
|
started chan struct{}
|
|
isComplete chan struct{}
|
|
comepleteOnce sync.Once
|
|
}
|
|
|
|
func (c *asyncCloser) Close() error {
|
|
close(c.started)
|
|
select {
|
|
case <-c.ctx.Done():
|
|
c.t.Error("timed out")
|
|
return c.ctx.Err()
|
|
case <-c.isComplete:
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func (c *asyncCloser) complete() {
|
|
c.comepleteOnce.Do(func() { close(c.isComplete) })
|
|
}
|
|
|
|
func newAsyncCloser(ctx context.Context, t *testing.T) *asyncCloser {
|
|
return &asyncCloser{
|
|
t: t,
|
|
ctx: ctx,
|
|
isComplete: make(chan struct{}),
|
|
started: make(chan struct{}),
|
|
}
|
|
}
|