Files
coder/coderd/wsconncache/wsconncache.go
Kyle Carberry 9bd83e5ec7 feat: Add Tailscale networking (#3505)
* fix: Add coder user to docker group on installation

This makes for a simpler setup, and reduces the likelihood
a user runs into a strange issue.

* Add wgnet

* Add ping

* Add listening

* Finish refactor to make this work

* Add interface for swapping

* Fix conncache with interface

* chore: update gvisor

* fix tailscale types

* linting

* more linting

* Add coordinator

* Add coordinator tests

* Fix coordination

* It compiles!

* Move all connection negotiation in-memory

* Migrate coordinator to use net.conn

* Add closed func

* Fix close listener func

* Make reconnecting PTY work

* Fix reconnecting PTY

* Update CI to Go 1.19

* Add CLI flags for DERP mapping

* Fix Tailnet test

* Rename ConnCoordinator to TailnetCoordinator

* Remove print statement from workspace agent test

* Refactor wsconncache to use tailnet

* Remove STUN from unit tests

* Add migrate back to dump

* chore: Upgrade to Go 1.19

This is required as part of #3505.

* Fix reconnecting PTY tests

* fix: update wireguard-go to fix devtunnel

* fix migration numbers

* linting

* Return early for status if endpoints are empty

* Update cli/server.go

Co-authored-by: Colin Adler <colin1adler@gmail.com>

* Update cli/server.go

Co-authored-by: Colin Adler <colin1adler@gmail.com>

* Fix frontend entites

* Fix agent bicopy

* Fix race condition for the last node

* Fix down migration

* Fix connection RBAC

* Fix migration numbers

* Fix forwarding TCP to a local port

* Implement ping for tailnet

* Rename to ForceHTTP

* Add external derpmapping

* Expose DERP region names to the API

* Add global option to enable Tailscale networking for web

* Mark DERP flags hidden while testing

* Update DERP map on reconnect

* Add close func to workspace agents

* Fix race condition in upstream dependency

* Fix feature columns race condition

Co-authored-by: Colin Adler <colin1adler@gmail.com>
2022-08-31 20:09:44 -05:00

163 lines
4.1 KiB
Go

// Package wsconncache caches workspace agent connections by UUID.
package wsconncache
import (
"context"
"net/http"
"sync"
"time"
"github.com/google/uuid"
"go.uber.org/atomic"
"golang.org/x/sync/singleflight"
"golang.org/x/xerrors"
"github.com/coder/coder/agent"
)
// New creates a new workspace connection cache that closes
// connections after the inactive timeout provided.
//
// Agent connections are cached due to WebRTC negotiation
// taking a few hundred milliseconds.
func New(dialer Dialer, inactiveTimeout time.Duration) *Cache {
if inactiveTimeout == 0 {
inactiveTimeout = 5 * time.Minute
}
return &Cache{
closed: make(chan struct{}),
dialer: dialer,
inactiveTimeout: inactiveTimeout,
}
}
// Dialer creates a new agent connection by ID.
type Dialer func(r *http.Request, id uuid.UUID) (agent.Conn, error)
// Conn wraps an agent connection with a reusable HTTP transport.
type Conn struct {
agent.Conn
locks atomic.Uint64
timeoutMutex sync.Mutex
timeout *time.Timer
timeoutCancel context.CancelFunc
transport *http.Transport
}
func (c *Conn) HTTPTransport() *http.Transport {
return c.transport
}
// CloseWithError ends the HTTP transport if exists, and closes the agent.
func (c *Conn) CloseWithError(err error) error {
if c.transport != nil {
c.transport.CloseIdleConnections()
}
c.timeoutMutex.Lock()
defer c.timeoutMutex.Unlock()
if c.timeout != nil {
c.timeout.Stop()
}
return c.Conn.CloseWithError(err)
}
type Cache struct {
closed chan struct{}
closeMutex sync.Mutex
closeGroup sync.WaitGroup
connGroup singleflight.Group
connMap sync.Map
dialer Dialer
inactiveTimeout time.Duration
}
// Acquire gets or establishes a connection with the dialer using the ID provided.
// If a connection is in-progress, that connection or error will be returned.
//
// The returned function is used to release a lock on the connection. Once zero
// locks exist on a connection, the inactive timeout will begin to tick down.
// After the time expires, the connection will be cleared from the cache.
func (c *Cache) Acquire(r *http.Request, id uuid.UUID) (*Conn, func(), error) {
rawConn, found := c.connMap.Load(id.String())
// If the connection isn't found, establish a new one!
if !found {
var err error
// A singleflight group is used to allow for concurrent requests to the
// same identifier to resolve.
rawConn, err, _ = c.connGroup.Do(id.String(), func() (interface{}, error) {
agentConn, err := c.dialer(r, id)
if err != nil {
return nil, xerrors.Errorf("dial: %w", err)
}
timeoutCtx, timeoutCancelFunc := context.WithCancel(context.Background())
defaultTransport, valid := http.DefaultTransport.(*http.Transport)
if !valid {
panic("dev error: default transport is the wrong type")
}
transport := defaultTransport.Clone()
transport.DialContext = agentConn.DialContext
conn := &Conn{
Conn: agentConn,
timeoutCancel: timeoutCancelFunc,
transport: transport,
}
c.closeMutex.Lock()
c.closeGroup.Add(1)
c.closeMutex.Unlock()
go func() {
defer c.closeGroup.Done()
var err error
select {
case <-timeoutCtx.Done():
err = xerrors.New("cache timeout")
case <-c.closed:
err = xerrors.New("cache closed")
case <-conn.Closed():
}
c.connMap.Delete(id.String())
c.connGroup.Forget(id.String())
_ = conn.CloseWithError(err)
}()
return conn, nil
})
if err != nil {
return nil, nil, err
}
c.connMap.Store(id.String(), rawConn)
}
conn, _ := rawConn.(*Conn)
conn.timeoutMutex.Lock()
defer conn.timeoutMutex.Unlock()
if conn.timeout != nil {
conn.timeout.Stop()
}
conn.locks.Inc()
return conn, func() {
conn.timeoutMutex.Lock()
defer conn.timeoutMutex.Unlock()
if conn.timeout != nil {
conn.timeout.Stop()
}
conn.locks.Dec()
if conn.locks.Load() == 0 {
conn.timeout = time.AfterFunc(c.inactiveTimeout, conn.timeoutCancel)
}
}, nil
}
func (c *Cache) Close() error {
c.closeMutex.Lock()
defer c.closeMutex.Unlock()
select {
case <-c.closed:
return nil
default:
}
close(c.closed)
c.closeGroup.Wait()
return nil
}