mirror of
https://github.com/coder/coder.git
synced 2025-07-15 22:20:27 +00:00
feat: Allow workspace resources to attach multiple agents (#942)
This enables a "kubernetes_pod" to attach multiple agents that could be for multiple services. Each agent is required to have a unique name, so SSH syntax is: `coder ssh <workspace>.<agent>` A resource can have zero agents too, they aren't required.
This commit is contained in:
@ -2,26 +2,16 @@ package coderd
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/render"
|
||||
"github.com/hashicorp/yamux"
|
||||
"golang.org/x/xerrors"
|
||||
"nhooyr.io/websocket"
|
||||
"github.com/google/uuid"
|
||||
|
||||
"cdr.dev/slog"
|
||||
|
||||
"github.com/coder/coder/coderd/database"
|
||||
"github.com/coder/coder/coderd/httpapi"
|
||||
"github.com/coder/coder/coderd/httpmw"
|
||||
"github.com/coder/coder/codersdk"
|
||||
"github.com/coder/coder/peerbroker"
|
||||
"github.com/coder/coder/peerbroker/proto"
|
||||
"github.com/coder/coder/provisionersdk"
|
||||
)
|
||||
|
||||
func (api *api) workspaceResource(rw http.ResponseWriter, r *http.Request) {
|
||||
@ -40,15 +30,18 @@ func (api *api) workspaceResource(rw http.ResponseWriter, r *http.Request) {
|
||||
})
|
||||
return
|
||||
}
|
||||
var apiAgent *codersdk.WorkspaceAgent
|
||||
if workspaceResource.AgentID.Valid {
|
||||
agent, err := api.Database.GetWorkspaceAgentByResourceID(r.Context(), workspaceResource.ID)
|
||||
if err != nil {
|
||||
httpapi.Write(rw, http.StatusInternalServerError, httpapi.Response{
|
||||
Message: fmt.Sprintf("get provisioner job agent: %s", err),
|
||||
})
|
||||
return
|
||||
}
|
||||
agents, err := api.Database.GetWorkspaceAgentsByResourceIDs(r.Context(), []uuid.UUID{workspaceResource.ID})
|
||||
if errors.Is(err, sql.ErrNoRows) {
|
||||
err = nil
|
||||
}
|
||||
if err != nil {
|
||||
httpapi.Write(rw, http.StatusInternalServerError, httpapi.Response{
|
||||
Message: fmt.Sprintf("get provisioner job agents: %s", err),
|
||||
})
|
||||
return
|
||||
}
|
||||
apiAgents := make([]codersdk.WorkspaceAgent, 0)
|
||||
for _, agent := range agents {
|
||||
convertedAgent, err := convertWorkspaceAgent(agent, api.AgentConnectionUpdateFrequency)
|
||||
if err != nil {
|
||||
httpapi.Write(rw, http.StatusInternalServerError, httpapi.Response{
|
||||
@ -56,239 +49,9 @@ func (api *api) workspaceResource(rw http.ResponseWriter, r *http.Request) {
|
||||
})
|
||||
return
|
||||
}
|
||||
apiAgent = &convertedAgent
|
||||
apiAgents = append(apiAgents, convertedAgent)
|
||||
}
|
||||
|
||||
render.Status(r, http.StatusOK)
|
||||
render.JSON(rw, r, convertWorkspaceResource(workspaceResource, apiAgent))
|
||||
}
|
||||
|
||||
func (api *api) workspaceResourceDial(rw http.ResponseWriter, r *http.Request) {
|
||||
api.websocketWaitMutex.Lock()
|
||||
api.websocketWaitGroup.Add(1)
|
||||
api.websocketWaitMutex.Unlock()
|
||||
defer api.websocketWaitGroup.Done()
|
||||
|
||||
resource := httpmw.WorkspaceResourceParam(r)
|
||||
if !resource.AgentID.Valid {
|
||||
httpapi.Write(rw, http.StatusBadRequest, httpapi.Response{
|
||||
Message: "resource doesn't have an agent",
|
||||
})
|
||||
return
|
||||
}
|
||||
agent, err := api.Database.GetWorkspaceAgentByResourceID(r.Context(), resource.ID)
|
||||
if err != nil {
|
||||
httpapi.Write(rw, http.StatusBadRequest, httpapi.Response{
|
||||
Message: fmt.Sprintf("get provisioner job agent: %s", err),
|
||||
})
|
||||
return
|
||||
}
|
||||
conn, err := websocket.Accept(rw, r, &websocket.AcceptOptions{
|
||||
CompressionMode: websocket.CompressionDisabled,
|
||||
})
|
||||
if err != nil {
|
||||
httpapi.Write(rw, http.StatusBadRequest, httpapi.Response{
|
||||
Message: fmt.Sprintf("accept websocket: %s", err),
|
||||
})
|
||||
return
|
||||
}
|
||||
defer func() {
|
||||
_ = conn.Close(websocket.StatusNormalClosure, "")
|
||||
}()
|
||||
config := yamux.DefaultConfig()
|
||||
config.LogOutput = io.Discard
|
||||
session, err := yamux.Server(websocket.NetConn(r.Context(), conn, websocket.MessageBinary), config)
|
||||
if err != nil {
|
||||
_ = conn.Close(websocket.StatusAbnormalClosure, err.Error())
|
||||
return
|
||||
}
|
||||
err = peerbroker.ProxyListen(r.Context(), session, peerbroker.ProxyOptions{
|
||||
ChannelID: agent.ID.String(),
|
||||
Logger: api.Logger.Named("peerbroker-proxy-dial"),
|
||||
Pubsub: api.Pubsub,
|
||||
})
|
||||
if err != nil {
|
||||
_ = conn.Close(websocket.StatusInternalError, httpapi.WebsocketCloseSprintf("serve: %s", err))
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (api *api) workspaceAgentListen(rw http.ResponseWriter, r *http.Request) {
|
||||
api.websocketWaitMutex.Lock()
|
||||
api.websocketWaitGroup.Add(1)
|
||||
api.websocketWaitMutex.Unlock()
|
||||
defer api.websocketWaitGroup.Done()
|
||||
|
||||
agent := httpmw.WorkspaceAgent(r)
|
||||
conn, err := websocket.Accept(rw, r, &websocket.AcceptOptions{
|
||||
CompressionMode: websocket.CompressionDisabled,
|
||||
})
|
||||
if err != nil {
|
||||
httpapi.Write(rw, http.StatusBadRequest, httpapi.Response{
|
||||
Message: fmt.Sprintf("accept websocket: %s", err),
|
||||
})
|
||||
return
|
||||
}
|
||||
resource, err := api.Database.GetWorkspaceResourceByID(r.Context(), agent.ResourceID)
|
||||
if err != nil {
|
||||
httpapi.Write(rw, http.StatusBadRequest, httpapi.Response{
|
||||
Message: fmt.Sprintf("accept websocket: %s", err),
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
defer func() {
|
||||
_ = conn.Close(websocket.StatusNormalClosure, "")
|
||||
}()
|
||||
config := yamux.DefaultConfig()
|
||||
config.LogOutput = io.Discard
|
||||
session, err := yamux.Server(websocket.NetConn(r.Context(), conn, websocket.MessageBinary), config)
|
||||
if err != nil {
|
||||
_ = conn.Close(websocket.StatusAbnormalClosure, err.Error())
|
||||
return
|
||||
}
|
||||
closer, err := peerbroker.ProxyDial(proto.NewDRPCPeerBrokerClient(provisionersdk.Conn(session)), peerbroker.ProxyOptions{
|
||||
ChannelID: agent.ID.String(),
|
||||
Pubsub: api.Pubsub,
|
||||
Logger: api.Logger.Named("peerbroker-proxy-listen"),
|
||||
})
|
||||
if err != nil {
|
||||
_ = conn.Close(websocket.StatusAbnormalClosure, err.Error())
|
||||
return
|
||||
}
|
||||
defer closer.Close()
|
||||
firstConnectedAt := agent.FirstConnectedAt
|
||||
if !firstConnectedAt.Valid {
|
||||
firstConnectedAt = sql.NullTime{
|
||||
Time: database.Now(),
|
||||
Valid: true,
|
||||
}
|
||||
}
|
||||
lastConnectedAt := sql.NullTime{
|
||||
Time: database.Now(),
|
||||
Valid: true,
|
||||
}
|
||||
disconnectedAt := agent.DisconnectedAt
|
||||
updateConnectionTimes := func() error {
|
||||
err = api.Database.UpdateWorkspaceAgentConnectionByID(r.Context(), database.UpdateWorkspaceAgentConnectionByIDParams{
|
||||
ID: agent.ID,
|
||||
FirstConnectedAt: firstConnectedAt,
|
||||
LastConnectedAt: lastConnectedAt,
|
||||
DisconnectedAt: disconnectedAt,
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
build, err := api.Database.GetWorkspaceBuildByJobID(r.Context(), resource.JobID)
|
||||
if err != nil {
|
||||
_ = conn.Close(websocket.StatusAbnormalClosure, err.Error())
|
||||
return
|
||||
}
|
||||
// Ensure the resource is still valid!
|
||||
// We only accept agents for resources on the latest build.
|
||||
ensureLatestBuild := func() error {
|
||||
latestBuild, err := api.Database.GetWorkspaceBuildByWorkspaceIDWithoutAfter(r.Context(), build.WorkspaceID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if build.ID.String() != latestBuild.ID.String() {
|
||||
return xerrors.New("build is outdated")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
defer func() {
|
||||
disconnectedAt = sql.NullTime{
|
||||
Time: database.Now(),
|
||||
Valid: true,
|
||||
}
|
||||
_ = updateConnectionTimes()
|
||||
}()
|
||||
|
||||
err = updateConnectionTimes()
|
||||
if err != nil {
|
||||
_ = conn.Close(websocket.StatusAbnormalClosure, err.Error())
|
||||
return
|
||||
}
|
||||
err = ensureLatestBuild()
|
||||
if err != nil {
|
||||
_ = conn.Close(websocket.StatusGoingAway, "")
|
||||
return
|
||||
}
|
||||
|
||||
api.Logger.Info(r.Context(), "accepting agent", slog.F("resource", resource), slog.F("agent", agent))
|
||||
|
||||
ticker := time.NewTicker(api.AgentConnectionUpdateFrequency)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-session.CloseChan():
|
||||
return
|
||||
case <-ticker.C:
|
||||
lastConnectedAt = sql.NullTime{
|
||||
Time: database.Now(),
|
||||
Valid: true,
|
||||
}
|
||||
err = updateConnectionTimes()
|
||||
if err != nil {
|
||||
_ = conn.Close(websocket.StatusAbnormalClosure, err.Error())
|
||||
return
|
||||
}
|
||||
err = ensureLatestBuild()
|
||||
if err != nil {
|
||||
// Disconnect agents that are no longer valid.
|
||||
_ = conn.Close(websocket.StatusGoingAway, "")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func convertWorkspaceAgent(dbAgent database.WorkspaceAgent, agentUpdateFrequency time.Duration) (codersdk.WorkspaceAgent, error) {
|
||||
var envs map[string]string
|
||||
if dbAgent.EnvironmentVariables.Valid {
|
||||
err := json.Unmarshal(dbAgent.EnvironmentVariables.RawMessage, &envs)
|
||||
if err != nil {
|
||||
return codersdk.WorkspaceAgent{}, xerrors.Errorf("unmarshal: %w", err)
|
||||
}
|
||||
}
|
||||
agent := codersdk.WorkspaceAgent{
|
||||
ID: dbAgent.ID,
|
||||
CreatedAt: dbAgent.CreatedAt,
|
||||
UpdatedAt: dbAgent.UpdatedAt,
|
||||
ResourceID: dbAgent.ResourceID,
|
||||
InstanceID: dbAgent.AuthInstanceID.String,
|
||||
StartupScript: dbAgent.StartupScript.String,
|
||||
EnvironmentVariables: envs,
|
||||
}
|
||||
if dbAgent.FirstConnectedAt.Valid {
|
||||
agent.FirstConnectedAt = &dbAgent.FirstConnectedAt.Time
|
||||
}
|
||||
if dbAgent.LastConnectedAt.Valid {
|
||||
agent.LastConnectedAt = &dbAgent.LastConnectedAt.Time
|
||||
}
|
||||
if dbAgent.DisconnectedAt.Valid {
|
||||
agent.DisconnectedAt = &dbAgent.DisconnectedAt.Time
|
||||
}
|
||||
switch {
|
||||
case !dbAgent.FirstConnectedAt.Valid:
|
||||
// If the agent never connected, it's waiting for the compute
|
||||
// to start up.
|
||||
agent.Status = codersdk.WorkspaceAgentWaiting
|
||||
case dbAgent.DisconnectedAt.Time.After(dbAgent.LastConnectedAt.Time):
|
||||
// If we've disconnected after our last connection, we know the
|
||||
// agent is no longer connected.
|
||||
agent.Status = codersdk.WorkspaceAgentDisconnected
|
||||
case agentUpdateFrequency*2 >= database.Now().Sub(dbAgent.LastConnectedAt.Time):
|
||||
// The connection updated it's timestamp within the update frequency.
|
||||
// We multiply by two to allow for some lag.
|
||||
agent.Status = codersdk.WorkspaceAgentConnected
|
||||
case database.Now().Sub(dbAgent.LastConnectedAt.Time) > agentUpdateFrequency*2:
|
||||
// The connection died without updating the last connected.
|
||||
agent.Status = codersdk.WorkspaceAgentDisconnected
|
||||
}
|
||||
|
||||
return agent, nil
|
||||
render.JSON(rw, r, convertWorkspaceResource(workspaceResource, apiAgents))
|
||||
}
|
||||
|
Reference in New Issue
Block a user