Files
coder/agent/apphealth.go
Kyle Carberry 7ad87505c8 chore: move agent functions from codersdk into agentsdk (#5903)
* chore: rename `AgentConn` to `WorkspaceAgentConn`

The codersdk was becoming bloated with consts for the workspace
agent that made no sense to a reader. `Tailnet*` is an example
of these consts.

* chore: remove `Get` prefix from *Client functions

* chore: remove `BypassRatelimits` option in `codersdk.Client`

It feels wrong to have this as a direct option because it's so infrequently
needed by API callers. It's better to directly modify headers in the two
places that we actually use it.

* Merge `appearance.go` and `buildinfo.go` into `deployment.go`

* Merge `experiments.go` and `features.go` into `deployment.go`

* Fix `make gen` referencing old type names

* Merge `error.go` into `client.go`

`codersdk.Response` lived in `error.go`, which is wrong.

* chore: refactor workspace agent functions into agentsdk

It was odd conflating the codersdk that clients should use
with functions that only the agent should use. This separates
them into two SDKs that are closely coupled, but separate.

* Merge `insights.go` into `deployment.go`

* Merge `organizationmember.go` into `organizations.go`

* Merge `quota.go` into `workspaces.go`

* Rename `sse.go` to `serversentevents.go`

* Rename `codersdk.WorkspaceAppHostResponse` to `codersdk.AppHostResponse`

* Format `.vscode/settings.json`

* Fix outdated naming in `api.ts`

* Fix app host response

* Fix unsupported type

* Fix imported type
2023-01-29 15:47:24 -06:00

183 lines
5.1 KiB
Go

package agent
import (
"context"
"net/http"
"sync"
"time"
"github.com/google/uuid"
"golang.org/x/xerrors"
"cdr.dev/slog"
"github.com/coder/coder/codersdk"
"github.com/coder/coder/codersdk/agentsdk"
"github.com/coder/retry"
)
// WorkspaceAgentApps fetches the workspace apps.
type WorkspaceAgentApps func(context.Context) ([]codersdk.WorkspaceApp, error)
// PostWorkspaceAgentAppHealth updates the workspace app health.
type PostWorkspaceAgentAppHealth func(context.Context, agentsdk.PostAppHealthsRequest) error
// WorkspaceAppHealthReporter is a function that checks and reports the health of the workspace apps until the passed context is canceled.
type WorkspaceAppHealthReporter func(ctx context.Context)
// NewWorkspaceAppHealthReporter creates a WorkspaceAppHealthReporter that reports app health to coderd.
func NewWorkspaceAppHealthReporter(logger slog.Logger, apps []codersdk.WorkspaceApp, postWorkspaceAgentAppHealth PostWorkspaceAgentAppHealth) WorkspaceAppHealthReporter {
runHealthcheckLoop := func(ctx context.Context) error {
// no need to run this loop if no apps for this workspace.
if len(apps) == 0 {
return nil
}
hasHealthchecksEnabled := false
health := make(map[uuid.UUID]codersdk.WorkspaceAppHealth, 0)
for _, app := range apps {
if app.Health == codersdk.WorkspaceAppHealthDisabled {
continue
}
health[app.ID] = app.Health
hasHealthchecksEnabled = true
}
// no need to run this loop if no health checks are configured.
if !hasHealthchecksEnabled {
return nil
}
// run a ticker for each app health check.
var mu sync.RWMutex
failures := make(map[uuid.UUID]int, 0)
for _, nextApp := range apps {
if !shouldStartTicker(nextApp) {
continue
}
app := nextApp
go func() {
t := time.NewTicker(time.Duration(app.Healthcheck.Interval) * time.Second)
defer t.Stop()
for {
select {
case <-ctx.Done():
return
case <-t.C:
}
// we set the http timeout to the healthcheck interval to prevent getting too backed up.
client := &http.Client{
Timeout: time.Duration(app.Healthcheck.Interval) * time.Second,
}
err := func() error {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, app.Healthcheck.URL, nil)
if err != nil {
return err
}
res, err := client.Do(req)
if err != nil {
return err
}
// successful healthcheck is a non-5XX status code
_ = res.Body.Close()
if res.StatusCode >= http.StatusInternalServerError {
return xerrors.Errorf("error status code: %d", res.StatusCode)
}
return nil
}()
if err != nil {
mu.Lock()
if failures[app.ID] < int(app.Healthcheck.Threshold) {
// increment the failure count and keep status the same.
// we will change it when we hit the threshold.
failures[app.ID]++
} else {
// set to unhealthy if we hit the failure threshold.
// we stop incrementing at the threshold to prevent the failure value from increasing forever.
health[app.ID] = codersdk.WorkspaceAppHealthUnhealthy
}
mu.Unlock()
} else {
mu.Lock()
// we only need one successful health check to be considered healthy.
health[app.ID] = codersdk.WorkspaceAppHealthHealthy
failures[app.ID] = 0
mu.Unlock()
}
t.Reset(time.Duration(app.Healthcheck.Interval) * time.Second)
}
}()
}
mu.Lock()
lastHealth := copyHealth(health)
mu.Unlock()
reportTicker := time.NewTicker(time.Second)
defer reportTicker.Stop()
// every second we check if the health values of the apps have changed
// and if there is a change we will report the new values.
for {
select {
case <-ctx.Done():
return nil
case <-reportTicker.C:
mu.RLock()
changed := healthChanged(lastHealth, health)
mu.RUnlock()
if !changed {
continue
}
mu.Lock()
lastHealth = copyHealth(health)
mu.Unlock()
err := postWorkspaceAgentAppHealth(ctx, agentsdk.PostAppHealthsRequest{
Healths: lastHealth,
})
if err != nil {
logger.Error(ctx, "failed to report workspace app stat", slog.Error(err))
}
}
}
}
return func(ctx context.Context) {
for r := retry.New(time.Second, 30*time.Second); r.Wait(ctx); {
err := runHealthcheckLoop(ctx)
if err == nil || xerrors.Is(err, context.Canceled) || xerrors.Is(err, context.DeadlineExceeded) {
return
}
logger.Error(ctx, "failed running workspace app reporter", slog.Error(err))
}
}
}
func shouldStartTicker(app codersdk.WorkspaceApp) bool {
return app.Healthcheck.URL != "" && app.Healthcheck.Interval > 0 && app.Healthcheck.Threshold > 0
}
func healthChanged(old map[uuid.UUID]codersdk.WorkspaceAppHealth, new map[uuid.UUID]codersdk.WorkspaceAppHealth) bool {
for name, newValue := range new {
oldValue, found := old[name]
if !found {
return true
}
if newValue != oldValue {
return true
}
}
return false
}
func copyHealth(h1 map[uuid.UUID]codersdk.WorkspaceAppHealth) map[uuid.UUID]codersdk.WorkspaceAppHealth {
h2 := make(map[uuid.UUID]codersdk.WorkspaceAppHealth, 0)
for k, v := range h1 {
h2[k] = v
}
return h2
}