Files
coder/coderd/agentapi/logs.go
Spike Curtis 92b2e26a48 feat: send log limit exceeded in response, not error (#12078)
When we exceed the db-imposed limit of logs, we need to communicate that back to the agent.  In v1 we did it with a 4xx-level HTTP status, but with dRPC, the errors are delivered as strings, which feels fragile to me for something we want to gracefully handle.

So, this PR adds the log limit exceeded as a field on the response message, and fixes the API handler to set it as appropriate instead of an error.
2024-02-09 16:17:20 +04:00

154 lines
4.9 KiB
Go

package agentapi
import (
"context"
"time"
"github.com/google/uuid"
"golang.org/x/xerrors"
"cdr.dev/slog"
agentproto "github.com/coder/coder/v2/agent/proto"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbtime"
"github.com/coder/coder/v2/codersdk/agentsdk"
)
type LogsAPI struct {
AgentFn func(context.Context) (database.WorkspaceAgent, error)
Database database.Store
Log slog.Logger
PublishWorkspaceUpdateFn func(context.Context, *database.WorkspaceAgent) error
PublishWorkspaceAgentLogsUpdateFn func(ctx context.Context, workspaceAgentID uuid.UUID, msg agentsdk.LogsNotifyMessage)
TimeNowFn func() time.Time // defaults to dbtime.Now()
}
func (a *LogsAPI) now() time.Time {
if a.TimeNowFn != nil {
return a.TimeNowFn()
}
return dbtime.Now()
}
func (a *LogsAPI) BatchCreateLogs(ctx context.Context, req *agentproto.BatchCreateLogsRequest) (*agentproto.BatchCreateLogsResponse, error) {
workspaceAgent, err := a.AgentFn(ctx)
if err != nil {
return nil, err
}
if workspaceAgent.LogsOverflowed {
return &agentproto.BatchCreateLogsResponse{LogLimitExceeded: true}, nil
}
if len(req.Logs) == 0 {
return &agentproto.BatchCreateLogsResponse{}, nil
}
logSourceID, err := uuid.FromBytes(req.LogSourceId)
if err != nil {
return nil, xerrors.Errorf("parse log source ID %q: %w", req.LogSourceId, err)
}
// This is to support the legacy API where the log source ID was
// not provided in the request body. We default to the external
// log source in this case.
if logSourceID == uuid.Nil {
// Use the external log source
externalSources, err := a.Database.InsertWorkspaceAgentLogSources(ctx, database.InsertWorkspaceAgentLogSourcesParams{
WorkspaceAgentID: workspaceAgent.ID,
CreatedAt: a.now(),
ID: []uuid.UUID{agentsdk.ExternalLogSourceID},
DisplayName: []string{"External"},
Icon: []string{"/emojis/1f310.png"},
})
if database.IsUniqueViolation(err, database.UniqueWorkspaceAgentLogSourcesPkey) {
err = nil
logSourceID = agentsdk.ExternalLogSourceID
}
if err != nil {
return nil, xerrors.Errorf("insert external workspace agent log source: %w", err)
}
if len(externalSources) == 1 {
logSourceID = externalSources[0].ID
}
}
output := make([]string, 0)
level := make([]database.LogLevel, 0)
outputLength := 0
for _, logEntry := range req.Logs {
output = append(output, logEntry.Output)
outputLength += len(logEntry.Output)
var dbLevel database.LogLevel
switch logEntry.Level {
case agentproto.Log_TRACE:
dbLevel = database.LogLevelTrace
case agentproto.Log_DEBUG:
dbLevel = database.LogLevelDebug
case agentproto.Log_INFO:
dbLevel = database.LogLevelInfo
case agentproto.Log_WARN:
dbLevel = database.LogLevelWarn
case agentproto.Log_ERROR:
dbLevel = database.LogLevelError
default:
// Default to "info" to support older clients that didn't have the
// level field.
dbLevel = database.LogLevelInfo
}
level = append(level, dbLevel)
}
logs, err := a.Database.InsertWorkspaceAgentLogs(ctx, database.InsertWorkspaceAgentLogsParams{
AgentID: workspaceAgent.ID,
CreatedAt: a.now(),
Output: output,
Level: level,
LogSourceID: logSourceID,
OutputLength: int32(outputLength),
})
if err != nil {
if !database.IsWorkspaceAgentLogsLimitError(err) {
return nil, xerrors.Errorf("insert workspace agent logs: %w", err)
}
err := a.Database.UpdateWorkspaceAgentLogOverflowByID(ctx, database.UpdateWorkspaceAgentLogOverflowByIDParams{
ID: workspaceAgent.ID,
LogsOverflowed: true,
})
if err != nil {
// We don't want to return here, because the agent will retry on
// failure and this isn't a huge deal. The overflow state is just a
// hint to the user that the logs are incomplete.
a.Log.Warn(ctx, "failed to update workspace agent log overflow", slog.Error(err))
}
if a.PublishWorkspaceUpdateFn != nil {
err = a.PublishWorkspaceUpdateFn(ctx, &workspaceAgent)
if err != nil {
return nil, xerrors.Errorf("publish workspace update: %w", err)
}
}
return &agentproto.BatchCreateLogsResponse{LogLimitExceeded: true}, nil
}
// Publish by the lowest log ID inserted so the log stream will fetch
// everything from that point.
if a.PublishWorkspaceAgentLogsUpdateFn != nil {
lowestLogID := logs[0].ID
a.PublishWorkspaceAgentLogsUpdateFn(ctx, workspaceAgent.ID, agentsdk.LogsNotifyMessage{
CreatedAfter: lowestLogID - 1,
})
}
if workspaceAgent.LogsLength == 0 && a.PublishWorkspaceUpdateFn != nil {
// If these are the first logs being appended, we publish a UI update
// to notify the UI that logs are now available.
err = a.PublishWorkspaceUpdateFn(ctx, &workspaceAgent)
if err != nil {
return nil, xerrors.Errorf("publish workspace update: %w", err)
}
}
return &agentproto.BatchCreateLogsResponse{}, nil
}