mirror of
https://github.com/coder/coder.git
synced 2025-07-13 21:36:50 +00:00
feat(codersdk/agentsdk): add StartupLogsSender
and StartupLogsWriter
(#8129)
This commit adds two new `agentsdk` functions, `StartupLogsSender` and `StartupLogsWriter` that can be used by any client looking to send startup logs to coderd. We also refactor the `agent` to use these new functions. As a bonus, agent startup logs are separated into "info" and "error" levels to separate stdout and stderr. --------- Co-authored-by: Marcin Tojek <mtojek@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
f56db1b41b
commit
3b9b06fe5a
227
codersdk/agentsdk/logs.go
Normal file
227
codersdk/agentsdk/logs.go
Normal file
@ -0,0 +1,227 @@
|
||||
package agentsdk
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
"cdr.dev/slog"
|
||||
"github.com/coder/coder/codersdk"
|
||||
"github.com/coder/retry"
|
||||
)
|
||||
|
||||
type startupLogsWriter struct {
|
||||
buf bytes.Buffer // Buffer to track partial lines.
|
||||
ctx context.Context
|
||||
send func(ctx context.Context, log ...StartupLog) error
|
||||
level codersdk.LogLevel
|
||||
}
|
||||
|
||||
func (w *startupLogsWriter) Write(p []byte) (int, error) {
|
||||
n := len(p)
|
||||
for len(p) > 0 {
|
||||
nl := bytes.IndexByte(p, '\n')
|
||||
if nl == -1 {
|
||||
break
|
||||
}
|
||||
cr := 0
|
||||
if nl > 0 && p[nl-1] == '\r' {
|
||||
cr = 1
|
||||
}
|
||||
|
||||
var partial []byte
|
||||
if w.buf.Len() > 0 {
|
||||
partial = w.buf.Bytes()
|
||||
w.buf.Reset()
|
||||
}
|
||||
err := w.send(w.ctx, StartupLog{
|
||||
CreatedAt: time.Now().UTC(), // UTC, like database.Now().
|
||||
Level: w.level,
|
||||
Output: string(partial) + string(p[:nl-cr]),
|
||||
})
|
||||
if err != nil {
|
||||
return n - len(p), err
|
||||
}
|
||||
p = p[nl+1:]
|
||||
}
|
||||
if len(p) > 0 {
|
||||
_, err := w.buf.Write(p)
|
||||
if err != nil {
|
||||
return n - len(p), err
|
||||
}
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (w *startupLogsWriter) Close() error {
|
||||
if w.buf.Len() > 0 {
|
||||
defer w.buf.Reset()
|
||||
return w.send(w.ctx, StartupLog{
|
||||
CreatedAt: time.Now().UTC(), // UTC, like database.Now().
|
||||
Level: w.level,
|
||||
Output: w.buf.String(),
|
||||
})
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// StartupLogsWriter returns an io.WriteCloser that sends logs via the
|
||||
// provided sender. The sender is expected to be non-blocking. Calling
|
||||
// Close flushes any remaining partially written log lines but is
|
||||
// otherwise no-op. If the context passed to StartupLogsWriter is
|
||||
// canceled, any remaining logs will be discarded.
|
||||
//
|
||||
// Neither Write nor Close is safe for concurrent use and must be used
|
||||
// by a single goroutine.
|
||||
func StartupLogsWriter(ctx context.Context, sender func(ctx context.Context, log ...StartupLog) error, level codersdk.LogLevel) io.WriteCloser {
|
||||
return &startupLogsWriter{
|
||||
ctx: ctx,
|
||||
send: sender,
|
||||
level: level,
|
||||
}
|
||||
}
|
||||
|
||||
// SendStartupLogs will send agent startup logs to the server. Calls to
|
||||
// sendLog are non-blocking and will return an error if flushAndClose
|
||||
// has been called. Calling sendLog concurrently is not supported. If
|
||||
// the context passed to flushAndClose is canceled, any remaining logs
|
||||
// will be discarded.
|
||||
func StartupLogsSender(patchStartupLogs func(ctx context.Context, req PatchStartupLogs) error, logger slog.Logger) (sendLog func(ctx context.Context, log ...StartupLog) error, flushAndClose func(context.Context) error) {
|
||||
// The main context is used to close the sender goroutine and cancel
|
||||
// any outbound requests to the API. The shudown context is used to
|
||||
// signal the sender goroutine to flush logs and then exit.
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
shutdownCtx, shutdown := context.WithCancel(ctx)
|
||||
|
||||
// Synchronous sender, there can only be one outbound send at a time.
|
||||
sendDone := make(chan struct{})
|
||||
send := make(chan []StartupLog, 1)
|
||||
go func() {
|
||||
// Set flushTimeout and backlogLimit so that logs are uploaded
|
||||
// once every 250ms or when 100 logs have been added to the
|
||||
// backlog, whichever comes first.
|
||||
flushTimeout := 250 * time.Millisecond
|
||||
backlogLimit := 100
|
||||
|
||||
flush := time.NewTicker(flushTimeout)
|
||||
|
||||
var backlog []StartupLog
|
||||
defer func() {
|
||||
flush.Stop()
|
||||
if len(backlog) > 0 {
|
||||
logger.Warn(ctx, "startup logs sender exiting early, discarding logs", slog.F("discarded_logs_count", len(backlog)))
|
||||
}
|
||||
logger.Debug(ctx, "startup logs sender exited")
|
||||
close(sendDone)
|
||||
}()
|
||||
|
||||
done := false
|
||||
for {
|
||||
flushed := false
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-shutdownCtx.Done():
|
||||
done = true
|
||||
|
||||
// Check queued logs before flushing.
|
||||
select {
|
||||
case logs := <-send:
|
||||
backlog = append(backlog, logs...)
|
||||
default:
|
||||
}
|
||||
case <-flush.C:
|
||||
flushed = true
|
||||
case logs := <-send:
|
||||
backlog = append(backlog, logs...)
|
||||
flushed = len(backlog) >= backlogLimit
|
||||
}
|
||||
|
||||
if (done || flushed) && len(backlog) > 0 {
|
||||
flush.Stop() // Lower the chance of a double flush.
|
||||
|
||||
// Retry uploading logs until successful or a specific
|
||||
// error occurs. Note that we use the main context here,
|
||||
// meaning these requests won't be interrupted by
|
||||
// shutdown.
|
||||
for r := retry.New(time.Second, 5*time.Second); r.Wait(ctx); {
|
||||
err := patchStartupLogs(ctx, PatchStartupLogs{
|
||||
Logs: backlog,
|
||||
})
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return
|
||||
}
|
||||
// This error is expected to be codersdk.Error, but it has
|
||||
// private fields so we can't fake it in tests.
|
||||
var statusErr interface{ StatusCode() int }
|
||||
if errors.As(err, &statusErr) {
|
||||
if statusErr.StatusCode() == http.StatusRequestEntityTooLarge {
|
||||
logger.Warn(ctx, "startup logs too large, discarding logs", slog.F("discarded_logs_count", len(backlog)), slog.Error(err))
|
||||
break
|
||||
}
|
||||
}
|
||||
logger.Error(ctx, "startup logs sender failed to upload logs, retrying later", slog.F("logs_count", len(backlog)), slog.Error(err))
|
||||
}
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
backlog = nil
|
||||
|
||||
// Anchor flush to the last log upload.
|
||||
flush.Reset(flushTimeout)
|
||||
}
|
||||
if done {
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
var queue []StartupLog
|
||||
sendLog = func(callCtx context.Context, log ...StartupLog) error {
|
||||
select {
|
||||
case <-shutdownCtx.Done():
|
||||
return xerrors.Errorf("closed: %w", shutdownCtx.Err())
|
||||
case <-callCtx.Done():
|
||||
return callCtx.Err()
|
||||
case queue = <-send:
|
||||
// Recheck to give priority to context cancellation.
|
||||
select {
|
||||
case <-shutdownCtx.Done():
|
||||
return xerrors.Errorf("closed: %w", shutdownCtx.Err())
|
||||
case <-callCtx.Done():
|
||||
return callCtx.Err()
|
||||
default:
|
||||
}
|
||||
// Queue has not been captured by sender yet, re-use.
|
||||
default:
|
||||
}
|
||||
|
||||
queue = append(queue, log...)
|
||||
send <- queue // Non-blocking.
|
||||
queue = nil
|
||||
|
||||
return nil
|
||||
}
|
||||
flushAndClose = func(callCtx context.Context) error {
|
||||
defer cancel()
|
||||
shutdown()
|
||||
select {
|
||||
case <-sendDone:
|
||||
return nil
|
||||
case <-callCtx.Done():
|
||||
cancel()
|
||||
<-sendDone
|
||||
return callCtx.Err()
|
||||
}
|
||||
}
|
||||
return sendLog, flushAndClose
|
||||
}
|
363
codersdk/agentsdk/logs_test.go
Normal file
363
codersdk/agentsdk/logs_test.go
Normal file
@ -0,0 +1,363 @@
|
||||
package agentsdk_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"golang.org/x/exp/slices"
|
||||
|
||||
"cdr.dev/slog"
|
||||
"cdr.dev/slog/sloggers/slogtest"
|
||||
"github.com/coder/coder/codersdk"
|
||||
"github.com/coder/coder/codersdk/agentsdk"
|
||||
"github.com/coder/coder/testutil"
|
||||
)
|
||||
|
||||
func TestStartupLogsWriter_Write(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
canceledCtx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
ctx context.Context
|
||||
level codersdk.LogLevel
|
||||
writes []string
|
||||
want []agentsdk.StartupLog
|
||||
wantErr bool
|
||||
closeFirst bool
|
||||
}{
|
||||
{
|
||||
name: "single line",
|
||||
ctx: context.Background(),
|
||||
level: codersdk.LogLevelInfo,
|
||||
writes: []string{"hello world\n"},
|
||||
want: []agentsdk.StartupLog{
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "hello world",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "multiple lines",
|
||||
ctx: context.Background(),
|
||||
level: codersdk.LogLevelInfo,
|
||||
writes: []string{"hello world\n", "goodbye world\n"},
|
||||
want: []agentsdk.StartupLog{
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "hello world",
|
||||
},
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "goodbye world",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "multiple newlines",
|
||||
ctx: context.Background(),
|
||||
level: codersdk.LogLevelInfo,
|
||||
writes: []string{"\n\n", "hello world\n\n\n", "goodbye world\n"},
|
||||
want: []agentsdk.StartupLog{
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "",
|
||||
},
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "",
|
||||
},
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "hello world",
|
||||
},
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "",
|
||||
},
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "",
|
||||
},
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "goodbye world",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "multiple lines with partial",
|
||||
ctx: context.Background(),
|
||||
level: codersdk.LogLevelInfo,
|
||||
writes: []string{"hello world\n", "goodbye world"},
|
||||
want: []agentsdk.StartupLog{
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "hello world",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "multiple lines with partial, close flushes",
|
||||
ctx: context.Background(),
|
||||
level: codersdk.LogLevelInfo,
|
||||
writes: []string{"hello world\n", "goodbye world"},
|
||||
closeFirst: true,
|
||||
want: []agentsdk.StartupLog{
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "hello world",
|
||||
},
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "goodbye world",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "multiple lines with partial in middle",
|
||||
ctx: context.Background(),
|
||||
level: codersdk.LogLevelInfo,
|
||||
writes: []string{"hello world\n", "goodbye", " world\n"},
|
||||
want: []agentsdk.StartupLog{
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "hello world",
|
||||
},
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "goodbye world",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "removes carriage return when grouped with newline",
|
||||
ctx: context.Background(),
|
||||
level: codersdk.LogLevelInfo,
|
||||
writes: []string{"hello world\r\n", "\r\r\n", "goodbye world\n"},
|
||||
want: []agentsdk.StartupLog{
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "hello world",
|
||||
},
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "\r",
|
||||
},
|
||||
{
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "goodbye world",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "cancel context",
|
||||
ctx: canceledCtx,
|
||||
writes: []string{
|
||||
"hello world\n",
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
tt := tt
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
var got []agentsdk.StartupLog
|
||||
send := func(ctx context.Context, log ...agentsdk.StartupLog) error {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
got = append(got, log...)
|
||||
return nil
|
||||
}
|
||||
w := agentsdk.StartupLogsWriter(tt.ctx, send, tt.level)
|
||||
for _, s := range tt.writes {
|
||||
_, err := w.Write([]byte(s))
|
||||
if err != nil {
|
||||
if tt.wantErr {
|
||||
return
|
||||
}
|
||||
t.Errorf("startupLogsWriter.Write() error = %v, wantErr %v", err, tt.wantErr)
|
||||
}
|
||||
}
|
||||
|
||||
if tt.closeFirst {
|
||||
err := w.Close()
|
||||
if err != nil {
|
||||
t.Errorf("startupLogsWriter.Close() error = %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Compare got and want, but ignore the CreatedAt field.
|
||||
for i := range got {
|
||||
got[i].CreatedAt = tt.want[i].CreatedAt
|
||||
}
|
||||
require.Equal(t, tt.want, got)
|
||||
|
||||
err := w.Close()
|
||||
if !tt.closeFirst && (err != nil) != tt.wantErr {
|
||||
t.Errorf("startupLogsWriter.Close() error = %v, wantErr %v", err, tt.wantErr)
|
||||
return
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type statusError int
|
||||
|
||||
func (s statusError) StatusCode() int {
|
||||
return int(s)
|
||||
}
|
||||
|
||||
func (s statusError) Error() string {
|
||||
return fmt.Sprintf("status %d", s)
|
||||
}
|
||||
|
||||
func TestStartupLogsSender(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
sendCount int
|
||||
discard []int
|
||||
patchResp func(req agentsdk.PatchStartupLogs) error
|
||||
}{
|
||||
{
|
||||
name: "single log",
|
||||
sendCount: 1,
|
||||
},
|
||||
{
|
||||
name: "multiple logs",
|
||||
sendCount: 995,
|
||||
},
|
||||
{
|
||||
name: "too large",
|
||||
sendCount: 1,
|
||||
discard: []int{1},
|
||||
patchResp: func(req agentsdk.PatchStartupLogs) error {
|
||||
return statusError(http.StatusRequestEntityTooLarge)
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
tt := tt
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitMedium)
|
||||
defer cancel()
|
||||
|
||||
got := []agentsdk.StartupLog{}
|
||||
patchStartupLogs := func(_ context.Context, req agentsdk.PatchStartupLogs) error {
|
||||
if tt.patchResp != nil {
|
||||
err := tt.patchResp(req)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
got = append(got, req.Logs...)
|
||||
return nil
|
||||
}
|
||||
|
||||
sendLog, flushAndClose := agentsdk.StartupLogsSender(patchStartupLogs, slogtest.Make(t, nil).Leveled(slog.LevelDebug))
|
||||
defer func() {
|
||||
err := flushAndClose(ctx)
|
||||
require.NoError(t, err)
|
||||
}()
|
||||
|
||||
var want []agentsdk.StartupLog
|
||||
for i := 0; i < tt.sendCount; i++ {
|
||||
want = append(want, agentsdk.StartupLog{
|
||||
CreatedAt: time.Now(),
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: fmt.Sprintf("hello world %d", i),
|
||||
})
|
||||
err := sendLog(ctx, want[len(want)-1])
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
err := flushAndClose(ctx)
|
||||
require.NoError(t, err)
|
||||
|
||||
for _, di := range tt.discard {
|
||||
want = slices.Delete(want, di-1, di)
|
||||
}
|
||||
|
||||
require.Equal(t, want, got)
|
||||
})
|
||||
}
|
||||
|
||||
t.Run("context canceled during send", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
defer cancel()
|
||||
|
||||
patchStartupLogs := func(_ context.Context, _ agentsdk.PatchStartupLogs) error {
|
||||
assert.Fail(t, "should not be called")
|
||||
return nil
|
||||
}
|
||||
|
||||
sendLog, flushAndClose := agentsdk.StartupLogsSender(patchStartupLogs, slogtest.Make(t, nil).Leveled(slog.LevelDebug))
|
||||
defer func() {
|
||||
_ = flushAndClose(ctx)
|
||||
}()
|
||||
|
||||
cancel()
|
||||
err := sendLog(ctx, agentsdk.StartupLog{
|
||||
CreatedAt: time.Now(),
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "hello world",
|
||||
})
|
||||
require.Error(t, err)
|
||||
|
||||
ctx, cancel = context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
defer cancel()
|
||||
err = flushAndClose(ctx)
|
||||
require.NoError(t, err)
|
||||
})
|
||||
|
||||
t.Run("context canceled during flush", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
defer cancel()
|
||||
|
||||
var want, got []agentsdk.StartupLog
|
||||
patchStartupLogs := func(_ context.Context, req agentsdk.PatchStartupLogs) error {
|
||||
got = append(got, req.Logs...)
|
||||
return nil
|
||||
}
|
||||
|
||||
sendLog, flushAndClose := agentsdk.StartupLogsSender(patchStartupLogs, slogtest.Make(t, nil).Leveled(slog.LevelDebug))
|
||||
defer func() {
|
||||
_ = flushAndClose(ctx)
|
||||
}()
|
||||
|
||||
err := sendLog(ctx, agentsdk.StartupLog{
|
||||
CreatedAt: time.Now(),
|
||||
Level: codersdk.LogLevelInfo,
|
||||
Output: "hello world",
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
cancel()
|
||||
err = flushAndClose(ctx)
|
||||
require.Error(t, err)
|
||||
|
||||
require.Equal(t, want, got)
|
||||
})
|
||||
}
|
Reference in New Issue
Block a user