From 44499315eda8192f895f76ea69da0308bc994bfe Mon Sep 17 00:00:00 2001 From: Hugo Dutka Date: Thu, 20 Feb 2025 16:33:14 +0100 Subject: [PATCH] chore: reduce log volume on server startup (#16608) Addresses https://github.com/coder/coder/issues/16231. This PR reduces the volume of logs we print after server startup in order to surface the web UI URL better. Here are the logs after the changes a couple of seconds after starting the server: Screenshot 2025-02-18 at 16 31 32 The warning is due to running a development site-less build. It wouldn't show in a release build. --- cli/server.go | 2 +- cli/server_test.go | 84 ++++++++++++++++++++ coderd/cryptokeys/rotate.go | 4 +- coderd/database/dbpurge/dbpurge.go | 2 +- enterprise/cli/provisionerdaemonstart.go | 9 ++- provisioner/terraform/install.go | 44 ++++++++-- provisioner/terraform/install_test.go | 2 +- provisioner/terraform/serve.go | 72 ++++++++++------- provisioner/terraform/serve_internal_test.go | 6 +- provisionerd/provisionerd.go | 32 +++++--- provisionersdk/serve.go | 7 +- tailnet/controllers.go | 4 +- 12 files changed, 207 insertions(+), 61 deletions(-) diff --git a/cli/server.go b/cli/server.go index 103eafcd20..2426bf888e 100644 --- a/cli/server.go +++ b/cli/server.go @@ -938,7 +938,7 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd. notificationReportGenerator := reports.NewReportGenerator(ctx, logger.Named("notifications.report_generator"), options.Database, options.NotificationsEnqueuer, quartz.NewReal()) defer notificationReportGenerator.Close() } else { - cliui.Info(inv.Stdout, "Notifications are currently disabled as there are no configured delivery methods. See https://coder.com/docs/admin/monitoring/notifications#delivery-methods for more details.") + logger.Debug(ctx, "notifications are currently disabled as there are no configured delivery methods. See https://coder.com/docs/admin/monitoring/notifications#delivery-methods for more details") } // Since errCh only has one buffered slot, all routines diff --git a/cli/server_test.go b/cli/server_test.go index 988fde808d..d971637750 100644 --- a/cli/server_test.go +++ b/cli/server_test.go @@ -25,6 +25,7 @@ import ( "runtime" "strconv" "strings" + "sync" "sync/atomic" "testing" "time" @@ -240,6 +241,70 @@ func TestServer(t *testing.T) { t.Fatalf("expected postgres URL to start with \"postgres://\", got %q", got) } }) + t.Run("SpammyLogs", func(t *testing.T) { + // The purpose of this test is to ensure we don't show excessive logs when the server starts. + t.Parallel() + inv, cfg := clitest.New(t, + "server", + "--in-memory", + "--http-address", ":0", + "--access-url", "http://localhost:3000/", + "--cache-dir", t.TempDir(), + ) + stdoutRW := syncReaderWriter{} + stderrRW := syncReaderWriter{} + inv.Stdout = io.MultiWriter(os.Stdout, &stdoutRW) + inv.Stderr = io.MultiWriter(os.Stderr, &stderrRW) + clitest.Start(t, inv) + + // Wait for startup + _ = waitAccessURL(t, cfg) + + // Wait a bit for more logs to be printed. + time.Sleep(testutil.WaitShort) + + // Lines containing these strings are printed because we're + // running the server with a test config. They wouldn't be + // normally shown to the user, so we'll ignore them. + ignoreLines := []string{ + "isn't externally reachable", + "install.sh will be unavailable", + "telemetry disabled, unable to notify of security issues", + } + + countLines := func(fullOutput string) int { + terminalWidth := 80 + linesByNewline := strings.Split(fullOutput, "\n") + countByWidth := 0 + lineLoop: + for _, line := range linesByNewline { + for _, ignoreLine := range ignoreLines { + if strings.Contains(line, ignoreLine) { + continue lineLoop + } + } + if line == "" { + // Empty lines take up one line. + countByWidth++ + } else { + countByWidth += (len(line) + terminalWidth - 1) / terminalWidth + } + } + return countByWidth + } + + stdout, err := io.ReadAll(&stdoutRW) + if err != nil { + t.Fatalf("failed to read stdout: %v", err) + } + stderr, err := io.ReadAll(&stderrRW) + if err != nil { + t.Fatalf("failed to read stderr: %v", err) + } + + numLines := countLines(string(stdout)) + countLines(string(stderr)) + require.Less(t, numLines, 20) + }) // Validate that a warning is printed that it may not be externally // reachable. @@ -2140,3 +2205,22 @@ func mockTelemetryServer(t *testing.T) (*url.URL, chan *telemetry.Deployment, ch return serverURL, deployment, snapshot } + +// syncWriter provides a thread-safe io.ReadWriter implementation +type syncReaderWriter struct { + buf bytes.Buffer + mu sync.Mutex +} + +func (w *syncReaderWriter) Write(p []byte) (n int, err error) { + w.mu.Lock() + defer w.mu.Unlock() + return w.buf.Write(p) +} + +func (w *syncReaderWriter) Read(p []byte) (n int, err error) { + w.mu.Lock() + defer w.mu.Unlock() + + return w.buf.Read(p) +} diff --git a/coderd/cryptokeys/rotate.go b/coderd/cryptokeys/rotate.go index 26256b4cd4..24e764a015 100644 --- a/coderd/cryptokeys/rotate.go +++ b/coderd/cryptokeys/rotate.go @@ -152,7 +152,7 @@ func (k *rotator) rotateKeys(ctx context.Context) error { } } if validKeys == 0 { - k.logger.Info(ctx, "no valid keys detected, inserting new key", + k.logger.Debug(ctx, "no valid keys detected, inserting new key", slog.F("feature", feature), ) _, err := k.insertNewKey(ctx, tx, feature, now) @@ -194,7 +194,7 @@ func (k *rotator) insertNewKey(ctx context.Context, tx database.Store, feature d return database.CryptoKey{}, xerrors.Errorf("inserting new key: %w", err) } - k.logger.Info(ctx, "inserted new key for feature", slog.F("feature", feature)) + k.logger.Debug(ctx, "inserted new key for feature", slog.F("feature", feature)) return newKey, nil } diff --git a/coderd/database/dbpurge/dbpurge.go b/coderd/database/dbpurge/dbpurge.go index e9c22611f1..b7a308cfd6 100644 --- a/coderd/database/dbpurge/dbpurge.go +++ b/coderd/database/dbpurge/dbpurge.go @@ -63,7 +63,7 @@ func New(ctx context.Context, logger slog.Logger, db database.Store, clk quartz. return xerrors.Errorf("failed to delete old notification messages: %w", err) } - logger.Info(ctx, "purged old database entries", slog.F("duration", clk.Since(start))) + logger.Debug(ctx, "purged old database entries", slog.F("duration", clk.Since(start))) return nil }, database.DefaultTXOptions().WithID("db_purge")); err != nil { diff --git a/enterprise/cli/provisionerdaemonstart.go b/enterprise/cli/provisionerdaemonstart.go index 3c3f1f0712..8d7d319d39 100644 --- a/enterprise/cli/provisionerdaemonstart.go +++ b/enterprise/cli/provisionerdaemonstart.go @@ -236,10 +236,11 @@ func (r *RootCmd) provisionerDaemonStart() *serpent.Command { ProvisionerKey: provisionerKey, }) }, &provisionerd.Options{ - Logger: logger, - UpdateInterval: 500 * time.Millisecond, - Connector: connector, - Metrics: metrics, + Logger: logger, + UpdateInterval: 500 * time.Millisecond, + Connector: connector, + Metrics: metrics, + ExternalProvisioner: true, }) waitForProvisionerJobs := false diff --git a/provisioner/terraform/install.go b/provisioner/terraform/install.go index 74229c8539..9d2c81d296 100644 --- a/provisioner/terraform/install.go +++ b/provisioner/terraform/install.go @@ -2,8 +2,10 @@ package terraform import ( "context" + "fmt" "os" "path/filepath" + "sync/atomic" "time" "github.com/gofrs/flock" @@ -30,7 +32,9 @@ var ( // Install implements a thread-safe, idempotent Terraform Install // operation. -func Install(ctx context.Context, log slog.Logger, dir string, wantVersion *version.Version) (string, error) { +// +//nolint:revive // verbose is a control flag that controls the verbosity of the log output. +func Install(ctx context.Context, log slog.Logger, verbose bool, dir string, wantVersion *version.Version) (string, error) { err := os.MkdirAll(dir, 0o750) if err != nil { return "", err @@ -64,13 +68,37 @@ func Install(ctx context.Context, log slog.Logger, dir string, wantVersion *vers Version: TerraformVersion, } installer.SetLogger(slog.Stdlib(ctx, log, slog.LevelDebug)) - log.Debug( - ctx, - "installing terraform", + + logInstall := log.Debug + if verbose { + logInstall = log.Info + } + + logInstall(ctx, "installing terraform", slog.F("prev_version", hasVersionStr), slog.F("dir", dir), - slog.F("version", TerraformVersion), - ) + slog.F("version", TerraformVersion)) + + prolongedInstall := atomic.Bool{} + prolongedInstallCtx, prolongedInstallCancel := context.WithCancel(ctx) + go func() { + seconds := 15 + select { + case <-time.After(time.Duration(seconds) * time.Second): + prolongedInstall.Store(true) + // We always want to log this at the info level. + log.Info( + prolongedInstallCtx, + fmt.Sprintf("terraform installation is taking longer than %d seconds, still in progress", seconds), + slog.F("prev_version", hasVersionStr), + slog.F("dir", dir), + slog.F("version", TerraformVersion), + ) + case <-prolongedInstallCtx.Done(): + return + } + }() + defer prolongedInstallCancel() path, err := installer.Install(ctx) if err != nil { @@ -83,5 +111,9 @@ func Install(ctx context.Context, log slog.Logger, dir string, wantVersion *vers return "", xerrors.Errorf("%s should be %s", path, binPath) } + if prolongedInstall.Load() { + log.Info(ctx, "terraform installation complete") + } + return path, nil } diff --git a/provisioner/terraform/install_test.go b/provisioner/terraform/install_test.go index 54471bdf6c..6a1be707dd 100644 --- a/provisioner/terraform/install_test.go +++ b/provisioner/terraform/install_test.go @@ -40,7 +40,7 @@ func TestInstall(t *testing.T) { wg.Add(1) go func() { defer wg.Done() - p, err := terraform.Install(ctx, log, dir, version) + p, err := terraform.Install(ctx, log, false, dir, version) assert.NoError(t, err) paths <- p }() diff --git a/provisioner/terraform/serve.go b/provisioner/terraform/serve.go index 7a3b033bf2..764b57da84 100644 --- a/provisioner/terraform/serve.go +++ b/provisioner/terraform/serve.go @@ -2,11 +2,13 @@ package terraform import ( "context" + "errors" "path/filepath" "sync" "time" "github.com/cli/safeexec" + "github.com/hashicorp/go-version" semconv "go.opentelemetry.io/otel/semconv/v1.14.0" "go.opentelemetry.io/otel/trace" "golang.org/x/xerrors" @@ -41,10 +43,15 @@ type ServeOptions struct { ExitTimeout time.Duration } -func absoluteBinaryPath(ctx context.Context, logger slog.Logger) (string, error) { +type systemBinaryDetails struct { + absolutePath string + version *version.Version +} + +func systemBinary(ctx context.Context) (*systemBinaryDetails, error) { binaryPath, err := safeexec.LookPath("terraform") if err != nil { - return "", xerrors.Errorf("Terraform binary not found: %w", err) + return nil, xerrors.Errorf("Terraform binary not found: %w", err) } // If the "coder" binary is in the same directory as @@ -54,59 +61,68 @@ func absoluteBinaryPath(ctx context.Context, logger slog.Logger) (string, error) // to execute this properly! absoluteBinary, err := filepath.Abs(binaryPath) if err != nil { - return "", xerrors.Errorf("Terraform binary absolute path not found: %w", err) + return nil, xerrors.Errorf("Terraform binary absolute path not found: %w", err) } // Checking the installed version of Terraform. installedVersion, err := versionFromBinaryPath(ctx, absoluteBinary) if err != nil { - return "", xerrors.Errorf("Terraform binary get version failed: %w", err) + return nil, xerrors.Errorf("Terraform binary get version failed: %w", err) } - logger.Info(ctx, "detected terraform version", - slog.F("installed_version", installedVersion.String()), - slog.F("min_version", minTerraformVersion.String()), - slog.F("max_version", maxTerraformVersion.String())) + details := &systemBinaryDetails{ + absolutePath: absoluteBinary, + version: installedVersion, + } if installedVersion.LessThan(minTerraformVersion) { - logger.Warn(ctx, "installed terraform version too old, will download known good version to cache") - return "", terraformMinorVersionMismatch + return details, terraformMinorVersionMismatch } - // Warn if the installed version is newer than what we've decided is the max. - // We used to ignore it and download our own version but this makes it easier - // to test out newer versions of Terraform. - if installedVersion.GreaterThanOrEqual(maxTerraformVersion) { - logger.Warn(ctx, "installed terraform version newer than expected, you may experience bugs", - slog.F("installed_version", installedVersion.String()), - slog.F("max_version", maxTerraformVersion.String())) - } - - return absoluteBinary, nil + return details, nil } // Serve starts a dRPC server on the provided transport speaking Terraform provisioner. func Serve(ctx context.Context, options *ServeOptions) error { if options.BinaryPath == "" { - absoluteBinary, err := absoluteBinaryPath(ctx, options.Logger) + binaryDetails, err := systemBinary(ctx) if err != nil { // This is an early exit to prevent extra execution in case the context is canceled. // It generally happens in unit tests since this method is asynchronous and // the unit test kills the app before this is complete. - if xerrors.Is(err, context.Canceled) { - return xerrors.Errorf("absolute binary context canceled: %w", err) + if errors.Is(err, context.Canceled) { + return xerrors.Errorf("system binary context canceled: %w", err) } - options.Logger.Warn(ctx, "no usable terraform binary found, downloading to cache dir", - slog.F("terraform_version", TerraformVersion.String()), - slog.F("cache_dir", options.CachePath)) - binPath, err := Install(ctx, options.Logger, options.CachePath, TerraformVersion) + if errors.Is(err, terraformMinorVersionMismatch) { + options.Logger.Warn(ctx, "installed terraform version too old, will download known good version to cache, or use a previously cached version", + slog.F("installed_version", binaryDetails.version.String()), + slog.F("min_version", minTerraformVersion.String())) + } + + binPath, err := Install(ctx, options.Logger, options.ExternalProvisioner, options.CachePath, TerraformVersion) if err != nil { return xerrors.Errorf("install terraform: %w", err) } options.BinaryPath = binPath } else { - options.BinaryPath = absoluteBinary + logVersion := options.Logger.Debug + if options.ExternalProvisioner { + logVersion = options.Logger.Info + } + logVersion(ctx, "detected terraform version", + slog.F("installed_version", binaryDetails.version.String()), + slog.F("min_version", minTerraformVersion.String()), + slog.F("max_version", maxTerraformVersion.String())) + // Warn if the installed version is newer than what we've decided is the max. + // We used to ignore it and download our own version but this makes it easier + // to test out newer versions of Terraform. + if binaryDetails.version.GreaterThanOrEqual(maxTerraformVersion) { + options.Logger.Warn(ctx, "installed terraform version newer than expected, you may experience bugs", + slog.F("installed_version", binaryDetails.version.String()), + slog.F("max_version", maxTerraformVersion.String())) + } + options.BinaryPath = binaryDetails.absolutePath } } if options.Tracer == nil { diff --git a/provisioner/terraform/serve_internal_test.go b/provisioner/terraform/serve_internal_test.go index 165a6e4a0a..0e4a673cd2 100644 --- a/provisioner/terraform/serve_internal_test.go +++ b/provisioner/terraform/serve_internal_test.go @@ -54,7 +54,6 @@ func Test_absoluteBinaryPath(t *testing.T) { t.Skip("Dummy terraform executable on Windows requires sh which isn't very practical.") } - log := testutil.Logger(t) // Create a temp dir with the binary tempDir := t.TempDir() terraformBinaryOutput := fmt.Sprintf(`#!/bin/sh @@ -85,11 +84,12 @@ func Test_absoluteBinaryPath(t *testing.T) { } ctx := testutil.Context(t, testutil.WaitShort) - actualAbsoluteBinary, actualErr := absoluteBinaryPath(ctx, log) + actualBinaryDetails, actualErr := systemBinary(ctx) - require.Equal(t, expectedAbsoluteBinary, actualAbsoluteBinary) if tt.expectedErr == nil { require.NoError(t, actualErr) + require.Equal(t, expectedAbsoluteBinary, actualBinaryDetails.absolutePath) + require.Equal(t, tt.terraformVersion, actualBinaryDetails.version.String()) } else { require.EqualError(t, actualErr, tt.expectedErr.Error()) } diff --git a/provisionerd/provisionerd.go b/provisionerd/provisionerd.go index e3b8da8bfe..b461bc593e 100644 --- a/provisionerd/provisionerd.go +++ b/provisionerd/provisionerd.go @@ -56,6 +56,7 @@ type Options struct { TracerProvider trace.TracerProvider Metrics *Metrics + ExternalProvisioner bool ForceCancelInterval time.Duration UpdateInterval time.Duration LogBufferInterval time.Duration @@ -97,12 +98,13 @@ func New(clientDialer Dialer, opts *Options) *Server { clientDialer: clientDialer, clientCh: make(chan proto.DRPCProvisionerDaemonClient), - closeContext: ctx, - closeCancel: ctxCancel, - closedCh: make(chan struct{}), - shuttingDownCh: make(chan struct{}), - acquireDoneCh: make(chan struct{}), - initConnectionCh: opts.InitConnectionCh, + closeContext: ctx, + closeCancel: ctxCancel, + closedCh: make(chan struct{}), + shuttingDownCh: make(chan struct{}), + acquireDoneCh: make(chan struct{}), + initConnectionCh: opts.InitConnectionCh, + externalProvisioner: opts.ExternalProvisioner, } daemon.wg.Add(2) @@ -141,8 +143,9 @@ type Server struct { // shuttingDownCh will receive when we start graceful shutdown shuttingDownCh chan struct{} // acquireDoneCh will receive when the acquireLoop exits - acquireDoneCh chan struct{} - activeJob *runner.Runner + acquireDoneCh chan struct{} + activeJob *runner.Runner + externalProvisioner bool } type Metrics struct { @@ -212,6 +215,10 @@ func NewMetrics(reg prometheus.Registerer) Metrics { func (p *Server) connect() { defer p.opts.Logger.Debug(p.closeContext, "connect loop exited") defer p.wg.Done() + logConnect := p.opts.Logger.Debug + if p.externalProvisioner { + logConnect = p.opts.Logger.Info + } // An exponential back-off occurs when the connection is failing to dial. // This is to prevent server spam in case of a coderd outage. connectLoop: @@ -239,7 +246,12 @@ connectLoop: p.opts.Logger.Warn(p.closeContext, "coderd client failed to dial", slog.Error(err)) continue } - p.opts.Logger.Info(p.closeContext, "successfully connected to coderd") + // This log is useful to verify that an external provisioner daemon is + // successfully connecting to coderd. It doesn't add much value if the + // daemon is built-in, so we only log it on the info level if p.externalProvisioner + // is true. This log message is mentioned in the docs: + // https://github.com/coder/coder/blob/5bd86cb1c06561d1d3e90ce689da220467e525c0/docs/admin/provisioners.md#L346 + logConnect(p.closeContext, "successfully connected to coderd") retrier.Reset() p.initConnectionOnce.Do(func() { close(p.initConnectionCh) @@ -252,7 +264,7 @@ connectLoop: client.DRPCConn().Close() return case <-client.DRPCConn().Closed(): - p.opts.Logger.Info(p.closeContext, "connection to coderd closed") + logConnect(p.closeContext, "connection to coderd closed") continue connectLoop case p.clientCh <- client: continue diff --git a/provisionersdk/serve.go b/provisionersdk/serve.go index baa3cc1412..b91329d066 100644 --- a/provisionersdk/serve.go +++ b/provisionersdk/serve.go @@ -25,9 +25,10 @@ type ServeOptions struct { // Listener serves multiple connections. Cannot be combined with Conn. Listener net.Listener // Conn is a single connection to serve. Cannot be combined with Listener. - Conn drpc.Transport - Logger slog.Logger - WorkDirectory string + Conn drpc.Transport + Logger slog.Logger + WorkDirectory string + ExternalProvisioner bool } type Server interface { diff --git a/tailnet/controllers.go b/tailnet/controllers.go index 832baf09cd..bf2ec1d964 100644 --- a/tailnet/controllers.go +++ b/tailnet/controllers.go @@ -1370,7 +1370,7 @@ func (c *Controller) Run(ctx context.Context) { c.logger.Error(c.ctx, "failed to dial tailnet v2+ API", errF) continue } - c.logger.Info(c.ctx, "obtained tailnet API v2+ client") + c.logger.Debug(c.ctx, "obtained tailnet API v2+ client") err = c.precheckClientsAndControllers(tailnetClients) if err != nil { c.logger.Critical(c.ctx, "failed precheck", slog.Error(err)) @@ -1379,7 +1379,7 @@ func (c *Controller) Run(ctx context.Context) { } retrier.Reset() c.runControllersOnce(tailnetClients) - c.logger.Info(c.ctx, "tailnet API v2+ connection lost") + c.logger.Debug(c.ctx, "tailnet API v2+ connection lost") } }() }