chore: track disabled telemetry (#16347)

Addresses https://github.com/coder/nexus/issues/116.

## Core Concept

Send one final telemetry report after the user disables telemetry with
the message that the telemetry was disabled. No other information about
the deployment is sent in this report.

This final report is submitted only if the deployment ever had telemetry
on.

## Changes

1. Refactored how our telemetry is initialized.
2. Introduced the `TelemetryEnabled` telemetry item, which allows to
decide whether a final report should be sent.
3. Added the `RecordTelemetryStatus` telemetry method, which decides
whether a final report should be sent and updates the telemetry item.
4. Added tests to ensure the implementation is correct.
This commit is contained in:
Hugo Dutka
2025-02-03 14:50:55 +01:00
committed by GitHub
parent 286b3d21e0
commit a68d11506c
4 changed files with 352 additions and 63 deletions

View File

@ -781,19 +781,19 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
// This should be output before the logs start streaming.
cliui.Infof(inv.Stdout, "\n==> Logs will stream in below (press ctrl+c to gracefully exit):")
if vals.Telemetry.Enable {
vals, err := vals.WithoutSecrets()
deploymentConfigWithoutSecrets, err := vals.WithoutSecrets()
if err != nil {
return xerrors.Errorf("remove secrets from deployment values: %w", err)
}
options.Telemetry, err = telemetry.New(telemetry.Options{
telemetryReporter, err := telemetry.New(telemetry.Options{
Disabled: !vals.Telemetry.Enable.Value(),
BuiltinPostgres: builtinPostgres,
DeploymentID: deploymentID,
Database: options.Database,
Logger: logger.Named("telemetry"),
URL: vals.Telemetry.URL.Value(),
Tunnel: tunnel != nil,
DeploymentConfig: vals,
DeploymentConfig: deploymentConfigWithoutSecrets,
ParseLicenseJWT: func(lic *telemetry.License) error {
// This will be nil when running in AGPL-only mode.
if options.ParseLicenseClaims == nil {
@ -814,7 +814,9 @@ func (r *RootCmd) Server(newAPI func(context.Context, *coderd.Options) (*coderd.
if err != nil {
return xerrors.Errorf("create telemetry reporter: %w", err)
}
defer options.Telemetry.Close()
defer telemetryReporter.Close()
if vals.Telemetry.Enable.Value() {
options.Telemetry = telemetryReporter
} else {
logger.Warn(ctx, fmt.Sprintf(`telemetry disabled, unable to notify of security issues. Read more: %s/admin/setup/telemetry`, vals.DocsURL.String()))
}

View File

@ -39,6 +39,7 @@ import (
"tailscale.com/types/key"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/buildinfo"
"github.com/coder/coder/v2/cli"
"github.com/coder/coder/v2/cli/clitest"
"github.com/coder/coder/v2/cli/config"
@ -947,22 +948,7 @@ func TestServer(t *testing.T) {
t.Run("Telemetry", func(t *testing.T) {
t.Parallel()
deployment := make(chan struct{}, 64)
snapshot := make(chan *telemetry.Snapshot, 64)
r := chi.NewRouter()
r.Post("/deployment", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusAccepted)
deployment <- struct{}{}
})
r.Post("/snapshot", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusAccepted)
ss := &telemetry.Snapshot{}
err := json.NewDecoder(r.Body).Decode(ss)
require.NoError(t, err)
snapshot <- ss
})
server := httptest.NewServer(r)
defer server.Close()
telemetryServerURL, deployment, snapshot := mockTelemetryServer(t)
inv, cfg := clitest.New(t,
"server",
@ -970,7 +956,7 @@ func TestServer(t *testing.T) {
"--http-address", ":0",
"--access-url", "http://example.com",
"--telemetry",
"--telemetry-url", server.URL,
"--telemetry-url", telemetryServerURL.String(),
"--cache-dir", t.TempDir(),
)
clitest.Start(t, inv)
@ -2009,3 +1995,148 @@ func TestServer_DisabledDERP(t *testing.T) {
err = c.Connect(ctx)
require.Error(t, err)
}
type runServerOpts struct {
waitForSnapshot bool
telemetryDisabled bool
waitForTelemetryDisabledCheck bool
}
func TestServer_TelemetryDisabled_FinalReport(t *testing.T) {
t.Parallel()
if !dbtestutil.WillUsePostgres() {
t.Skip("this test requires postgres")
}
telemetryServerURL, deployment, snapshot := mockTelemetryServer(t)
dbConnURL, err := dbtestutil.Open(t)
require.NoError(t, err)
cacheDir := t.TempDir()
runServer := func(t *testing.T, opts runServerOpts) (chan error, context.CancelFunc) {
ctx, cancelFunc := context.WithCancel(context.Background())
inv, _ := clitest.New(t,
"server",
"--postgres-url", dbConnURL,
"--http-address", ":0",
"--access-url", "http://example.com",
"--telemetry="+strconv.FormatBool(!opts.telemetryDisabled),
"--telemetry-url", telemetryServerURL.String(),
"--cache-dir", cacheDir,
"--log-filter", ".*",
)
finished := make(chan bool, 2)
errChan := make(chan error, 1)
pty := ptytest.New(t).Attach(inv)
go func() {
errChan <- inv.WithContext(ctx).Run()
finished <- true
}()
go func() {
defer func() {
finished <- true
}()
if opts.waitForSnapshot {
pty.ExpectMatchContext(testutil.Context(t, testutil.WaitLong), "submitted snapshot")
}
if opts.waitForTelemetryDisabledCheck {
pty.ExpectMatchContext(testutil.Context(t, testutil.WaitLong), "finished telemetry status check")
}
}()
<-finished
return errChan, cancelFunc
}
waitForShutdown := func(t *testing.T, errChan chan error) error {
t.Helper()
select {
case err := <-errChan:
return err
case <-time.After(testutil.WaitMedium):
t.Fatalf("timed out waiting for server to shutdown")
}
return nil
}
errChan, cancelFunc := runServer(t, runServerOpts{telemetryDisabled: true, waitForTelemetryDisabledCheck: true})
cancelFunc()
require.NoError(t, waitForShutdown(t, errChan))
// Since telemetry was disabled, we expect no deployments or snapshots.
require.Empty(t, deployment)
require.Empty(t, snapshot)
errChan, cancelFunc = runServer(t, runServerOpts{waitForSnapshot: true})
cancelFunc()
require.NoError(t, waitForShutdown(t, errChan))
// we expect to see a deployment and a snapshot twice:
// 1. the first pair is sent when the server starts
// 2. the second pair is sent when the server shuts down
for i := 0; i < 2; i++ {
select {
case <-snapshot:
case <-time.After(testutil.WaitShort / 2):
t.Fatalf("timed out waiting for snapshot")
}
select {
case <-deployment:
case <-time.After(testutil.WaitShort / 2):
t.Fatalf("timed out waiting for deployment")
}
}
errChan, cancelFunc = runServer(t, runServerOpts{telemetryDisabled: true, waitForTelemetryDisabledCheck: true})
cancelFunc()
require.NoError(t, waitForShutdown(t, errChan))
// Since telemetry is disabled, we expect no deployment. We expect a snapshot
// with the telemetry disabled item.
require.Empty(t, deployment)
select {
case ss := <-snapshot:
require.Len(t, ss.TelemetryItems, 1)
require.Equal(t, string(telemetry.TelemetryItemKeyTelemetryEnabled), ss.TelemetryItems[0].Key)
require.Equal(t, "false", ss.TelemetryItems[0].Value)
case <-time.After(testutil.WaitShort / 2):
t.Fatalf("timed out waiting for snapshot")
}
errChan, cancelFunc = runServer(t, runServerOpts{telemetryDisabled: true, waitForTelemetryDisabledCheck: true})
cancelFunc()
require.NoError(t, waitForShutdown(t, errChan))
// Since telemetry is disabled and we've already sent a snapshot, we expect no
// new deployments or snapshots.
require.Empty(t, deployment)
require.Empty(t, snapshot)
}
func mockTelemetryServer(t *testing.T) (*url.URL, chan *telemetry.Deployment, chan *telemetry.Snapshot) {
t.Helper()
deployment := make(chan *telemetry.Deployment, 64)
snapshot := make(chan *telemetry.Snapshot, 64)
r := chi.NewRouter()
r.Post("/deployment", func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, buildinfo.Version(), r.Header.Get(telemetry.VersionHeader))
dd := &telemetry.Deployment{}
err := json.NewDecoder(r.Body).Decode(dd)
require.NoError(t, err)
deployment <- dd
// Ensure the header is sent only after deployment is sent
w.WriteHeader(http.StatusAccepted)
})
r.Post("/snapshot", func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, buildinfo.Version(), r.Header.Get(telemetry.VersionHeader))
ss := &telemetry.Snapshot{}
err := json.NewDecoder(r.Body).Decode(ss)
require.NoError(t, err)
snapshot <- ss
// Ensure the header is sent only after snapshot is sent
w.WriteHeader(http.StatusAccepted)
})
server := httptest.NewServer(r)
t.Cleanup(server.Close)
serverURL, err := url.Parse(server.URL)
require.NoError(t, err)
return serverURL, deployment, snapshot
}

View File

@ -15,6 +15,7 @@ import (
"regexp"
"runtime"
"slices"
"strconv"
"strings"
"sync"
"time"
@ -42,6 +43,7 @@ const (
)
type Options struct {
Disabled bool
Database database.Store
Logger slog.Logger
// URL is an endpoint to direct telemetry towards!
@ -116,8 +118,8 @@ type remoteReporter struct {
shutdownAt *time.Time
}
func (*remoteReporter) Enabled() bool {
return true
func (r *remoteReporter) Enabled() bool {
return !r.options.Disabled
}
func (r *remoteReporter) Report(snapshot *Snapshot) {
@ -161,10 +163,12 @@ func (r *remoteReporter) Close() {
close(r.closed)
now := dbtime.Now()
r.shutdownAt = &now
if r.Enabled() {
// Report a final collection of telemetry prior to close!
// This could indicate final actions a user has taken, and
// the time the deployment was shutdown.
r.reportWithDeployment()
}
r.closeFunc()
}
@ -177,7 +181,74 @@ func (r *remoteReporter) isClosed() bool {
}
}
// See the corresponding test in telemetry_test.go for a truth table.
func ShouldReportTelemetryDisabled(recordedTelemetryEnabled *bool, telemetryEnabled bool) bool {
return recordedTelemetryEnabled != nil && *recordedTelemetryEnabled && !telemetryEnabled
}
// RecordTelemetryStatus records the telemetry status in the database.
// If the status changed from enabled to disabled, returns a snapshot to
// be sent to the telemetry server.
func RecordTelemetryStatus( //nolint:revive
ctx context.Context,
logger slog.Logger,
db database.Store,
telemetryEnabled bool,
) (*Snapshot, error) {
item, err := db.GetTelemetryItem(ctx, string(TelemetryItemKeyTelemetryEnabled))
if err != nil && !errors.Is(err, sql.ErrNoRows) {
return nil, xerrors.Errorf("get telemetry enabled: %w", err)
}
var recordedTelemetryEnabled *bool
if !errors.Is(err, sql.ErrNoRows) {
value, err := strconv.ParseBool(item.Value)
if err != nil {
logger.Debug(ctx, "parse telemetry enabled", slog.Error(err))
}
// If ParseBool fails, value will default to false.
// This may happen if an admin manually edits the telemetry item
// in the database.
recordedTelemetryEnabled = &value
}
if err := db.UpsertTelemetryItem(ctx, database.UpsertTelemetryItemParams{
Key: string(TelemetryItemKeyTelemetryEnabled),
Value: strconv.FormatBool(telemetryEnabled),
}); err != nil {
return nil, xerrors.Errorf("upsert telemetry enabled: %w", err)
}
shouldReport := ShouldReportTelemetryDisabled(recordedTelemetryEnabled, telemetryEnabled)
if !shouldReport {
return nil, nil //nolint:nilnil
}
// If any of the following calls fail, we will never report that telemetry changed
// from enabled to disabled. This is okay. We only want to ping the telemetry server
// once, and never again. If that attempt fails, so be it.
item, err = db.GetTelemetryItem(ctx, string(TelemetryItemKeyTelemetryEnabled))
if err != nil {
return nil, xerrors.Errorf("get telemetry enabled after upsert: %w", err)
}
return &Snapshot{
TelemetryItems: []TelemetryItem{
ConvertTelemetryItem(item),
},
}, nil
}
func (r *remoteReporter) runSnapshotter() {
telemetryDisabledSnapshot, err := RecordTelemetryStatus(r.ctx, r.options.Logger, r.options.Database, r.Enabled())
if err != nil {
r.options.Logger.Debug(r.ctx, "record and maybe report telemetry status", slog.Error(err))
}
if telemetryDisabledSnapshot != nil {
r.reportSync(telemetryDisabledSnapshot)
}
r.options.Logger.Debug(r.ctx, "finished telemetry status check")
if !r.Enabled() {
return
}
first := true
ticker := time.NewTicker(r.options.SnapshotFrequency)
defer ticker.Stop()
@ -1567,6 +1638,7 @@ type telemetryItemKey string
//revive:disable:exported
const (
TelemetryItemKeyHTMLFirstServedAt telemetryItemKey = "html_first_served_at"
TelemetryItemKeyTelemetryEnabled telemetryItemKey = "telemetry_enabled"
)
type TelemetryItem struct {
@ -1581,3 +1653,5 @@ type noopReporter struct{}
func (*noopReporter) Report(_ *Snapshot) {}
func (*noopReporter) Enabled() bool { return false }
func (*noopReporter) Close() {}
func (*noopReporter) RunSnapshotter() {}
func (*noopReporter) ReportDisabledIfNeeded() error { return nil }

View File

@ -131,7 +131,8 @@ func TestTelemetry(t *testing.T) {
require.Len(t, snapshot.WorkspaceProxies, 1)
require.Len(t, snapshot.WorkspaceModules, 1)
require.Len(t, snapshot.Organizations, 1)
require.Len(t, snapshot.TelemetryItems, 1)
// We create one item manually above. The other is TelemetryEnabled, created by the snapshotter.
require.Len(t, snapshot.TelemetryItems, 2)
wsa := snapshot.WorkspaceAgents[0]
require.Len(t, wsa.Subsystems, 2)
require.Equal(t, string(database.WorkspaceAgentSubsystemEnvbox), wsa.Subsystems[0])
@ -361,31 +362,112 @@ func TestTelemetryItem(t *testing.T) {
require.Equal(t, item.Value, "new_value")
}
func collectSnapshot(t *testing.T, db database.Store, addOptionsFn func(opts telemetry.Options) telemetry.Options) (*telemetry.Deployment, *telemetry.Snapshot) {
func TestShouldReportTelemetryDisabled(t *testing.T) {
t.Parallel()
// Description | telemetryEnabled (db) | telemetryEnabled (is) | Report Telemetry Disabled |
//----------------------------------------|-----------------------|-----------------------|---------------------------|
// New deployment | <null> | true | No |
// New deployment with telemetry disabled | <null> | false | No |
// Telemetry was enabled, and still is | true | true | No |
// Telemetry was enabled but now disabled | true | false | Yes |
// Telemetry was disabled, now is enabled | false | true | No |
// Telemetry was disabled, still disabled | false | false | No |
boolTrue := true
boolFalse := false
require.False(t, telemetry.ShouldReportTelemetryDisabled(nil, true))
require.False(t, telemetry.ShouldReportTelemetryDisabled(nil, false))
require.False(t, telemetry.ShouldReportTelemetryDisabled(&boolTrue, true))
require.True(t, telemetry.ShouldReportTelemetryDisabled(&boolTrue, false))
require.False(t, telemetry.ShouldReportTelemetryDisabled(&boolFalse, true))
require.False(t, telemetry.ShouldReportTelemetryDisabled(&boolFalse, false))
}
func TestRecordTelemetryStatus(t *testing.T) {
t.Parallel()
for _, testCase := range []struct {
name string
recordedTelemetryEnabled string
telemetryEnabled bool
shouldReport bool
}{
{name: "New deployment", recordedTelemetryEnabled: "nil", telemetryEnabled: true, shouldReport: false},
{name: "Telemetry disabled", recordedTelemetryEnabled: "nil", telemetryEnabled: false, shouldReport: false},
{name: "Telemetry was enabled and still is", recordedTelemetryEnabled: "true", telemetryEnabled: true, shouldReport: false},
{name: "Telemetry was enabled but now disabled", recordedTelemetryEnabled: "true", telemetryEnabled: false, shouldReport: true},
{name: "Telemetry was disabled now is enabled", recordedTelemetryEnabled: "false", telemetryEnabled: true, shouldReport: false},
{name: "Telemetry was disabled still disabled", recordedTelemetryEnabled: "false", telemetryEnabled: false, shouldReport: false},
{name: "Telemetry was disabled still disabled, invalid value", recordedTelemetryEnabled: "invalid", telemetryEnabled: false, shouldReport: false},
} {
testCase := testCase
t.Run(testCase.name, func(t *testing.T) {
t.Parallel()
db, _ := dbtestutil.NewDB(t)
ctx := testutil.Context(t, testutil.WaitMedium)
logger := testutil.Logger(t)
if testCase.recordedTelemetryEnabled != "nil" {
db.UpsertTelemetryItem(ctx, database.UpsertTelemetryItemParams{
Key: string(telemetry.TelemetryItemKeyTelemetryEnabled),
Value: testCase.recordedTelemetryEnabled,
})
}
snapshot1, err := telemetry.RecordTelemetryStatus(ctx, logger, db, testCase.telemetryEnabled)
require.NoError(t, err)
if testCase.shouldReport {
require.NotNil(t, snapshot1)
require.Equal(t, snapshot1.TelemetryItems[0].Key, string(telemetry.TelemetryItemKeyTelemetryEnabled))
require.Equal(t, snapshot1.TelemetryItems[0].Value, "false")
} else {
require.Nil(t, snapshot1)
}
for i := 0; i < 3; i++ {
// Whatever happens, subsequent calls should not report if telemetryEnabled didn't change
snapshot2, err := telemetry.RecordTelemetryStatus(ctx, logger, db, testCase.telemetryEnabled)
require.NoError(t, err)
require.Nil(t, snapshot2)
}
})
}
}
func mockTelemetryServer(t *testing.T) (*url.URL, chan *telemetry.Deployment, chan *telemetry.Snapshot) {
t.Helper()
deployment := make(chan *telemetry.Deployment, 64)
snapshot := make(chan *telemetry.Snapshot, 64)
r := chi.NewRouter()
r.Post("/deployment", func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, buildinfo.Version(), r.Header.Get(telemetry.VersionHeader))
w.WriteHeader(http.StatusAccepted)
dd := &telemetry.Deployment{}
err := json.NewDecoder(r.Body).Decode(dd)
require.NoError(t, err)
deployment <- dd
// Ensure the header is sent only after deployment is sent
w.WriteHeader(http.StatusAccepted)
})
r.Post("/snapshot", func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, buildinfo.Version(), r.Header.Get(telemetry.VersionHeader))
w.WriteHeader(http.StatusAccepted)
ss := &telemetry.Snapshot{}
err := json.NewDecoder(r.Body).Decode(ss)
require.NoError(t, err)
snapshot <- ss
// Ensure the header is sent only after snapshot is sent
w.WriteHeader(http.StatusAccepted)
})
server := httptest.NewServer(r)
t.Cleanup(server.Close)
serverURL, err := url.Parse(server.URL)
require.NoError(t, err)
return serverURL, deployment, snapshot
}
func collectSnapshot(t *testing.T, db database.Store, addOptionsFn func(opts telemetry.Options) telemetry.Options) (*telemetry.Deployment, *telemetry.Snapshot) {
t.Helper()
serverURL, deployment, snapshot := mockTelemetryServer(t)
options := telemetry.Options{
Database: db,
Logger: testutil.Logger(t),