mirror of
https://github.com/coder/coder.git
synced 2025-07-12 00:14:10 +00:00
feat: add prometheus metric for tracking user statuses (#15281)
This commit is contained in:
@ -212,10 +212,16 @@ func enablePrometheus(
|
||||
options.PrometheusRegistry.MustRegister(collectors.NewGoCollector())
|
||||
options.PrometheusRegistry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
|
||||
|
||||
closeUsersFunc, err := prometheusmetrics.ActiveUsers(ctx, options.PrometheusRegistry, options.Database, 0)
|
||||
closeActiveUsersFunc, err := prometheusmetrics.ActiveUsers(ctx, options.Logger.Named("active_user_metrics"), options.PrometheusRegistry, options.Database, 0)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("register active users prometheus metric: %w", err)
|
||||
}
|
||||
afterCtx(ctx, closeActiveUsersFunc)
|
||||
|
||||
closeUsersFunc, err := prometheusmetrics.Users(ctx, options.Logger.Named("user_metrics"), quartz.NewReal(), options.PrometheusRegistry, options.Database, 0)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("register users prometheus metric: %w", err)
|
||||
}
|
||||
afterCtx(ctx, closeUsersFunc)
|
||||
|
||||
closeWorkspacesFunc, err := prometheusmetrics.Workspaces(ctx, options.Logger.Named("workspaces_metrics"), options.PrometheusRegistry, options.Database, 0)
|
||||
|
@ -12,6 +12,7 @@ import (
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"golang.org/x/xerrors"
|
||||
"tailscale.com/tailcfg"
|
||||
|
||||
"cdr.dev/slog"
|
||||
@ -22,12 +23,13 @@ import (
|
||||
"github.com/coder/coder/v2/coderd/database/dbtime"
|
||||
"github.com/coder/coder/v2/codersdk"
|
||||
"github.com/coder/coder/v2/tailnet"
|
||||
"github.com/coder/quartz"
|
||||
)
|
||||
|
||||
const defaultRefreshRate = time.Minute
|
||||
|
||||
// ActiveUsers tracks the number of users that have authenticated within the past hour.
|
||||
func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
|
||||
func ActiveUsers(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
|
||||
if duration == 0 {
|
||||
duration = defaultRefreshRate
|
||||
}
|
||||
@ -58,6 +60,7 @@ func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db datab
|
||||
|
||||
apiKeys, err := db.GetAPIKeysLastUsedAfter(ctx, dbtime.Now().Add(-1*time.Hour))
|
||||
if err != nil {
|
||||
logger.Error(ctx, "get api keys for active users prometheus metric", slog.Error(err))
|
||||
continue
|
||||
}
|
||||
distinctUsers := map[uuid.UUID]struct{}{}
|
||||
@ -73,6 +76,57 @@ func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db datab
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Users tracks the total number of registered users, partitioned by status.
|
||||
func Users(ctx context.Context, logger slog.Logger, clk quartz.Clock, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
|
||||
if duration == 0 {
|
||||
// It's not super important this tracks real-time.
|
||||
duration = defaultRefreshRate * 5
|
||||
}
|
||||
|
||||
gauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Namespace: "coderd",
|
||||
Subsystem: "api",
|
||||
Name: "total_user_count",
|
||||
Help: "The total number of registered users, partitioned by status.",
|
||||
}, []string{"status"})
|
||||
err := registerer.Register(gauge)
|
||||
if err != nil {
|
||||
return nil, xerrors.Errorf("register total_user_count gauge: %w", err)
|
||||
}
|
||||
|
||||
ctx, cancelFunc := context.WithCancel(ctx)
|
||||
done := make(chan struct{})
|
||||
ticker := clk.NewTicker(duration)
|
||||
go func() {
|
||||
defer close(done)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-ticker.C:
|
||||
}
|
||||
|
||||
gauge.Reset()
|
||||
//nolint:gocritic // This is a system service that needs full access
|
||||
//to the users table.
|
||||
users, err := db.GetUsers(dbauthz.AsSystemRestricted(ctx), database.GetUsersParams{})
|
||||
if err != nil {
|
||||
logger.Error(ctx, "get all users for prometheus metrics", slog.Error(err))
|
||||
continue
|
||||
}
|
||||
|
||||
for _, user := range users {
|
||||
gauge.WithLabelValues(string(user.Status)).Inc()
|
||||
}
|
||||
}
|
||||
}()
|
||||
return func() {
|
||||
cancelFunc()
|
||||
<-done
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Workspaces tracks the total number of workspaces with labels on status.
|
||||
func Workspaces(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
|
||||
if duration == 0 {
|
||||
|
@ -38,6 +38,7 @@ import (
|
||||
"github.com/coder/coder/v2/tailnet"
|
||||
"github.com/coder/coder/v2/tailnet/tailnettest"
|
||||
"github.com/coder/coder/v2/testutil"
|
||||
"github.com/coder/quartz"
|
||||
)
|
||||
|
||||
func TestActiveUsers(t *testing.T) {
|
||||
@ -98,7 +99,7 @@ func TestActiveUsers(t *testing.T) {
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
registry := prometheus.NewRegistry()
|
||||
closeFunc, err := prometheusmetrics.ActiveUsers(context.Background(), registry, tc.Database(t), time.Millisecond)
|
||||
closeFunc, err := prometheusmetrics.ActiveUsers(context.Background(), slogtest.Make(t, nil), registry, tc.Database(t), time.Millisecond)
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(closeFunc)
|
||||
|
||||
@ -112,6 +113,100 @@ func TestActiveUsers(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestUsers(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
for _, tc := range []struct {
|
||||
Name string
|
||||
Database func(t *testing.T) database.Store
|
||||
Count map[database.UserStatus]int
|
||||
}{{
|
||||
Name: "None",
|
||||
Database: func(t *testing.T) database.Store {
|
||||
return dbmem.New()
|
||||
},
|
||||
Count: map[database.UserStatus]int{},
|
||||
}, {
|
||||
Name: "One",
|
||||
Database: func(t *testing.T) database.Store {
|
||||
db := dbmem.New()
|
||||
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
|
||||
return db
|
||||
},
|
||||
Count: map[database.UserStatus]int{database.UserStatusActive: 1},
|
||||
}, {
|
||||
Name: "MultipleStatuses",
|
||||
Database: func(t *testing.T) database.Store {
|
||||
db := dbmem.New()
|
||||
|
||||
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
|
||||
dbgen.User(t, db, database.User{Status: database.UserStatusDormant})
|
||||
|
||||
return db
|
||||
},
|
||||
Count: map[database.UserStatus]int{database.UserStatusActive: 1, database.UserStatusDormant: 1},
|
||||
}, {
|
||||
Name: "MultipleActive",
|
||||
Database: func(t *testing.T) database.Store {
|
||||
db := dbmem.New()
|
||||
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
|
||||
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
|
||||
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
|
||||
return db
|
||||
},
|
||||
Count: map[database.UserStatus]int{database.UserStatusActive: 3},
|
||||
}} {
|
||||
tc := tc
|
||||
t.Run(tc.Name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||
defer cancel()
|
||||
|
||||
registry := prometheus.NewRegistry()
|
||||
mClock := quartz.NewMock(t)
|
||||
db := tc.Database(t)
|
||||
closeFunc, err := prometheusmetrics.Users(context.Background(), slogtest.Make(t, nil), mClock, registry, db, time.Millisecond)
|
||||
require.NoError(t, err)
|
||||
t.Cleanup(closeFunc)
|
||||
|
||||
_, w := mClock.AdvanceNext()
|
||||
w.MustWait(ctx)
|
||||
|
||||
checkFn := func() bool {
|
||||
metrics, err := registry.Gather()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// If we get no metrics and we know none should exist, bail
|
||||
// early. If we get no metrics but we expect some, retry.
|
||||
if len(metrics) == 0 {
|
||||
return len(tc.Count) == 0
|
||||
}
|
||||
|
||||
for _, metric := range metrics[0].Metric {
|
||||
if tc.Count[database.UserStatus(*metric.Label[0].Value)] != int(metric.Gauge.GetValue()) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
require.Eventually(t, checkFn, testutil.WaitShort, testutil.IntervalFast)
|
||||
|
||||
// Add another dormant user and ensure it updates
|
||||
dbgen.User(t, db, database.User{Status: database.UserStatusDormant})
|
||||
tc.Count[database.UserStatusDormant]++
|
||||
|
||||
_, w = mClock.AdvanceNext()
|
||||
w.MustWait(ctx)
|
||||
|
||||
require.Eventually(t, checkFn, testutil.WaitShort, testutil.IntervalFast)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestWorkspaceLatestBuildTotals(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
Reference in New Issue
Block a user