mirror of
https://github.com/coder/coder.git
synced 2025-07-12 00:14:10 +00:00
feat: add prometheus metric for tracking user statuses (#15281)
This commit is contained in:
@ -212,10 +212,16 @@ func enablePrometheus(
|
|||||||
options.PrometheusRegistry.MustRegister(collectors.NewGoCollector())
|
options.PrometheusRegistry.MustRegister(collectors.NewGoCollector())
|
||||||
options.PrometheusRegistry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
|
options.PrometheusRegistry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}))
|
||||||
|
|
||||||
closeUsersFunc, err := prometheusmetrics.ActiveUsers(ctx, options.PrometheusRegistry, options.Database, 0)
|
closeActiveUsersFunc, err := prometheusmetrics.ActiveUsers(ctx, options.Logger.Named("active_user_metrics"), options.PrometheusRegistry, options.Database, 0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, xerrors.Errorf("register active users prometheus metric: %w", err)
|
return nil, xerrors.Errorf("register active users prometheus metric: %w", err)
|
||||||
}
|
}
|
||||||
|
afterCtx(ctx, closeActiveUsersFunc)
|
||||||
|
|
||||||
|
closeUsersFunc, err := prometheusmetrics.Users(ctx, options.Logger.Named("user_metrics"), quartz.NewReal(), options.PrometheusRegistry, options.Database, 0)
|
||||||
|
if err != nil {
|
||||||
|
return nil, xerrors.Errorf("register users prometheus metric: %w", err)
|
||||||
|
}
|
||||||
afterCtx(ctx, closeUsersFunc)
|
afterCtx(ctx, closeUsersFunc)
|
||||||
|
|
||||||
closeWorkspacesFunc, err := prometheusmetrics.Workspaces(ctx, options.Logger.Named("workspaces_metrics"), options.PrometheusRegistry, options.Database, 0)
|
closeWorkspacesFunc, err := prometheusmetrics.Workspaces(ctx, options.Logger.Named("workspaces_metrics"), options.PrometheusRegistry, options.Database, 0)
|
||||||
|
@ -12,6 +12,7 @@ import (
|
|||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"golang.org/x/xerrors"
|
||||||
"tailscale.com/tailcfg"
|
"tailscale.com/tailcfg"
|
||||||
|
|
||||||
"cdr.dev/slog"
|
"cdr.dev/slog"
|
||||||
@ -22,12 +23,13 @@ import (
|
|||||||
"github.com/coder/coder/v2/coderd/database/dbtime"
|
"github.com/coder/coder/v2/coderd/database/dbtime"
|
||||||
"github.com/coder/coder/v2/codersdk"
|
"github.com/coder/coder/v2/codersdk"
|
||||||
"github.com/coder/coder/v2/tailnet"
|
"github.com/coder/coder/v2/tailnet"
|
||||||
|
"github.com/coder/quartz"
|
||||||
)
|
)
|
||||||
|
|
||||||
const defaultRefreshRate = time.Minute
|
const defaultRefreshRate = time.Minute
|
||||||
|
|
||||||
// ActiveUsers tracks the number of users that have authenticated within the past hour.
|
// ActiveUsers tracks the number of users that have authenticated within the past hour.
|
||||||
func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
|
func ActiveUsers(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
|
||||||
if duration == 0 {
|
if duration == 0 {
|
||||||
duration = defaultRefreshRate
|
duration = defaultRefreshRate
|
||||||
}
|
}
|
||||||
@ -58,6 +60,7 @@ func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db datab
|
|||||||
|
|
||||||
apiKeys, err := db.GetAPIKeysLastUsedAfter(ctx, dbtime.Now().Add(-1*time.Hour))
|
apiKeys, err := db.GetAPIKeysLastUsedAfter(ctx, dbtime.Now().Add(-1*time.Hour))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
logger.Error(ctx, "get api keys for active users prometheus metric", slog.Error(err))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
distinctUsers := map[uuid.UUID]struct{}{}
|
distinctUsers := map[uuid.UUID]struct{}{}
|
||||||
@ -73,6 +76,57 @@ func ActiveUsers(ctx context.Context, registerer prometheus.Registerer, db datab
|
|||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Users tracks the total number of registered users, partitioned by status.
|
||||||
|
func Users(ctx context.Context, logger slog.Logger, clk quartz.Clock, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
|
||||||
|
if duration == 0 {
|
||||||
|
// It's not super important this tracks real-time.
|
||||||
|
duration = defaultRefreshRate * 5
|
||||||
|
}
|
||||||
|
|
||||||
|
gauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Namespace: "coderd",
|
||||||
|
Subsystem: "api",
|
||||||
|
Name: "total_user_count",
|
||||||
|
Help: "The total number of registered users, partitioned by status.",
|
||||||
|
}, []string{"status"})
|
||||||
|
err := registerer.Register(gauge)
|
||||||
|
if err != nil {
|
||||||
|
return nil, xerrors.Errorf("register total_user_count gauge: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancelFunc := context.WithCancel(ctx)
|
||||||
|
done := make(chan struct{})
|
||||||
|
ticker := clk.NewTicker(duration)
|
||||||
|
go func() {
|
||||||
|
defer close(done)
|
||||||
|
defer ticker.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
}
|
||||||
|
|
||||||
|
gauge.Reset()
|
||||||
|
//nolint:gocritic // This is a system service that needs full access
|
||||||
|
//to the users table.
|
||||||
|
users, err := db.GetUsers(dbauthz.AsSystemRestricted(ctx), database.GetUsersParams{})
|
||||||
|
if err != nil {
|
||||||
|
logger.Error(ctx, "get all users for prometheus metrics", slog.Error(err))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, user := range users {
|
||||||
|
gauge.WithLabelValues(string(user.Status)).Inc()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return func() {
|
||||||
|
cancelFunc()
|
||||||
|
<-done
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Workspaces tracks the total number of workspaces with labels on status.
|
// Workspaces tracks the total number of workspaces with labels on status.
|
||||||
func Workspaces(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
|
func Workspaces(ctx context.Context, logger slog.Logger, registerer prometheus.Registerer, db database.Store, duration time.Duration) (func(), error) {
|
||||||
if duration == 0 {
|
if duration == 0 {
|
||||||
|
@ -38,6 +38,7 @@ import (
|
|||||||
"github.com/coder/coder/v2/tailnet"
|
"github.com/coder/coder/v2/tailnet"
|
||||||
"github.com/coder/coder/v2/tailnet/tailnettest"
|
"github.com/coder/coder/v2/tailnet/tailnettest"
|
||||||
"github.com/coder/coder/v2/testutil"
|
"github.com/coder/coder/v2/testutil"
|
||||||
|
"github.com/coder/quartz"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestActiveUsers(t *testing.T) {
|
func TestActiveUsers(t *testing.T) {
|
||||||
@ -98,7 +99,7 @@ func TestActiveUsers(t *testing.T) {
|
|||||||
t.Run(tc.Name, func(t *testing.T) {
|
t.Run(tc.Name, func(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
registry := prometheus.NewRegistry()
|
registry := prometheus.NewRegistry()
|
||||||
closeFunc, err := prometheusmetrics.ActiveUsers(context.Background(), registry, tc.Database(t), time.Millisecond)
|
closeFunc, err := prometheusmetrics.ActiveUsers(context.Background(), slogtest.Make(t, nil), registry, tc.Database(t), time.Millisecond)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
t.Cleanup(closeFunc)
|
t.Cleanup(closeFunc)
|
||||||
|
|
||||||
@ -112,6 +113,100 @@ func TestActiveUsers(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestUsers(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
for _, tc := range []struct {
|
||||||
|
Name string
|
||||||
|
Database func(t *testing.T) database.Store
|
||||||
|
Count map[database.UserStatus]int
|
||||||
|
}{{
|
||||||
|
Name: "None",
|
||||||
|
Database: func(t *testing.T) database.Store {
|
||||||
|
return dbmem.New()
|
||||||
|
},
|
||||||
|
Count: map[database.UserStatus]int{},
|
||||||
|
}, {
|
||||||
|
Name: "One",
|
||||||
|
Database: func(t *testing.T) database.Store {
|
||||||
|
db := dbmem.New()
|
||||||
|
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
|
||||||
|
return db
|
||||||
|
},
|
||||||
|
Count: map[database.UserStatus]int{database.UserStatusActive: 1},
|
||||||
|
}, {
|
||||||
|
Name: "MultipleStatuses",
|
||||||
|
Database: func(t *testing.T) database.Store {
|
||||||
|
db := dbmem.New()
|
||||||
|
|
||||||
|
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
|
||||||
|
dbgen.User(t, db, database.User{Status: database.UserStatusDormant})
|
||||||
|
|
||||||
|
return db
|
||||||
|
},
|
||||||
|
Count: map[database.UserStatus]int{database.UserStatusActive: 1, database.UserStatusDormant: 1},
|
||||||
|
}, {
|
||||||
|
Name: "MultipleActive",
|
||||||
|
Database: func(t *testing.T) database.Store {
|
||||||
|
db := dbmem.New()
|
||||||
|
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
|
||||||
|
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
|
||||||
|
dbgen.User(t, db, database.User{Status: database.UserStatusActive})
|
||||||
|
return db
|
||||||
|
},
|
||||||
|
Count: map[database.UserStatus]int{database.UserStatusActive: 3},
|
||||||
|
}} {
|
||||||
|
tc := tc
|
||||||
|
t.Run(tc.Name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
registry := prometheus.NewRegistry()
|
||||||
|
mClock := quartz.NewMock(t)
|
||||||
|
db := tc.Database(t)
|
||||||
|
closeFunc, err := prometheusmetrics.Users(context.Background(), slogtest.Make(t, nil), mClock, registry, db, time.Millisecond)
|
||||||
|
require.NoError(t, err)
|
||||||
|
t.Cleanup(closeFunc)
|
||||||
|
|
||||||
|
_, w := mClock.AdvanceNext()
|
||||||
|
w.MustWait(ctx)
|
||||||
|
|
||||||
|
checkFn := func() bool {
|
||||||
|
metrics, err := registry.Gather()
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we get no metrics and we know none should exist, bail
|
||||||
|
// early. If we get no metrics but we expect some, retry.
|
||||||
|
if len(metrics) == 0 {
|
||||||
|
return len(tc.Count) == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, metric := range metrics[0].Metric {
|
||||||
|
if tc.Count[database.UserStatus(*metric.Label[0].Value)] != int(metric.Gauge.GetValue()) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
require.Eventually(t, checkFn, testutil.WaitShort, testutil.IntervalFast)
|
||||||
|
|
||||||
|
// Add another dormant user and ensure it updates
|
||||||
|
dbgen.User(t, db, database.User{Status: database.UserStatusDormant})
|
||||||
|
tc.Count[database.UserStatusDormant]++
|
||||||
|
|
||||||
|
_, w = mClock.AdvanceNext()
|
||||||
|
w.MustWait(ctx)
|
||||||
|
|
||||||
|
require.Eventually(t, checkFn, testutil.WaitShort, testutil.IntervalFast)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestWorkspaceLatestBuildTotals(t *testing.T) {
|
func TestWorkspaceLatestBuildTotals(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user