add prebuild metrics and observability

This commit is contained in:
Sas Swart
2025-02-25 09:07:48 +00:00
parent 5e854da331
commit 697c99d8cf
2 changed files with 140 additions and 0 deletions

View File

@ -0,0 +1,71 @@
package prebuilds
import (
"context"
"time"
"cdr.dev/slog"
"github.com/prometheus/client_golang/prometheus"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbauthz"
)
var (
CreatedPrebuildsDesc = prometheus.NewDesc("coderd_prebuilds_created", "The number of prebuilds created.", []string{"template_name", "preset_name"}, nil)
FailedPrebuildsDesc = prometheus.NewDesc("coderd_prebuilds_failed", "The number of prebuilds that failed.", []string{"template_name", "preset_name"}, nil)
AssignedPrebuildsDesc = prometheus.NewDesc("coderd_prebuilds_assigned", "The number of prebuilds that were assigned to a runner.", []string{"template_name", "preset_name"}, nil)
UsedPresetsDesc = prometheus.NewDesc("coderd_presets_used", "The number of times a preset was used.", []string{"template_name", "preset_name"}, nil)
ExhaustedPrebuildsDesc = prometheus.NewDesc("coderd_prebuilds_exhausted", "The number of prebuilds that were exhausted.", []string{"template_name", "preset_name"}, nil)
DesiredPrebuildsDesc = prometheus.NewDesc("coderd_prebuilds_desired", "The number of desired prebuilds.", []string{"template_name", "preset_name"}, nil)
ActualPrebuildsDesc = prometheus.NewDesc("coderd_prebuilds_actual", "The number of actual prebuilds.", []string{"template_name", "preset_name"}, nil)
EligiblePrebuildsDesc = prometheus.NewDesc("coderd_prebuilds_eligible", "The number of eligible prebuilds.", []string{"template_name", "preset_name"}, nil)
)
type MetricsCollector struct {
database database.Store
logger slog.Logger
}
var _ prometheus.Collector = new(MetricsCollector)
func NewMetricsCollector(db database.Store, logger slog.Logger) *MetricsCollector {
return &MetricsCollector{
database: db,
logger: logger.Named("prebuilds_metrics_collector"),
}
}
func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) {
descCh <- CreatedPrebuildsDesc
descCh <- FailedPrebuildsDesc
descCh <- AssignedPrebuildsDesc
descCh <- UsedPresetsDesc
descCh <- ExhaustedPrebuildsDesc
descCh <- DesiredPrebuildsDesc
descCh <- ActualPrebuildsDesc
descCh <- EligiblePrebuildsDesc
}
func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) {
// TODO (sasswart): get a proper actor in here, to deescalate from system
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
// nolint:gocritic // just until we get back to this
metrics, err := mc.database.GetPrebuildMetrics(dbauthz.AsSystemRestricted(ctx))
if err != nil {
mc.logger.Error(ctx, "failed to get prebuild metrics", slog.Error(err))
return
}
for _, metric := range metrics {
metricsCh <- prometheus.MustNewConstMetric(CreatedPrebuildsDesc, prometheus.CounterValue, float64(metric.Created), metric.TemplateName.String, metric.PresetName.String)
metricsCh <- prometheus.MustNewConstMetric(FailedPrebuildsDesc, prometheus.CounterValue, float64(metric.Failed), metric.TemplateName.String, metric.PresetName.String)
metricsCh <- prometheus.MustNewConstMetric(AssignedPrebuildsDesc, prometheus.CounterValue, float64(metric.Assigned), metric.TemplateName.String, metric.PresetName.String)
metricsCh <- prometheus.MustNewConstMetric(ExhaustedPrebuildsDesc, prometheus.CounterValue, float64(metric.Exhausted), metric.TemplateName.String, metric.PresetName.String)
metricsCh <- prometheus.MustNewConstMetric(UsedPresetsDesc, prometheus.CounterValue, float64(metric.UsedPreset), metric.TemplateName.String, metric.PresetName.String)
}
// TODO (sasswart): read gauges from controller
}

View File

@ -0,0 +1,69 @@
package prebuilds_test
import (
"context"
"database/sql"
"testing"
"time"
"github.com/google/uuid"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/require"
"cdr.dev/slog/sloggers/slogtest"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbgen"
"github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/enterprise/coderd/prebuilds"
)
func TestMetricsCollector(t *testing.T) {
t.Parallel()
db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure())
org := dbgen.Organization(t, db, database.Organization{})
user := dbgen.User(t, db, database.User{})
template := dbgen.Template(t, db, database.Template{
CreatedBy: user.ID,
OrganizationID: org.ID,
})
templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true},
OrganizationID: org.ID,
CreatedBy: user.ID,
})
logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true})
collector := prebuilds.NewMetricsCollector(db, logger)
registry := prometheus.NewRegistry()
registry.Register(collector)
preset, err := db.InsertPreset(context.Background(), database.InsertPresetParams{
TemplateVersionID: templateVersion.ID,
Name: "test",
})
require.NoError(t, err)
workspace := dbgen.Workspace(t, db, database.WorkspaceTable{
OrganizationID: org.ID,
OwnerID: user.ID,
TemplateID: template.ID,
})
job := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
OrganizationID: org.ID,
CompletedAt: sql.NullTime{Time: time.Now(), Valid: true},
InitiatorID: prebuilds.OwnerID,
})
dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
WorkspaceID: workspace.ID,
TemplateVersionID: templateVersion.ID,
TemplateVersionPresetID: uuid.NullUUID{UUID: preset.ID, Valid: true},
InitiatorID: prebuilds.OwnerID,
JobID: job.ID,
})
metrics, err := registry.Gather()
require.NoError(t, err)
require.Equal(t, 5, len(metrics))
}