diff --git a/coderd/database/dbauthz/dbauthz.go b/coderd/database/dbauthz/dbauthz.go index 2f1402b1c4..3edf7f6286 100644 --- a/coderd/database/dbauthz/dbauthz.go +++ b/coderd/database/dbauthz/dbauthz.go @@ -1977,6 +1977,13 @@ func (q *querier) GetParameterSchemasByJobID(ctx context.Context, jobID uuid.UUI return q.db.GetParameterSchemasByJobID(ctx, jobID) } +func (q *querier) GetPrebuildMetrics(ctx context.Context) ([]database.GetPrebuildMetricsRow, error) { + if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceTemplate); err != nil { + return nil, err + } + return q.db.GetPrebuildMetrics(ctx) +} + func (q *querier) GetPrebuildsInProgress(ctx context.Context) ([]database.GetPrebuildsInProgressRow, error) { if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceTemplate); err != nil { return nil, err diff --git a/coderd/database/dbmem/dbmem.go b/coderd/database/dbmem/dbmem.go index c7cc09550f..540c4cf188 100644 --- a/coderd/database/dbmem/dbmem.go +++ b/coderd/database/dbmem/dbmem.go @@ -3784,6 +3784,10 @@ func (q *FakeQuerier) GetParameterSchemasByJobID(_ context.Context, jobID uuid.U return parameters, nil } +func (q *FakeQuerier) GetPrebuildMetrics(ctx context.Context) ([]database.GetPrebuildMetricsRow, error) { + panic("not implemented") +} + func (q *FakeQuerier) GetPrebuildsInProgress(ctx context.Context) ([]database.GetPrebuildsInProgressRow, error) { panic("not implemented") } diff --git a/coderd/database/dbmetrics/querymetrics.go b/coderd/database/dbmetrics/querymetrics.go index a971a9f835..2f7334d191 100644 --- a/coderd/database/dbmetrics/querymetrics.go +++ b/coderd/database/dbmetrics/querymetrics.go @@ -987,6 +987,13 @@ func (m queryMetricsStore) GetParameterSchemasByJobID(ctx context.Context, jobID return schemas, err } +func (m queryMetricsStore) GetPrebuildMetrics(ctx context.Context) ([]database.GetPrebuildMetricsRow, error) { + start := time.Now() + r0, r1 := m.s.GetPrebuildMetrics(ctx) + m.queryLatencies.WithLabelValues("GetPrebuildMetrics").Observe(time.Since(start).Seconds()) + return r0, r1 +} + func (m queryMetricsStore) GetPrebuildsInProgress(ctx context.Context) ([]database.GetPrebuildsInProgressRow, error) { start := time.Now() r0, r1 := m.s.GetPrebuildsInProgress(ctx) diff --git a/coderd/database/dbmock/dbmock.go b/coderd/database/dbmock/dbmock.go index 27c84e80f1..b8331587ab 100644 --- a/coderd/database/dbmock/dbmock.go +++ b/coderd/database/dbmock/dbmock.go @@ -2031,6 +2031,21 @@ func (mr *MockStoreMockRecorder) GetParameterSchemasByJobID(ctx, jobID any) *gom return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetParameterSchemasByJobID", reflect.TypeOf((*MockStore)(nil).GetParameterSchemasByJobID), ctx, jobID) } +// GetPrebuildMetrics mocks base method. +func (m *MockStore) GetPrebuildMetrics(ctx context.Context) ([]database.GetPrebuildMetricsRow, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetPrebuildMetrics", ctx) + ret0, _ := ret[0].([]database.GetPrebuildMetricsRow) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetPrebuildMetrics indicates an expected call of GetPrebuildMetrics. +func (mr *MockStoreMockRecorder) GetPrebuildMetrics(ctx any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetPrebuildMetrics", reflect.TypeOf((*MockStore)(nil).GetPrebuildMetrics), ctx) +} + // GetPrebuildsInProgress mocks base method. func (m *MockStore) GetPrebuildsInProgress(ctx context.Context) ([]database.GetPrebuildsInProgressRow, error) { m.ctrl.T.Helper() diff --git a/coderd/database/querier.go b/coderd/database/querier.go index d5a1ae5a67..7d1e20a7a1 100644 --- a/coderd/database/querier.go +++ b/coderd/database/querier.go @@ -204,6 +204,7 @@ type sqlcQuerier interface { GetOrganizations(ctx context.Context, arg GetOrganizationsParams) ([]Organization, error) GetOrganizationsByUserID(ctx context.Context, userID uuid.UUID) ([]Organization, error) GetParameterSchemasByJobID(ctx context.Context, jobID uuid.UUID) ([]ParameterSchema, error) + GetPrebuildMetrics(ctx context.Context) ([]GetPrebuildMetricsRow, error) GetPrebuildsInProgress(ctx context.Context) ([]GetPrebuildsInProgressRow, error) GetPresetByWorkspaceBuildID(ctx context.Context, workspaceBuildID uuid.UUID) (TemplateVersionPreset, error) GetPresetParametersByTemplateVersionID(ctx context.Context, templateVersionID uuid.UUID) ([]TemplateVersionPresetParameter, error) diff --git a/coderd/database/queries.sql.go b/coderd/database/queries.sql.go index e6cdb83068..342d400f19 100644 --- a/coderd/database/queries.sql.go +++ b/coderd/database/queries.sql.go @@ -5442,6 +5442,91 @@ func (q *sqlQuerier) ClaimPrebuild(ctx context.Context, arg ClaimPrebuildParams) return i, err } +const getPrebuildMetrics = `-- name: GetPrebuildMetrics :many +SELECT + t.name as template_name, + tvp.name as preset_name, + COUNT(*) FILTER ( -- created + -- TODO (sasswart): double check which job statuses should be included here + WHERE + pj.initiator_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid + AND pj.job_status = 'succeeded'::provisioner_job_status + ) as created, + COUNT(*) FILTER ( -- failed + -- TODO (sasswart): should we count cancelled here? + WHERE pj.initiator_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid + AND pj.job_status = 'failed'::provisioner_job_status + ) as failed, + COUNT(*) FILTER ( -- assigned + WHERE pj.initiator_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid + AND NOT w.owner_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid + ) as assigned, + COUNT(*) FILTER ( -- exhausted + -- TODO (sasswart): write a filter to count this + -- we should be able to count: + -- - workspace builds + -- - that have a preset id + -- - and that preset has prebuilds enabled + -- - and the job for the prebuild was initiated by a user other than the prebuilds user + WHERE + wb.template_version_preset_id IS NOT NULL + AND w.owner_id != 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid + AND wb.initiator_id != 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid + ) as exhausted, + COUNT(*) FILTER ( -- used_preset + WHERE wb.template_version_preset_id IS NOT NULL + ) as used_preset +FROM workspace_builds wb +INNER JOIN provisioner_jobs pj ON wb.job_id = pj.id +LEFT JOIN workspaces w ON wb.workspace_id = w.id +LEFT JOIN template_version_presets tvp ON wb.template_version_preset_id = tvp.id +LEFT JOIN template_versions tv ON tv.id = wb.template_version_id +LEFT JOIN templates t ON t.id = tv.template_id +WHERE pj.initiator_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid +GROUP BY t.name, tvp.name +` + +type GetPrebuildMetricsRow struct { + TemplateName sql.NullString `db:"template_name" json:"template_name"` + PresetName sql.NullString `db:"preset_name" json:"preset_name"` + Created int64 `db:"created" json:"created"` + Failed int64 `db:"failed" json:"failed"` + Assigned int64 `db:"assigned" json:"assigned"` + Exhausted int64 `db:"exhausted" json:"exhausted"` + UsedPreset int64 `db:"used_preset" json:"used_preset"` +} + +func (q *sqlQuerier) GetPrebuildMetrics(ctx context.Context) ([]GetPrebuildMetricsRow, error) { + rows, err := q.db.QueryContext(ctx, getPrebuildMetrics) + if err != nil { + return nil, err + } + defer rows.Close() + var items []GetPrebuildMetricsRow + for rows.Next() { + var i GetPrebuildMetricsRow + if err := rows.Scan( + &i.TemplateName, + &i.PresetName, + &i.Created, + &i.Failed, + &i.Assigned, + &i.Exhausted, + &i.UsedPreset, + ); err != nil { + return nil, err + } + items = append(items, i) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const getPrebuildsInProgress = `-- name: GetPrebuildsInProgress :many SELECT t.id AS template_id, wpb.template_version_id, wpb.transition, COUNT(wpb.transition) AS count FROM workspace_latest_build wlb diff --git a/coderd/database/queries/prebuilds.sql b/coderd/database/queries/prebuilds.sql index f760b094f3..ef8f4f0779 100644 --- a/coderd/database/queries/prebuilds.sql +++ b/coderd/database/queries/prebuilds.sql @@ -71,3 +71,46 @@ RETURNING w.id, w.name; INSERT INTO template_version_preset_prebuilds (id, preset_id, desired_instances, invalidate_after_secs) VALUES (@id::uuid, @preset_id::uuid, @desired_instances::int, @invalidate_after_secs::int) RETURNING *; + +-- name: GetPrebuildMetrics :many +SELECT + t.name as template_name, + tvp.name as preset_name, + COUNT(*) FILTER ( -- created + -- TODO (sasswart): double check which job statuses should be included here + WHERE + pj.initiator_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid + AND pj.job_status = 'succeeded'::provisioner_job_status + ) as created, + COUNT(*) FILTER ( -- failed + -- TODO (sasswart): should we count cancelled here? + WHERE pj.initiator_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid + AND pj.job_status = 'failed'::provisioner_job_status + ) as failed, + COUNT(*) FILTER ( -- assigned + WHERE pj.initiator_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid + AND NOT w.owner_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid + ) as assigned, + COUNT(*) FILTER ( -- exhausted + -- TODO (sasswart): write a filter to count this + -- we should be able to count: + -- - workspace builds + -- - that have a preset id + -- - and that preset has prebuilds enabled + -- - and the job for the prebuild was initiated by a user other than the prebuilds user + WHERE + wb.template_version_preset_id IS NOT NULL + AND w.owner_id != 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid + AND wb.initiator_id != 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid + ) as exhausted, + COUNT(*) FILTER ( -- used_preset + WHERE wb.template_version_preset_id IS NOT NULL + ) as used_preset +FROM workspace_builds wb +INNER JOIN provisioner_jobs pj ON wb.job_id = pj.id +LEFT JOIN workspaces w ON wb.workspace_id = w.id +LEFT JOIN template_version_presets tvp ON wb.template_version_preset_id = tvp.id +LEFT JOIN template_versions tv ON tv.id = wb.template_version_id +LEFT JOIN templates t ON t.id = tv.template_id +WHERE pj.initiator_id = 'c42fdf75-3097-471c-8c33-fb52454d81c0'::uuid +GROUP BY t.name, tvp.name; diff --git a/enterprise/coderd/coderd.go b/enterprise/coderd/coderd.go index 9a698856dc..3489f4d770 100644 --- a/enterprise/coderd/coderd.go +++ b/enterprise/coderd/coderd.go @@ -590,6 +590,13 @@ func New(ctx context.Context, options *Options) (_ *API, err error) { } else { api.prebuildsController = prebuilds.NewController(options.Database, options.Pubsub, options.DeploymentValues.Prebuilds, options.Logger.Named("prebuilds.controller")) go api.prebuildsController.Loop(ctx) + + prebuildMetricsCollector := prebuilds.NewMetricsCollector(options.Database, options.Logger) + // should this be api.prebuild... + err = api.PrometheusRegistry.Register(prebuildMetricsCollector) + if err != nil { + return nil, xerrors.Errorf("unable to register prebuilds metrics collector: %w", err) + } } } diff --git a/enterprise/coderd/prebuilds/claim.go b/enterprise/coderd/prebuilds/claim.go index 0cb39c1659..fa4f48a389 100644 --- a/enterprise/coderd/prebuilds/claim.go +++ b/enterprise/coderd/prebuilds/claim.go @@ -52,7 +52,7 @@ func (e EnterpriseClaimer) Claim(ctx context.Context, store database.Store, user } func (e EnterpriseClaimer) Initiator() uuid.UUID { - return ownerID + return OwnerID } var _ prebuilds.Claimer = &EnterpriseClaimer{} diff --git a/enterprise/coderd/prebuilds/controller.go b/enterprise/coderd/prebuilds/controller.go index de4e02508c..7bb862ee7f 100644 --- a/enterprise/coderd/prebuilds/controller.go +++ b/enterprise/coderd/prebuilds/controller.go @@ -321,7 +321,7 @@ func (c *Controller) createPrebuild(ctx context.Context, prebuildID uuid.UUID, t ID: prebuildID, CreatedAt: now, UpdatedAt: now, - OwnerID: ownerID, + OwnerID: OwnerID, OrganizationID: template.OrganizationID, TemplateID: template.ID, Name: name, @@ -382,14 +382,14 @@ func (c *Controller) provision(ctx context.Context, prebuildID uuid.UUID, templa builder := wsbuilder.New(workspace, transition). Reason(database.BuildReasonInitiator). - Initiator(ownerID). + Initiator(OwnerID). ActiveVersion(). VersionID(template.ActiveVersionID). MarkPrebuild(). TemplateVersionPresetID(presetID) // We only inject the required params when the prebuild is being created. - // This mirrors the behaviour of regular workspace deletion (see cli/delete.go). + // This mirrors the behavior of regular workspace deletion (see cli/delete.go). if transition != database.WorkspaceTransitionDelete { builder = builder.RichParameterValues(params) } diff --git a/enterprise/coderd/prebuilds/id.go b/enterprise/coderd/prebuilds/id.go index 6f7ff2dac2..bde76e3f7b 100644 --- a/enterprise/coderd/prebuilds/id.go +++ b/enterprise/coderd/prebuilds/id.go @@ -2,4 +2,4 @@ package prebuilds import "github.com/google/uuid" -var ownerID = uuid.MustParse("c42fdf75-3097-471c-8c33-fb52454d81c0") +var OwnerID = uuid.MustParse("c42fdf75-3097-471c-8c33-fb52454d81c0") diff --git a/enterprise/coderd/prebuilds/metricscollector.go b/enterprise/coderd/prebuilds/metricscollector.go new file mode 100644 index 0000000000..f6a6ee8a4b --- /dev/null +++ b/enterprise/coderd/prebuilds/metricscollector.go @@ -0,0 +1,71 @@ +package prebuilds + +import ( + "context" + "time" + + "cdr.dev/slog" + + "github.com/prometheus/client_golang/prometheus" + + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbauthz" +) + +var ( + CreatedPrebuildsDesc = prometheus.NewDesc("coderd_prebuilds_created", "The number of prebuilds created.", []string{"template_name", "preset_name"}, nil) + FailedPrebuildsDesc = prometheus.NewDesc("coderd_prebuilds_failed", "The number of prebuilds that failed.", []string{"template_name", "preset_name"}, nil) + AssignedPrebuildsDesc = prometheus.NewDesc("coderd_prebuilds_assigned", "The number of prebuilds that were assigned to a runner.", []string{"template_name", "preset_name"}, nil) + UsedPresetsDesc = prometheus.NewDesc("coderd_presets_used", "The number of times a preset was used.", []string{"template_name", "preset_name"}, nil) + ExhaustedPrebuildsDesc = prometheus.NewDesc("coderd_prebuilds_exhausted", "The number of prebuilds that were exhausted.", []string{"template_name", "preset_name"}, nil) + DesiredPrebuildsDesc = prometheus.NewDesc("coderd_prebuilds_desired", "The number of desired prebuilds.", []string{"template_name", "preset_name"}, nil) + ActualPrebuildsDesc = prometheus.NewDesc("coderd_prebuilds_actual", "The number of actual prebuilds.", []string{"template_name", "preset_name"}, nil) + EligiblePrebuildsDesc = prometheus.NewDesc("coderd_prebuilds_eligible", "The number of eligible prebuilds.", []string{"template_name", "preset_name"}, nil) +) + +type MetricsCollector struct { + database database.Store + logger slog.Logger +} + +var _ prometheus.Collector = new(MetricsCollector) + +func NewMetricsCollector(db database.Store, logger slog.Logger) *MetricsCollector { + return &MetricsCollector{ + database: db, + logger: logger.Named("prebuilds_metrics_collector"), + } +} + +func (*MetricsCollector) Describe(descCh chan<- *prometheus.Desc) { + descCh <- CreatedPrebuildsDesc + descCh <- FailedPrebuildsDesc + descCh <- AssignedPrebuildsDesc + descCh <- UsedPresetsDesc + descCh <- ExhaustedPrebuildsDesc + descCh <- DesiredPrebuildsDesc + descCh <- ActualPrebuildsDesc + descCh <- EligiblePrebuildsDesc +} + +func (mc *MetricsCollector) Collect(metricsCh chan<- prometheus.Metric) { + // TODO (sasswart): get a proper actor in here, to deescalate from system + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + // nolint:gocritic // just until we get back to this + metrics, err := mc.database.GetPrebuildMetrics(dbauthz.AsSystemRestricted(ctx)) + if err != nil { + mc.logger.Error(ctx, "failed to get prebuild metrics", slog.Error(err)) + return + } + + for _, metric := range metrics { + metricsCh <- prometheus.MustNewConstMetric(CreatedPrebuildsDesc, prometheus.CounterValue, float64(metric.Created), metric.TemplateName.String, metric.PresetName.String) + metricsCh <- prometheus.MustNewConstMetric(FailedPrebuildsDesc, prometheus.CounterValue, float64(metric.Failed), metric.TemplateName.String, metric.PresetName.String) + metricsCh <- prometheus.MustNewConstMetric(AssignedPrebuildsDesc, prometheus.CounterValue, float64(metric.Assigned), metric.TemplateName.String, metric.PresetName.String) + metricsCh <- prometheus.MustNewConstMetric(ExhaustedPrebuildsDesc, prometheus.CounterValue, float64(metric.Exhausted), metric.TemplateName.String, metric.PresetName.String) + metricsCh <- prometheus.MustNewConstMetric(UsedPresetsDesc, prometheus.CounterValue, float64(metric.UsedPreset), metric.TemplateName.String, metric.PresetName.String) + } + + // TODO (sasswart): read gauges from controller +} diff --git a/enterprise/coderd/prebuilds/metricscollector_test.go b/enterprise/coderd/prebuilds/metricscollector_test.go new file mode 100644 index 0000000000..b2231a8a2e --- /dev/null +++ b/enterprise/coderd/prebuilds/metricscollector_test.go @@ -0,0 +1,74 @@ +package prebuilds_test + +import ( + "context" + "database/sql" + "testing" + "time" + + "github.com/google/uuid" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + + "cdr.dev/slog/sloggers/slogtest" + + "github.com/coder/coder/v2/coderd/database" + "github.com/coder/coder/v2/coderd/database/dbgen" + "github.com/coder/coder/v2/coderd/database/dbtestutil" + "github.com/coder/coder/v2/enterprise/coderd/prebuilds" +) + +func TestMetricsCollector(t *testing.T) { + t.Parallel() + + if !dbtestutil.WillUsePostgres() { + t.Skip("this test requires postgres") + } + + db, _ := dbtestutil.NewDB(t, dbtestutil.WithDumpOnFailure()) + + org := dbgen.Organization(t, db, database.Organization{}) + user := dbgen.User(t, db, database.User{}) + template := dbgen.Template(t, db, database.Template{ + CreatedBy: user.ID, + OrganizationID: org.ID, + }) + templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{ + TemplateID: uuid.NullUUID{UUID: template.ID, Valid: true}, + OrganizationID: org.ID, + CreatedBy: user.ID, + }) + + logger := slogtest.Make(t, &slogtest.Options{IgnoreErrors: true}) + collector := prebuilds.NewMetricsCollector(db, logger) + + registry := prometheus.NewRegistry() + registry.Register(collector) + + preset, err := db.InsertPreset(context.Background(), database.InsertPresetParams{ + TemplateVersionID: templateVersion.ID, + Name: "test", + }) + require.NoError(t, err) + workspace := dbgen.Workspace(t, db, database.WorkspaceTable{ + OrganizationID: org.ID, + OwnerID: user.ID, + TemplateID: template.ID, + }) + job := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{ + OrganizationID: org.ID, + CompletedAt: sql.NullTime{Time: time.Now(), Valid: true}, + InitiatorID: prebuilds.OwnerID, + }) + dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{ + WorkspaceID: workspace.ID, + TemplateVersionID: templateVersion.ID, + TemplateVersionPresetID: uuid.NullUUID{UUID: preset.ID, Valid: true}, + InitiatorID: prebuilds.OwnerID, + JobID: job.ID, + }) + + metrics, err := registry.Gather() + require.NoError(t, err) + require.Equal(t, 5, len(metrics)) +}