fix: fix metric for hard-limited presets (#18045)

```
// Report a metric only if the preset uses the latest version of the template and the template is not deleted.
// This avoids conflicts between metrics from old and new template versions.
//
// NOTE: Multiple versions of a preset can exist with the same orgName, templateName, and presetName,
// because templates can have multiple versions — or deleted templates can share the same name.
//
// The safest approach is to report the metric only for the latest version of the preset.
// When a new template version is released, the metric for the new preset should overwrite
// the old value in Prometheus.
//
// However, there’s one edge case: if an admin creates a template, it becomes hard-limited,
// then deletes the template and never creates another with the same name,
// the old preset will continue to be reported as hard-limited —
// even though it’s deleted. This will persist until `coderd` is restarted.
```
This commit is contained in:
Yevhenii Shcherbina
2025-05-27 10:07:36 -04:00
committed by GitHub
parent 5b90c69b90
commit e8c75eb1c3
2 changed files with 21 additions and 10 deletions

View File

@ -361,15 +361,23 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
slog.F("preset_name", ps.Preset.Name), slog.F("preset_name", ps.Preset.Name),
) )
// Report a preset as hard-limited only if all the following conditions are met: // Report a metric only if the preset uses the latest version of the template and the template is not deleted.
// - The preset is marked as hard-limited // This avoids conflicts between metrics from old and new template versions.
// - The preset is using the active version of its template, and the template has not been deleted
// //
// The second condition is important because a hard-limited preset that has become outdated is no longer relevant. // NOTE: Multiple versions of a preset can exist with the same orgName, templateName, and presetName,
// Its associated prebuilt workspaces were likely deleted, and it's not meaningful to continue reporting it // because templates can have multiple versions — or deleted templates can share the same name.
// as hard-limited to the admin. //
reportAsHardLimited := ps.IsHardLimited && ps.Preset.UsingActiveVersion && !ps.Preset.Deleted // The safest approach is to report the metric only for the latest version of the preset.
c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, reportAsHardLimited) // When a new template version is released, the metric for the new preset should overwrite
// the old value in Prometheus.
//
// However, theres one edge case: if an admin creates a template, it becomes hard-limited,
// then deletes the template and never creates another with the same name,
// the old preset will continue to be reported as hard-limited —
// even though its deleted. This will persist until `coderd` is restarted.
if ps.Preset.UsingActiveVersion && !ps.Preset.Deleted {
c.metrics.trackHardLimitedStatus(ps.Preset.OrganizationName, ps.Preset.TemplateName, ps.Preset.Name, ps.IsHardLimited)
}
// If the preset reached the hard failure limit for the first time during this iteration: // If the preset reached the hard failure limit for the first time during this iteration:
// - Mark it as hard-limited in the database // - Mark it as hard-limited in the database

View File

@ -1034,7 +1034,8 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
require.Equal(t, database.WorkspaceTransitionDelete, workspaceBuilds[0].Transition) require.Equal(t, database.WorkspaceTransitionDelete, workspaceBuilds[0].Transition)
require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[1].Transition) require.Equal(t, database.WorkspaceTransitionStart, workspaceBuilds[1].Transition)
// Metric is deleted after preset became outdated. // The metric is still set to 1, even though the preset has become outdated.
// This happens because the old value hasn't been overwritten by a newer preset yet.
mf, err = registry.Gather() mf, err = registry.Gather()
require.NoError(t, err) require.NoError(t, err)
metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{ metric = findMetric(mf, prebuilds.MetricPresetHardLimitedGauge, map[string]string{
@ -1042,7 +1043,9 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
"preset_name": preset.Name, "preset_name": preset.Name,
"org_name": org.Name, "org_name": org.Name,
}) })
require.Nil(t, metric) require.NotNil(t, metric)
require.NotNil(t, metric.GetGauge())
require.EqualValues(t, 1, metric.GetGauge().GetValue())
}) })
} }
} }