mirror of
https://github.com/coder/coder.git
synced 2025-07-15 22:20:27 +00:00
fix: remove notifications for hard-limited prebuilds (#18528)
Relates to https://github.com/coder/internal/issues/674 Currently, we send notifications to **all template admins** for **every failed and hard-limited preset**. This can generate excessive noise—especially when someone is debugging a template and creates multiple broken versions in quick succession. For now, we've decided to remove hard-limited preset notifications to reduce excessive noise. In the long term, we plan to aggregate failure information and deliver it on a daily or weekly basis.
This commit is contained in:
committed by
GitHub
parent
7b152cdd91
commit
bca5c35aa2
@ -427,7 +427,6 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
|
||||
|
||||
// If the preset reached the hard failure limit for the first time during this iteration:
|
||||
// - Mark it as hard-limited in the database
|
||||
// - Send notifications to template admins
|
||||
// - Continue execution, we disallow only creation operation for hard-limited presets. Deletion is allowed.
|
||||
if ps.Preset.PrebuildStatus != database.PrebuildStatusHardLimited && ps.IsHardLimited {
|
||||
logger.Warn(ctx, "preset is hard limited, notifying template admins")
|
||||
@ -439,11 +438,6 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
|
||||
if err != nil {
|
||||
return xerrors.Errorf("failed to update preset prebuild status: %w", err)
|
||||
}
|
||||
|
||||
err = c.notifyPrebuildFailureLimitReached(ctx, ps)
|
||||
if err != nil {
|
||||
logger.Error(ctx, "failed to notify that number of prebuild failures reached the limit", slog.Error(err))
|
||||
}
|
||||
}
|
||||
|
||||
state := ps.CalculateState()
|
||||
@ -474,49 +468,6 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
|
||||
return multiErr.ErrorOrNil()
|
||||
}
|
||||
|
||||
func (c *StoreReconciler) notifyPrebuildFailureLimitReached(ctx context.Context, ps prebuilds.PresetSnapshot) error {
|
||||
// nolint:gocritic // Necessary to query all the required data.
|
||||
ctx = dbauthz.AsSystemRestricted(ctx)
|
||||
|
||||
// Send notification to template admins.
|
||||
if c.notifEnq == nil {
|
||||
c.logger.Warn(ctx, "notification enqueuer not set, cannot send prebuild is hard limited notification(s)")
|
||||
return nil
|
||||
}
|
||||
|
||||
templateAdmins, err := c.store.GetUsers(ctx, database.GetUsersParams{
|
||||
RbacRole: []string{codersdk.RoleTemplateAdmin},
|
||||
})
|
||||
if err != nil {
|
||||
return xerrors.Errorf("fetch template admins: %w", err)
|
||||
}
|
||||
|
||||
for _, templateAdmin := range templateAdmins {
|
||||
if _, err := c.notifEnq.EnqueueWithData(ctx, templateAdmin.ID, notifications.PrebuildFailureLimitReached,
|
||||
map[string]string{
|
||||
"org": ps.Preset.OrganizationName,
|
||||
"template": ps.Preset.TemplateName,
|
||||
"template_version": ps.Preset.TemplateVersionName,
|
||||
"preset": ps.Preset.Name,
|
||||
},
|
||||
map[string]any{},
|
||||
"prebuilds_reconciler",
|
||||
// Associate this notification with all the related entities.
|
||||
ps.Preset.TemplateID, ps.Preset.TemplateVersionID, ps.Preset.ID, ps.Preset.OrganizationID,
|
||||
); err != nil {
|
||||
c.logger.Error(ctx,
|
||||
"failed to send notification",
|
||||
slog.Error(err),
|
||||
slog.F("template_admin_id", templateAdmin.ID.String()),
|
||||
)
|
||||
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *StoreReconciler) CalculateActions(ctx context.Context, snapshot prebuilds.PresetSnapshot) ([]*prebuilds.ReconciliationActions, error) {
|
||||
if ctx.Err() != nil {
|
||||
return nil, ctx.Err()
|
||||
|
@ -853,11 +853,6 @@ func TestSkippingHardLimitedPresets(t *testing.T) {
|
||||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||||
controller := prebuilds.NewStoreReconciler(db, pubSub, cache, cfg, logger, clock, registry, fakeEnqueuer)
|
||||
|
||||
// Template admin to receive a notification.
|
||||
templateAdmin := dbgen.User(t, db, database.User{
|
||||
RBACRoles: []string{codersdk.RoleTemplateAdmin},
|
||||
})
|
||||
|
||||
// Set up test environment with a template, version, and preset.
|
||||
ownerID := uuid.New()
|
||||
dbgen.User(t, db, database.User{
|
||||
@ -938,20 +933,6 @@ func TestSkippingHardLimitedPresets(t *testing.T) {
|
||||
require.Equal(t, 1, len(workspaces))
|
||||
require.Equal(t, database.PrebuildStatusHardLimited, updatedPreset.PrebuildStatus)
|
||||
|
||||
// When hard limit is reached, a notification should be sent.
|
||||
matching := fakeEnqueuer.Sent(func(notification *notificationstest.FakeNotification) bool {
|
||||
if !assert.Equal(t, notifications.PrebuildFailureLimitReached, notification.TemplateID, "unexpected template") {
|
||||
return false
|
||||
}
|
||||
|
||||
if !assert.Equal(t, templateAdmin.ID, notification.UserID, "unexpected receiver") {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
})
|
||||
require.Len(t, matching, 1)
|
||||
|
||||
// When hard limit is reached, metric is set to 1.
|
||||
mf, err = registry.Gather()
|
||||
require.NoError(t, err)
|
||||
@ -1016,11 +997,6 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
|
||||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||||
controller := prebuilds.NewStoreReconciler(db, pubSub, cache, cfg, logger, clock, registry, fakeEnqueuer)
|
||||
|
||||
// Template admin to receive a notification.
|
||||
templateAdmin := dbgen.User(t, db, database.User{
|
||||
RBACRoles: []string{codersdk.RoleTemplateAdmin},
|
||||
})
|
||||
|
||||
// Set up test environment with a template, version, and preset.
|
||||
ownerID := uuid.New()
|
||||
dbgen.User(t, db, database.User{
|
||||
@ -1125,20 +1101,6 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, database.PrebuildStatusHardLimited, updatedPreset.PrebuildStatus)
|
||||
|
||||
// When hard limit is reached, a notification should be sent.
|
||||
matching := fakeEnqueuer.Sent(func(notification *notificationstest.FakeNotification) bool {
|
||||
if !assert.Equal(t, notifications.PrebuildFailureLimitReached, notification.TemplateID, "unexpected template") {
|
||||
return false
|
||||
}
|
||||
|
||||
if !assert.Equal(t, templateAdmin.ID, notification.UserID, "unexpected receiver") {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
})
|
||||
require.Len(t, matching, 1)
|
||||
|
||||
// When hard limit is reached, metric is set to 1.
|
||||
mf, err = registry.Gather()
|
||||
require.NoError(t, err)
|
||||
|
Reference in New Issue
Block a user