mirror of
https://github.com/coder/coder.git
synced 2025-07-18 14:17:22 +00:00
fix: remove notifications for hard-limited prebuilds (#18528)
Relates to https://github.com/coder/internal/issues/674 Currently, we send notifications to **all template admins** for **every failed and hard-limited preset**. This can generate excessive noise—especially when someone is debugging a template and creates multiple broken versions in quick succession. For now, we've decided to remove hard-limited preset notifications to reduce excessive noise. In the long term, we plan to aggregate failure information and deliver it on a daily or weekly basis.
This commit is contained in:
committed by
GitHub
parent
7b152cdd91
commit
bca5c35aa2
@ -427,7 +427,6 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
|
|||||||
|
|
||||||
// If the preset reached the hard failure limit for the first time during this iteration:
|
// If the preset reached the hard failure limit for the first time during this iteration:
|
||||||
// - Mark it as hard-limited in the database
|
// - Mark it as hard-limited in the database
|
||||||
// - Send notifications to template admins
|
|
||||||
// - Continue execution, we disallow only creation operation for hard-limited presets. Deletion is allowed.
|
// - Continue execution, we disallow only creation operation for hard-limited presets. Deletion is allowed.
|
||||||
if ps.Preset.PrebuildStatus != database.PrebuildStatusHardLimited && ps.IsHardLimited {
|
if ps.Preset.PrebuildStatus != database.PrebuildStatusHardLimited && ps.IsHardLimited {
|
||||||
logger.Warn(ctx, "preset is hard limited, notifying template admins")
|
logger.Warn(ctx, "preset is hard limited, notifying template admins")
|
||||||
@ -439,11 +438,6 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return xerrors.Errorf("failed to update preset prebuild status: %w", err)
|
return xerrors.Errorf("failed to update preset prebuild status: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = c.notifyPrebuildFailureLimitReached(ctx, ps)
|
|
||||||
if err != nil {
|
|
||||||
logger.Error(ctx, "failed to notify that number of prebuild failures reached the limit", slog.Error(err))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
state := ps.CalculateState()
|
state := ps.CalculateState()
|
||||||
@ -474,49 +468,6 @@ func (c *StoreReconciler) ReconcilePreset(ctx context.Context, ps prebuilds.Pres
|
|||||||
return multiErr.ErrorOrNil()
|
return multiErr.ErrorOrNil()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *StoreReconciler) notifyPrebuildFailureLimitReached(ctx context.Context, ps prebuilds.PresetSnapshot) error {
|
|
||||||
// nolint:gocritic // Necessary to query all the required data.
|
|
||||||
ctx = dbauthz.AsSystemRestricted(ctx)
|
|
||||||
|
|
||||||
// Send notification to template admins.
|
|
||||||
if c.notifEnq == nil {
|
|
||||||
c.logger.Warn(ctx, "notification enqueuer not set, cannot send prebuild is hard limited notification(s)")
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
templateAdmins, err := c.store.GetUsers(ctx, database.GetUsersParams{
|
|
||||||
RbacRole: []string{codersdk.RoleTemplateAdmin},
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return xerrors.Errorf("fetch template admins: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, templateAdmin := range templateAdmins {
|
|
||||||
if _, err := c.notifEnq.EnqueueWithData(ctx, templateAdmin.ID, notifications.PrebuildFailureLimitReached,
|
|
||||||
map[string]string{
|
|
||||||
"org": ps.Preset.OrganizationName,
|
|
||||||
"template": ps.Preset.TemplateName,
|
|
||||||
"template_version": ps.Preset.TemplateVersionName,
|
|
||||||
"preset": ps.Preset.Name,
|
|
||||||
},
|
|
||||||
map[string]any{},
|
|
||||||
"prebuilds_reconciler",
|
|
||||||
// Associate this notification with all the related entities.
|
|
||||||
ps.Preset.TemplateID, ps.Preset.TemplateVersionID, ps.Preset.ID, ps.Preset.OrganizationID,
|
|
||||||
); err != nil {
|
|
||||||
c.logger.Error(ctx,
|
|
||||||
"failed to send notification",
|
|
||||||
slog.Error(err),
|
|
||||||
slog.F("template_admin_id", templateAdmin.ID.String()),
|
|
||||||
)
|
|
||||||
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *StoreReconciler) CalculateActions(ctx context.Context, snapshot prebuilds.PresetSnapshot) ([]*prebuilds.ReconciliationActions, error) {
|
func (c *StoreReconciler) CalculateActions(ctx context.Context, snapshot prebuilds.PresetSnapshot) ([]*prebuilds.ReconciliationActions, error) {
|
||||||
if ctx.Err() != nil {
|
if ctx.Err() != nil {
|
||||||
return nil, ctx.Err()
|
return nil, ctx.Err()
|
||||||
|
@ -853,11 +853,6 @@ func TestSkippingHardLimitedPresets(t *testing.T) {
|
|||||||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||||||
controller := prebuilds.NewStoreReconciler(db, pubSub, cache, cfg, logger, clock, registry, fakeEnqueuer)
|
controller := prebuilds.NewStoreReconciler(db, pubSub, cache, cfg, logger, clock, registry, fakeEnqueuer)
|
||||||
|
|
||||||
// Template admin to receive a notification.
|
|
||||||
templateAdmin := dbgen.User(t, db, database.User{
|
|
||||||
RBACRoles: []string{codersdk.RoleTemplateAdmin},
|
|
||||||
})
|
|
||||||
|
|
||||||
// Set up test environment with a template, version, and preset.
|
// Set up test environment with a template, version, and preset.
|
||||||
ownerID := uuid.New()
|
ownerID := uuid.New()
|
||||||
dbgen.User(t, db, database.User{
|
dbgen.User(t, db, database.User{
|
||||||
@ -938,20 +933,6 @@ func TestSkippingHardLimitedPresets(t *testing.T) {
|
|||||||
require.Equal(t, 1, len(workspaces))
|
require.Equal(t, 1, len(workspaces))
|
||||||
require.Equal(t, database.PrebuildStatusHardLimited, updatedPreset.PrebuildStatus)
|
require.Equal(t, database.PrebuildStatusHardLimited, updatedPreset.PrebuildStatus)
|
||||||
|
|
||||||
// When hard limit is reached, a notification should be sent.
|
|
||||||
matching := fakeEnqueuer.Sent(func(notification *notificationstest.FakeNotification) bool {
|
|
||||||
if !assert.Equal(t, notifications.PrebuildFailureLimitReached, notification.TemplateID, "unexpected template") {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if !assert.Equal(t, templateAdmin.ID, notification.UserID, "unexpected receiver") {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
require.Len(t, matching, 1)
|
|
||||||
|
|
||||||
// When hard limit is reached, metric is set to 1.
|
// When hard limit is reached, metric is set to 1.
|
||||||
mf, err = registry.Gather()
|
mf, err = registry.Gather()
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
@ -1016,11 +997,6 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
|
|||||||
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
cache := files.New(prometheus.NewRegistry(), &coderdtest.FakeAuthorizer{})
|
||||||
controller := prebuilds.NewStoreReconciler(db, pubSub, cache, cfg, logger, clock, registry, fakeEnqueuer)
|
controller := prebuilds.NewStoreReconciler(db, pubSub, cache, cfg, logger, clock, registry, fakeEnqueuer)
|
||||||
|
|
||||||
// Template admin to receive a notification.
|
|
||||||
templateAdmin := dbgen.User(t, db, database.User{
|
|
||||||
RBACRoles: []string{codersdk.RoleTemplateAdmin},
|
|
||||||
})
|
|
||||||
|
|
||||||
// Set up test environment with a template, version, and preset.
|
// Set up test environment with a template, version, and preset.
|
||||||
ownerID := uuid.New()
|
ownerID := uuid.New()
|
||||||
dbgen.User(t, db, database.User{
|
dbgen.User(t, db, database.User{
|
||||||
@ -1125,20 +1101,6 @@ func TestHardLimitedPresetShouldNotBlockDeletion(t *testing.T) {
|
|||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Equal(t, database.PrebuildStatusHardLimited, updatedPreset.PrebuildStatus)
|
require.Equal(t, database.PrebuildStatusHardLimited, updatedPreset.PrebuildStatus)
|
||||||
|
|
||||||
// When hard limit is reached, a notification should be sent.
|
|
||||||
matching := fakeEnqueuer.Sent(func(notification *notificationstest.FakeNotification) bool {
|
|
||||||
if !assert.Equal(t, notifications.PrebuildFailureLimitReached, notification.TemplateID, "unexpected template") {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if !assert.Equal(t, templateAdmin.ID, notification.UserID, "unexpected receiver") {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
require.Len(t, matching, 1)
|
|
||||||
|
|
||||||
// When hard limit is reached, metric is set to 1.
|
// When hard limit is reached, metric is set to 1.
|
||||||
mf, err = registry.Gather()
|
mf, err = registry.Gather()
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
Reference in New Issue
Block a user