mirror of
https://github.com/coder/coder.git
synced 2025-07-03 16:13:58 +00:00
fix: reduce cost of prebuild failure (#17697)
Relates to https://github.com/coder/coder/issues/17432 ### Part 1: Notes: - `GetPresetsAtFailureLimit` SQL query is added, which is similar to `GetPresetsBackoff`, they use same CTEs: `filtered_builds`, `time_sorted_builds`, but they are still different. - Query is executed on every loop iteration. We can consider marking specific preset as permanently failed as an optimization to avoid executing query on every loop iteration. But I decided don't do it for now. - By default `FailureHardLimit` is set to 3. - `FailureHardLimit` is configurable. Setting it to zero - means that hard limit is disabled. ### Part 2 Notes: - `PrebuildFailureLimitReached` notification is added. - Notification is sent to template admins. - Notification is sent only the first time, when hard limit is reached. But it will `log.Warn` on every loop iteration. - I introduced this enum: ```sql CREATE TYPE prebuild_status AS ENUM ( 'normal', -- Prebuilds are working as expected; this is the default, healthy state. 'hard_limited', -- Prebuilds have failed repeatedly and hit the configured hard failure limit; won't be retried anymore. 'validation_failed' -- Prebuilds failed due to a non-retryable validation error (e.g. template misconfiguration); won't be retried. ); ``` `validation_failed` not used in this PR, but I think it will be used in next one, so I wanted to save us an extra migration. - Notification looks like this: <img width="472" alt="image" src="https://github.com/user-attachments/assets/e10efea0-1790-4e7f-a65c-f94c40fced27" /> ### Latest notification views: <img width="463" alt="image" src="https://github.com/user-attachments/assets/11310c58-68d1-4075-a497-f76d854633fe" /> <img width="725" alt="image" src="https://github.com/user-attachments/assets/6bbfe21a-91ac-47c3-a9d1-21807bb0c53a" />
This commit is contained in:
committed by
GitHub
parent
e1934fe119
commit
53e8e9c7cd
@ -1343,6 +1343,67 @@ func AllPortShareProtocolValues() []PortShareProtocol {
|
||||
}
|
||||
}
|
||||
|
||||
type PrebuildStatus string
|
||||
|
||||
const (
|
||||
PrebuildStatusHealthy PrebuildStatus = "healthy"
|
||||
PrebuildStatusHardLimited PrebuildStatus = "hard_limited"
|
||||
PrebuildStatusValidationFailed PrebuildStatus = "validation_failed"
|
||||
)
|
||||
|
||||
func (e *PrebuildStatus) Scan(src interface{}) error {
|
||||
switch s := src.(type) {
|
||||
case []byte:
|
||||
*e = PrebuildStatus(s)
|
||||
case string:
|
||||
*e = PrebuildStatus(s)
|
||||
default:
|
||||
return fmt.Errorf("unsupported scan type for PrebuildStatus: %T", src)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type NullPrebuildStatus struct {
|
||||
PrebuildStatus PrebuildStatus `json:"prebuild_status"`
|
||||
Valid bool `json:"valid"` // Valid is true if PrebuildStatus is not NULL
|
||||
}
|
||||
|
||||
// Scan implements the Scanner interface.
|
||||
func (ns *NullPrebuildStatus) Scan(value interface{}) error {
|
||||
if value == nil {
|
||||
ns.PrebuildStatus, ns.Valid = "", false
|
||||
return nil
|
||||
}
|
||||
ns.Valid = true
|
||||
return ns.PrebuildStatus.Scan(value)
|
||||
}
|
||||
|
||||
// Value implements the driver Valuer interface.
|
||||
func (ns NullPrebuildStatus) Value() (driver.Value, error) {
|
||||
if !ns.Valid {
|
||||
return nil, nil
|
||||
}
|
||||
return string(ns.PrebuildStatus), nil
|
||||
}
|
||||
|
||||
func (e PrebuildStatus) Valid() bool {
|
||||
switch e {
|
||||
case PrebuildStatusHealthy,
|
||||
PrebuildStatusHardLimited,
|
||||
PrebuildStatusValidationFailed:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func AllPrebuildStatusValues() []PrebuildStatus {
|
||||
return []PrebuildStatus{
|
||||
PrebuildStatusHealthy,
|
||||
PrebuildStatusHardLimited,
|
||||
PrebuildStatusValidationFailed,
|
||||
}
|
||||
}
|
||||
|
||||
// The status of a provisioner daemon.
|
||||
type ProvisionerDaemonStatus string
|
||||
|
||||
@ -3248,12 +3309,13 @@ type TemplateVersionParameter struct {
|
||||
}
|
||||
|
||||
type TemplateVersionPreset struct {
|
||||
ID uuid.UUID `db:"id" json:"id"`
|
||||
TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"`
|
||||
Name string `db:"name" json:"name"`
|
||||
CreatedAt time.Time `db:"created_at" json:"created_at"`
|
||||
DesiredInstances sql.NullInt32 `db:"desired_instances" json:"desired_instances"`
|
||||
InvalidateAfterSecs sql.NullInt32 `db:"invalidate_after_secs" json:"invalidate_after_secs"`
|
||||
ID uuid.UUID `db:"id" json:"id"`
|
||||
TemplateVersionID uuid.UUID `db:"template_version_id" json:"template_version_id"`
|
||||
Name string `db:"name" json:"name"`
|
||||
CreatedAt time.Time `db:"created_at" json:"created_at"`
|
||||
DesiredInstances sql.NullInt32 `db:"desired_instances" json:"desired_instances"`
|
||||
InvalidateAfterSecs sql.NullInt32 `db:"invalidate_after_secs" json:"invalidate_after_secs"`
|
||||
PrebuildStatus PrebuildStatus `db:"prebuild_status" json:"prebuild_status"`
|
||||
}
|
||||
|
||||
type TemplateVersionPresetParameter struct {
|
||||
|
Reference in New Issue
Block a user