mirror of
https://github.com/coder/coder.git
synced 2025-07-03 16:13:58 +00:00
The experimental functions in `golang.org/x/exp/slices` are now available in the standard library since Go 1.21. Reference: https://go.dev/doc/go1.21#slices Signed-off-by: Eng Zer Jun <engzerjun@gmail.com>
123 lines
3.8 KiB
Go
123 lines
3.8 KiB
Go
package dbmetrics
|
|
|
|
import (
|
|
"context"
|
|
"slices"
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
"cdr.dev/slog"
|
|
"github.com/coder/coder/v2/coderd/database"
|
|
)
|
|
|
|
type metricsStore struct {
|
|
database.Store
|
|
logger slog.Logger
|
|
// txDuration is how long transactions take to execute.
|
|
txDuration *prometheus.HistogramVec
|
|
// txRetries is how many retries we are seeing for a given tx.
|
|
txRetries *prometheus.CounterVec
|
|
}
|
|
|
|
// NewDBMetrics returns a database.Store that registers metrics for the database
|
|
// but does not handle individual queries.
|
|
// metricsStore is intended to always be used, because queryMetrics are a bit
|
|
// too verbose for many use cases.
|
|
func NewDBMetrics(s database.Store, logger slog.Logger, reg prometheus.Registerer) database.Store {
|
|
// Don't double-wrap.
|
|
if slices.Contains(s.Wrappers(), wrapname) {
|
|
return s
|
|
}
|
|
txRetries := prometheus.NewCounterVec(prometheus.CounterOpts{
|
|
Namespace: "coderd",
|
|
Subsystem: "db",
|
|
Name: "tx_executions_count",
|
|
Help: "Total count of transactions executed. 'retries' is expected to be 0 for a successful transaction.",
|
|
}, []string{
|
|
"success", // Did the InTx function return an error?
|
|
// Number of executions, since we have retry logic on serialization errors.
|
|
// retries = Executions - 1 (as 1 execute is expected)
|
|
"retries",
|
|
// Uniquely naming some transactions can help debug reoccurring errors.
|
|
"tx_id",
|
|
})
|
|
reg.MustRegister(txRetries)
|
|
|
|
txDuration := prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
|
Namespace: "coderd",
|
|
Subsystem: "db",
|
|
Name: "tx_duration_seconds",
|
|
Help: "Duration of transactions in seconds.",
|
|
Buckets: prometheus.DefBuckets,
|
|
}, []string{
|
|
"success", // Did the InTx function return an error?
|
|
// Uniquely naming some transactions can help debug reoccurring errors.
|
|
"tx_id",
|
|
})
|
|
reg.MustRegister(txDuration)
|
|
return &metricsStore{
|
|
Store: s,
|
|
txDuration: txDuration,
|
|
txRetries: txRetries,
|
|
logger: logger,
|
|
}
|
|
}
|
|
|
|
func (m metricsStore) Wrappers() []string {
|
|
return append(m.Store.Wrappers(), wrapname)
|
|
}
|
|
|
|
func (m metricsStore) InTx(f func(database.Store) error, options *database.TxOptions) error {
|
|
if options == nil {
|
|
options = database.DefaultTXOptions()
|
|
}
|
|
|
|
if options.TxIdentifier == "" {
|
|
// empty strings are hard to deal with in grafana
|
|
options.TxIdentifier = "unlabeled"
|
|
}
|
|
|
|
start := time.Now()
|
|
err := m.Store.InTx(f, options)
|
|
dur := time.Since(start)
|
|
// The number of unique label combinations is
|
|
// 2 x #IDs x #of buckets
|
|
// So IDs should be used sparingly to prevent too much bloat.
|
|
m.txDuration.With(prometheus.Labels{
|
|
"success": strconv.FormatBool(err == nil),
|
|
"tx_id": options.TxIdentifier,
|
|
}).Observe(dur.Seconds())
|
|
|
|
m.txRetries.With(prometheus.Labels{
|
|
"success": strconv.FormatBool(err == nil),
|
|
"retries": strconv.FormatInt(int64(options.ExecutionCount()-1), 10),
|
|
"tx_id": options.TxIdentifier,
|
|
}).Inc()
|
|
|
|
// Log all serializable transactions that are retried.
|
|
// This is expected to happen in production, but should be kept
|
|
// to a minimum. If these logs happen frequently, something is wrong.
|
|
if options.ExecutionCount() > 1 {
|
|
l := m.logger.Warn
|
|
if err != nil {
|
|
// Error level if retries were not enough
|
|
l = m.logger.Error
|
|
}
|
|
// No context is present in this function :(
|
|
l(context.Background(), "database transaction hit serialization error and had to retry",
|
|
slog.F("success", err == nil), // It can succeed on retry
|
|
// Note the error might not be a serialization error. It is possible
|
|
// the first error was a serialization error, and the error on the
|
|
// retry is different. If this is the case, we still want to log it
|
|
// since the first error was a serialization error.
|
|
slog.Error(err), // Might be nil, that is ok!
|
|
slog.F("executions", options.ExecutionCount()),
|
|
slog.F("tx_id", options.TxIdentifier),
|
|
slog.F("duration", dur),
|
|
)
|
|
}
|
|
return err
|
|
}
|