mirror of
https://github.com/coder/coder.git
synced 2025-07-03 16:13:58 +00:00
fix(coderd): use insights for DAUs, simplify metricscache (#12775)
Fixes #12134 Fixes https://github.com/coder/customers/issues/384 Refs #12122
This commit is contained in:
committed by
GitHub
parent
5d82a78d4c
commit
421bf7e785
@ -3,15 +3,11 @@ package metricscache
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"math"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"golang.org/x/exp/maps"
|
||||
"golang.org/x/exp/slices"
|
||||
"golang.org/x/xerrors"
|
||||
|
||||
"cdr.dev/slog"
|
||||
@ -22,33 +18,6 @@ import (
|
||||
"github.com/coder/retry"
|
||||
)
|
||||
|
||||
func OnlyDate(t time.Time) string {
|
||||
return t.Format("2006-01-02")
|
||||
}
|
||||
|
||||
// deploymentTimezoneOffsets are the timezones that are cached and supported.
|
||||
// Any non-listed timezone offsets will need to use the closest supported one.
|
||||
var deploymentTimezoneOffsets = []int{
|
||||
0, // UTC - is listed first intentionally.
|
||||
// Shortened list of 4 timezones that should encompass *most* users. Caching
|
||||
// all 25 timezones can be too computationally expensive for large
|
||||
// deployments. This is a stop-gap until more robust fixes can be made for
|
||||
// the deployment DAUs query.
|
||||
-6, 3, 6, 10,
|
||||
|
||||
// -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1,
|
||||
// 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
|
||||
}
|
||||
|
||||
// templateTimezoneOffsets are the timezones each template will use for it's DAU
|
||||
// calculations. This is expensive as each template needs to do each timezone, so keep this list
|
||||
// very small.
|
||||
var templateTimezoneOffsets = []int{
|
||||
// Only do one for now. If people request more accurate template DAU, we can
|
||||
// fix this. But it adds too much cost, so optimization is needed first.
|
||||
0, // UTC - is listed first intentionally.
|
||||
}
|
||||
|
||||
// Cache holds the template metrics.
|
||||
// The aggregation queries responsible for these values can take up to a minute
|
||||
// on large deployments. Even in small deployments, aggregation queries can
|
||||
@ -59,9 +28,6 @@ type Cache struct {
|
||||
log slog.Logger
|
||||
intervals Intervals
|
||||
|
||||
deploymentDAUResponses atomic.Pointer[map[int]codersdk.DAUsResponse]
|
||||
templateDAUResponses atomic.Pointer[map[int]map[uuid.UUID]codersdk.DAUsResponse]
|
||||
templateUniqueUsers atomic.Pointer[map[uuid.UUID]int]
|
||||
templateWorkspaceOwners atomic.Pointer[map[uuid.UUID]int]
|
||||
templateAverageBuildTime atomic.Pointer[map[uuid.UUID]database.GetTemplateAverageBuildTimeRow]
|
||||
deploymentStatsResponse atomic.Pointer[codersdk.DeploymentStats]
|
||||
@ -71,13 +37,13 @@ type Cache struct {
|
||||
}
|
||||
|
||||
type Intervals struct {
|
||||
TemplateDAUs time.Duration
|
||||
DeploymentStats time.Duration
|
||||
TemplateBuildTimes time.Duration
|
||||
DeploymentStats time.Duration
|
||||
}
|
||||
|
||||
func New(db database.Store, log slog.Logger, intervals Intervals) *Cache {
|
||||
if intervals.TemplateDAUs <= 0 {
|
||||
intervals.TemplateDAUs = time.Hour
|
||||
if intervals.TemplateBuildTimes <= 0 {
|
||||
intervals.TemplateBuildTimes = time.Hour
|
||||
}
|
||||
if intervals.DeploymentStats <= 0 {
|
||||
intervals.DeploymentStats = time.Minute
|
||||
@ -97,7 +63,7 @@ func New(db database.Store, log slog.Logger, intervals Intervals) *Cache {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
c.run(ctx, "template daus", intervals.TemplateDAUs, c.refreshTemplateDAUs)
|
||||
c.run(ctx, "template build times", intervals.TemplateBuildTimes, c.refreshTemplateBuildTimes)
|
||||
}()
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
@ -109,104 +75,7 @@ func New(db database.Store, log slog.Logger, intervals Intervals) *Cache {
|
||||
return c
|
||||
}
|
||||
|
||||
func fillEmptyDays(sortedDates []time.Time) []time.Time {
|
||||
var newDates []time.Time
|
||||
|
||||
for i, ti := range sortedDates {
|
||||
if i == 0 {
|
||||
newDates = append(newDates, ti)
|
||||
continue
|
||||
}
|
||||
|
||||
last := sortedDates[i-1]
|
||||
|
||||
const day = time.Hour * 24
|
||||
diff := ti.Sub(last)
|
||||
for diff > day {
|
||||
if diff <= day {
|
||||
break
|
||||
}
|
||||
last = last.Add(day)
|
||||
newDates = append(newDates, last)
|
||||
diff -= day
|
||||
}
|
||||
|
||||
newDates = append(newDates, ti)
|
||||
continue
|
||||
}
|
||||
|
||||
return newDates
|
||||
}
|
||||
|
||||
type dauRow interface {
|
||||
database.GetTemplateDAUsRow |
|
||||
database.GetDeploymentDAUsRow
|
||||
}
|
||||
|
||||
func convertDAUResponse[T dauRow](rows []T, tzOffset int) codersdk.DAUsResponse {
|
||||
respMap := make(map[time.Time][]uuid.UUID)
|
||||
for _, row := range rows {
|
||||
switch row := any(row).(type) {
|
||||
case database.GetDeploymentDAUsRow:
|
||||
respMap[row.Date] = append(respMap[row.Date], row.UserID)
|
||||
case database.GetTemplateDAUsRow:
|
||||
respMap[row.Date] = append(respMap[row.Date], row.UserID)
|
||||
default:
|
||||
// This should never happen.
|
||||
panic(fmt.Sprintf("%T not acceptable, developer error", row))
|
||||
}
|
||||
}
|
||||
|
||||
dates := maps.Keys(respMap)
|
||||
slices.SortFunc(dates, func(a, b time.Time) int {
|
||||
if a.Before(b) {
|
||||
return -1
|
||||
} else if a.Equal(b) {
|
||||
return 0
|
||||
}
|
||||
return 1
|
||||
})
|
||||
|
||||
var resp codersdk.DAUsResponse
|
||||
for _, date := range fillEmptyDays(dates) {
|
||||
resp.Entries = append(resp.Entries, codersdk.DAUEntry{
|
||||
// This date is truncated to 00:00:00 of the given day, so only
|
||||
// return date information.
|
||||
Date: OnlyDate(date),
|
||||
Amount: len(respMap[date]),
|
||||
})
|
||||
}
|
||||
resp.TZHourOffset = tzOffset
|
||||
|
||||
return resp
|
||||
}
|
||||
|
||||
func countUniqueUsers(rows []database.GetTemplateDAUsRow) int {
|
||||
seen := make(map[uuid.UUID]struct{}, len(rows))
|
||||
for _, row := range rows {
|
||||
seen[row.UserID] = struct{}{}
|
||||
}
|
||||
return len(seen)
|
||||
}
|
||||
|
||||
func (c *Cache) refreshDeploymentDAUs(ctx context.Context) error {
|
||||
//nolint:gocritic // This is a system service.
|
||||
ctx = dbauthz.AsSystemRestricted(ctx)
|
||||
|
||||
deploymentDAUs := make(map[int]codersdk.DAUsResponse)
|
||||
for _, tzOffset := range deploymentTimezoneOffsets {
|
||||
rows, err := c.database.GetDeploymentDAUs(ctx, int32(tzOffset))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
deploymentDAUs[tzOffset] = convertDAUResponse(rows, tzOffset)
|
||||
}
|
||||
|
||||
c.deploymentDAUResponses.Store(&deploymentDAUs)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Cache) refreshTemplateDAUs(ctx context.Context) error {
|
||||
func (c *Cache) refreshTemplateBuildTimes(ctx context.Context) error {
|
||||
//nolint:gocritic // This is a system service.
|
||||
ctx = dbauthz.AsSystemRestricted(ctx)
|
||||
|
||||
@ -216,38 +85,13 @@ func (c *Cache) refreshTemplateDAUs(ctx context.Context) error {
|
||||
}
|
||||
|
||||
var (
|
||||
templateDAUs = make(map[int]map[uuid.UUID]codersdk.DAUsResponse, len(templates))
|
||||
templateUniqueUsers = make(map[uuid.UUID]int)
|
||||
templateWorkspaceOwners = make(map[uuid.UUID]int)
|
||||
templateAverageBuildTimes = make(map[uuid.UUID]database.GetTemplateAverageBuildTimeRow)
|
||||
)
|
||||
|
||||
err = c.refreshDeploymentDAUs(ctx)
|
||||
if err != nil {
|
||||
return xerrors.Errorf("deployment daus: %w", err)
|
||||
}
|
||||
|
||||
ids := make([]uuid.UUID, 0, len(templates))
|
||||
for _, template := range templates {
|
||||
ids = append(ids, template.ID)
|
||||
for _, tzOffset := range templateTimezoneOffsets {
|
||||
rows, err := c.database.GetTemplateDAUs(ctx, database.GetTemplateDAUsParams{
|
||||
TemplateID: template.ID,
|
||||
TzOffset: int32(tzOffset),
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if templateDAUs[tzOffset] == nil {
|
||||
templateDAUs[tzOffset] = make(map[uuid.UUID]codersdk.DAUsResponse)
|
||||
}
|
||||
templateDAUs[tzOffset][template.ID] = convertDAUResponse(rows, tzOffset)
|
||||
if _, set := templateUniqueUsers[template.ID]; !set {
|
||||
// If the uniqueUsers has not been counted yet, set the unique count with the rows we have.
|
||||
// We only need to calculate this once.
|
||||
templateUniqueUsers[template.ID] = countUniqueUsers(rows)
|
||||
}
|
||||
}
|
||||
|
||||
templateAvgBuildTime, err := c.database.GetTemplateAverageBuildTime(ctx, database.GetTemplateAverageBuildTimeParams{
|
||||
TemplateID: uuid.NullUUID{
|
||||
@ -275,8 +119,6 @@ func (c *Cache) refreshTemplateDAUs(ctx context.Context) error {
|
||||
}
|
||||
|
||||
c.templateWorkspaceOwners.Store(&templateWorkspaceOwners)
|
||||
c.templateDAUResponses.Store(&templateDAUs)
|
||||
c.templateUniqueUsers.Store(&templateUniqueUsers)
|
||||
c.templateAverageBuildTime.Store(&templateAverageBuildTimes)
|
||||
|
||||
return nil
|
||||
@ -359,99 +201,6 @@ func (c *Cache) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Cache) DeploymentDAUs(offset int) (int, *codersdk.DAUsResponse, bool) {
|
||||
m := c.deploymentDAUResponses.Load()
|
||||
if m == nil {
|
||||
return 0, nil, false
|
||||
}
|
||||
closestOffset, resp, ok := closest(*m, offset)
|
||||
if !ok {
|
||||
return 0, nil, false
|
||||
}
|
||||
return closestOffset, &resp, ok
|
||||
}
|
||||
|
||||
// TemplateDAUs returns an empty response if the template doesn't have users
|
||||
// or is loading for the first time.
|
||||
// The cache will select the closest DAUs response to given timezone offset.
|
||||
func (c *Cache) TemplateDAUs(id uuid.UUID, offset int) (int, *codersdk.DAUsResponse, bool) {
|
||||
m := c.templateDAUResponses.Load()
|
||||
if m == nil {
|
||||
// Data loading.
|
||||
return 0, nil, false
|
||||
}
|
||||
|
||||
closestOffset, resp, ok := closest(*m, offset)
|
||||
if !ok {
|
||||
// Probably no data.
|
||||
return 0, nil, false
|
||||
}
|
||||
|
||||
tpl, ok := resp[id]
|
||||
if !ok {
|
||||
// Probably no data.
|
||||
return 0, nil, false
|
||||
}
|
||||
|
||||
return closestOffset, &tpl, true
|
||||
}
|
||||
|
||||
// closest returns the value in the values map that has a key with the value most
|
||||
// close to the requested key. This is so if a user requests a timezone offset that
|
||||
// we do not have, we return the closest one we do have to the user.
|
||||
func closest[V any](values map[int]V, offset int) (int, V, bool) {
|
||||
if len(values) == 0 {
|
||||
var v V
|
||||
return -1, v, false
|
||||
}
|
||||
|
||||
v, ok := values[offset]
|
||||
if ok {
|
||||
// We have the exact offset, that was easy!
|
||||
return offset, v, true
|
||||
}
|
||||
|
||||
var closest int
|
||||
var closestV V
|
||||
diff := math.MaxInt
|
||||
for k, v := range values {
|
||||
newDiff := abs(k - offset)
|
||||
// Take the closest value that is also the smallest value. We do this
|
||||
// to make the output deterministic
|
||||
if newDiff < diff || (newDiff == diff && k < closest) {
|
||||
// new closest
|
||||
closest = k
|
||||
closestV = v
|
||||
diff = newDiff
|
||||
}
|
||||
}
|
||||
return closest, closestV, true
|
||||
}
|
||||
|
||||
func abs(a int) int {
|
||||
if a < 0 {
|
||||
return -1 * a
|
||||
}
|
||||
return a
|
||||
}
|
||||
|
||||
// TemplateUniqueUsers returns the number of unique Template users
|
||||
// from all Cache data.
|
||||
func (c *Cache) TemplateUniqueUsers(id uuid.UUID) (int, bool) {
|
||||
m := c.templateUniqueUsers.Load()
|
||||
if m == nil {
|
||||
// Data loading.
|
||||
return -1, false
|
||||
}
|
||||
|
||||
resp, ok := (*m)[id]
|
||||
if !ok {
|
||||
// Probably no data.
|
||||
return -1, false
|
||||
}
|
||||
return resp, true
|
||||
}
|
||||
|
||||
func (c *Cache) TemplateBuildTimeStats(id uuid.UUID) codersdk.TemplateBuildTimeStats {
|
||||
unknown := codersdk.TemplateBuildTimeStats{
|
||||
codersdk.WorkspaceTransitionStart: {},
|
||||
|
Reference in New Issue
Block a user