feat(coderd/database): use template_usage_stats in GetTemplateInsightsByInterval query (#12667)

This PR updates the `GetTemplateInsightsByInterval` query to use rolled up `template_usage_stats` instead of raw agent and app stats.
This commit is contained in:
Mathias Fredriksson
2024-03-25 15:45:49 +02:00
committed by GitHub
parent 35d08434a9
commit 5f3be62c83
3 changed files with 126 additions and 177 deletions

View File

@ -3407,7 +3407,7 @@ func (q *FakeQuerier) GetTemplateInsights(_ context.Context, arg database.GetTem
return row, nil return row, nil
} }
func (q *FakeQuerier) GetTemplateInsightsByInterval(ctx context.Context, arg database.GetTemplateInsightsByIntervalParams) ([]database.GetTemplateInsightsByIntervalRow, error) { func (q *FakeQuerier) GetTemplateInsightsByInterval(_ context.Context, arg database.GetTemplateInsightsByIntervalParams) ([]database.GetTemplateInsightsByIntervalRow, error) {
err := validateDatabaseType(arg) err := validateDatabaseType(arg)
if err != nil { if err != nil {
return nil, err return nil, err
@ -3416,82 +3416,89 @@ func (q *FakeQuerier) GetTemplateInsightsByInterval(ctx context.Context, arg dat
q.mutex.RLock() q.mutex.RLock()
defer q.mutex.RUnlock() defer q.mutex.RUnlock()
type statByInterval struct { /*
startTime, endTime time.Time WITH
userSet map[uuid.UUID]struct{} ts AS (
templateIDSet map[uuid.UUID]struct{} SELECT
d::timestamptz AS from_,
CASE
WHEN (d::timestamptz + (@interval_days::int || ' day')::interval) <= @end_time::timestamptz
THEN (d::timestamptz + (@interval_days::int || ' day')::interval)
ELSE @end_time::timestamptz
END AS to_
FROM
-- Subtract 1 microsecond from end_time to avoid including the next interval in the results.
generate_series(@start_time::timestamptz, (@end_time::timestamptz) - '1 microsecond'::interval, (@interval_days::int || ' day')::interval) AS d
)
SELECT
ts.from_ AS start_time,
ts.to_ AS end_time,
array_remove(array_agg(DISTINCT tus.template_id), NULL)::uuid[] AS template_ids,
COUNT(DISTINCT tus.user_id) AS active_users
FROM
ts
LEFT JOIN
template_usage_stats AS tus
ON
tus.start_time >= ts.from_
AND tus.end_time <= ts.to_
AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN tus.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
GROUP BY
ts.from_, ts.to_;
*/
type interval struct {
From time.Time
To time.Time
}
var ts []interval
for d := arg.StartTime; d.Before(arg.EndTime); d = d.AddDate(0, 0, int(arg.IntervalDays)) {
to := d.AddDate(0, 0, int(arg.IntervalDays))
if to.After(arg.EndTime) {
to = arg.EndTime
}
ts = append(ts, interval{From: d, To: to})
} }
statsByInterval := []statByInterval{{arg.StartTime, arg.StartTime.AddDate(0, 0, int(arg.IntervalDays)), make(map[uuid.UUID]struct{}), make(map[uuid.UUID]struct{})}} type grouped struct {
for statsByInterval[len(statsByInterval)-1].endTime.Before(arg.EndTime) { TemplateIDs map[uuid.UUID]struct{}
statsByInterval = append(statsByInterval, statByInterval{statsByInterval[len(statsByInterval)-1].endTime, statsByInterval[len(statsByInterval)-1].endTime.AddDate(0, 0, int(arg.IntervalDays)), make(map[uuid.UUID]struct{}), make(map[uuid.UUID]struct{})}) UserIDs map[uuid.UUID]struct{}
} }
if statsByInterval[len(statsByInterval)-1].endTime.After(arg.EndTime) { groupedByInterval := make(map[interval]grouped)
statsByInterval[len(statsByInterval)-1].endTime = arg.EndTime for _, tus := range q.templateUsageStats {
} for _, t := range ts {
if tus.StartTime.Before(t.From) || tus.EndTime.After(t.To) {
for _, s := range q.workspaceAgentStats {
if s.CreatedAt.Before(arg.StartTime) || s.CreatedAt.Equal(arg.EndTime) || s.CreatedAt.After(arg.EndTime) {
continue
}
if len(arg.TemplateIDs) > 0 && !slices.Contains(arg.TemplateIDs, s.TemplateID) {
continue
}
if s.ConnectionCount == 0 {
continue
}
for _, ds := range statsByInterval {
if s.CreatedAt.Before(ds.startTime) || s.CreatedAt.Equal(ds.endTime) || s.CreatedAt.After(ds.endTime) {
continue continue
} }
ds.userSet[s.UserID] = struct{}{} if len(arg.TemplateIDs) > 0 && !slices.Contains(arg.TemplateIDs, tus.TemplateID) {
ds.templateIDSet[s.TemplateID] = struct{}{}
}
}
for _, s := range q.workspaceAppStats {
w, err := q.getWorkspaceByIDNoLock(ctx, s.WorkspaceID)
if err != nil {
return nil, err
}
if len(arg.TemplateIDs) > 0 && !slices.Contains(arg.TemplateIDs, w.TemplateID) {
continue
}
for _, ds := range statsByInterval {
// (was.session_started_at >= ts.from_ AND was.session_started_at < ts.to_)
// OR (was.session_ended_at > ts.from_ AND was.session_ended_at < ts.to_)
// OR (was.session_started_at < ts.from_ AND was.session_ended_at >= ts.to_)
if !(((s.SessionStartedAt.After(ds.startTime) || s.SessionStartedAt.Equal(ds.startTime)) && s.SessionStartedAt.Before(ds.endTime)) ||
(s.SessionEndedAt.After(ds.startTime) && s.SessionEndedAt.Before(ds.endTime)) ||
(s.SessionStartedAt.Before(ds.startTime) && (s.SessionEndedAt.After(ds.endTime) || s.SessionEndedAt.Equal(ds.endTime)))) {
continue continue
} }
g, ok := groupedByInterval[t]
ds.userSet[s.UserID] = struct{}{} if !ok {
ds.templateIDSet[w.TemplateID] = struct{}{} g = grouped{
TemplateIDs: make(map[uuid.UUID]struct{}),
UserIDs: make(map[uuid.UUID]struct{}),
}
}
g.TemplateIDs[tus.TemplateID] = struct{}{}
g.UserIDs[tus.UserID] = struct{}{}
groupedByInterval[t] = g
} }
} }
var result []database.GetTemplateInsightsByIntervalRow var rows []database.GetTemplateInsightsByIntervalRow
for _, ds := range statsByInterval { for _, t := range ts { // Ordered by interval.
templateIDs := make([]uuid.UUID, 0, len(ds.templateIDSet)) row := database.GetTemplateInsightsByIntervalRow{
for templateID := range ds.templateIDSet { StartTime: t.From,
templateIDs = append(templateIDs, templateID) EndTime: t.To,
} }
slices.SortFunc(templateIDs, func(a, b uuid.UUID) int { row.TemplateIDs = uniqueSortedUUIDs(maps.Keys(groupedByInterval[t].TemplateIDs))
return slice.Ascending(a.String(), b.String()) row.ActiveUsers = int64(len(groupedByInterval[t].UserIDs))
}) rows = append(rows, row)
result = append(result, database.GetTemplateInsightsByIntervalRow{
StartTime: ds.startTime,
EndTime: ds.endTime,
TemplateIDs: templateIDs,
ActiveUsers: int64(len(ds.userSet)),
})
} }
return result, nil
return rows, nil
} }
func (q *FakeQuerier) GetTemplateInsightsByTemplate(_ context.Context, arg database.GetTemplateInsightsByTemplateParams) ([]database.GetTemplateInsightsByTemplateRow, error) { func (q *FakeQuerier) GetTemplateInsightsByTemplate(_ context.Context, arg database.GetTemplateInsightsByTemplateParams) ([]database.GetTemplateInsightsByTemplateRow, error) {

View File

@ -2001,71 +2001,42 @@ func (q *sqlQuerier) GetTemplateInsights(ctx context.Context, arg GetTemplateIns
} }
const getTemplateInsightsByInterval = `-- name: GetTemplateInsightsByInterval :many const getTemplateInsightsByInterval = `-- name: GetTemplateInsightsByInterval :many
WITH ts AS ( WITH
SELECT ts AS (
d::timestamptz AS from_, SELECT
CASE d::timestamptz AS from_,
WHEN (d::timestamptz + ($1::int || ' day')::interval) <= $2::timestamptz CASE
THEN (d::timestamptz + ($1::int || ' day')::interval) WHEN (d::timestamptz + ($2::int || ' day')::interval) <= $3::timestamptz
ELSE $2::timestamptz THEN (d::timestamptz + ($2::int || ' day')::interval)
END AS to_ ELSE $3::timestamptz
FROM END AS to_
-- Subtract 1 microsecond from end_time to avoid including the next interval in the results. FROM
generate_series($3::timestamptz, ($2::timestamptz) - '1 microsecond'::interval, ($1::int || ' day')::interval) AS d -- Subtract 1 microsecond from end_time to avoid including the next interval in the results.
), unflattened_usage_by_interval AS ( generate_series($4::timestamptz, ($3::timestamptz) - '1 microsecond'::interval, ($2::int || ' day')::interval) AS d
-- We select data from both workspace agent stats and workspace app stats to
-- get a complete picture of usage. This matches how usage is calculated by
-- the combination of GetTemplateInsights and GetTemplateAppInsights. We use
-- a union all to avoid a costly distinct operation.
--
-- Note that one query must perform a left join so that all intervals are
-- present at least once.
SELECT
ts.from_, ts.to_,
was.template_id,
was.user_id
FROM ts
LEFT JOIN workspace_agent_stats was ON (
was.created_at >= ts.from_
AND was.created_at < ts.to_
AND was.connection_count > 0
AND CASE WHEN COALESCE(array_length($4::uuid[], 1), 0) > 0 THEN was.template_id = ANY($4::uuid[]) ELSE TRUE END
) )
GROUP BY ts.from_, ts.to_, was.template_id, was.user_id
UNION ALL
SELECT
ts.from_, ts.to_,
w.template_id,
was.user_id
FROM ts
JOIN workspace_app_stats was ON (
(was.session_started_at >= ts.from_ AND was.session_started_at < ts.to_)
OR (was.session_ended_at > ts.from_ AND was.session_ended_at < ts.to_)
OR (was.session_started_at < ts.from_ AND was.session_ended_at >= ts.to_)
)
JOIN workspaces w ON (
w.id = was.workspace_id
AND CASE WHEN COALESCE(array_length($4::uuid[], 1), 0) > 0 THEN w.template_id = ANY($4::uuid[]) ELSE TRUE END
)
GROUP BY ts.from_, ts.to_, w.template_id, was.user_id
)
SELECT SELECT
from_ AS start_time, ts.from_ AS start_time,
to_ AS end_time, ts.to_ AS end_time,
array_remove(array_agg(DISTINCT template_id), NULL)::uuid[] AS template_ids, array_remove(array_agg(DISTINCT tus.template_id), NULL)::uuid[] AS template_ids,
COUNT(DISTINCT user_id) AS active_users COUNT(DISTINCT tus.user_id) AS active_users
FROM unflattened_usage_by_interval FROM
GROUP BY from_, to_ ts
LEFT JOIN
template_usage_stats AS tus
ON
tus.start_time >= ts.from_
AND tus.end_time <= ts.to_
AND CASE WHEN COALESCE(array_length($1::uuid[], 1), 0) > 0 THEN tus.template_id = ANY($1::uuid[]) ELSE TRUE END
GROUP BY
ts.from_, ts.to_
` `
type GetTemplateInsightsByIntervalParams struct { type GetTemplateInsightsByIntervalParams struct {
TemplateIDs []uuid.UUID `db:"template_ids" json:"template_ids"`
IntervalDays int32 `db:"interval_days" json:"interval_days"` IntervalDays int32 `db:"interval_days" json:"interval_days"`
EndTime time.Time `db:"end_time" json:"end_time"` EndTime time.Time `db:"end_time" json:"end_time"`
StartTime time.Time `db:"start_time" json:"start_time"` StartTime time.Time `db:"start_time" json:"start_time"`
TemplateIDs []uuid.UUID `db:"template_ids" json:"template_ids"`
} }
type GetTemplateInsightsByIntervalRow struct { type GetTemplateInsightsByIntervalRow struct {
@ -2081,10 +2052,10 @@ type GetTemplateInsightsByIntervalRow struct {
// interval/template, it will be included in the results with 0 active users. // interval/template, it will be included in the results with 0 active users.
func (q *sqlQuerier) GetTemplateInsightsByInterval(ctx context.Context, arg GetTemplateInsightsByIntervalParams) ([]GetTemplateInsightsByIntervalRow, error) { func (q *sqlQuerier) GetTemplateInsightsByInterval(ctx context.Context, arg GetTemplateInsightsByIntervalParams) ([]GetTemplateInsightsByIntervalRow, error) {
rows, err := q.db.QueryContext(ctx, getTemplateInsightsByInterval, rows, err := q.db.QueryContext(ctx, getTemplateInsightsByInterval,
pq.Array(arg.TemplateIDs),
arg.IntervalDays, arg.IntervalDays,
arg.EndTime, arg.EndTime,
arg.StartTime, arg.StartTime,
pq.Array(arg.TemplateIDs),
) )
if err != nil { if err != nil {
return nil, err return nil, err

View File

@ -297,64 +297,35 @@ GROUP BY template_id, display_name, slug_or_port;
-- time, if end time is a partial interval, it will be included in the results and -- time, if end time is a partial interval, it will be included in the results and
-- that interval will be shorter than a full one. If there is no data for a selected -- that interval will be shorter than a full one. If there is no data for a selected
-- interval/template, it will be included in the results with 0 active users. -- interval/template, it will be included in the results with 0 active users.
WITH ts AS ( WITH
SELECT ts AS (
d::timestamptz AS from_, SELECT
CASE d::timestamptz AS from_,
WHEN (d::timestamptz + (@interval_days::int || ' day')::interval) <= @end_time::timestamptz CASE
THEN (d::timestamptz + (@interval_days::int || ' day')::interval) WHEN (d::timestamptz + (@interval_days::int || ' day')::interval) <= @end_time::timestamptz
ELSE @end_time::timestamptz THEN (d::timestamptz + (@interval_days::int || ' day')::interval)
END AS to_ ELSE @end_time::timestamptz
FROM END AS to_
-- Subtract 1 microsecond from end_time to avoid including the next interval in the results. FROM
generate_series(@start_time::timestamptz, (@end_time::timestamptz) - '1 microsecond'::interval, (@interval_days::int || ' day')::interval) AS d -- Subtract 1 microsecond from end_time to avoid including the next interval in the results.
), unflattened_usage_by_interval AS ( generate_series(@start_time::timestamptz, (@end_time::timestamptz) - '1 microsecond'::interval, (@interval_days::int || ' day')::interval) AS d
-- We select data from both workspace agent stats and workspace app stats to
-- get a complete picture of usage. This matches how usage is calculated by
-- the combination of GetTemplateInsights and GetTemplateAppInsights. We use
-- a union all to avoid a costly distinct operation.
--
-- Note that one query must perform a left join so that all intervals are
-- present at least once.
SELECT
ts.*,
was.template_id,
was.user_id
FROM ts
LEFT JOIN workspace_agent_stats was ON (
was.created_at >= ts.from_
AND was.created_at < ts.to_
AND was.connection_count > 0
AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN was.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
) )
GROUP BY ts.from_, ts.to_, was.template_id, was.user_id
UNION ALL
SELECT
ts.*,
w.template_id,
was.user_id
FROM ts
JOIN workspace_app_stats was ON (
(was.session_started_at >= ts.from_ AND was.session_started_at < ts.to_)
OR (was.session_ended_at > ts.from_ AND was.session_ended_at < ts.to_)
OR (was.session_started_at < ts.from_ AND was.session_ended_at >= ts.to_)
)
JOIN workspaces w ON (
w.id = was.workspace_id
AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN w.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
)
GROUP BY ts.from_, ts.to_, w.template_id, was.user_id
)
SELECT SELECT
from_ AS start_time, ts.from_ AS start_time,
to_ AS end_time, ts.to_ AS end_time,
array_remove(array_agg(DISTINCT template_id), NULL)::uuid[] AS template_ids, array_remove(array_agg(DISTINCT tus.template_id), NULL)::uuid[] AS template_ids,
COUNT(DISTINCT user_id) AS active_users COUNT(DISTINCT tus.user_id) AS active_users
FROM unflattened_usage_by_interval FROM
GROUP BY from_, to_; ts
LEFT JOIN
template_usage_stats AS tus
ON
tus.start_time >= ts.from_
AND tus.end_time <= ts.to_
AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN tus.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
GROUP BY
ts.from_, ts.to_;
-- name: GetTemplateUsageStats :many -- name: GetTemplateUsageStats :many
SELECT SELECT