-- name: GetUserLatencyInsights :many
-- GetUserLatencyInsights returns the median and 95th percentile connection
-- latency that users have experienced. The result can be filtered on
-- template_ids, meaning only user data from workspaces based on those templates
-- will be included.
SELECT
	tus.user_id,
	u.username,
	u.avatar_url,
	array_agg(DISTINCT tus.template_id)::uuid[] AS template_ids,
	COALESCE((PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY tus.median_latency_ms)), -1)::float AS workspace_connection_latency_50,
	COALESCE((PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY tus.median_latency_ms)), -1)::float AS workspace_connection_latency_95
FROM
	template_usage_stats tus
JOIN
	users u
ON
	u.id = tus.user_id
WHERE
	tus.start_time >= @start_time::timestamptz
	AND tus.end_time <= @end_time::timestamptz
	AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN tus.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
GROUP BY
	tus.user_id, u.username, u.avatar_url
ORDER BY
	tus.user_id ASC;

-- name: GetUserActivityInsights :many
-- GetUserActivityInsights returns the ranking with top active users.
-- The result can be filtered on template_ids, meaning only user data
-- from workspaces based on those templates will be included.
-- Note: The usage_seconds and usage_seconds_cumulative differ only when
-- requesting deployment-wide (or multiple template) data. Cumulative
-- produces a bloated value if a user has used multiple templates
-- simultaneously.
WITH
	deployment_stats AS (
		SELECT
			start_time,
			user_id,
			array_agg(template_id) AS template_ids,
			-- See motivation in GetTemplateInsights for LEAST(SUM(n), 30).
			LEAST(SUM(usage_mins), 30) AS usage_mins
		FROM
			template_usage_stats
		WHERE
			start_time >= @start_time::timestamptz
			AND end_time <= @end_time::timestamptz
			AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
		GROUP BY
			start_time, user_id
	),
	template_ids AS (
		SELECT
			user_id,
			array_agg(DISTINCT template_id) AS ids
		FROM
			deployment_stats, unnest(template_ids) template_id
		GROUP BY
			user_id
	)

SELECT
	ds.user_id,
	u.username,
	u.avatar_url,
	t.ids::uuid[] AS template_ids,
	(SUM(ds.usage_mins) * 60)::bigint AS usage_seconds
FROM
	deployment_stats ds
JOIN
	users u
ON
	u.id = ds.user_id
JOIN
	template_ids t
ON
	ds.user_id = t.user_id
GROUP BY
	ds.user_id, u.username, u.avatar_url, t.ids
ORDER BY
	ds.user_id ASC;

-- name: GetTemplateInsights :one
-- GetTemplateInsights returns the aggregate user-produced usage of all
-- workspaces in a given timeframe. The template IDs, active users, and
-- usage_seconds all reflect any usage in the template, including apps.
--
-- When combining data from multiple templates, we must make a guess at
-- how the user behaved for the 30 minute interval. In this case we make
-- the assumption that if the user used two workspaces for 15 minutes,
-- they did so sequentially, thus we sum the usage up to a maximum of
-- 30 minutes with LEAST(SUM(n), 30).
WITH
	insights AS (
		SELECT
			user_id,
			-- See motivation in GetTemplateInsights for LEAST(SUM(n), 30).
			LEAST(SUM(usage_mins), 30) AS usage_mins,
			LEAST(SUM(ssh_mins), 30) AS ssh_mins,
			LEAST(SUM(sftp_mins), 30) AS sftp_mins,
			LEAST(SUM(reconnecting_pty_mins), 30) AS reconnecting_pty_mins,
			LEAST(SUM(vscode_mins), 30) AS vscode_mins,
			LEAST(SUM(jetbrains_mins), 30) AS jetbrains_mins
		FROM
			template_usage_stats
		WHERE
			start_time >= @start_time::timestamptz
			AND end_time <= @end_time::timestamptz
			AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
		GROUP BY
			start_time, user_id
	),
	templates AS (
		SELECT
			array_agg(DISTINCT template_id) AS template_ids,
			array_agg(DISTINCT template_id) FILTER (WHERE ssh_mins > 0) AS ssh_template_ids,
			array_agg(DISTINCT template_id) FILTER (WHERE sftp_mins > 0) AS sftp_template_ids,
			array_agg(DISTINCT template_id) FILTER (WHERE reconnecting_pty_mins > 0) AS reconnecting_pty_template_ids,
			array_agg(DISTINCT template_id) FILTER (WHERE vscode_mins > 0) AS vscode_template_ids,
			array_agg(DISTINCT template_id) FILTER (WHERE jetbrains_mins > 0) AS jetbrains_template_ids
		FROM
			template_usage_stats
		WHERE
			start_time >= @start_time::timestamptz
			AND end_time <= @end_time::timestamptz
			AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
	)

SELECT
	COALESCE((SELECT template_ids FROM templates), '{}')::uuid[] AS template_ids, -- Includes app usage.
	COALESCE((SELECT ssh_template_ids FROM templates), '{}')::uuid[] AS ssh_template_ids,
	COALESCE((SELECT sftp_template_ids FROM templates), '{}')::uuid[] AS sftp_template_ids,
	COALESCE((SELECT reconnecting_pty_template_ids FROM templates), '{}')::uuid[] AS reconnecting_pty_template_ids,
	COALESCE((SELECT vscode_template_ids FROM templates), '{}')::uuid[] AS vscode_template_ids,
	COALESCE((SELECT jetbrains_template_ids FROM templates), '{}')::uuid[] AS jetbrains_template_ids,
	COALESCE(COUNT(DISTINCT user_id), 0)::bigint AS active_users, -- Includes app usage.
	COALESCE(SUM(usage_mins) * 60, 0)::bigint AS usage_total_seconds, -- Includes app usage.
	COALESCE(SUM(ssh_mins) * 60, 0)::bigint AS usage_ssh_seconds,
	COALESCE(SUM(sftp_mins) * 60, 0)::bigint AS usage_sftp_seconds,
	COALESCE(SUM(reconnecting_pty_mins) * 60, 0)::bigint AS usage_reconnecting_pty_seconds,
	COALESCE(SUM(vscode_mins) * 60, 0)::bigint AS usage_vscode_seconds,
	COALESCE(SUM(jetbrains_mins) * 60, 0)::bigint AS usage_jetbrains_seconds
FROM
	insights;

-- name: GetTemplateInsightsByTemplate :many
-- GetTemplateInsightsByTemplate is used for Prometheus metrics. Keep
-- in sync with GetTemplateInsights and UpsertTemplateUsageStats.
WITH
	-- This CTE is used to truncate agent usage into minute buckets, then
	-- flatten the users agent usage within the template so that usage in
	-- multiple workspaces under one template is only counted once for
	-- every minute (per user).
	insights AS (
		SELECT
			template_id,
			user_id,
			COUNT(DISTINCT CASE WHEN session_count_ssh > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS ssh_mins,
			-- TODO(mafredri): Enable when we have the column.
			-- COUNT(DISTINCT CASE WHEN session_count_sftp > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS sftp_mins,
			COUNT(DISTINCT CASE WHEN session_count_reconnecting_pty > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS reconnecting_pty_mins,
			COUNT(DISTINCT CASE WHEN session_count_vscode > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS vscode_mins,
			COUNT(DISTINCT CASE WHEN session_count_jetbrains > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS jetbrains_mins,
			-- NOTE(mafredri): The agent stats are currently very unreliable, and
			-- sometimes the connections are missing, even during active sessions.
			-- Since we can't fully rely on this, we check for "any connection
			-- within this bucket". A better solution here would be preferable.
			MAX(connection_count) > 0 AS has_connection
		FROM
			workspace_agent_stats
		WHERE
			created_at >= @start_time::timestamptz
			AND created_at < @end_time::timestamptz
			-- Inclusion criteria to filter out empty results.
			AND (
				session_count_ssh > 0
				-- TODO(mafredri): Enable when we have the column.
				-- OR session_count_sftp > 0
				OR session_count_reconnecting_pty > 0
				OR session_count_vscode > 0
				OR session_count_jetbrains > 0
			)
		GROUP BY
			template_id, user_id
	)

SELECT
	template_id,
	COUNT(DISTINCT user_id)::bigint AS active_users,
	(SUM(vscode_mins) * 60)::bigint AS usage_vscode_seconds,
	(SUM(jetbrains_mins) * 60)::bigint AS usage_jetbrains_seconds,
	(SUM(reconnecting_pty_mins) * 60)::bigint AS usage_reconnecting_pty_seconds,
	(SUM(ssh_mins) * 60)::bigint AS usage_ssh_seconds
FROM
	insights
WHERE
	has_connection
GROUP BY
	template_id;

-- name: GetTemplateAppInsights :many
-- GetTemplateAppInsights returns the aggregate usage of each app in a given
-- timeframe. The result can be filtered on template_ids, meaning only user data
-- from workspaces based on those templates will be included.
WITH
	-- Create a list of all unique apps by template, this is used to
	-- filter out irrelevant template usage stats.
	apps AS (
		SELECT DISTINCT ON (ws.template_id, app.slug)
			ws.template_id,
			app.slug,
			app.display_name,
			app.icon
		FROM
			workspaces ws
		JOIN
			workspace_builds AS build
		ON
			build.workspace_id = ws.id
		JOIN
			workspace_resources AS resource
		ON
			resource.job_id = build.job_id
		JOIN
			workspace_agents AS agent
		ON
			agent.resource_id = resource.id
		JOIN
			workspace_apps AS app
		ON
			app.agent_id = agent.id
		WHERE
			-- Partial query parameter filter.
			CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN ws.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
		ORDER BY
			ws.template_id, app.slug, app.created_at DESC
	),
	-- Join apps and template usage stats to filter out irrelevant rows.
	-- Note that this way of joining will eliminate all data-points that
	-- aren't for "real" apps. That means ports are ignored (even though
	-- they're part of the dataset), as well as are "[terminal]" entries
	-- which are alternate datapoints for reconnecting pty usage.
	template_usage_stats_with_apps AS (
		SELECT
			tus.start_time,
			tus.template_id,
			tus.user_id,
			apps.slug,
			apps.display_name,
			apps.icon,
			(tus.app_usage_mins -> apps.slug)::smallint AS usage_mins
		FROM
			apps
		JOIN
			template_usage_stats AS tus
		ON
			-- Query parameter filter.
			tus.start_time >= @start_time::timestamptz
			AND tus.end_time <= @end_time::timestamptz
			AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN tus.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
			-- Primary join condition.
			AND tus.template_id = apps.template_id
			AND tus.app_usage_mins ? apps.slug -- Key exists in object.
	),
	-- Group the app insights by interval, user and unique app. This
	-- allows us to deduplicate a user using the same app across
	-- multiple templates.
	app_insights AS (
		SELECT
			user_id,
			slug,
			display_name,
			icon,
			-- See motivation in GetTemplateInsights for LEAST(SUM(n), 30).
			LEAST(SUM(usage_mins), 30) AS usage_mins
		FROM
			template_usage_stats_with_apps
		GROUP BY
			start_time, user_id, slug, display_name, icon
	),
	-- Analyze the users unique app usage across all templates. Count
	-- usage across consecutive intervals as continuous usage.
	times_used AS (
		SELECT DISTINCT ON (user_id, slug, display_name, icon, uniq)
			slug,
			display_name,
			icon,
			-- Turn start_time into a unique identifier that identifies a users
			-- continuous app usage. The value of uniq is otherwise garbage.
			--
			-- Since we're aggregating per user app usage across templates,
			-- there can be duplicate start_times. To handle this, we use the
			-- dense_rank() function, otherwise row_number() would suffice.
			start_time - (
				dense_rank() OVER (
					PARTITION BY
						user_id, slug, display_name, icon
					ORDER BY
						start_time
				) * '30 minutes'::interval
			) AS uniq
		FROM
			template_usage_stats_with_apps
	),
	-- Even though we allow identical apps to be aggregated across
	-- templates, we still want to be able to report which templates
	-- the data comes from.
	templates AS (
		SELECT
			slug,
			display_name,
			icon,
			array_agg(DISTINCT template_id)::uuid[] AS template_ids
		FROM
			template_usage_stats_with_apps
		GROUP BY
			slug, display_name, icon
	)

SELECT
	t.template_ids,
	COUNT(DISTINCT ai.user_id) AS active_users,
	ai.slug,
	ai.display_name,
	ai.icon,
	(SUM(ai.usage_mins) * 60)::bigint AS usage_seconds,
	COALESCE((
		SELECT
			COUNT(*)
		FROM
			times_used
		WHERE
			times_used.slug = ai.slug
			AND times_used.display_name = ai.display_name
			AND times_used.icon = ai.icon
	), 0)::bigint AS times_used
FROM
	app_insights AS ai
JOIN
	templates AS t
ON
	t.slug = ai.slug
	AND t.display_name = ai.display_name
	AND t.icon = ai.icon
GROUP BY
	t.template_ids, ai.slug, ai.display_name, ai.icon;

-- name: GetTemplateAppInsightsByTemplate :many
-- GetTemplateAppInsightsByTemplate is used for Prometheus metrics. Keep
-- in sync with GetTemplateAppInsights and UpsertTemplateUsageStats.
WITH
	-- This CTE is used to explode app usage into minute buckets, then
	-- flatten the users app usage within the template so that usage in
	-- multiple workspaces under one template is only counted once for
	-- every minute.
	app_insights AS (
		SELECT
			w.template_id,
			was.user_id,
			-- Both app stats and agent stats track web terminal usage, but
			-- by different means. The app stats value should be more
			-- accurate so we don't want to discard it just yet.
			CASE
				WHEN was.access_method = 'terminal'
				THEN '[terminal]' -- Unique name, app names can't contain brackets.
				ELSE was.slug_or_port
			END::text AS app_name,
			COALESCE(wa.display_name, '') AS display_name,
			(wa.slug IS NOT NULL)::boolean AS is_app,
			COUNT(DISTINCT s.minute_bucket) AS app_minutes
		FROM
			workspace_app_stats AS was
		JOIN
			workspaces AS w
		ON
			w.id = was.workspace_id
		-- We do a left join here because we want to include user IDs that have used
		-- e.g. ports when counting active users.
		LEFT JOIN
			workspace_apps wa
		ON
			wa.agent_id = was.agent_id
			AND wa.slug = was.slug_or_port
		-- Generate a series of minute buckets for each session for computing the
		-- mintes/bucket.
		CROSS JOIN
			generate_series(
				date_trunc('minute', was.session_started_at),
				-- Subtract 1 μs to avoid creating an extra series.
				date_trunc('minute', was.session_ended_at - '1 microsecond'::interval),
				'1 minute'::interval
			) AS s(minute_bucket)
		WHERE
			s.minute_bucket >= @start_time::timestamptz
			AND s.minute_bucket < @end_time::timestamptz
		GROUP BY
			w.template_id, was.user_id, was.access_method, was.slug_or_port, wa.display_name, wa.slug
	)

SELECT
	template_id,
	app_name AS slug_or_port,
	display_name AS display_name,
	COUNT(DISTINCT user_id)::bigint AS active_users,
	(SUM(app_minutes) * 60)::bigint AS usage_seconds
FROM
	app_insights
WHERE
	is_app IS TRUE
GROUP BY
	template_id, slug_or_port, display_name;


-- name: GetTemplateInsightsByInterval :many
-- GetTemplateInsightsByInterval returns all intervals between start and end
-- time, if end time is a partial interval, it will be included in the results and
-- that interval will be shorter than a full one. If there is no data for a selected
-- interval/template, it will be included in the results with 0 active users.
WITH
	ts AS (
		SELECT
			d::timestamptz AS from_,
			LEAST(
				(d::timestamptz + (@interval_days::int || ' day')::interval)::timestamptz,
				@end_time::timestamptz
			)::timestamptz AS to_
		FROM
			generate_series(
				@start_time::timestamptz,
				-- Subtract 1 μs to avoid creating an extra series.
				(@end_time::timestamptz) - '1 microsecond'::interval,
				(@interval_days::int || ' day')::interval
			) AS d
	)

SELECT
	ts.from_ AS start_time,
	ts.to_ AS end_time,
	array_remove(array_agg(DISTINCT tus.template_id), NULL)::uuid[] AS template_ids,
	COUNT(DISTINCT tus.user_id) AS active_users
FROM
	ts
LEFT JOIN
	template_usage_stats AS tus
ON
	tus.start_time >= ts.from_
	AND tus.start_time < ts.to_ -- End time exclusion criteria optimization for index.
	AND tus.end_time <= ts.to_
	AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN tus.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
GROUP BY
	ts.from_, ts.to_;

-- name: GetTemplateUsageStats :many
SELECT
	*
FROM
	template_usage_stats
WHERE
	start_time >= @start_time::timestamptz
	AND end_time <= @end_time::timestamptz
	AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN template_id = ANY(@template_ids::uuid[]) ELSE TRUE END;

-- name: UpsertTemplateUsageStats :exec
-- This query aggregates the workspace_agent_stats and workspace_app_stats data
-- into a single table for efficient storage and querying. Half-hour buckets are
-- used to store the data, and the minutes are summed for each user and template
-- combination. The result is stored in the template_usage_stats table.
WITH
	latest_start AS (
		SELECT
			-- Truncate to hour so that we always look at even ranges of data.
			date_trunc('hour', COALESCE(
				MAX(start_time) - '1 hour'::interval,
				-- Fallback when there are no template usage stats yet.
				-- App stats can exist before this, but not agent stats,
				-- limit the lookback to avoid inconsistency.
				(SELECT MIN(created_at) FROM workspace_agent_stats)
			)) AS t
		FROM
			template_usage_stats
	),
	workspace_app_stat_buckets AS (
		SELECT
			-- Truncate the minute to the nearest half hour, this is the bucket size
			-- for the data.
			date_trunc('hour', s.minute_bucket) + trunc(date_part('minute', s.minute_bucket) / 30) * 30 * '1 minute'::interval AS time_bucket,
			w.template_id,
			was.user_id,
			-- Both app stats and agent stats track web terminal usage, but
			-- by different means. The app stats value should be more
			-- accurate so we don't want to discard it just yet.
			CASE
				WHEN was.access_method = 'terminal'
				THEN '[terminal]' -- Unique name, app names can't contain brackets.
				ELSE was.slug_or_port
			END AS app_name,
			COUNT(DISTINCT s.minute_bucket) AS app_minutes,
			-- Store each unique minute bucket for later merge between datasets.
			array_agg(DISTINCT s.minute_bucket) AS minute_buckets
		FROM
			workspace_app_stats AS was
		JOIN
			workspaces AS w
		ON
			w.id = was.workspace_id
		-- Generate a series of minute buckets for each session for computing the
		-- mintes/bucket.
		CROSS JOIN
			generate_series(
				date_trunc('minute', was.session_started_at),
				-- Subtract 1 μs to avoid creating an extra series.
				date_trunc('minute', was.session_ended_at - '1 microsecond'::interval),
				'1 minute'::interval
			) AS s(minute_bucket)
		WHERE
			-- s.minute_bucket >= @start_time::timestamptz
			-- AND s.minute_bucket < @end_time::timestamptz
			s.minute_bucket >= (SELECT t FROM latest_start)
			AND s.minute_bucket < NOW()
		GROUP BY
			time_bucket, w.template_id, was.user_id, was.access_method, was.slug_or_port
	),
	agent_stats_buckets AS (
		SELECT
			-- Truncate the minute to the nearest half hour, this is the bucket size
			-- for the data.
			date_trunc('hour', created_at) + trunc(date_part('minute', created_at) / 30) * 30 * '1 minute'::interval AS time_bucket,
			template_id,
			user_id,
			-- Store each unique minute bucket for later merge between datasets.
			array_agg(
				DISTINCT CASE
				WHEN
					session_count_ssh > 0
					-- TODO(mafredri): Enable when we have the column.
					-- OR session_count_sftp > 0
					OR session_count_reconnecting_pty > 0
					OR session_count_vscode > 0
					OR session_count_jetbrains > 0
				THEN
					date_trunc('minute', created_at)
				ELSE
					NULL
				END
			) AS minute_buckets,
			COUNT(DISTINCT CASE WHEN session_count_ssh > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS ssh_mins,
			-- TODO(mafredri): Enable when we have the column.
			-- COUNT(DISTINCT CASE WHEN session_count_sftp > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS sftp_mins,
			COUNT(DISTINCT CASE WHEN session_count_reconnecting_pty > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS reconnecting_pty_mins,
			COUNT(DISTINCT CASE WHEN session_count_vscode > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS vscode_mins,
			COUNT(DISTINCT CASE WHEN session_count_jetbrains > 0 THEN date_trunc('minute', created_at) ELSE NULL END) AS jetbrains_mins,
			-- NOTE(mafredri): The agent stats are currently very unreliable, and
			-- sometimes the connections are missing, even during active sessions.
			-- Since we can't fully rely on this, we check for "any connection
			-- during this half-hour". A better solution here would be preferable.
			MAX(connection_count) > 0 AS has_connection
		FROM
			workspace_agent_stats
		WHERE
			-- created_at >= @start_time::timestamptz
			-- AND created_at < @end_time::timestamptz
			created_at >= (SELECT t FROM latest_start)
			AND created_at < NOW()
			-- Inclusion criteria to filter out empty results.
			AND (
				session_count_ssh > 0
				-- TODO(mafredri): Enable when we have the column.
				-- OR session_count_sftp > 0
				OR session_count_reconnecting_pty > 0
				OR session_count_vscode > 0
				OR session_count_jetbrains > 0
			)
		GROUP BY
			time_bucket, template_id, user_id
	),
	stats AS (
		SELECT
			stats.time_bucket AS start_time,
			stats.time_bucket + '30 minutes'::interval AS end_time,
			stats.template_id,
			stats.user_id,
			-- Sum/distinct to handle zero/duplicate values due union and to unnest.
			COUNT(DISTINCT minute_bucket) AS usage_mins,
			array_agg(DISTINCT minute_bucket) AS minute_buckets,
			SUM(DISTINCT stats.ssh_mins) AS ssh_mins,
			SUM(DISTINCT stats.sftp_mins) AS sftp_mins,
			SUM(DISTINCT stats.reconnecting_pty_mins) AS reconnecting_pty_mins,
			SUM(DISTINCT stats.vscode_mins) AS vscode_mins,
			SUM(DISTINCT stats.jetbrains_mins) AS jetbrains_mins,
			-- This is what we unnested, re-nest as json.
			jsonb_object_agg(stats.app_name, stats.app_minutes) FILTER (WHERE stats.app_name IS NOT NULL) AS app_usage_mins
		FROM (
			SELECT
				time_bucket,
				template_id,
				user_id,
				0 AS ssh_mins,
				0 AS sftp_mins,
				0 AS reconnecting_pty_mins,
				0 AS vscode_mins,
				0 AS jetbrains_mins,
				app_name,
				app_minutes,
				minute_buckets
			FROM
				workspace_app_stat_buckets

			UNION ALL

			SELECT
				time_bucket,
				template_id,
				user_id,
				ssh_mins,
				-- TODO(mafredri): Enable when we have the column.
				0 AS sftp_mins,
				reconnecting_pty_mins,
				vscode_mins,
				jetbrains_mins,
				NULL AS app_name,
				NULL AS app_minutes,
				minute_buckets
			FROM
				agent_stats_buckets
			WHERE
				-- See note in the agent_stats_buckets CTE.
				has_connection
		) AS stats, unnest(minute_buckets) AS minute_bucket
		GROUP BY
			stats.time_bucket, stats.template_id, stats.user_id
	),
	minute_buckets AS (
		-- Create distinct minute buckets for user-activity, so we can filter out
		-- irrelevant latencies.
		SELECT DISTINCT ON (stats.start_time, stats.template_id, stats.user_id, minute_bucket)
			stats.start_time,
			stats.template_id,
			stats.user_id,
			minute_bucket
		FROM
			stats, unnest(minute_buckets) AS minute_bucket
	),
	latencies AS (
		-- Select all non-zero latencies for all the minutes that a user used the
		-- workspace in some way.
		SELECT
			mb.start_time,
			mb.template_id,
			mb.user_id,
			-- TODO(mafredri): We're doing medians on medians here, we may want to
			-- improve upon this at some point.
			PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY was.connection_median_latency_ms)::real AS median_latency_ms
		FROM
			minute_buckets AS mb
		JOIN
			workspace_agent_stats AS was
		ON
			was.created_at >= (SELECT t FROM latest_start)
			AND was.created_at < NOW()
			AND date_trunc('minute', was.created_at) = mb.minute_bucket
			AND was.template_id = mb.template_id
			AND was.user_id = mb.user_id
			AND was.connection_median_latency_ms > 0
		GROUP BY
			mb.start_time, mb.template_id, mb.user_id
	)

INSERT INTO template_usage_stats AS tus (
	start_time,
	end_time,
	template_id,
	user_id,
	usage_mins,
	median_latency_ms,
	ssh_mins,
	sftp_mins,
	reconnecting_pty_mins,
	vscode_mins,
	jetbrains_mins,
	app_usage_mins
) (
	SELECT
		stats.start_time,
		stats.end_time,
		stats.template_id,
		stats.user_id,
		stats.usage_mins,
		latencies.median_latency_ms,
		stats.ssh_mins,
		stats.sftp_mins,
		stats.reconnecting_pty_mins,
		stats.vscode_mins,
		stats.jetbrains_mins,
		stats.app_usage_mins
	FROM
		stats
	LEFT JOIN
		latencies
	ON
		-- The latencies group-by ensures there at most one row.
		latencies.start_time = stats.start_time
		AND latencies.template_id = stats.template_id
		AND latencies.user_id = stats.user_id
)
ON CONFLICT
	(start_time, template_id, user_id)
DO UPDATE
SET
	usage_mins = EXCLUDED.usage_mins,
	median_latency_ms = EXCLUDED.median_latency_ms,
	ssh_mins = EXCLUDED.ssh_mins,
	sftp_mins = EXCLUDED.sftp_mins,
	reconnecting_pty_mins = EXCLUDED.reconnecting_pty_mins,
	vscode_mins = EXCLUDED.vscode_mins,
	jetbrains_mins = EXCLUDED.jetbrains_mins,
	app_usage_mins = EXCLUDED.app_usage_mins
WHERE
	(tus.*) IS DISTINCT FROM (EXCLUDED.*);

-- name: GetTemplateParameterInsights :many
-- GetTemplateParameterInsights does for each template in a given timeframe,
-- look for the latest workspace build (for every workspace) that has been
-- created in the timeframe and return the aggregate usage counts of parameter
-- values.
WITH latest_workspace_builds AS (
	SELECT
		wb.id,
		wbmax.template_id,
		wb.template_version_id
	FROM (
		SELECT
			tv.template_id, wbmax.workspace_id, MAX(wbmax.build_number) as max_build_number
		FROM workspace_builds wbmax
		JOIN template_versions tv ON (tv.id = wbmax.template_version_id)
		WHERE
			wbmax.created_at >= @start_time::timestamptz
			AND wbmax.created_at < @end_time::timestamptz
			AND CASE WHEN COALESCE(array_length(@template_ids::uuid[], 1), 0) > 0 THEN tv.template_id = ANY(@template_ids::uuid[]) ELSE TRUE END
		GROUP BY tv.template_id, wbmax.workspace_id
	) wbmax
	JOIN workspace_builds wb ON (
		wb.workspace_id = wbmax.workspace_id
		AND wb.build_number = wbmax.max_build_number
	)
), unique_template_params AS (
	SELECT
		ROW_NUMBER() OVER () AS num,
		array_agg(DISTINCT wb.template_id)::uuid[] AS template_ids,
		array_agg(wb.id)::uuid[] AS workspace_build_ids,
		tvp.name,
		tvp.type,
		tvp.display_name,
		tvp.description,
		tvp.options
	FROM latest_workspace_builds wb
	JOIN template_version_parameters tvp ON (tvp.template_version_id = wb.template_version_id)
	GROUP BY tvp.name, tvp.type, tvp.display_name, tvp.description, tvp.options
)

SELECT
	utp.num,
	utp.template_ids,
	utp.name,
	utp.type,
	utp.display_name,
	utp.description,
	utp.options,
	wbp.value,
	COUNT(wbp.value) AS count
FROM unique_template_params utp
JOIN workspace_build_parameters wbp ON (utp.workspace_build_ids @> ARRAY[wbp.workspace_build_id] AND utp.name = wbp.name)
GROUP BY utp.num, utp.template_ids, utp.name, utp.type, utp.display_name, utp.description, utp.options, wbp.value;

-- name: GetUserStatusCounts :many
-- GetUserStatusCounts returns the count of users in each status over time.
-- The time range is inclusively defined by the start_time and end_time parameters.
--
-- Bucketing:
-- Between the start_time and end_time, we include each timestamp where a user's status changed or they were deleted.
-- We do not bucket these results by day or some other time unit. This is because such bucketing would hide potentially
-- important patterns. If a user was active for 23 hours and 59 minutes, and then suspended, a daily bucket would hide this.
-- A daily bucket would also have required us to carefully manage the timezone of the bucket based on the timezone of the user.
--
-- Accumulation:
-- We do not start counting from 0 at the start_time. We check the last status change before the start_time for each user. As such,
-- the result shows the total number of users in each status on any particular day.
WITH
	-- dates_of_interest defines all points in time that are relevant to the query.
	-- It includes the start_time, all status changes, all deletions, and the end_time.
dates_of_interest AS (
	SELECT date FROM generate_series(
		@start_time::timestamptz,
		@end_time::timestamptz,
		(CASE WHEN @interval::int <= 0 THEN 3600 * 24 ELSE @interval::int END || ' seconds')::interval
	) AS date
),
	-- latest_status_before_range defines the status of each user before the start_time.
	-- We do not include users who were deleted before the start_time. We use this to ensure that
	-- we correctly count users prior to the start_time for a complete graph.
latest_status_before_range AS (
    SELECT
        DISTINCT usc.user_id,
        usc.new_status,
        usc.changed_at,
        ud.deleted
    FROM user_status_changes usc
	LEFT JOIN LATERAL (
		SELECT COUNT(*) > 0 AS deleted
		FROM user_deleted ud
		WHERE ud.user_id = usc.user_id AND (ud.deleted_at < usc.changed_at OR ud.deleted_at < @start_time)
	) AS ud ON true
    WHERE usc.changed_at < @start_time::timestamptz
    ORDER BY usc.user_id, usc.changed_at DESC
),
	-- status_changes_during_range defines the status of each user during the start_time and end_time.
	-- If a user is deleted during the time range, we count status changes between the start_time and the deletion date.
	-- Theoretically, it should probably not be possible to update the status of a deleted user, but we
	-- need to ensure that this is enforced, so that a change in business logic later does not break this graph.
status_changes_during_range AS (
    SELECT
        usc.user_id,
        usc.new_status,
        usc.changed_at,
        ud.deleted
    FROM user_status_changes usc
	LEFT JOIN LATERAL (
		SELECT COUNT(*) > 0 AS deleted
		FROM user_deleted ud
		WHERE ud.user_id = usc.user_id AND ud.deleted_at < usc.changed_at
	) AS ud ON true
    WHERE usc.changed_at >= @start_time::timestamptz
        AND usc.changed_at <= @end_time::timestamptz
),
	-- relevant_status_changes defines the status of each user at any point in time.
	-- It includes the status of each user before the start_time, and the status of each user during the start_time and end_time.
relevant_status_changes AS (
    SELECT
        user_id,
        new_status,
        changed_at
    FROM latest_status_before_range
    WHERE NOT deleted

    UNION ALL

    SELECT
        user_id,
        new_status,
        changed_at
    FROM status_changes_during_range
    WHERE NOT deleted
),
	-- statuses defines all the distinct statuses that were present just before and during the time range.
	-- This is used to ensure that we have a series for every relevant status.
statuses AS (
	SELECT DISTINCT new_status FROM relevant_status_changes
),
	-- We only want to count the latest status change for each user on each date and then filter them by the relevant status.
	-- We use the row_number function to ensure that we only count the latest status change for each user on each date.
	-- We then filter the status changes by the relevant status in the final select statement below.
ranked_status_change_per_user_per_date AS (
	SELECT
	d.date,
	rsc1.user_id,
	ROW_NUMBER() OVER (PARTITION BY d.date, rsc1.user_id ORDER BY rsc1.changed_at DESC) AS rn,
	rsc1.new_status
	FROM dates_of_interest d
	LEFT JOIN relevant_status_changes rsc1 ON rsc1.changed_at <= d.date
)
SELECT
	rscpupd.date::timestamptz AS date,
	statuses.new_status AS status,
	COUNT(rscpupd.user_id) FILTER (
		WHERE rscpupd.rn = 1
		AND (
			rscpupd.new_status = statuses.new_status
			AND (
				-- Include users who haven't been deleted
				NOT EXISTS (SELECT 1 FROM user_deleted WHERE user_id = rscpupd.user_id)
				OR
				-- Or users whose deletion date is after the current date we're looking at
				rscpupd.date < (SELECT deleted_at FROM user_deleted WHERE user_id = rscpupd.user_id)
			)
		)
	) AS count
FROM ranked_status_change_per_user_per_date rscpupd
CROSS JOIN statuses
GROUP BY rscpupd.date, statuses.new_status
ORDER BY rscpupd.date;