feat: add workspace agent stat reporting to telemetry (#6577)

This aggregates stats periodically and sends them by agent ID to
our telemetry server. It should help us identify which editors are
primarily in use.
This commit is contained in:
Kyle Carberry
2023-03-13 14:16:54 -05:00
committed by GitHub
parent 813b54942f
commit 35df1b10d0
7 changed files with 237 additions and 0 deletions

View File

@ -284,6 +284,10 @@ func (q *querier) GetDeploymentWorkspaceAgentStats(ctx context.Context, createdA
return q.db.GetDeploymentWorkspaceAgentStats(ctx, createdAfter) return q.db.GetDeploymentWorkspaceAgentStats(ctx, createdAfter)
} }
func (q *querier) GetWorkspaceAgentStats(ctx context.Context, createdAfter time.Time) ([]database.GetWorkspaceAgentStatsRow, error) {
return q.db.GetWorkspaceAgentStats(ctx, createdAfter)
}
func (q *querier) GetDeploymentWorkspaceStats(ctx context.Context) (database.GetDeploymentWorkspaceStatsRow, error) { func (q *querier) GetDeploymentWorkspaceStats(ctx context.Context) (database.GetDeploymentWorkspaceStatsRow, error) {
return q.db.GetDeploymentWorkspaceStats(ctx) return q.db.GetDeploymentWorkspaceStats(ctx)
} }

View File

@ -3707,6 +3707,79 @@ func (q *fakeQuerier) GetDeploymentWorkspaceStats(ctx context.Context) (database
return stat, nil return stat, nil
} }
func (q *fakeQuerier) GetWorkspaceAgentStats(_ context.Context, createdAfter time.Time) ([]database.GetWorkspaceAgentStatsRow, error) {
q.mutex.RLock()
defer q.mutex.RUnlock()
agentStatsCreatedAfter := make([]database.WorkspaceAgentStat, 0)
for _, agentStat := range q.workspaceAgentStats {
if agentStat.CreatedAt.After(createdAfter) {
agentStatsCreatedAfter = append(agentStatsCreatedAfter, agentStat)
}
}
latestAgentStats := map[uuid.UUID]database.WorkspaceAgentStat{}
for _, agentStat := range q.workspaceAgentStats {
if agentStat.CreatedAt.After(createdAfter) {
latestAgentStats[agentStat.AgentID] = agentStat
}
}
statByAgent := map[uuid.UUID]database.GetWorkspaceAgentStatsRow{}
for _, agentStat := range latestAgentStats {
stat := statByAgent[agentStat.AgentID]
stat.SessionCountVSCode += agentStat.SessionCountVSCode
stat.SessionCountJetBrains += agentStat.SessionCountJetBrains
stat.SessionCountReconnectingPTY += agentStat.SessionCountReconnectingPTY
stat.SessionCountSSH += agentStat.SessionCountSSH
statByAgent[stat.AgentID] = stat
}
latenciesByAgent := map[uuid.UUID][]float64{}
minimumDateByAgent := map[uuid.UUID]time.Time{}
for _, agentStat := range agentStatsCreatedAfter {
if agentStat.ConnectionMedianLatencyMS <= 0 {
continue
}
stat := statByAgent[agentStat.AgentID]
minimumDate := minimumDateByAgent[agentStat.AgentID]
if agentStat.CreatedAt.Before(minimumDate) || minimumDate.IsZero() {
minimumDateByAgent[agentStat.AgentID] = agentStat.CreatedAt
}
stat.WorkspaceRxBytes += agentStat.RxBytes
stat.WorkspaceTxBytes += agentStat.TxBytes
statByAgent[agentStat.AgentID] = stat
latenciesByAgent[agentStat.AgentID] = append(latenciesByAgent[agentStat.AgentID], agentStat.ConnectionMedianLatencyMS)
}
tryPercentile := func(fs []float64, p float64) float64 {
if len(fs) == 0 {
return -1
}
sort.Float64s(fs)
return fs[int(float64(len(fs))*p/100)]
}
for _, stat := range statByAgent {
stat.AggregatedFrom = minimumDateByAgent[stat.AgentID]
statByAgent[stat.AgentID] = stat
latencies, ok := latenciesByAgent[stat.AgentID]
if !ok {
continue
}
stat.WorkspaceConnectionLatency50 = tryPercentile(latencies, 50)
stat.WorkspaceConnectionLatency95 = tryPercentile(latencies, 95)
statByAgent[stat.AgentID] = stat
}
stats := make([]database.GetWorkspaceAgentStatsRow, 0, len(statByAgent))
for _, agent := range statByAgent {
stats = append(stats, agent)
}
return stats, nil
}
func (q *fakeQuerier) UpdateWorkspaceTTLToBeWithinTemplateMax(_ context.Context, arg database.UpdateWorkspaceTTLToBeWithinTemplateMaxParams) error { func (q *fakeQuerier) UpdateWorkspaceTTLToBeWithinTemplateMax(_ context.Context, arg database.UpdateWorkspaceTTLToBeWithinTemplateMaxParams) error {
if err := validateDatabaseType(arg); err != nil { if err := validateDatabaseType(arg); err != nil {
return err return err

View File

@ -121,6 +121,7 @@ type sqlcQuerier interface {
GetWorkspaceAgentByAuthToken(ctx context.Context, authToken uuid.UUID) (WorkspaceAgent, error) GetWorkspaceAgentByAuthToken(ctx context.Context, authToken uuid.UUID) (WorkspaceAgent, error)
GetWorkspaceAgentByID(ctx context.Context, id uuid.UUID) (WorkspaceAgent, error) GetWorkspaceAgentByID(ctx context.Context, id uuid.UUID) (WorkspaceAgent, error)
GetWorkspaceAgentByInstanceID(ctx context.Context, authInstanceID string) (WorkspaceAgent, error) GetWorkspaceAgentByInstanceID(ctx context.Context, authInstanceID string) (WorkspaceAgent, error)
GetWorkspaceAgentStats(ctx context.Context, createdAt time.Time) ([]GetWorkspaceAgentStatsRow, error)
GetWorkspaceAgentsByResourceIDs(ctx context.Context, ids []uuid.UUID) ([]WorkspaceAgent, error) GetWorkspaceAgentsByResourceIDs(ctx context.Context, ids []uuid.UUID) ([]WorkspaceAgent, error)
GetWorkspaceAgentsCreatedAfter(ctx context.Context, createdAt time.Time) ([]WorkspaceAgent, error) GetWorkspaceAgentsCreatedAfter(ctx context.Context, createdAt time.Time) ([]WorkspaceAgent, error)
GetWorkspaceAppByAgentIDAndSlug(ctx context.Context, arg GetWorkspaceAppByAgentIDAndSlugParams) (WorkspaceApp, error) GetWorkspaceAppByAgentIDAndSlug(ctx context.Context, arg GetWorkspaceAppByAgentIDAndSlugParams) (WorkspaceApp, error)

View File

@ -5632,6 +5632,88 @@ func (q *sqlQuerier) GetTemplateDAUs(ctx context.Context, templateID uuid.UUID)
return items, nil return items, nil
} }
const getWorkspaceAgentStats = `-- name: GetWorkspaceAgentStats :many
WITH agent_stats AS (
SELECT
user_id,
agent_id,
workspace_id,
template_id,
MIN(created_at)::timestamptz AS aggregated_from,
coalesce(SUM(rx_bytes), 0)::bigint AS workspace_rx_bytes,
coalesce(SUM(tx_bytes), 0)::bigint AS workspace_tx_bytes,
coalesce((PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY connection_median_latency_ms)), -1)::FLOAT AS workspace_connection_latency_50,
coalesce((PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY connection_median_latency_ms)), -1)::FLOAT AS workspace_connection_latency_95
FROM workspace_agent_stats
-- The greater than 0 is to support legacy agents that don't report connection_median_latency_ms.
WHERE workspace_agent_stats.created_at > $1 AND connection_median_latency_ms > 0 GROUP BY user_id, agent_id, workspace_id, template_id
), latest_agent_stats AS (
SELECT
coalesce(SUM(session_count_vscode), 0)::bigint AS session_count_vscode,
coalesce(SUM(session_count_ssh), 0)::bigint AS session_count_ssh,
coalesce(SUM(session_count_jetbrains), 0)::bigint AS session_count_jetbrains,
coalesce(SUM(session_count_reconnecting_pty), 0)::bigint AS session_count_reconnecting_pty
FROM (
SELECT id, created_at, user_id, agent_id, workspace_id, template_id, connections_by_proto, connection_count, rx_packets, rx_bytes, tx_packets, tx_bytes, connection_median_latency_ms, session_count_vscode, session_count_jetbrains, session_count_reconnecting_pty, session_count_ssh, ROW_NUMBER() OVER(PARTITION BY agent_id ORDER BY created_at DESC) AS rn
FROM workspace_agent_stats WHERE created_at > $1
) AS a WHERE a.rn = 1 GROUP BY a.user_id, a.agent_id, a.workspace_id, a.template_id
)
SELECT user_id, agent_id, workspace_id, template_id, aggregated_from, workspace_rx_bytes, workspace_tx_bytes, workspace_connection_latency_50, workspace_connection_latency_95, session_count_vscode, session_count_ssh, session_count_jetbrains, session_count_reconnecting_pty FROM agent_stats, latest_agent_stats
`
type GetWorkspaceAgentStatsRow struct {
UserID uuid.UUID `db:"user_id" json:"user_id"`
AgentID uuid.UUID `db:"agent_id" json:"agent_id"`
WorkspaceID uuid.UUID `db:"workspace_id" json:"workspace_id"`
TemplateID uuid.UUID `db:"template_id" json:"template_id"`
AggregatedFrom time.Time `db:"aggregated_from" json:"aggregated_from"`
WorkspaceRxBytes int64 `db:"workspace_rx_bytes" json:"workspace_rx_bytes"`
WorkspaceTxBytes int64 `db:"workspace_tx_bytes" json:"workspace_tx_bytes"`
WorkspaceConnectionLatency50 float64 `db:"workspace_connection_latency_50" json:"workspace_connection_latency_50"`
WorkspaceConnectionLatency95 float64 `db:"workspace_connection_latency_95" json:"workspace_connection_latency_95"`
SessionCountVSCode int64 `db:"session_count_vscode" json:"session_count_vscode"`
SessionCountSSH int64 `db:"session_count_ssh" json:"session_count_ssh"`
SessionCountJetBrains int64 `db:"session_count_jetbrains" json:"session_count_jetbrains"`
SessionCountReconnectingPTY int64 `db:"session_count_reconnecting_pty" json:"session_count_reconnecting_pty"`
}
func (q *sqlQuerier) GetWorkspaceAgentStats(ctx context.Context, createdAt time.Time) ([]GetWorkspaceAgentStatsRow, error) {
rows, err := q.db.QueryContext(ctx, getWorkspaceAgentStats, createdAt)
if err != nil {
return nil, err
}
defer rows.Close()
var items []GetWorkspaceAgentStatsRow
for rows.Next() {
var i GetWorkspaceAgentStatsRow
if err := rows.Scan(
&i.UserID,
&i.AgentID,
&i.WorkspaceID,
&i.TemplateID,
&i.AggregatedFrom,
&i.WorkspaceRxBytes,
&i.WorkspaceTxBytes,
&i.WorkspaceConnectionLatency50,
&i.WorkspaceConnectionLatency95,
&i.SessionCountVSCode,
&i.SessionCountSSH,
&i.SessionCountJetBrains,
&i.SessionCountReconnectingPTY,
); err != nil {
return nil, err
}
items = append(items, i)
}
if err := rows.Close(); err != nil {
return nil, err
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const insertWorkspaceAgentStat = `-- name: InsertWorkspaceAgentStat :one const insertWorkspaceAgentStat = `-- name: InsertWorkspaceAgentStat :one
INSERT INTO INSERT INTO
workspace_agent_stats ( workspace_agent_stats (

View File

@ -74,3 +74,31 @@ WITH agent_stats AS (
) AS a WHERE a.rn = 1 ) AS a WHERE a.rn = 1
) )
SELECT * FROM agent_stats, latest_agent_stats; SELECT * FROM agent_stats, latest_agent_stats;
-- name: GetWorkspaceAgentStats :many
WITH agent_stats AS (
SELECT
user_id,
agent_id,
workspace_id,
template_id,
MIN(created_at)::timestamptz AS aggregated_from,
coalesce(SUM(rx_bytes), 0)::bigint AS workspace_rx_bytes,
coalesce(SUM(tx_bytes), 0)::bigint AS workspace_tx_bytes,
coalesce((PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY connection_median_latency_ms)), -1)::FLOAT AS workspace_connection_latency_50,
coalesce((PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY connection_median_latency_ms)), -1)::FLOAT AS workspace_connection_latency_95
FROM workspace_agent_stats
-- The greater than 0 is to support legacy agents that don't report connection_median_latency_ms.
WHERE workspace_agent_stats.created_at > $1 AND connection_median_latency_ms > 0 GROUP BY user_id, agent_id, workspace_id, template_id
), latest_agent_stats AS (
SELECT
coalesce(SUM(session_count_vscode), 0)::bigint AS session_count_vscode,
coalesce(SUM(session_count_ssh), 0)::bigint AS session_count_ssh,
coalesce(SUM(session_count_jetbrains), 0)::bigint AS session_count_jetbrains,
coalesce(SUM(session_count_reconnecting_pty), 0)::bigint AS session_count_reconnecting_pty
FROM (
SELECT *, ROW_NUMBER() OVER(PARTITION BY agent_id ORDER BY created_at DESC) AS rn
FROM workspace_agent_stats WHERE created_at > $1
) AS a WHERE a.rn = 1 GROUP BY a.user_id, a.agent_id, a.workspace_id, a.template_id
)
SELECT * FROM agent_stats, latest_agent_stats;

View File

@ -465,6 +465,17 @@ func (r *remoteReporter) createSnapshot() (*Snapshot, error) {
} }
return nil return nil
}) })
eg.Go(func() error {
stats, err := r.options.Database.GetWorkspaceAgentStats(ctx, createdAfter)
if err != nil {
return xerrors.Errorf("get workspace agent stats: %w", err)
}
snapshot.WorkspaceAgentStats = make([]WorkspaceAgentStat, 0, len(stats))
for _, stat := range stats {
snapshot.WorkspaceAgentStats = append(snapshot.WorkspaceAgentStats, ConvertWorkspaceAgentStat(stat))
}
return nil
})
err := eg.Wait() err := eg.Wait()
if err != nil { if err != nil {
@ -564,6 +575,25 @@ func ConvertWorkspaceAgent(agent database.WorkspaceAgent) WorkspaceAgent {
return snapAgent return snapAgent
} }
// ConvertWorkspaceAgentStat anonymizes a workspace agent stat.
func ConvertWorkspaceAgentStat(stat database.GetWorkspaceAgentStatsRow) WorkspaceAgentStat {
return WorkspaceAgentStat{
UserID: stat.UserID,
TemplateID: stat.TemplateID,
WorkspaceID: stat.WorkspaceID,
AgentID: stat.AgentID,
AggregatedFrom: stat.AggregatedFrom,
ConnectionLatency50: stat.WorkspaceConnectionLatency50,
ConnectionLatency95: stat.WorkspaceConnectionLatency95,
RxBytes: stat.WorkspaceRxBytes,
TxBytes: stat.WorkspaceTxBytes,
SessionCountVSCode: stat.SessionCountVSCode,
SessionCountJetBrains: stat.SessionCountJetBrains,
SessionCountReconnectingPTY: stat.SessionCountReconnectingPTY,
SessionCountSSH: stat.SessionCountSSH,
}
}
// ConvertWorkspaceApp anonymizes a workspace app. // ConvertWorkspaceApp anonymizes a workspace app.
func ConvertWorkspaceApp(app database.WorkspaceApp) WorkspaceApp { func ConvertWorkspaceApp(app database.WorkspaceApp) WorkspaceApp {
return WorkspaceApp{ return WorkspaceApp{
@ -666,6 +696,7 @@ type Snapshot struct {
Workspaces []Workspace `json:"workspaces"` Workspaces []Workspace `json:"workspaces"`
WorkspaceApps []WorkspaceApp `json:"workspace_apps"` WorkspaceApps []WorkspaceApp `json:"workspace_apps"`
WorkspaceAgents []WorkspaceAgent `json:"workspace_agents"` WorkspaceAgents []WorkspaceAgent `json:"workspace_agents"`
WorkspaceAgentStats []WorkspaceAgentStat `json:"workspace_agent_stats"`
WorkspaceBuilds []WorkspaceBuild `json:"workspace_build"` WorkspaceBuilds []WorkspaceBuild `json:"workspace_build"`
WorkspaceResources []WorkspaceResource `json:"workspace_resources"` WorkspaceResources []WorkspaceResource `json:"workspace_resources"`
WorkspaceResourceMetadata []WorkspaceResourceMetadata `json:"workspace_resource_metadata"` WorkspaceResourceMetadata []WorkspaceResourceMetadata `json:"workspace_resource_metadata"`
@ -754,6 +785,22 @@ type WorkspaceAgent struct {
ShutdownScript bool `json:"shutdown_script"` ShutdownScript bool `json:"shutdown_script"`
} }
type WorkspaceAgentStat struct {
UserID uuid.UUID `json:"user_id"`
TemplateID uuid.UUID `json:"template_id"`
WorkspaceID uuid.UUID `json:"workspace_id"`
AggregatedFrom time.Time `json:"aggregated_from"`
AgentID uuid.UUID `json:"agent_id"`
RxBytes int64 `json:"rx_bytes"`
TxBytes int64 `json:"tx_bytes"`
ConnectionLatency50 float64 `json:"connection_latency_50"`
ConnectionLatency95 float64 `json:"connection_latency_95"`
SessionCountVSCode int64 `json:"session_count_vscode"`
SessionCountJetBrains int64 `json:"session_count_jetbrains"`
SessionCountReconnectingPTY int64 `json:"session_count_reconnecting_pty"`
SessionCountSSH int64 `json:"session_count_ssh"`
}
type WorkspaceApp struct { type WorkspaceApp struct {
ID uuid.UUID `json:"id"` ID uuid.UUID `json:"id"`
CreatedAt time.Time `json:"created_at"` CreatedAt time.Time `json:"created_at"`

View File

@ -67,6 +67,7 @@ func TestTelemetry(t *testing.T) {
_ = dbgen.WorkspaceResource(t, db, database.WorkspaceResource{ _ = dbgen.WorkspaceResource(t, db, database.WorkspaceResource{
Transition: database.WorkspaceTransitionStart, Transition: database.WorkspaceTransitionStart,
}) })
_ = dbgen.WorkspaceAgentStat(t, db, database.WorkspaceAgentStat{})
_, err = db.InsertLicense(ctx, database.InsertLicenseParams{ _, err = db.InsertLicense(ctx, database.InsertLicenseParams{
UploadedAt: database.Now(), UploadedAt: database.Now(),
JWT: "", JWT: "",
@ -86,6 +87,7 @@ func TestTelemetry(t *testing.T) {
require.Len(t, snapshot.WorkspaceAgents, 1) require.Len(t, snapshot.WorkspaceAgents, 1)
require.Len(t, snapshot.WorkspaceBuilds, 1) require.Len(t, snapshot.WorkspaceBuilds, 1)
require.Len(t, snapshot.WorkspaceResources, 1) require.Len(t, snapshot.WorkspaceResources, 1)
require.Len(t, snapshot.WorkspaceAgentStats, 1)
}) })
t.Run("HashedEmail", func(t *testing.T) { t.Run("HashedEmail", func(t *testing.T) {
t.Parallel() t.Parallel()