chore: track workspace resource monitors in telemetry (#16776)

Addresses https://github.com/coder/nexus/issues/195. Specifically, just
the "tracking templates" requirement:

> ## Tracking in templates
> To enable resource alerts, a user must add the resource_monitoring
block to a template's coder_agent resource. We'd like to track if
customers have any resource monitoring enabled on a per-deployment
basis. Even better, we could identify which templates are using resource
monitoring.
This commit is contained in:
Hugo Dutka
2025-03-03 18:41:01 +01:00
committed by GitHub
parent dfcd93b26e
commit 24f3445e00
10 changed files with 285 additions and 22 deletions

View File

@@ -1438,6 +1438,17 @@ func (q *querier) FetchMemoryResourceMonitorsByAgentID(ctx context.Context, agen
return q.db.FetchMemoryResourceMonitorsByAgentID(ctx, agentID)
}
func (q *querier) FetchMemoryResourceMonitorsUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]database.WorkspaceAgentMemoryResourceMonitor, error) {
// Ideally, we would return a list of monitors that the user has access to. However, that check would need to
// be implemented similarly to GetWorkspaces, which is more complex than what we're doing here. Since this query
// was introduced for telemetry, we perform a simpler check.
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceWorkspaceAgentResourceMonitor); err != nil {
return nil, err
}
return q.db.FetchMemoryResourceMonitorsUpdatedAfter(ctx, updatedAt)
}
func (q *querier) FetchNewMessageMetadata(ctx context.Context, arg database.FetchNewMessageMetadataParams) (database.FetchNewMessageMetadataRow, error) {
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceNotificationMessage); err != nil {
return database.FetchNewMessageMetadataRow{}, err
@@ -1459,6 +1470,17 @@ func (q *querier) FetchVolumesResourceMonitorsByAgentID(ctx context.Context, age
return q.db.FetchVolumesResourceMonitorsByAgentID(ctx, agentID)
}
func (q *querier) FetchVolumesResourceMonitorsUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]database.WorkspaceAgentVolumeResourceMonitor, error) {
// Ideally, we would return a list of monitors that the user has access to. However, that check would need to
// be implemented similarly to GetWorkspaces, which is more complex than what we're doing here. Since this query
// was introduced for telemetry, we perform a simpler check.
if err := q.authorizeContext(ctx, policy.ActionRead, rbac.ResourceWorkspaceAgentResourceMonitor); err != nil {
return nil, err
}
return q.db.FetchVolumesResourceMonitorsUpdatedAfter(ctx, updatedAt)
}
func (q *querier) GetAPIKeyByID(ctx context.Context, id string) (database.APIKey, error) {
return fetch(q.log, q.auth, q.db.GetAPIKeyByID)(ctx, id)
}

View File

@@ -4919,6 +4919,14 @@ func (s *MethodTestSuite) TestResourcesMonitor() {
}).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionUpdate)
}))
s.Run("FetchMemoryResourceMonitorsUpdatedAfter", s.Subtest(func(db database.Store, check *expects) {
check.Args(dbtime.Now()).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionRead)
}))
s.Run("FetchVolumesResourceMonitorsUpdatedAfter", s.Subtest(func(db database.Store, check *expects) {
check.Args(dbtime.Now()).Asserts(rbac.ResourceWorkspaceAgentResourceMonitor, policy.ActionRead)
}))
s.Run("FetchMemoryResourceMonitorsByAgentID", s.Subtest(func(db database.Store, check *expects) {
agt, w := createAgent(s.T(), db)

View File

@@ -2503,6 +2503,19 @@ func (q *FakeQuerier) FetchMemoryResourceMonitorsByAgentID(_ context.Context, ag
return database.WorkspaceAgentMemoryResourceMonitor{}, sql.ErrNoRows
}
func (q *FakeQuerier) FetchMemoryResourceMonitorsUpdatedAfter(_ context.Context, updatedAt time.Time) ([]database.WorkspaceAgentMemoryResourceMonitor, error) {
q.mutex.RLock()
defer q.mutex.RUnlock()
monitors := []database.WorkspaceAgentMemoryResourceMonitor{}
for _, monitor := range q.workspaceAgentMemoryResourceMonitors {
if monitor.UpdatedAt.After(updatedAt) {
monitors = append(monitors, monitor)
}
}
return monitors, nil
}
func (q *FakeQuerier) FetchNewMessageMetadata(_ context.Context, arg database.FetchNewMessageMetadataParams) (database.FetchNewMessageMetadataRow, error) {
err := validateDatabaseType(arg)
if err != nil {
@@ -2547,6 +2560,19 @@ func (q *FakeQuerier) FetchVolumesResourceMonitorsByAgentID(_ context.Context, a
return monitors, nil
}
func (q *FakeQuerier) FetchVolumesResourceMonitorsUpdatedAfter(_ context.Context, updatedAt time.Time) ([]database.WorkspaceAgentVolumeResourceMonitor, error) {
q.mutex.RLock()
defer q.mutex.RUnlock()
monitors := []database.WorkspaceAgentVolumeResourceMonitor{}
for _, monitor := range q.workspaceAgentVolumeResourceMonitors {
if monitor.UpdatedAt.After(updatedAt) {
monitors = append(monitors, monitor)
}
}
return monitors, nil
}
func (q *FakeQuerier) GetAPIKeyByID(_ context.Context, id string) (database.APIKey, error) {
q.mutex.RLock()
defer q.mutex.RUnlock()

View File

@@ -451,6 +451,13 @@ func (m queryMetricsStore) FetchMemoryResourceMonitorsByAgentID(ctx context.Cont
return r0, r1
}
func (m queryMetricsStore) FetchMemoryResourceMonitorsUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]database.WorkspaceAgentMemoryResourceMonitor, error) {
start := time.Now()
r0, r1 := m.s.FetchMemoryResourceMonitorsUpdatedAfter(ctx, updatedAt)
m.queryLatencies.WithLabelValues("FetchMemoryResourceMonitorsUpdatedAfter").Observe(time.Since(start).Seconds())
return r0, r1
}
func (m queryMetricsStore) FetchNewMessageMetadata(ctx context.Context, arg database.FetchNewMessageMetadataParams) (database.FetchNewMessageMetadataRow, error) {
start := time.Now()
r0, r1 := m.s.FetchNewMessageMetadata(ctx, arg)
@@ -465,6 +472,13 @@ func (m queryMetricsStore) FetchVolumesResourceMonitorsByAgentID(ctx context.Con
return r0, r1
}
func (m queryMetricsStore) FetchVolumesResourceMonitorsUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]database.WorkspaceAgentVolumeResourceMonitor, error) {
start := time.Now()
r0, r1 := m.s.FetchVolumesResourceMonitorsUpdatedAfter(ctx, updatedAt)
m.queryLatencies.WithLabelValues("FetchVolumesResourceMonitorsUpdatedAfter").Observe(time.Since(start).Seconds())
return r0, r1
}
func (m queryMetricsStore) GetAPIKeyByID(ctx context.Context, id string) (database.APIKey, error) {
start := time.Now()
apiKey, err := m.s.GetAPIKeyByID(ctx, id)

View File

@@ -787,6 +787,21 @@ func (mr *MockStoreMockRecorder) FetchMemoryResourceMonitorsByAgentID(ctx, agent
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FetchMemoryResourceMonitorsByAgentID", reflect.TypeOf((*MockStore)(nil).FetchMemoryResourceMonitorsByAgentID), ctx, agentID)
}
// FetchMemoryResourceMonitorsUpdatedAfter mocks base method.
func (m *MockStore) FetchMemoryResourceMonitorsUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]database.WorkspaceAgentMemoryResourceMonitor, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "FetchMemoryResourceMonitorsUpdatedAfter", ctx, updatedAt)
ret0, _ := ret[0].([]database.WorkspaceAgentMemoryResourceMonitor)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// FetchMemoryResourceMonitorsUpdatedAfter indicates an expected call of FetchMemoryResourceMonitorsUpdatedAfter.
func (mr *MockStoreMockRecorder) FetchMemoryResourceMonitorsUpdatedAfter(ctx, updatedAt any) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FetchMemoryResourceMonitorsUpdatedAfter", reflect.TypeOf((*MockStore)(nil).FetchMemoryResourceMonitorsUpdatedAfter), ctx, updatedAt)
}
// FetchNewMessageMetadata mocks base method.
func (m *MockStore) FetchNewMessageMetadata(ctx context.Context, arg database.FetchNewMessageMetadataParams) (database.FetchNewMessageMetadataRow, error) {
m.ctrl.T.Helper()
@@ -817,6 +832,21 @@ func (mr *MockStoreMockRecorder) FetchVolumesResourceMonitorsByAgentID(ctx, agen
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FetchVolumesResourceMonitorsByAgentID", reflect.TypeOf((*MockStore)(nil).FetchVolumesResourceMonitorsByAgentID), ctx, agentID)
}
// FetchVolumesResourceMonitorsUpdatedAfter mocks base method.
func (m *MockStore) FetchVolumesResourceMonitorsUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]database.WorkspaceAgentVolumeResourceMonitor, error) {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "FetchVolumesResourceMonitorsUpdatedAfter", ctx, updatedAt)
ret0, _ := ret[0].([]database.WorkspaceAgentVolumeResourceMonitor)
ret1, _ := ret[1].(error)
return ret0, ret1
}
// FetchVolumesResourceMonitorsUpdatedAfter indicates an expected call of FetchVolumesResourceMonitorsUpdatedAfter.
func (mr *MockStoreMockRecorder) FetchVolumesResourceMonitorsUpdatedAfter(ctx, updatedAt any) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "FetchVolumesResourceMonitorsUpdatedAfter", reflect.TypeOf((*MockStore)(nil).FetchVolumesResourceMonitorsUpdatedAfter), ctx, updatedAt)
}
// GetAPIKeyByID mocks base method.
func (m *MockStore) GetAPIKeyByID(ctx context.Context, id string) (database.APIKey, error) {
m.ctrl.T.Helper()

View File

@@ -113,9 +113,11 @@ type sqlcQuerier interface {
EnqueueNotificationMessage(ctx context.Context, arg EnqueueNotificationMessageParams) error
FavoriteWorkspace(ctx context.Context, id uuid.UUID) error
FetchMemoryResourceMonitorsByAgentID(ctx context.Context, agentID uuid.UUID) (WorkspaceAgentMemoryResourceMonitor, error)
FetchMemoryResourceMonitorsUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]WorkspaceAgentMemoryResourceMonitor, error)
// This is used to build up the notification_message's JSON payload.
FetchNewMessageMetadata(ctx context.Context, arg FetchNewMessageMetadataParams) (FetchNewMessageMetadataRow, error)
FetchVolumesResourceMonitorsByAgentID(ctx context.Context, agentID uuid.UUID) ([]WorkspaceAgentVolumeResourceMonitor, error)
FetchVolumesResourceMonitorsUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]WorkspaceAgentVolumeResourceMonitor, error)
GetAPIKeyByID(ctx context.Context, id string) (APIKey, error)
// there is no unique constraint on empty token names
GetAPIKeyByName(ctx context.Context, arg GetAPIKeyByNameParams) (APIKey, error)

View File

@@ -12398,6 +12398,46 @@ func (q *sqlQuerier) FetchMemoryResourceMonitorsByAgentID(ctx context.Context, a
return i, err
}
const fetchMemoryResourceMonitorsUpdatedAfter = `-- name: FetchMemoryResourceMonitorsUpdatedAfter :many
SELECT
agent_id, enabled, threshold, created_at, updated_at, state, debounced_until
FROM
workspace_agent_memory_resource_monitors
WHERE
updated_at > $1
`
func (q *sqlQuerier) FetchMemoryResourceMonitorsUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]WorkspaceAgentMemoryResourceMonitor, error) {
rows, err := q.db.QueryContext(ctx, fetchMemoryResourceMonitorsUpdatedAfter, updatedAt)
if err != nil {
return nil, err
}
defer rows.Close()
var items []WorkspaceAgentMemoryResourceMonitor
for rows.Next() {
var i WorkspaceAgentMemoryResourceMonitor
if err := rows.Scan(
&i.AgentID,
&i.Enabled,
&i.Threshold,
&i.CreatedAt,
&i.UpdatedAt,
&i.State,
&i.DebouncedUntil,
); err != nil {
return nil, err
}
items = append(items, i)
}
if err := rows.Close(); err != nil {
return nil, err
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const fetchVolumesResourceMonitorsByAgentID = `-- name: FetchVolumesResourceMonitorsByAgentID :many
SELECT
agent_id, enabled, threshold, path, created_at, updated_at, state, debounced_until
@@ -12439,6 +12479,47 @@ func (q *sqlQuerier) FetchVolumesResourceMonitorsByAgentID(ctx context.Context,
return items, nil
}
const fetchVolumesResourceMonitorsUpdatedAfter = `-- name: FetchVolumesResourceMonitorsUpdatedAfter :many
SELECT
agent_id, enabled, threshold, path, created_at, updated_at, state, debounced_until
FROM
workspace_agent_volume_resource_monitors
WHERE
updated_at > $1
`
func (q *sqlQuerier) FetchVolumesResourceMonitorsUpdatedAfter(ctx context.Context, updatedAt time.Time) ([]WorkspaceAgentVolumeResourceMonitor, error) {
rows, err := q.db.QueryContext(ctx, fetchVolumesResourceMonitorsUpdatedAfter, updatedAt)
if err != nil {
return nil, err
}
defer rows.Close()
var items []WorkspaceAgentVolumeResourceMonitor
for rows.Next() {
var i WorkspaceAgentVolumeResourceMonitor
if err := rows.Scan(
&i.AgentID,
&i.Enabled,
&i.Threshold,
&i.Path,
&i.CreatedAt,
&i.UpdatedAt,
&i.State,
&i.DebouncedUntil,
); err != nil {
return nil, err
}
items = append(items, i)
}
if err := rows.Close(); err != nil {
return nil, err
}
if err := rows.Err(); err != nil {
return nil, err
}
return items, nil
}
const insertMemoryResourceMonitor = `-- name: InsertMemoryResourceMonitor :one
INSERT INTO
workspace_agent_memory_resource_monitors (

View File

@@ -1,3 +1,19 @@
-- name: FetchVolumesResourceMonitorsUpdatedAfter :many
SELECT
*
FROM
workspace_agent_volume_resource_monitors
WHERE
updated_at > $1;
-- name: FetchMemoryResourceMonitorsUpdatedAfter :many
SELECT
*
FROM
workspace_agent_memory_resource_monitors
WHERE
updated_at > $1;
-- name: FetchMemoryResourceMonitorsByAgentID :one
SELECT
*

View File

@@ -624,6 +624,28 @@ func (r *remoteReporter) createSnapshot() (*Snapshot, error) {
}
return nil
})
eg.Go(func() error {
memoryMonitors, err := r.options.Database.FetchMemoryResourceMonitorsUpdatedAfter(ctx, createdAfter)
if err != nil {
return xerrors.Errorf("get memory resource monitors: %w", err)
}
snapshot.WorkspaceAgentMemoryResourceMonitors = make([]WorkspaceAgentMemoryResourceMonitor, 0, len(memoryMonitors))
for _, monitor := range memoryMonitors {
snapshot.WorkspaceAgentMemoryResourceMonitors = append(snapshot.WorkspaceAgentMemoryResourceMonitors, ConvertWorkspaceAgentMemoryResourceMonitor(monitor))
}
return nil
})
eg.Go(func() error {
volumeMonitors, err := r.options.Database.FetchVolumesResourceMonitorsUpdatedAfter(ctx, createdAfter)
if err != nil {
return xerrors.Errorf("get volume resource monitors: %w", err)
}
snapshot.WorkspaceAgentVolumeResourceMonitors = make([]WorkspaceAgentVolumeResourceMonitor, 0, len(volumeMonitors))
for _, monitor := range volumeMonitors {
snapshot.WorkspaceAgentVolumeResourceMonitors = append(snapshot.WorkspaceAgentVolumeResourceMonitors, ConvertWorkspaceAgentVolumeResourceMonitor(monitor))
}
return nil
})
eg.Go(func() error {
proxies, err := r.options.Database.GetWorkspaceProxies(ctx)
if err != nil {
@@ -765,6 +787,26 @@ func ConvertWorkspaceAgent(agent database.WorkspaceAgent) WorkspaceAgent {
return snapAgent
}
func ConvertWorkspaceAgentMemoryResourceMonitor(monitor database.WorkspaceAgentMemoryResourceMonitor) WorkspaceAgentMemoryResourceMonitor {
return WorkspaceAgentMemoryResourceMonitor{
AgentID: monitor.AgentID,
Enabled: monitor.Enabled,
Threshold: monitor.Threshold,
CreatedAt: monitor.CreatedAt,
UpdatedAt: monitor.UpdatedAt,
}
}
func ConvertWorkspaceAgentVolumeResourceMonitor(monitor database.WorkspaceAgentVolumeResourceMonitor) WorkspaceAgentVolumeResourceMonitor {
return WorkspaceAgentVolumeResourceMonitor{
AgentID: monitor.AgentID,
Enabled: monitor.Enabled,
Threshold: monitor.Threshold,
CreatedAt: monitor.CreatedAt,
UpdatedAt: monitor.UpdatedAt,
}
}
// ConvertWorkspaceAgentStat anonymizes a workspace agent stat.
func ConvertWorkspaceAgentStat(stat database.GetWorkspaceAgentStatsRow) WorkspaceAgentStat {
return WorkspaceAgentStat{
@@ -1083,28 +1125,30 @@ func ConvertTelemetryItem(item database.TelemetryItem) TelemetryItem {
type Snapshot struct {
DeploymentID string `json:"deployment_id"`
APIKeys []APIKey `json:"api_keys"`
CLIInvocations []clitelemetry.Invocation `json:"cli_invocations"`
ExternalProvisioners []ExternalProvisioner `json:"external_provisioners"`
Licenses []License `json:"licenses"`
ProvisionerJobs []ProvisionerJob `json:"provisioner_jobs"`
TemplateVersions []TemplateVersion `json:"template_versions"`
Templates []Template `json:"templates"`
Users []User `json:"users"`
Groups []Group `json:"groups"`
GroupMembers []GroupMember `json:"group_members"`
WorkspaceAgentStats []WorkspaceAgentStat `json:"workspace_agent_stats"`
WorkspaceAgents []WorkspaceAgent `json:"workspace_agents"`
WorkspaceApps []WorkspaceApp `json:"workspace_apps"`
WorkspaceBuilds []WorkspaceBuild `json:"workspace_build"`
WorkspaceProxies []WorkspaceProxy `json:"workspace_proxies"`
WorkspaceResourceMetadata []WorkspaceResourceMetadata `json:"workspace_resource_metadata"`
WorkspaceResources []WorkspaceResource `json:"workspace_resources"`
WorkspaceModules []WorkspaceModule `json:"workspace_modules"`
Workspaces []Workspace `json:"workspaces"`
NetworkEvents []NetworkEvent `json:"network_events"`
Organizations []Organization `json:"organizations"`
TelemetryItems []TelemetryItem `json:"telemetry_items"`
APIKeys []APIKey `json:"api_keys"`
CLIInvocations []clitelemetry.Invocation `json:"cli_invocations"`
ExternalProvisioners []ExternalProvisioner `json:"external_provisioners"`
Licenses []License `json:"licenses"`
ProvisionerJobs []ProvisionerJob `json:"provisioner_jobs"`
TemplateVersions []TemplateVersion `json:"template_versions"`
Templates []Template `json:"templates"`
Users []User `json:"users"`
Groups []Group `json:"groups"`
GroupMembers []GroupMember `json:"group_members"`
WorkspaceAgentStats []WorkspaceAgentStat `json:"workspace_agent_stats"`
WorkspaceAgents []WorkspaceAgent `json:"workspace_agents"`
WorkspaceApps []WorkspaceApp `json:"workspace_apps"`
WorkspaceBuilds []WorkspaceBuild `json:"workspace_build"`
WorkspaceProxies []WorkspaceProxy `json:"workspace_proxies"`
WorkspaceResourceMetadata []WorkspaceResourceMetadata `json:"workspace_resource_metadata"`
WorkspaceResources []WorkspaceResource `json:"workspace_resources"`
WorkspaceAgentMemoryResourceMonitors []WorkspaceAgentMemoryResourceMonitor `json:"workspace_agent_memory_resource_monitors"`
WorkspaceAgentVolumeResourceMonitors []WorkspaceAgentVolumeResourceMonitor `json:"workspace_agent_volume_resource_monitors"`
WorkspaceModules []WorkspaceModule `json:"workspace_modules"`
Workspaces []Workspace `json:"workspaces"`
NetworkEvents []NetworkEvent `json:"network_events"`
Organizations []Organization `json:"organizations"`
TelemetryItems []TelemetryItem `json:"telemetry_items"`
}
// Deployment contains information about the host running Coder.
@@ -1232,6 +1276,22 @@ type WorkspaceAgentStat struct {
SessionCountSSH int64 `json:"session_count_ssh"`
}
type WorkspaceAgentMemoryResourceMonitor struct {
AgentID uuid.UUID `json:"agent_id"`
Enabled bool `json:"enabled"`
Threshold int32 `json:"threshold"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type WorkspaceAgentVolumeResourceMonitor struct {
AgentID uuid.UUID `json:"agent_id"`
Enabled bool `json:"enabled"`
Threshold int32 `json:"threshold"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type WorkspaceApp struct {
ID uuid.UUID `json:"id"`
CreatedAt time.Time `json:"created_at"`

View File

@@ -112,6 +112,8 @@ func TestTelemetry(t *testing.T) {
_, _ = dbgen.WorkspaceProxy(t, db, database.WorkspaceProxy{})
_ = dbgen.WorkspaceModule(t, db, database.WorkspaceModule{})
_ = dbgen.WorkspaceAgentMemoryResourceMonitor(t, db, database.WorkspaceAgentMemoryResourceMonitor{})
_ = dbgen.WorkspaceAgentVolumeResourceMonitor(t, db, database.WorkspaceAgentVolumeResourceMonitor{})
_, snapshot := collectSnapshot(t, db, nil)
require.Len(t, snapshot.ProvisionerJobs, 1)
@@ -133,6 +135,8 @@ func TestTelemetry(t *testing.T) {
require.Len(t, snapshot.Organizations, 1)
// We create one item manually above. The other is TelemetryEnabled, created by the snapshotter.
require.Len(t, snapshot.TelemetryItems, 2)
require.Len(t, snapshot.WorkspaceAgentMemoryResourceMonitors, 1)
require.Len(t, snapshot.WorkspaceAgentVolumeResourceMonitors, 1)
wsa := snapshot.WorkspaceAgents[0]
require.Len(t, wsa.Subsystems, 2)
require.Equal(t, string(database.WorkspaceAgentSubsystemEnvbox), wsa.Subsystems[0])