feat: expose agent metrics via Prometheus endpoint (#7011)

* WIP

* WIP

* WIP

* Agents

* fix

* 1min

* fix

* WIP

* Test

* docs

* fmt

* Add timer to measure the metrics collection

* Use CachedGaugeVec

* Unit tests

* Address PR comments
This commit is contained in:
Marcin Tojek
2023-04-07 17:48:52 +02:00
committed by GitHub
parent dd85ea8977
commit 0347231bb8
7 changed files with 629 additions and 48 deletions

View File

@ -1,3 +1,23 @@
# HELP coderd_agents_apps Agent applications with statuses.
# TYPE coderd_agents_apps gauge
coderd_agents_apps{agent_name="main",app_name="code-server",health="healthy",username="admin",workspace_name="workspace-1"} 1
coderd_agents_apps{agent_name="main",app_name="code-server",health="healthy",username="admin",workspace_name="workspace-2"} 1
coderd_agents_apps{agent_name="main",app_name="code-server",health="healthy",username="admin",workspace_name="workspace-3"} 1
# HELP coderd_agents_connection_latencies_seconds Agent connection latencies in seconds.
# TYPE coderd_agents_connection_latencies_seconds gauge
coderd_agents_connection_latencies_seconds{agent_id="main",derp_region="Coder Embedded Relay",preferred="true",username="admin",workspace_name="workspace-1"} 0.03018125
coderd_agents_connection_latencies_seconds{agent_id="main",derp_region="Coder Embedded Relay",preferred="true",username="admin",workspace_name="workspace-2"} 0.028658416
coderd_agents_connection_latencies_seconds{agent_id="main",derp_region="Coder Embedded Relay",preferred="true",username="admin",workspace_name="workspace-3"} 0.028041416
# HELP coderd_agents_connections Agent connections with statuses.
# TYPE coderd_agents_connections gauge
coderd_agents_connections{agent_name="main",lifecycle_state="ready",status="connected",tailnet_node="nodeid:16966f7df70d8cc5",username="admin",workspace_name="workspace-3"} 1
coderd_agents_connections{agent_name="main",lifecycle_state="start_timeout",status="connected",tailnet_node="nodeid:3237d00938be23e3",username="admin",workspace_name="workspace-2"} 1
coderd_agents_connections{agent_name="main",lifecycle_state="start_timeout",status="connected",tailnet_node="nodeid:3779bd45d00be0eb",username="admin",workspace_name="workspace-1"} 1
# HELP coderd_agents_up The number of active agents per workspace.
# TYPE coderd_agents_up gauge
coderd_agents_up{username="admin",workspace_name="workspace-1"} 1
coderd_agents_up{username="admin",workspace_name="workspace-2"} 1
coderd_agents_up{username="admin",workspace_name="workspace-3"} 1
# HELP coderd_api_websocket_durations_seconds Websocket duration distribution of requests in seconds.
# TYPE coderd_api_websocket_durations_seconds histogram
coderd_api_websocket_durations_seconds_bucket{path="/api/v2/workspaceagents/me/coordinate",le="0.001"} 0
@ -568,6 +588,22 @@ coderd_api_requests_processed_total{code="401",method="POST",path="/api/v2/files
# HELP coderd_api_workspace_latest_build_total The latest workspace builds with a status.
# TYPE coderd_api_workspace_latest_build_total gauge
coderd_api_workspace_latest_build_total{status="succeeded"} 1
# HELP coderd_metrics_collector_agents_execution_seconds Histogram for duration of agents metrics collection in seconds.
# TYPE coderd_metrics_collector_agents_execution_seconds histogram
coderd_metrics_collector_agents_execution_seconds_bucket{le="0.001"} 0
coderd_metrics_collector_agents_execution_seconds_bucket{le="0.005"} 0
coderd_metrics_collector_agents_execution_seconds_bucket{le="0.01"} 0
coderd_metrics_collector_agents_execution_seconds_bucket{le="0.025"} 0
coderd_metrics_collector_agents_execution_seconds_bucket{le="0.05"} 2
coderd_metrics_collector_agents_execution_seconds_bucket{le="0.1"} 2
coderd_metrics_collector_agents_execution_seconds_bucket{le="0.5"} 2
coderd_metrics_collector_agents_execution_seconds_bucket{le="1"} 2
coderd_metrics_collector_agents_execution_seconds_bucket{le="5"} 2
coderd_metrics_collector_agents_execution_seconds_bucket{le="10"} 2
coderd_metrics_collector_agents_execution_seconds_bucket{le="30"} 2
coderd_metrics_collector_agents_execution_seconds_bucket{le="+Inf"} 2
coderd_metrics_collector_agents_execution_seconds_sum 0.0592915
coderd_metrics_collector_agents_execution_seconds_count 2
# HELP coderd_provisionerd_job_timings_seconds The provisioner job time duration in seconds.
# TYPE coderd_provisionerd_job_timings_seconds histogram
coderd_provisionerd_job_timings_seconds_bucket{provisioner="terraform",status="success",le="1"} 0