mirror of
https://github.com/coder/coder.git
synced 2025-07-15 22:20:27 +00:00
fix: fix MetricsAggregator check for metric sameness (#11508)
Fixes #11451 A refactor of the Agent API passes metrics as protobufs, which include pointers to label name/value pairs. The aggregator tested for sameness by doing a shallow compare of label values, which for different stats reports would compare unequal because the pointers would be different. This fix does a deep compare. While testing I also noted that we neglect to compare template names. This is unlikely to have caused any issue in practice, since the combination of username/workspace is unique, but in the context of comparing metric labels we should do the comparison. If a user creates a workspace, deletes it, then recreates from a different template, we could in principle have reported incorrect stats for the old template.
This commit is contained in:
26
agent/proto/compare.go
Normal file
26
agent/proto/compare.go
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
package proto
|
||||||
|
|
||||||
|
func LabelsEqual(a, b []*Stats_Metric_Label) bool {
|
||||||
|
am := make(map[string]string, len(a))
|
||||||
|
for _, lbl := range a {
|
||||||
|
v := lbl.GetValue()
|
||||||
|
if v == "" {
|
||||||
|
// Prometheus considers empty labels as equivalent to being absent
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
am[lbl.GetName()] = lbl.GetValue()
|
||||||
|
}
|
||||||
|
lenB := 0
|
||||||
|
for _, lbl := range b {
|
||||||
|
v := lbl.GetValue()
|
||||||
|
if v == "" {
|
||||||
|
// Prometheus considers empty labels as equivalent to being absent
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
lenB++
|
||||||
|
if am[lbl.GetName()] != v {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return len(am) == lenB
|
||||||
|
}
|
77
agent/proto/compare_test.go
Normal file
77
agent/proto/compare_test.go
Normal file
@ -0,0 +1,77 @@
|
|||||||
|
package proto_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/coder/coder/v2/agent/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLabelsEqual(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
for _, tc := range []struct {
|
||||||
|
name string
|
||||||
|
a []*proto.Stats_Metric_Label
|
||||||
|
b []*proto.Stats_Metric_Label
|
||||||
|
eq bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "mainlineEq",
|
||||||
|
a: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "credulity", Value: "sus"},
|
||||||
|
{Name: "color", Value: "aquamarine"},
|
||||||
|
},
|
||||||
|
b: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "credulity", Value: "sus"},
|
||||||
|
{Name: "color", Value: "aquamarine"},
|
||||||
|
},
|
||||||
|
eq: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "emptyValue",
|
||||||
|
a: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "credulity", Value: "sus"},
|
||||||
|
{Name: "color", Value: "aquamarine"},
|
||||||
|
{Name: "singularity", Value: ""},
|
||||||
|
},
|
||||||
|
b: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "credulity", Value: "sus"},
|
||||||
|
{Name: "color", Value: "aquamarine"},
|
||||||
|
},
|
||||||
|
eq: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "extra",
|
||||||
|
a: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "credulity", Value: "sus"},
|
||||||
|
{Name: "color", Value: "aquamarine"},
|
||||||
|
{Name: "opacity", Value: "seyshells"},
|
||||||
|
},
|
||||||
|
b: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "credulity", Value: "sus"},
|
||||||
|
{Name: "color", Value: "aquamarine"},
|
||||||
|
},
|
||||||
|
eq: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "different",
|
||||||
|
a: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "credulity", Value: "sus"},
|
||||||
|
{Name: "color", Value: "aquamarine"},
|
||||||
|
},
|
||||||
|
b: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "credulity", Value: "legit"},
|
||||||
|
{Name: "color", Value: "aquamarine"},
|
||||||
|
},
|
||||||
|
eq: false,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
tc := tc
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
require.Equal(t, tc.eq, proto.LabelsEqual(tc.a, tc.b))
|
||||||
|
require.Equal(t, tc.eq, proto.LabelsEqual(tc.b, tc.a))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -5,7 +5,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/prometheus/client_golang/prometheus"
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
"golang.org/x/exp/slices"
|
|
||||||
"golang.org/x/xerrors"
|
"golang.org/x/xerrors"
|
||||||
|
|
||||||
"cdr.dev/slog"
|
"cdr.dev/slog"
|
||||||
@ -68,7 +67,12 @@ type annotatedMetric struct {
|
|||||||
var _ prometheus.Collector = new(MetricsAggregator)
|
var _ prometheus.Collector = new(MetricsAggregator)
|
||||||
|
|
||||||
func (am *annotatedMetric) is(req updateRequest, m *agentproto.Stats_Metric) bool {
|
func (am *annotatedMetric) is(req updateRequest, m *agentproto.Stats_Metric) bool {
|
||||||
return am.username == req.username && am.workspaceName == req.workspaceName && am.agentName == req.agentName && am.Name == m.Name && slices.Equal(am.Labels, m.Labels)
|
return am.username == req.username &&
|
||||||
|
am.workspaceName == req.workspaceName &&
|
||||||
|
am.agentName == req.agentName &&
|
||||||
|
am.templateName == req.templateName &&
|
||||||
|
am.Name == m.Name &&
|
||||||
|
agentproto.LabelsEqual(am.Labels, m.Labels)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (am *annotatedMetric) asPrometheus() (prometheus.Metric, error) {
|
func (am *annotatedMetric) asPrometheus() (prometheus.Metric, error) {
|
||||||
|
210
coderd/prometheusmetrics/aggregator_internal_test.go
Normal file
210
coderd/prometheusmetrics/aggregator_internal_test.go
Normal file
@ -0,0 +1,210 @@
|
|||||||
|
package prometheusmetrics
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/coder/coder/v2/agent/proto"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestAnnotatedMetric_Is(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
am1 := &annotatedMetric{
|
||||||
|
Stats_Metric: &proto.Stats_Metric{
|
||||||
|
Name: "met",
|
||||||
|
Type: proto.Stats_Metric_COUNTER,
|
||||||
|
Value: 1,
|
||||||
|
Labels: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "rarity", Value: "blue moon"},
|
||||||
|
{Name: "certainty", Value: "yes"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
username: "spike",
|
||||||
|
workspaceName: "work",
|
||||||
|
agentName: "janus",
|
||||||
|
templateName: "tempe",
|
||||||
|
expiryDate: time.Now(),
|
||||||
|
}
|
||||||
|
for _, tc := range []struct {
|
||||||
|
name string
|
||||||
|
req updateRequest
|
||||||
|
m *proto.Stats_Metric
|
||||||
|
is bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "OK",
|
||||||
|
req: updateRequest{
|
||||||
|
username: "spike",
|
||||||
|
workspaceName: "work",
|
||||||
|
agentName: "janus",
|
||||||
|
templateName: "tempe",
|
||||||
|
metrics: nil,
|
||||||
|
timestamp: time.Now().Add(-5 * time.Second),
|
||||||
|
},
|
||||||
|
m: &proto.Stats_Metric{
|
||||||
|
Name: "met",
|
||||||
|
Type: proto.Stats_Metric_COUNTER,
|
||||||
|
Value: 2,
|
||||||
|
Labels: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "rarity", Value: "blue moon"},
|
||||||
|
{Name: "certainty", Value: "yes"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
is: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "missingLabel",
|
||||||
|
req: updateRequest{
|
||||||
|
username: "spike",
|
||||||
|
workspaceName: "work",
|
||||||
|
agentName: "janus",
|
||||||
|
templateName: "tempe",
|
||||||
|
metrics: nil,
|
||||||
|
timestamp: time.Now().Add(-5 * time.Second),
|
||||||
|
},
|
||||||
|
m: &proto.Stats_Metric{
|
||||||
|
Name: "met",
|
||||||
|
Type: proto.Stats_Metric_COUNTER,
|
||||||
|
Value: 2,
|
||||||
|
Labels: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "certainty", Value: "yes"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
is: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "wrongLabelValue",
|
||||||
|
req: updateRequest{
|
||||||
|
username: "spike",
|
||||||
|
workspaceName: "work",
|
||||||
|
agentName: "janus",
|
||||||
|
templateName: "tempe",
|
||||||
|
metrics: nil,
|
||||||
|
timestamp: time.Now().Add(-5 * time.Second),
|
||||||
|
},
|
||||||
|
m: &proto.Stats_Metric{
|
||||||
|
Name: "met",
|
||||||
|
Type: proto.Stats_Metric_COUNTER,
|
||||||
|
Value: 2,
|
||||||
|
Labels: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "rarity", Value: "blue moon"},
|
||||||
|
{Name: "certainty", Value: "inshallah"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
is: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "wrongMetricName",
|
||||||
|
req: updateRequest{
|
||||||
|
username: "spike",
|
||||||
|
workspaceName: "work",
|
||||||
|
agentName: "janus",
|
||||||
|
templateName: "tempe",
|
||||||
|
metrics: nil,
|
||||||
|
timestamp: time.Now().Add(-5 * time.Second),
|
||||||
|
},
|
||||||
|
m: &proto.Stats_Metric{
|
||||||
|
Name: "cub",
|
||||||
|
Type: proto.Stats_Metric_COUNTER,
|
||||||
|
Value: 2,
|
||||||
|
Labels: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "rarity", Value: "blue moon"},
|
||||||
|
{Name: "certainty", Value: "yes"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
is: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "wrongUsername",
|
||||||
|
req: updateRequest{
|
||||||
|
username: "steve",
|
||||||
|
workspaceName: "work",
|
||||||
|
agentName: "janus",
|
||||||
|
templateName: "tempe",
|
||||||
|
metrics: nil,
|
||||||
|
timestamp: time.Now().Add(-5 * time.Second),
|
||||||
|
},
|
||||||
|
m: &proto.Stats_Metric{
|
||||||
|
Name: "met",
|
||||||
|
Type: proto.Stats_Metric_COUNTER,
|
||||||
|
Value: 2,
|
||||||
|
Labels: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "rarity", Value: "blue moon"},
|
||||||
|
{Name: "certainty", Value: "yes"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
is: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "wrongWorkspaceName",
|
||||||
|
req: updateRequest{
|
||||||
|
username: "spike",
|
||||||
|
workspaceName: "play",
|
||||||
|
agentName: "janus",
|
||||||
|
templateName: "tempe",
|
||||||
|
metrics: nil,
|
||||||
|
timestamp: time.Now().Add(-5 * time.Second),
|
||||||
|
},
|
||||||
|
m: &proto.Stats_Metric{
|
||||||
|
Name: "met",
|
||||||
|
Type: proto.Stats_Metric_COUNTER,
|
||||||
|
Value: 2,
|
||||||
|
Labels: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "rarity", Value: "blue moon"},
|
||||||
|
{Name: "certainty", Value: "yes"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
is: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "wrongAgentName",
|
||||||
|
req: updateRequest{
|
||||||
|
username: "spike",
|
||||||
|
workspaceName: "work",
|
||||||
|
agentName: "bond",
|
||||||
|
templateName: "tempe",
|
||||||
|
metrics: nil,
|
||||||
|
timestamp: time.Now().Add(-5 * time.Second),
|
||||||
|
},
|
||||||
|
m: &proto.Stats_Metric{
|
||||||
|
Name: "met",
|
||||||
|
Type: proto.Stats_Metric_COUNTER,
|
||||||
|
Value: 2,
|
||||||
|
Labels: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "rarity", Value: "blue moon"},
|
||||||
|
{Name: "certainty", Value: "yes"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
is: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "wrongTemplateName",
|
||||||
|
req: updateRequest{
|
||||||
|
username: "spike",
|
||||||
|
workspaceName: "work",
|
||||||
|
agentName: "janus",
|
||||||
|
templateName: "phoenix",
|
||||||
|
metrics: nil,
|
||||||
|
timestamp: time.Now().Add(-5 * time.Second),
|
||||||
|
},
|
||||||
|
m: &proto.Stats_Metric{
|
||||||
|
Name: "met",
|
||||||
|
Type: proto.Stats_Metric_COUNTER,
|
||||||
|
Value: 2,
|
||||||
|
Labels: []*proto.Stats_Metric_Label{
|
||||||
|
{Name: "rarity", Value: "blue moon"},
|
||||||
|
{Name: "certainty", Value: "yes"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
is: false,
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
tc := tc
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
require.Equal(t, tc.is, am1.is(tc.req, tc.m))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
@ -51,11 +51,19 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
|
|||||||
given1 := []*agentproto.Stats_Metric{
|
given1 := []*agentproto.Stats_Metric{
|
||||||
{Name: "a_counter_one", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
|
{Name: "a_counter_one", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
|
||||||
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 2},
|
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 2},
|
||||||
|
// Tests that we update labels correctly when they have extra labels
|
||||||
|
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 27, Labels: []*agentproto.Stats_Metric_Label{
|
||||||
|
{Name: "lizz", Value: "rizz"},
|
||||||
|
}},
|
||||||
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 3},
|
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 3},
|
||||||
}
|
}
|
||||||
|
|
||||||
given2 := []*agentproto.Stats_Metric{
|
given2 := []*agentproto.Stats_Metric{
|
||||||
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 4},
|
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 4},
|
||||||
|
// Tests that we update labels correctly when they have extra labels
|
||||||
|
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: -9, Labels: []*agentproto.Stats_Metric_Label{
|
||||||
|
{Name: "lizz", Value: "rizz"},
|
||||||
|
}},
|
||||||
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5},
|
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5},
|
||||||
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 2, Labels: []*agentproto.Stats_Metric_Label{
|
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 2, Labels: []*agentproto.Stats_Metric_Label{
|
||||||
{Name: "foobar", Value: "Foobaz"},
|
{Name: "foobar", Value: "Foobaz"},
|
||||||
@ -73,6 +81,13 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
|
|||||||
expected := []*agentproto.Stats_Metric{
|
expected := []*agentproto.Stats_Metric{
|
||||||
{Name: "a_counter_one", Type: agentproto.Stats_Metric_COUNTER, Value: 1, Labels: commonLabels},
|
{Name: "a_counter_one", Type: agentproto.Stats_Metric_COUNTER, Value: 1, Labels: commonLabels},
|
||||||
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 4, Labels: commonLabels},
|
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 4, Labels: commonLabels},
|
||||||
|
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: -9, Labels: []*agentproto.Stats_Metric_Label{
|
||||||
|
{Name: "agent_name", Value: testAgentName},
|
||||||
|
{Name: "lizz", Value: "rizz"},
|
||||||
|
{Name: "username", Value: testUsername},
|
||||||
|
{Name: "workspace_name", Value: testWorkspaceName},
|
||||||
|
{Name: "template_name", Value: testTemplateName},
|
||||||
|
}},
|
||||||
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5, Labels: commonLabels},
|
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5, Labels: commonLabels},
|
||||||
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 2, Labels: []*agentproto.Stats_Metric_Label{
|
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 2, Labels: []*agentproto.Stats_Metric_Label{
|
||||||
{Name: "agent_name", Value: testAgentName},
|
{Name: "agent_name", Value: testAgentName},
|
||||||
@ -111,6 +126,7 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
|
|||||||
|
|
||||||
func verifyCollectedMetrics(t *testing.T, expected []*agentproto.Stats_Metric, actual []prometheus.Metric) bool {
|
func verifyCollectedMetrics(t *testing.T, expected []*agentproto.Stats_Metric, actual []prometheus.Metric) bool {
|
||||||
if len(expected) != len(actual) {
|
if len(expected) != len(actual) {
|
||||||
|
t.Logf("expected %d metrics, got %d", len(expected), len(actual))
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user