fix: fix MetricsAggregator check for metric sameness (#11508)

Fixes #11451

A refactor of the Agent API passes metrics as protobufs, which include pointers to label name/value pairs.  The aggregator tested for sameness by doing a shallow compare of label values, which for different stats reports would compare unequal because the pointers would be different.

This fix does a deep compare.

While testing I also noted that we neglect to compare template names. This is unlikely to have caused any issue in practice, since the combination of username/workspace is unique, but in the context of comparing metric labels we should do the comparison.

If a user creates a workspace, deletes it, then recreates from a different template, we could in principle have reported incorrect stats for the old template.
This commit is contained in:
Spike Curtis
2024-01-09 15:21:30 +04:00
committed by GitHub
parent 21093c00f0
commit fdd60d316e
5 changed files with 335 additions and 2 deletions

26
agent/proto/compare.go Normal file
View File

@ -0,0 +1,26 @@
package proto
func LabelsEqual(a, b []*Stats_Metric_Label) bool {
am := make(map[string]string, len(a))
for _, lbl := range a {
v := lbl.GetValue()
if v == "" {
// Prometheus considers empty labels as equivalent to being absent
continue
}
am[lbl.GetName()] = lbl.GetValue()
}
lenB := 0
for _, lbl := range b {
v := lbl.GetValue()
if v == "" {
// Prometheus considers empty labels as equivalent to being absent
continue
}
lenB++
if am[lbl.GetName()] != v {
return false
}
}
return len(am) == lenB
}

View File

@ -0,0 +1,77 @@
package proto_test
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/coder/coder/v2/agent/proto"
)
func TestLabelsEqual(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
name string
a []*proto.Stats_Metric_Label
b []*proto.Stats_Metric_Label
eq bool
}{
{
name: "mainlineEq",
a: []*proto.Stats_Metric_Label{
{Name: "credulity", Value: "sus"},
{Name: "color", Value: "aquamarine"},
},
b: []*proto.Stats_Metric_Label{
{Name: "credulity", Value: "sus"},
{Name: "color", Value: "aquamarine"},
},
eq: true,
},
{
name: "emptyValue",
a: []*proto.Stats_Metric_Label{
{Name: "credulity", Value: "sus"},
{Name: "color", Value: "aquamarine"},
{Name: "singularity", Value: ""},
},
b: []*proto.Stats_Metric_Label{
{Name: "credulity", Value: "sus"},
{Name: "color", Value: "aquamarine"},
},
eq: true,
},
{
name: "extra",
a: []*proto.Stats_Metric_Label{
{Name: "credulity", Value: "sus"},
{Name: "color", Value: "aquamarine"},
{Name: "opacity", Value: "seyshells"},
},
b: []*proto.Stats_Metric_Label{
{Name: "credulity", Value: "sus"},
{Name: "color", Value: "aquamarine"},
},
eq: false,
},
{
name: "different",
a: []*proto.Stats_Metric_Label{
{Name: "credulity", Value: "sus"},
{Name: "color", Value: "aquamarine"},
},
b: []*proto.Stats_Metric_Label{
{Name: "credulity", Value: "legit"},
{Name: "color", Value: "aquamarine"},
},
eq: false,
},
} {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
require.Equal(t, tc.eq, proto.LabelsEqual(tc.a, tc.b))
require.Equal(t, tc.eq, proto.LabelsEqual(tc.b, tc.a))
})
}
}

View File

@ -5,7 +5,6 @@ import (
"time" "time"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"golang.org/x/exp/slices"
"golang.org/x/xerrors" "golang.org/x/xerrors"
"cdr.dev/slog" "cdr.dev/slog"
@ -68,7 +67,12 @@ type annotatedMetric struct {
var _ prometheus.Collector = new(MetricsAggregator) var _ prometheus.Collector = new(MetricsAggregator)
func (am *annotatedMetric) is(req updateRequest, m *agentproto.Stats_Metric) bool { func (am *annotatedMetric) is(req updateRequest, m *agentproto.Stats_Metric) bool {
return am.username == req.username && am.workspaceName == req.workspaceName && am.agentName == req.agentName && am.Name == m.Name && slices.Equal(am.Labels, m.Labels) return am.username == req.username &&
am.workspaceName == req.workspaceName &&
am.agentName == req.agentName &&
am.templateName == req.templateName &&
am.Name == m.Name &&
agentproto.LabelsEqual(am.Labels, m.Labels)
} }
func (am *annotatedMetric) asPrometheus() (prometheus.Metric, error) { func (am *annotatedMetric) asPrometheus() (prometheus.Metric, error) {

View File

@ -0,0 +1,210 @@
package prometheusmetrics
import (
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/coder/coder/v2/agent/proto"
)
func TestAnnotatedMetric_Is(t *testing.T) {
t.Parallel()
am1 := &annotatedMetric{
Stats_Metric: &proto.Stats_Metric{
Name: "met",
Type: proto.Stats_Metric_COUNTER,
Value: 1,
Labels: []*proto.Stats_Metric_Label{
{Name: "rarity", Value: "blue moon"},
{Name: "certainty", Value: "yes"},
},
},
username: "spike",
workspaceName: "work",
agentName: "janus",
templateName: "tempe",
expiryDate: time.Now(),
}
for _, tc := range []struct {
name string
req updateRequest
m *proto.Stats_Metric
is bool
}{
{
name: "OK",
req: updateRequest{
username: "spike",
workspaceName: "work",
agentName: "janus",
templateName: "tempe",
metrics: nil,
timestamp: time.Now().Add(-5 * time.Second),
},
m: &proto.Stats_Metric{
Name: "met",
Type: proto.Stats_Metric_COUNTER,
Value: 2,
Labels: []*proto.Stats_Metric_Label{
{Name: "rarity", Value: "blue moon"},
{Name: "certainty", Value: "yes"},
},
},
is: true,
},
{
name: "missingLabel",
req: updateRequest{
username: "spike",
workspaceName: "work",
agentName: "janus",
templateName: "tempe",
metrics: nil,
timestamp: time.Now().Add(-5 * time.Second),
},
m: &proto.Stats_Metric{
Name: "met",
Type: proto.Stats_Metric_COUNTER,
Value: 2,
Labels: []*proto.Stats_Metric_Label{
{Name: "certainty", Value: "yes"},
},
},
is: false,
},
{
name: "wrongLabelValue",
req: updateRequest{
username: "spike",
workspaceName: "work",
agentName: "janus",
templateName: "tempe",
metrics: nil,
timestamp: time.Now().Add(-5 * time.Second),
},
m: &proto.Stats_Metric{
Name: "met",
Type: proto.Stats_Metric_COUNTER,
Value: 2,
Labels: []*proto.Stats_Metric_Label{
{Name: "rarity", Value: "blue moon"},
{Name: "certainty", Value: "inshallah"},
},
},
is: false,
},
{
name: "wrongMetricName",
req: updateRequest{
username: "spike",
workspaceName: "work",
agentName: "janus",
templateName: "tempe",
metrics: nil,
timestamp: time.Now().Add(-5 * time.Second),
},
m: &proto.Stats_Metric{
Name: "cub",
Type: proto.Stats_Metric_COUNTER,
Value: 2,
Labels: []*proto.Stats_Metric_Label{
{Name: "rarity", Value: "blue moon"},
{Name: "certainty", Value: "yes"},
},
},
is: false,
},
{
name: "wrongUsername",
req: updateRequest{
username: "steve",
workspaceName: "work",
agentName: "janus",
templateName: "tempe",
metrics: nil,
timestamp: time.Now().Add(-5 * time.Second),
},
m: &proto.Stats_Metric{
Name: "met",
Type: proto.Stats_Metric_COUNTER,
Value: 2,
Labels: []*proto.Stats_Metric_Label{
{Name: "rarity", Value: "blue moon"},
{Name: "certainty", Value: "yes"},
},
},
is: false,
},
{
name: "wrongWorkspaceName",
req: updateRequest{
username: "spike",
workspaceName: "play",
agentName: "janus",
templateName: "tempe",
metrics: nil,
timestamp: time.Now().Add(-5 * time.Second),
},
m: &proto.Stats_Metric{
Name: "met",
Type: proto.Stats_Metric_COUNTER,
Value: 2,
Labels: []*proto.Stats_Metric_Label{
{Name: "rarity", Value: "blue moon"},
{Name: "certainty", Value: "yes"},
},
},
is: false,
},
{
name: "wrongAgentName",
req: updateRequest{
username: "spike",
workspaceName: "work",
agentName: "bond",
templateName: "tempe",
metrics: nil,
timestamp: time.Now().Add(-5 * time.Second),
},
m: &proto.Stats_Metric{
Name: "met",
Type: proto.Stats_Metric_COUNTER,
Value: 2,
Labels: []*proto.Stats_Metric_Label{
{Name: "rarity", Value: "blue moon"},
{Name: "certainty", Value: "yes"},
},
},
is: false,
},
{
name: "wrongTemplateName",
req: updateRequest{
username: "spike",
workspaceName: "work",
agentName: "janus",
templateName: "phoenix",
metrics: nil,
timestamp: time.Now().Add(-5 * time.Second),
},
m: &proto.Stats_Metric{
Name: "met",
Type: proto.Stats_Metric_COUNTER,
Value: 2,
Labels: []*proto.Stats_Metric_Label{
{Name: "rarity", Value: "blue moon"},
{Name: "certainty", Value: "yes"},
},
},
is: false,
},
} {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
require.Equal(t, tc.is, am1.is(tc.req, tc.m))
})
}
}

View File

@ -51,11 +51,19 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
given1 := []*agentproto.Stats_Metric{ given1 := []*agentproto.Stats_Metric{
{Name: "a_counter_one", Type: agentproto.Stats_Metric_COUNTER, Value: 1}, {Name: "a_counter_one", Type: agentproto.Stats_Metric_COUNTER, Value: 1},
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 2}, {Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 2},
// Tests that we update labels correctly when they have extra labels
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 27, Labels: []*agentproto.Stats_Metric_Label{
{Name: "lizz", Value: "rizz"},
}},
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 3}, {Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 3},
} }
given2 := []*agentproto.Stats_Metric{ given2 := []*agentproto.Stats_Metric{
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 4}, {Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 4},
// Tests that we update labels correctly when they have extra labels
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: -9, Labels: []*agentproto.Stats_Metric_Label{
{Name: "lizz", Value: "rizz"},
}},
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5}, {Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5},
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 2, Labels: []*agentproto.Stats_Metric_Label{ {Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 2, Labels: []*agentproto.Stats_Metric_Label{
{Name: "foobar", Value: "Foobaz"}, {Name: "foobar", Value: "Foobaz"},
@ -73,6 +81,13 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
expected := []*agentproto.Stats_Metric{ expected := []*agentproto.Stats_Metric{
{Name: "a_counter_one", Type: agentproto.Stats_Metric_COUNTER, Value: 1, Labels: commonLabels}, {Name: "a_counter_one", Type: agentproto.Stats_Metric_COUNTER, Value: 1, Labels: commonLabels},
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 4, Labels: commonLabels}, {Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: 4, Labels: commonLabels},
{Name: "b_counter_two", Type: agentproto.Stats_Metric_COUNTER, Value: -9, Labels: []*agentproto.Stats_Metric_Label{
{Name: "agent_name", Value: testAgentName},
{Name: "lizz", Value: "rizz"},
{Name: "username", Value: testUsername},
{Name: "workspace_name", Value: testWorkspaceName},
{Name: "template_name", Value: testTemplateName},
}},
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5, Labels: commonLabels}, {Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 5, Labels: commonLabels},
{Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 2, Labels: []*agentproto.Stats_Metric_Label{ {Name: "c_gauge_three", Type: agentproto.Stats_Metric_GAUGE, Value: 2, Labels: []*agentproto.Stats_Metric_Label{
{Name: "agent_name", Value: testAgentName}, {Name: "agent_name", Value: testAgentName},
@ -111,6 +126,7 @@ func TestUpdateMetrics_MetricsDoNotExpire(t *testing.T) {
func verifyCollectedMetrics(t *testing.T, expected []*agentproto.Stats_Metric, actual []prometheus.Metric) bool { func verifyCollectedMetrics(t *testing.T, expected []*agentproto.Stats_Metric, actual []prometheus.Metric) bool {
if len(expected) != len(actual) { if len(expected) != len(actual) {
t.Logf("expected %d metrics, got %d", len(expected), len(actual))
return false return false
} }