Files
coder/coderd/agentapi/resources_monitoring.go
Vincent Vielle bc609d0056 feat: integrate agentAPI with resources monitoring logic (#16438)
As part of the new resources monitoring logic - more specifically for
OOM & OOD Notifications , we need to update the AgentAPI , and the
agents logic.

This PR aims to do it, and more specifically :  
We are updating the AgentAPI & TailnetAPI to version 24 to add two new
methods in the AgentAPI :
- One method to fetch the resources monitoring configuration
- One method to push the datapoints for the resources monitoring.

Also, this PR adds a new logic on the agent side, with a routine running
and ticking - fetching the resources usage each time , but also storing
it in a FIFO like queue.

Finally, this PR fixes a problem we had with RBAC logic on the resources
monitoring model, applying the same logic than we have for similar
entities.
2025-02-14 10:28:15 +01:00

68 lines
2.1 KiB
Go

package agentapi
import (
"context"
"database/sql"
"errors"
"golang.org/x/xerrors"
"github.com/google/uuid"
"cdr.dev/slog"
"github.com/coder/coder/v2/agent/proto"
"github.com/coder/coder/v2/coderd/database"
)
type ResourcesMonitoringAPI struct {
AgentID uuid.UUID
Database database.Store
Log slog.Logger
}
func (a *ResourcesMonitoringAPI) GetResourcesMonitoringConfiguration(ctx context.Context, _ *proto.GetResourcesMonitoringConfigurationRequest) (*proto.GetResourcesMonitoringConfigurationResponse, error) {
memoryMonitor, memoryErr := a.Database.FetchMemoryResourceMonitorsByAgentID(ctx, a.AgentID)
if memoryErr != nil && !errors.Is(memoryErr, sql.ErrNoRows) {
return nil, xerrors.Errorf("failed to fetch memory resource monitor: %w", memoryErr)
}
volumeMonitors, err := a.Database.FetchVolumesResourceMonitorsByAgentID(ctx, a.AgentID)
if err != nil {
return nil, xerrors.Errorf("failed to fetch volume resource monitors: %w", err)
}
return &proto.GetResourcesMonitoringConfigurationResponse{
Config: &proto.GetResourcesMonitoringConfigurationResponse_Config{
CollectionIntervalSeconds: 10,
NumDatapoints: 20,
},
Memory: func() *proto.GetResourcesMonitoringConfigurationResponse_Memory {
if memoryErr != nil {
return nil
}
return &proto.GetResourcesMonitoringConfigurationResponse_Memory{
Enabled: memoryMonitor.Enabled,
}
}(),
Volumes: func() []*proto.GetResourcesMonitoringConfigurationResponse_Volume {
volumes := make([]*proto.GetResourcesMonitoringConfigurationResponse_Volume, 0, len(volumeMonitors))
for _, monitor := range volumeMonitors {
volumes = append(volumes, &proto.GetResourcesMonitoringConfigurationResponse_Volume{
Enabled: monitor.Enabled,
Path: monitor.Path,
})
}
return volumes
}(),
}, nil
}
func (a *ResourcesMonitoringAPI) PushResourcesMonitoringUsage(ctx context.Context, req *proto.PushResourcesMonitoringUsageRequest) (*proto.PushResourcesMonitoringUsageResponse, error) {
a.Log.Info(ctx, "resources monitoring usage received",
slog.F("request", req))
return &proto.PushResourcesMonitoringUsageResponse{}, nil
}