Merge branch 'main' of github.com:/coder/coder into dk/prebuilds

Signed-off-by: Danny Kopping <danny@coder.com>
2025-07-13 21:36:50 +00:00 · 2025-02-19 15:40:19 +00:00
parent 7d949e5ee0 2c6df5a9ae
commit 64d476545b
275 changed files with 10767 additions and 1322 deletions
--- a/coderd/agentapi/api.go
+++ b/coderd/agentapi/api.go
@ -17,10 +17,12 @@ import (

 	"cdr.dev/slog"
 	agentproto "github.com/coder/coder/v2/agent/proto"
+	"github.com/coder/coder/v2/coderd/agentapi/resourcesmonitor"
 	"github.com/coder/coder/v2/coderd/appearance"
 	"github.com/coder/coder/v2/coderd/database"
 	"github.com/coder/coder/v2/coderd/database/pubsub"
 	"github.com/coder/coder/v2/coderd/externalauth"
+	"github.com/coder/coder/v2/coderd/notifications"
 	"github.com/coder/coder/v2/coderd/prometheusmetrics"
 	"github.com/coder/coder/v2/coderd/tracing"
 	"github.com/coder/coder/v2/coderd/workspacestats"
@ -29,6 +31,7 @@ import (
 	"github.com/coder/coder/v2/codersdk/agentsdk"
 	"github.com/coder/coder/v2/tailnet"
 	tailnetproto "github.com/coder/coder/v2/tailnet/proto"
+	"github.com/coder/quartz"
 )

 // API implements the DRPC agent API interface from agent/proto. This struct is
@ -59,7 +62,9 @@ type Options struct {

 	Ctx                               context.Context
 	Log                               slog.Logger
+	Clock                             quartz.Clock
 	Database                          database.Store
+	NotificationsEnqueuer             notifications.Enqueuer
 	Pubsub                            pubsub.Pubsub
 	DerpMapFn                         func() *tailcfg.DERPMap
 	TailnetCoordinator                *atomic.Pointer[tailnet.Coordinator]
@ -82,6 +87,10 @@ type Options struct {
 }

 func New(opts Options) *API {
+	if opts.Clock == nil {
+		opts.Clock = quartz.NewReal()
+	}
+
 	api := &API{
 		opts: opts,
 		mu:   sync.Mutex{},
@ -106,9 +115,22 @@ func New(opts Options) *API {
 	}

 	api.ResourcesMonitoringAPI = &ResourcesMonitoringAPI{
-		Log:      opts.Log,
-		AgentID:  opts.AgentID,
-		Database: opts.Database,
+		AgentID:               opts.AgentID,
+		WorkspaceID:           opts.WorkspaceID,
+		Clock:                 opts.Clock,
+		Database:              opts.Database,
+		NotificationsEnqueuer: opts.NotificationsEnqueuer,
+		Debounce:              5 * time.Minute,
+
+		Config: resourcesmonitor.Config{
+			NumDatapoints:      20,
+			CollectionInterval: 10 * time.Second,
+
+			Alert: resourcesmonitor.AlertConfig{
+				MinimumNOKsPercent:     20,
+				ConsecutiveNOKsPercent: 50,
+			},
+		},
 	}

 	api.StatsAPI = &StatsAPI{
--- a/coderd/agentapi/resources_monitoring.go
+++ b/coderd/agentapi/resources_monitoring.go
@ -4,20 +4,35 @@ import (
 	"context"
 	"database/sql"
 	"errors"
+	"fmt"
+	"time"

 	"golang.org/x/xerrors"

+	"cdr.dev/slog"
+
 	"github.com/google/uuid"

-	"cdr.dev/slog"
 	"github.com/coder/coder/v2/agent/proto"
+	"github.com/coder/coder/v2/coderd/agentapi/resourcesmonitor"
 	"github.com/coder/coder/v2/coderd/database"
+	"github.com/coder/coder/v2/coderd/database/dbauthz"
+	"github.com/coder/coder/v2/coderd/database/dbtime"
+	"github.com/coder/coder/v2/coderd/notifications"
+	"github.com/coder/quartz"
 )

 type ResourcesMonitoringAPI struct {
-	AgentID  uuid.UUID
-	Database database.Store
-	Log      slog.Logger
+	AgentID     uuid.UUID
+	WorkspaceID uuid.UUID
+
+	Log                   slog.Logger
+	Clock                 quartz.Clock
+	Database              database.Store
+	NotificationsEnqueuer notifications.Enqueuer
+
+	Debounce time.Duration
+	Config   resourcesmonitor.Config
 }

 func (a *ResourcesMonitoringAPI) GetResourcesMonitoringConfiguration(ctx context.Context, _ *proto.GetResourcesMonitoringConfigurationRequest) (*proto.GetResourcesMonitoringConfigurationResponse, error) {
@ -33,8 +48,8 @@ func (a *ResourcesMonitoringAPI) GetResourcesMonitoringConfiguration(ctx context

 	return &proto.GetResourcesMonitoringConfigurationResponse{
 		Config: &proto.GetResourcesMonitoringConfigurationResponse_Config{
-			CollectionIntervalSeconds: 10,
-			NumDatapoints:             20,
+			CollectionIntervalSeconds: int32(a.Config.CollectionInterval.Seconds()),
+			NumDatapoints:             a.Config.NumDatapoints,
 		},
 		Memory: func() *proto.GetResourcesMonitoringConfigurationResponse_Memory {
 			if memoryErr != nil {
@ -60,8 +75,182 @@ func (a *ResourcesMonitoringAPI) GetResourcesMonitoringConfiguration(ctx context
 }

 func (a *ResourcesMonitoringAPI) PushResourcesMonitoringUsage(ctx context.Context, req *proto.PushResourcesMonitoringUsageRequest) (*proto.PushResourcesMonitoringUsageResponse, error) {
-	a.Log.Info(ctx, "resources monitoring usage received",
-		slog.F("request", req))
+	var err error

-	return &proto.PushResourcesMonitoringUsageResponse{}, nil
+	if memoryErr := a.monitorMemory(ctx, req.Datapoints); memoryErr != nil {
+		err = errors.Join(err, xerrors.Errorf("monitor memory: %w", memoryErr))
+	}
+
+	if volumeErr := a.monitorVolumes(ctx, req.Datapoints); volumeErr != nil {
+		err = errors.Join(err, xerrors.Errorf("monitor volume: %w", volumeErr))
+	}
+
+	return &proto.PushResourcesMonitoringUsageResponse{}, err
+}
+
+func (a *ResourcesMonitoringAPI) monitorMemory(ctx context.Context, datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint) error {
+	monitor, err := a.Database.FetchMemoryResourceMonitorsByAgentID(ctx, a.AgentID)
+	if err != nil {
+		// It is valid for an agent to not have a memory monitor, so we
+		// do not want to treat it as an error.
+		if errors.Is(err, sql.ErrNoRows) {
+			return nil
+		}
+
+		return xerrors.Errorf("fetch memory resource monitor: %w", err)
+	}
+
+	if !monitor.Enabled {
+		return nil
+	}
+
+	usageDatapoints := make([]*proto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage, 0, len(datapoints))
+	for _, datapoint := range datapoints {
+		usageDatapoints = append(usageDatapoints, datapoint.Memory)
+	}
+
+	usageStates := resourcesmonitor.CalculateMemoryUsageStates(monitor, usageDatapoints)
+
+	oldState := monitor.State
+	newState := resourcesmonitor.NextState(a.Config, oldState, usageStates)
+
+	debouncedUntil, shouldNotify := monitor.Debounce(a.Debounce, a.Clock.Now(), oldState, newState)
+
+	//nolint:gocritic // We need to be able to update the resource monitor here.
+	err = a.Database.UpdateMemoryResourceMonitor(dbauthz.AsResourceMonitor(ctx), database.UpdateMemoryResourceMonitorParams{
+		AgentID:        a.AgentID,
+		State:          newState,
+		UpdatedAt:      dbtime.Time(a.Clock.Now()),
+		DebouncedUntil: dbtime.Time(debouncedUntil),
+	})
+	if err != nil {
+		return xerrors.Errorf("update workspace monitor: %w", err)
+	}
+
+	if !shouldNotify {
+		return nil
+	}
+
+	workspace, err := a.Database.GetWorkspaceByID(ctx, a.WorkspaceID)
+	if err != nil {
+		return xerrors.Errorf("get workspace by id: %w", err)
+	}
+
+	_, err = a.NotificationsEnqueuer.EnqueueWithData(
+		// nolint:gocritic // We need to be able to send the notification.
+		dbauthz.AsNotifier(ctx),
+		workspace.OwnerID,
+		notifications.TemplateWorkspaceOutOfMemory,
+		map[string]string{
+			"workspace": workspace.Name,
+			"threshold": fmt.Sprintf("%d%%", monitor.Threshold),
+		},
+		map[string]any{
+			// NOTE(DanielleMaywood):
+			// When notifications are enqueued, they are checked to be
+			// unique within a single day. This means that if we attempt
+			// to send two OOM notifications for the same workspace on
+			// the same day, the enqueuer will prevent us from sending
+			// a second one. We are inject a timestamp to make the
+			// notifications appear different enough to circumvent this
+			// deduplication logic.
+			"timestamp": a.Clock.Now(),
+		},
+		"workspace-monitor-memory",
+	)
+	if err != nil {
+		return xerrors.Errorf("notify workspace OOM: %w", err)
+	}
+
+	return nil
+}
+
+func (a *ResourcesMonitoringAPI) monitorVolumes(ctx context.Context, datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint) error {
+	volumeMonitors, err := a.Database.FetchVolumesResourceMonitorsByAgentID(ctx, a.AgentID)
+	if err != nil {
+		return xerrors.Errorf("get or insert volume monitor: %w", err)
+	}
+
+	outOfDiskVolumes := make([]map[string]any, 0)
+
+	for _, monitor := range volumeMonitors {
+		if !monitor.Enabled {
+			continue
+		}
+
+		usageDatapoints := make([]*proto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage, 0, len(datapoints))
+		for _, datapoint := range datapoints {
+			var usage *proto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage
+
+			for _, volume := range datapoint.Volumes {
+				if volume.Volume == monitor.Path {
+					usage = volume
+					break
+				}
+			}
+
+			usageDatapoints = append(usageDatapoints, usage)
+		}
+
+		usageStates := resourcesmonitor.CalculateVolumeUsageStates(monitor, usageDatapoints)
+
+		oldState := monitor.State
+		newState := resourcesmonitor.NextState(a.Config, oldState, usageStates)
+
+		debouncedUntil, shouldNotify := monitor.Debounce(a.Debounce, a.Clock.Now(), oldState, newState)
+
+		if shouldNotify {
+			outOfDiskVolumes = append(outOfDiskVolumes, map[string]any{
+				"path":      monitor.Path,
+				"threshold": fmt.Sprintf("%d%%", monitor.Threshold),
+			})
+		}
+
+		//nolint:gocritic // We need to be able to update the resource monitor here.
+		if err := a.Database.UpdateVolumeResourceMonitor(dbauthz.AsResourceMonitor(ctx), database.UpdateVolumeResourceMonitorParams{
+			AgentID:        a.AgentID,
+			Path:           monitor.Path,
+			State:          newState,
+			UpdatedAt:      dbtime.Time(a.Clock.Now()),
+			DebouncedUntil: dbtime.Time(debouncedUntil),
+		}); err != nil {
+			return xerrors.Errorf("update workspace monitor: %w", err)
+		}
+	}
+
+	if len(outOfDiskVolumes) == 0 {
+		return nil
+	}
+
+	workspace, err := a.Database.GetWorkspaceByID(ctx, a.WorkspaceID)
+	if err != nil {
+		return xerrors.Errorf("get workspace by id: %w", err)
+	}
+
+	if _, err := a.NotificationsEnqueuer.EnqueueWithData(
+		// nolint:gocritic // We need to be able to send the notification.
+		dbauthz.AsNotifier(ctx),
+		workspace.OwnerID,
+		notifications.TemplateWorkspaceOutOfDisk,
+		map[string]string{
+			"workspace": workspace.Name,
+		},
+		map[string]any{
+			"volumes": outOfDiskVolumes,
+			// NOTE(DanielleMaywood):
+			// When notifications are enqueued, they are checked to be
+			// unique within a single day. This means that if we attempt
+			// to send two OOM notifications for the same workspace on
+			// the same day, the enqueuer will prevent us from sending
+			// a second one. We are inject a timestamp to make the
+			// notifications appear different enough to circumvent this
+			// deduplication logic.
+			"timestamp": a.Clock.Now(),
+		},
+		"workspace-monitor-volumes",
+	); err != nil {
+		return xerrors.Errorf("notify workspace OOD: %w", err)
+	}
+
+	return nil
 }
--- a/coderd/agentapi/resources_monitoring_test.go
+++ b/coderd/agentapi/resources_monitoring_test.go
@ -0,0 +1,944 @@
+package agentapi_test
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/google/uuid"
+	"github.com/stretchr/testify/require"
+	"google.golang.org/protobuf/types/known/timestamppb"
+
+	agentproto "github.com/coder/coder/v2/agent/proto"
+	"github.com/coder/coder/v2/coderd/agentapi"
+	"github.com/coder/coder/v2/coderd/agentapi/resourcesmonitor"
+	"github.com/coder/coder/v2/coderd/database"
+	"github.com/coder/coder/v2/coderd/database/dbgen"
+	"github.com/coder/coder/v2/coderd/database/dbtestutil"
+	"github.com/coder/coder/v2/coderd/notifications"
+	"github.com/coder/coder/v2/coderd/notifications/notificationstest"
+	"github.com/coder/quartz"
+)
+
+func resourceMonitorAPI(t *testing.T) (*agentapi.ResourcesMonitoringAPI, database.User, *quartz.Mock, *notificationstest.FakeEnqueuer) {
+	t.Helper()
+
+	db, _ := dbtestutil.NewDB(t)
+	user := dbgen.User(t, db, database.User{})
+	org := dbgen.Organization(t, db, database.Organization{})
+	template := dbgen.Template(t, db, database.Template{
+		OrganizationID: org.ID,
+		CreatedBy:      user.ID,
+	})
+	templateVersion := dbgen.TemplateVersion(t, db, database.TemplateVersion{
+		TemplateID:     uuid.NullUUID{Valid: true, UUID: template.ID},
+		OrganizationID: org.ID,
+		CreatedBy:      user.ID,
+	})
+	workspace := dbgen.Workspace(t, db, database.WorkspaceTable{
+		OrganizationID: org.ID,
+		TemplateID:     template.ID,
+		OwnerID:        user.ID,
+	})
+	job := dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
+		Type: database.ProvisionerJobTypeWorkspaceBuild,
+	})
+	build := dbgen.WorkspaceBuild(t, db, database.WorkspaceBuild{
+		JobID:             job.ID,
+		WorkspaceID:       workspace.ID,
+		TemplateVersionID: templateVersion.ID,
+	})
+	resource := dbgen.WorkspaceResource(t, db, database.WorkspaceResource{
+		JobID: build.JobID,
+	})
+	agent := dbgen.WorkspaceAgent(t, db, database.WorkspaceAgent{
+		ResourceID: resource.ID,
+	})
+
+	notifyEnq := &notificationstest.FakeEnqueuer{}
+	clock := quartz.NewMock(t)
+
+	return &agentapi.ResourcesMonitoringAPI{
+		AgentID:               agent.ID,
+		WorkspaceID:           workspace.ID,
+		Clock:                 clock,
+		Database:              db,
+		NotificationsEnqueuer: notifyEnq,
+		Config: resourcesmonitor.Config{
+			NumDatapoints:      20,
+			CollectionInterval: 10 * time.Second,
+
+			Alert: resourcesmonitor.AlertConfig{
+				MinimumNOKsPercent:     20,
+				ConsecutiveNOKsPercent: 50,
+			},
+		},
+		Debounce: 1 * time.Minute,
+	}, user, clock, notifyEnq
+}
+
+func TestMemoryResourceMonitorDebounce(t *testing.T) {
+	t.Parallel()
+
+	// This test is a bit of a long one. We're testing that
+	// when a monitor goes into an alert state, it doesn't
+	// allow another notification to occur until after the
+	// debounce period.
+	//
+	// 1. OK -> NOK  |> sends a notification
+	// 2. NOK -> OK  |> does nothing
+	// 3. OK -> NOK  |> does nothing due to debounce period
+	// 4. NOK -> OK  |> does nothing
+	// 5. OK -> NOK  |> sends a notification as debounce period exceeded
+
+	api, user, clock, notifyEnq := resourceMonitorAPI(t)
+	api.Config.Alert.ConsecutiveNOKsPercent = 100
+
+	// Given: A monitor in an OK state
+	dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{
+		AgentID:   api.AgentID,
+		State:     database.WorkspaceAgentMonitorStateOK,
+		Threshold: 80,
+	})
+
+	// When: The monitor is given a state that will trigger NOK
+	_, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+		Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+			{
+				CollectedAt: timestamppb.New(clock.Now()),
+				Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{
+					Used:  10,
+					Total: 10,
+				},
+			},
+		},
+	})
+	require.NoError(t, err)
+
+	// Then: We expect there to be a notification sent
+	sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory))
+	require.Len(t, sent, 1)
+	require.Equal(t, user.ID, sent[0].UserID)
+	notifyEnq.Clear()
+
+	// When: The monitor moves to an OK state from NOK
+	clock.Advance(api.Debounce / 4)
+	_, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+		Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+			{
+				CollectedAt: timestamppb.New(clock.Now()),
+				Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{
+					Used:  1,
+					Total: 10,
+				},
+			},
+		},
+	})
+	require.NoError(t, err)
+
+	// Then: We expect no new notifications
+	sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory))
+	require.Len(t, sent, 0)
+	notifyEnq.Clear()
+
+	// When: The monitor moves back to a NOK state before the debounced time.
+	clock.Advance(api.Debounce / 4)
+	_, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+		Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+			{
+				CollectedAt: timestamppb.New(clock.Now()),
+				Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{
+					Used:  10,
+					Total: 10,
+				},
+			},
+		},
+	})
+	require.NoError(t, err)
+
+	// Then: We expect no new notifications (showing the debouncer working)
+	sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory))
+	require.Len(t, sent, 0)
+	notifyEnq.Clear()
+
+	// When: The monitor moves back to an OK state from NOK
+	clock.Advance(api.Debounce / 4)
+	_, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+		Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+			{
+				CollectedAt: timestamppb.New(clock.Now()),
+				Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{
+					Used:  1,
+					Total: 10,
+				},
+			},
+		},
+	})
+	require.NoError(t, err)
+
+	// Then: We still expect no new notifications
+	sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory))
+	require.Len(t, sent, 0)
+	notifyEnq.Clear()
+
+	// When: The monitor moves back to a NOK state after the debounce period.
+	clock.Advance(api.Debounce/4 + 1*time.Second)
+	_, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+		Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+			{
+				CollectedAt: timestamppb.New(clock.Now()),
+				Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{
+					Used:  10,
+					Total: 10,
+				},
+			},
+		},
+	})
+	require.NoError(t, err)
+
+	// Then: We expect a notification
+	sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory))
+	require.Len(t, sent, 1)
+	require.Equal(t, user.ID, sent[0].UserID)
+}
+
+func TestMemoryResourceMonitor(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name          string
+		memoryUsage   []int64
+		memoryTotal   int64
+		previousState database.WorkspaceAgentMonitorState
+		expectState   database.WorkspaceAgentMonitorState
+		shouldNotify  bool
+	}{
+		{
+			name:          "WhenOK/NeverExceedsThreshold",
+			memoryUsage:   []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
+			memoryTotal:   10,
+			previousState: database.WorkspaceAgentMonitorStateOK,
+			expectState:   database.WorkspaceAgentMonitorStateOK,
+			shouldNotify:  false,
+		},
+		{
+			name:          "WhenOK/ShouldStayInOK",
+			memoryUsage:   []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
+			memoryTotal:   10,
+			previousState: database.WorkspaceAgentMonitorStateOK,
+			expectState:   database.WorkspaceAgentMonitorStateOK,
+			shouldNotify:  false,
+		},
+		{
+			name:          "WhenOK/ConsecutiveExceedsThreshold",
+			memoryUsage:   []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9},
+			memoryTotal:   10,
+			previousState: database.WorkspaceAgentMonitorStateOK,
+			expectState:   database.WorkspaceAgentMonitorStateNOK,
+			shouldNotify:  true,
+		},
+		{
+			name:          "WhenOK/MinimumExceedsThreshold",
+			memoryUsage:   []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9},
+			memoryTotal:   10,
+			previousState: database.WorkspaceAgentMonitorStateOK,
+			expectState:   database.WorkspaceAgentMonitorStateNOK,
+			shouldNotify:  true,
+		},
+		{
+			name:          "WhenNOK/NeverExceedsThreshold",
+			memoryUsage:   []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
+			memoryTotal:   10,
+			previousState: database.WorkspaceAgentMonitorStateNOK,
+			expectState:   database.WorkspaceAgentMonitorStateOK,
+			shouldNotify:  false,
+		},
+		{
+			name:          "WhenNOK/ShouldStayInNOK",
+			memoryUsage:   []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
+			memoryTotal:   10,
+			previousState: database.WorkspaceAgentMonitorStateNOK,
+			expectState:   database.WorkspaceAgentMonitorStateNOK,
+			shouldNotify:  false,
+		},
+		{
+			name:          "WhenNOK/ConsecutiveExceedsThreshold",
+			memoryUsage:   []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9},
+			memoryTotal:   10,
+			previousState: database.WorkspaceAgentMonitorStateNOK,
+			expectState:   database.WorkspaceAgentMonitorStateNOK,
+			shouldNotify:  false,
+		},
+		{
+			name:          "WhenNOK/MinimumExceedsThreshold",
+			memoryUsage:   []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9},
+			memoryTotal:   10,
+			previousState: database.WorkspaceAgentMonitorStateNOK,
+			expectState:   database.WorkspaceAgentMonitorStateNOK,
+			shouldNotify:  false,
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+
+			api, user, clock, notifyEnq := resourceMonitorAPI(t)
+
+			datapoints := make([]*agentproto.PushResourcesMonitoringUsageRequest_Datapoint, 0, len(tt.memoryUsage))
+			collectedAt := clock.Now()
+			for _, usage := range tt.memoryUsage {
+				collectedAt = collectedAt.Add(15 * time.Second)
+				datapoints = append(datapoints, &agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+					CollectedAt: timestamppb.New(collectedAt),
+					Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{
+						Used:  usage,
+						Total: tt.memoryTotal,
+					},
+				})
+			}
+
+			dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{
+				AgentID:   api.AgentID,
+				State:     tt.previousState,
+				Threshold: 80,
+			})
+
+			clock.Set(collectedAt)
+			_, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+				Datapoints: datapoints,
+			})
+			require.NoError(t, err)
+
+			sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory))
+			if tt.shouldNotify {
+				require.Len(t, sent, 1)
+				require.Equal(t, user.ID, sent[0].UserID)
+			} else {
+				require.Len(t, sent, 0)
+			}
+		})
+	}
+}
+
+func TestMemoryResourceMonitorMissingData(t *testing.T) {
+	t.Parallel()
+
+	t.Run("UnknownPreventsMovingIntoAlertState", func(t *testing.T) {
+		t.Parallel()
+
+		api, _, clock, notifyEnq := resourceMonitorAPI(t)
+		api.Config.Alert.ConsecutiveNOKsPercent = 50
+		api.Config.Alert.MinimumNOKsPercent = 100
+
+		// Given: A monitor in an OK state.
+		dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{
+			AgentID:   api.AgentID,
+			State:     database.WorkspaceAgentMonitorStateOK,
+			Threshold: 80,
+		})
+
+		// When: A datapoint is missing, surrounded by two NOK datapoints.
+		_, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+			Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+				{
+					CollectedAt: timestamppb.New(clock.Now()),
+					Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{
+						Used:  10,
+						Total: 10,
+					},
+				},
+				{
+					CollectedAt: timestamppb.New(clock.Now().Add(10 * time.Second)),
+					Memory:      nil,
+				},
+				{
+					CollectedAt: timestamppb.New(clock.Now().Add(20 * time.Second)),
+					Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{
+						Used:  10,
+						Total: 10,
+					},
+				},
+			},
+		})
+		require.NoError(t, err)
+
+		// Then: We expect no notifications, as this unknown prevents us knowing we should alert.
+		sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfMemory))
+		require.Len(t, sent, 0)
+
+		// Then: We expect the monitor to still be in an OK state.
+		monitor, err := api.Database.FetchMemoryResourceMonitorsByAgentID(context.Background(), api.AgentID)
+		require.NoError(t, err)
+		require.Equal(t, database.WorkspaceAgentMonitorStateOK, monitor.State)
+	})
+
+	t.Run("UnknownPreventsMovingOutOfAlertState", func(t *testing.T) {
+		t.Parallel()
+
+		api, _, clock, _ := resourceMonitorAPI(t)
+		api.Config.Alert.ConsecutiveNOKsPercent = 50
+		api.Config.Alert.MinimumNOKsPercent = 100
+
+		// Given: A monitor in a NOK state.
+		dbgen.WorkspaceAgentMemoryResourceMonitor(t, api.Database, database.WorkspaceAgentMemoryResourceMonitor{
+			AgentID:   api.AgentID,
+			State:     database.WorkspaceAgentMonitorStateNOK,
+			Threshold: 80,
+		})
+
+		// When: A datapoint is missing, surrounded by two OK datapoints.
+		_, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+			Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+				{
+					CollectedAt: timestamppb.New(clock.Now()),
+					Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{
+						Used:  1,
+						Total: 10,
+					},
+				},
+				{
+					CollectedAt: timestamppb.New(clock.Now().Add(10 * time.Second)),
+					Memory:      nil,
+				},
+				{
+					CollectedAt: timestamppb.New(clock.Now().Add(20 * time.Second)),
+					Memory: &agentproto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage{
+						Used:  1,
+						Total: 10,
+					},
+				},
+			},
+		})
+		require.NoError(t, err)
+
+		// Then: We expect the monitor to still be in a NOK state.
+		monitor, err := api.Database.FetchMemoryResourceMonitorsByAgentID(context.Background(), api.AgentID)
+		require.NoError(t, err)
+		require.Equal(t, database.WorkspaceAgentMonitorStateNOK, monitor.State)
+	})
+}
+
+func TestVolumeResourceMonitorDebounce(t *testing.T) {
+	t.Parallel()
+
+	// This test is an even longer one. We're testing
+	// that the debounce logic is independent per
+	// volume monitor. We interleave the triggering
+	// of each monitor to ensure the debounce logic
+	// is monitor independent.
+	//
+	// First Monitor:
+	//   1. OK -> NOK  |> sends a notification
+	//   2. NOK -> OK  |> does nothing
+	//   3. OK -> NOK  |> does nothing due to debounce period
+	//   4. NOK -> OK  |> does nothing
+	//   5. OK -> NOK  |> sends a notification as debounce period exceeded
+	//   6. NOK -> OK  |> does nothing
+	//
+	// Second Monitor:
+	//   1. OK -> OK  |> does nothing
+	//   2. OK -> NOK |> sends a notification
+	//   3. NOK -> OK |> does nothing
+	//   4. OK -> NOK |> does nothing due to debounce period
+	//   5. NOK -> OK |> does nothing
+	//   6. OK -> NOK |> sends a notification as debounce period exceeded
+	//
+
+	firstVolumePath := "/home/coder"
+	secondVolumePath := "/dev/coder"
+
+	api, _, clock, notifyEnq := resourceMonitorAPI(t)
+
+	// Given:
+	//  - First monitor in an OK state
+	//  - Second monitor in an OK state
+	dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{
+		AgentID:   api.AgentID,
+		Path:      firstVolumePath,
+		State:     database.WorkspaceAgentMonitorStateOK,
+		Threshold: 80,
+	})
+	dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{
+		AgentID:   api.AgentID,
+		Path:      secondVolumePath,
+		State:     database.WorkspaceAgentMonitorStateNOK,
+		Threshold: 80,
+	})
+
+	// When:
+	//  - First monitor is in a NOK state
+	//  - Second monitor is in an OK state
+	_, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+		Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+			{
+				CollectedAt: timestamppb.New(clock.Now()),
+				Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{
+					{Volume: firstVolumePath, Used: 10, Total: 10},
+					{Volume: secondVolumePath, Used: 1, Total: 10},
+				},
+			},
+		},
+	})
+	require.NoError(t, err)
+
+	// Then:
+	//  - We expect a notification from only the first monitor
+	sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk))
+	require.Len(t, sent, 1)
+	volumes := requireVolumeData(t, sent[0])
+	require.Len(t, volumes, 1)
+	require.Equal(t, firstVolumePath, volumes[0]["path"])
+	notifyEnq.Clear()
+
+	// When:
+	//  - First monitor moves back to OK
+	//  - Second monitor moves to NOK
+	clock.Advance(api.Debounce / 4)
+	_, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+		Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+			{
+				CollectedAt: timestamppb.New(clock.Now()),
+				Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{
+					{Volume: firstVolumePath, Used: 1, Total: 10},
+					{Volume: secondVolumePath, Used: 10, Total: 10},
+				},
+			},
+		},
+	})
+	require.NoError(t, err)
+
+	// Then:
+	//  - We expect a notification from only the second monitor
+	sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk))
+	require.Len(t, sent, 1)
+	volumes = requireVolumeData(t, sent[0])
+	require.Len(t, volumes, 1)
+	require.Equal(t, secondVolumePath, volumes[0]["path"])
+	notifyEnq.Clear()
+
+	// When:
+	//  - First monitor moves back to NOK before debounce period has ended
+	//  - Second monitor moves back to OK
+	clock.Advance(api.Debounce / 4)
+	_, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+		Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+			{
+				CollectedAt: timestamppb.New(clock.Now()),
+				Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{
+					{Volume: firstVolumePath, Used: 10, Total: 10},
+					{Volume: secondVolumePath, Used: 1, Total: 10},
+				},
+			},
+		},
+	})
+	require.NoError(t, err)
+
+	// Then:
+	//  - We expect no new notifications
+	sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk))
+	require.Len(t, sent, 0)
+	notifyEnq.Clear()
+
+	// When:
+	//  - First monitor moves back to OK
+	//  - Second monitor moves back to NOK
+	clock.Advance(api.Debounce / 4)
+	_, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+		Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+			{
+				CollectedAt: timestamppb.New(clock.Now()),
+				Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{
+					{Volume: firstVolumePath, Used: 1, Total: 10},
+					{Volume: secondVolumePath, Used: 10, Total: 10},
+				},
+			},
+		},
+	})
+	require.NoError(t, err)
+
+	// Then:
+	//  - We expect no new notifications.
+	sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk))
+	require.Len(t, sent, 0)
+	notifyEnq.Clear()
+
+	// When:
+	//  - First monitor moves back to a NOK state after the debounce period
+	//  - Second monitor moves back to OK
+	clock.Advance(api.Debounce/4 + 1*time.Second)
+	_, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+		Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+			{
+				CollectedAt: timestamppb.New(clock.Now()),
+				Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{
+					{Volume: firstVolumePath, Used: 10, Total: 10},
+					{Volume: secondVolumePath, Used: 1, Total: 10},
+				},
+			},
+		},
+	})
+	require.NoError(t, err)
+
+	// Then:
+	//  - We expect a notification from only the first monitor
+	sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk))
+	require.Len(t, sent, 1)
+	volumes = requireVolumeData(t, sent[0])
+	require.Len(t, volumes, 1)
+	require.Equal(t, firstVolumePath, volumes[0]["path"])
+	notifyEnq.Clear()
+
+	// When:
+	//  - First montior moves back to OK
+	//  - Second monitor moves back to NOK after the debounce period
+	clock.Advance(api.Debounce/4 + 1*time.Second)
+	_, err = api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+		Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+			{
+				CollectedAt: timestamppb.New(clock.Now()),
+				Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{
+					{Volume: firstVolumePath, Used: 1, Total: 10},
+					{Volume: secondVolumePath, Used: 10, Total: 10},
+				},
+			},
+		},
+	})
+	require.NoError(t, err)
+
+	// Then:
+	//  - We expect a notification from only the second monitor
+	sent = notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk))
+	require.Len(t, sent, 1)
+	volumes = requireVolumeData(t, sent[0])
+	require.Len(t, volumes, 1)
+	require.Equal(t, secondVolumePath, volumes[0]["path"])
+}
+
+func TestVolumeResourceMonitor(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name             string
+		volumePath       string
+		volumeUsage      []int64
+		volumeTotal      int64
+		thresholdPercent int32
+		previousState    database.WorkspaceAgentMonitorState
+		expectState      database.WorkspaceAgentMonitorState
+		shouldNotify     bool
+	}{
+		{
+			name:             "WhenOK/NeverExceedsThreshold",
+			volumePath:       "/home/coder",
+			volumeUsage:      []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
+			volumeTotal:      10,
+			thresholdPercent: 80,
+			previousState:    database.WorkspaceAgentMonitorStateOK,
+			expectState:      database.WorkspaceAgentMonitorStateOK,
+			shouldNotify:     false,
+		},
+		{
+			name:             "WhenOK/ShouldStayInOK",
+			volumePath:       "/home/coder",
+			volumeUsage:      []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
+			volumeTotal:      10,
+			thresholdPercent: 80,
+			previousState:    database.WorkspaceAgentMonitorStateOK,
+			expectState:      database.WorkspaceAgentMonitorStateOK,
+			shouldNotify:     false,
+		},
+		{
+			name:             "WhenOK/ConsecutiveExceedsThreshold",
+			volumePath:       "/home/coder",
+			volumeUsage:      []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9},
+			volumeTotal:      10,
+			thresholdPercent: 80,
+			previousState:    database.WorkspaceAgentMonitorStateOK,
+			expectState:      database.WorkspaceAgentMonitorStateNOK,
+			shouldNotify:     true,
+		},
+		{
+			name:             "WhenOK/MinimumExceedsThreshold",
+			volumePath:       "/home/coder",
+			volumeUsage:      []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9},
+			volumeTotal:      10,
+			thresholdPercent: 80,
+			previousState:    database.WorkspaceAgentMonitorStateOK,
+			expectState:      database.WorkspaceAgentMonitorStateNOK,
+			shouldNotify:     true,
+		},
+		{
+			name:             "WhenNOK/NeverExceedsThreshold",
+			volumePath:       "/home/coder",
+			volumeUsage:      []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
+			volumeTotal:      10,
+			thresholdPercent: 80,
+			previousState:    database.WorkspaceAgentMonitorStateNOK,
+			expectState:      database.WorkspaceAgentMonitorStateOK,
+			shouldNotify:     false,
+		},
+		{
+			name:             "WhenNOK/ShouldStayInNOK",
+			volumePath:       "/home/coder",
+			volumeUsage:      []int64{9, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 2, 3, 1, 2},
+			volumeTotal:      10,
+			thresholdPercent: 80,
+			previousState:    database.WorkspaceAgentMonitorStateNOK,
+			expectState:      database.WorkspaceAgentMonitorStateNOK,
+			shouldNotify:     false,
+		},
+		{
+			name:             "WhenNOK/ConsecutiveExceedsThreshold",
+			volumePath:       "/home/coder",
+			volumeUsage:      []int64{2, 3, 2, 4, 2, 3, 2, 1, 2, 3, 4, 4, 1, 8, 9, 8, 9},
+			volumeTotal:      10,
+			thresholdPercent: 80,
+			previousState:    database.WorkspaceAgentMonitorStateNOK,
+			expectState:      database.WorkspaceAgentMonitorStateNOK,
+			shouldNotify:     false,
+		},
+		{
+			name:             "WhenNOK/MinimumExceedsThreshold",
+			volumePath:       "/home/coder",
+			volumeUsage:      []int64{2, 8, 2, 9, 2, 8, 2, 9, 2, 8, 4, 9, 1, 8, 2, 8, 9},
+			volumeTotal:      10,
+			thresholdPercent: 80,
+			previousState:    database.WorkspaceAgentMonitorStateNOK,
+			expectState:      database.WorkspaceAgentMonitorStateNOK,
+			shouldNotify:     false,
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+
+		t.Run(tt.name, func(t *testing.T) {
+			t.Parallel()
+
+			api, user, clock, notifyEnq := resourceMonitorAPI(t)
+
+			datapoints := make([]*agentproto.PushResourcesMonitoringUsageRequest_Datapoint, 0, len(tt.volumeUsage))
+			collectedAt := clock.Now()
+			for _, volumeUsage := range tt.volumeUsage {
+				collectedAt = collectedAt.Add(15 * time.Second)
+
+				volumeDatapoints := []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{
+					{
+						Volume: tt.volumePath,
+						Used:   volumeUsage,
+						Total:  tt.volumeTotal,
+					},
+				}
+
+				datapoints = append(datapoints, &agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+					CollectedAt: timestamppb.New(collectedAt),
+					Volumes:     volumeDatapoints,
+				})
+			}
+
+			dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{
+				AgentID:   api.AgentID,
+				Path:      tt.volumePath,
+				State:     tt.previousState,
+				Threshold: tt.thresholdPercent,
+			})
+
+			clock.Set(collectedAt)
+			_, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+				Datapoints: datapoints,
+			})
+			require.NoError(t, err)
+
+			sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk))
+			if tt.shouldNotify {
+				require.Len(t, sent, 1)
+				require.Equal(t, user.ID, sent[0].UserID)
+			} else {
+				require.Len(t, sent, 0)
+			}
+		})
+	}
+}
+
+func TestVolumeResourceMonitorMultiple(t *testing.T) {
+	t.Parallel()
+
+	api, _, clock, notifyEnq := resourceMonitorAPI(t)
+	api.Config.Alert.ConsecutiveNOKsPercent = 100
+
+	// Given: two different volume resource monitors
+	dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{
+		AgentID:   api.AgentID,
+		Path:      "/home/coder",
+		State:     database.WorkspaceAgentMonitorStateOK,
+		Threshold: 80,
+	})
+
+	dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{
+		AgentID:   api.AgentID,
+		Path:      "/dev/coder",
+		State:     database.WorkspaceAgentMonitorStateOK,
+		Threshold: 80,
+	})
+
+	// When: both of them move to a NOK state
+	_, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+		Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+			{
+				CollectedAt: timestamppb.New(clock.Now()),
+				Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{
+					{
+						Volume: "/home/coder",
+						Used:   10,
+						Total:  10,
+					},
+					{
+						Volume: "/dev/coder",
+						Used:   10,
+						Total:  10,
+					},
+				},
+			},
+		},
+	})
+	require.NoError(t, err)
+
+	// Then: We expect a notification to alert with information about both
+	sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk))
+	require.Len(t, sent, 1)
+
+	volumes := requireVolumeData(t, sent[0])
+	require.Len(t, volumes, 2)
+	require.Equal(t, "/home/coder", volumes[0]["path"])
+	require.Equal(t, "/dev/coder", volumes[1]["path"])
+}
+
+func TestVolumeResourceMonitorMissingData(t *testing.T) {
+	t.Parallel()
+
+	t.Run("UnknownPreventsMovingIntoAlertState", func(t *testing.T) {
+		t.Parallel()
+
+		volumePath := "/home/coder"
+
+		api, _, clock, notifyEnq := resourceMonitorAPI(t)
+		api.Config.Alert.ConsecutiveNOKsPercent = 50
+		api.Config.Alert.MinimumNOKsPercent = 100
+
+		// Given: A monitor in an OK state.
+		dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{
+			AgentID:   api.AgentID,
+			Path:      volumePath,
+			State:     database.WorkspaceAgentMonitorStateOK,
+			Threshold: 80,
+		})
+
+		// When: A datapoint is missing, surrounded by two NOK datapoints.
+		_, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+			Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+				{
+					CollectedAt: timestamppb.New(clock.Now()),
+					Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{
+						{
+							Volume: volumePath,
+							Used:   10,
+							Total:  10,
+						},
+					},
+				},
+				{
+					CollectedAt: timestamppb.New(clock.Now().Add(10 * time.Second)),
+					Volumes:     []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{},
+				},
+				{
+					CollectedAt: timestamppb.New(clock.Now().Add(20 * time.Second)),
+					Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{
+						{
+							Volume: volumePath,
+							Used:   10,
+							Total:  10,
+						},
+					},
+				},
+			},
+		})
+		require.NoError(t, err)
+
+		// Then: We expect no notifications, as this unknown prevents us knowing we should alert.
+		sent := notifyEnq.Sent(notificationstest.WithTemplateID(notifications.TemplateWorkspaceOutOfDisk))
+		require.Len(t, sent, 0)
+
+		// Then: We expect the monitor to still be in an OK state.
+		monitors, err := api.Database.FetchVolumesResourceMonitorsByAgentID(context.Background(), api.AgentID)
+		require.NoError(t, err)
+		require.Len(t, monitors, 1)
+		require.Equal(t, database.WorkspaceAgentMonitorStateOK, monitors[0].State)
+	})
+
+	t.Run("UnknownPreventsMovingOutOfAlertState", func(t *testing.T) {
+		t.Parallel()
+
+		volumePath := "/home/coder"
+
+		api, _, clock, _ := resourceMonitorAPI(t)
+		api.Config.Alert.ConsecutiveNOKsPercent = 50
+		api.Config.Alert.MinimumNOKsPercent = 100
+
+		// Given: A monitor in a NOK state.
+		dbgen.WorkspaceAgentVolumeResourceMonitor(t, api.Database, database.WorkspaceAgentVolumeResourceMonitor{
+			AgentID:   api.AgentID,
+			Path:      volumePath,
+			State:     database.WorkspaceAgentMonitorStateNOK,
+			Threshold: 80,
+		})
+
+		// When: A datapoint is missing, surrounded by two OK datapoints.
+		_, err := api.PushResourcesMonitoringUsage(context.Background(), &agentproto.PushResourcesMonitoringUsageRequest{
+			Datapoints: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint{
+				{
+					CollectedAt: timestamppb.New(clock.Now()),
+					Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{
+						{
+							Volume: volumePath,
+							Used:   1,
+							Total:  10,
+						},
+					},
+				},
+				{
+					CollectedAt: timestamppb.New(clock.Now().Add(10 * time.Second)),
+					Volumes:     []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{},
+				},
+				{
+					CollectedAt: timestamppb.New(clock.Now().Add(20 * time.Second)),
+					Volumes: []*agentproto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage{
+						{
+							Volume: volumePath,
+							Used:   1,
+							Total:  10,
+						},
+					},
+				},
+			},
+		})
+		require.NoError(t, err)
+
+		// Then: We expect the monitor to still be in a NOK state.
+		monitors, err := api.Database.FetchVolumesResourceMonitorsByAgentID(context.Background(), api.AgentID)
+		require.NoError(t, err)
+		require.Len(t, monitors, 1)
+		require.Equal(t, database.WorkspaceAgentMonitorStateNOK, monitors[0].State)
+	})
+}
+
+func requireVolumeData(t *testing.T, notif *notificationstest.FakeNotification) []map[string]any {
+	t.Helper()
+
+	volumesData := notif.Data["volumes"]
+	require.IsType(t, []map[string]any{}, volumesData)
+
+	return volumesData.([]map[string]any)
+}
--- a/coderd/agentapi/resourcesmonitor/resources_monitor.go
+++ b/coderd/agentapi/resourcesmonitor/resources_monitor.go
@ -0,0 +1,129 @@
+package resourcesmonitor
+
+import (
+	"math"
+	"time"
+
+	"github.com/coder/coder/v2/agent/proto"
+	"github.com/coder/coder/v2/coderd/database"
+	"github.com/coder/coder/v2/coderd/util/slice"
+)
+
+type State int
+
+const (
+	StateOK State = iota
+	StateNOK
+	StateUnknown
+)
+
+type AlertConfig struct {
+	// What percentage of datapoints in a row are
+	// required to put the monitor in an alert state.
+	ConsecutiveNOKsPercent int
+
+	// What percentage of datapoints in a window are
+	// required to put the monitor in an alert state.
+	MinimumNOKsPercent int
+}
+
+type Config struct {
+	// How many datapoints should the agent send
+	NumDatapoints int32
+
+	// How long between each datapoint should
+	// collection occur.
+	CollectionInterval time.Duration
+
+	Alert AlertConfig
+}
+
+func CalculateMemoryUsageStates(
+	monitor database.WorkspaceAgentMemoryResourceMonitor,
+	datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint_MemoryUsage,
+) []State {
+	states := make([]State, 0, len(datapoints))
+
+	for _, datapoint := range datapoints {
+		state := StateUnknown
+
+		if datapoint != nil {
+			percent := int32(float64(datapoint.Used) / float64(datapoint.Total) * 100)
+
+			if percent < monitor.Threshold {
+				state = StateOK
+			} else {
+				state = StateNOK
+			}
+		}
+
+		states = append(states, state)
+	}
+
+	return states
+}
+
+func CalculateVolumeUsageStates(
+	monitor database.WorkspaceAgentVolumeResourceMonitor,
+	datapoints []*proto.PushResourcesMonitoringUsageRequest_Datapoint_VolumeUsage,
+) []State {
+	states := make([]State, 0, len(datapoints))
+
+	for _, datapoint := range datapoints {
+		state := StateUnknown
+
+		if datapoint != nil {
+			percent := int32(float64(datapoint.Used) / float64(datapoint.Total) * 100)
+
+			if percent < monitor.Threshold {
+				state = StateOK
+			} else {
+				state = StateNOK
+			}
+		}
+
+		states = append(states, state)
+	}
+
+	return states
+}
+
+func NextState(c Config, oldState database.WorkspaceAgentMonitorState, states []State) database.WorkspaceAgentMonitorState {
+	// If there are enough consecutive NOK states, we should be in an
+	// alert state.
+	consecutiveNOKs := slice.CountConsecutive(StateNOK, states...)
+	if percent(consecutiveNOKs, len(states)) >= c.Alert.ConsecutiveNOKsPercent {
+		return database.WorkspaceAgentMonitorStateNOK
+	}
+
+	// We do not explicitly handle StateUnknown because it could have
+	// been either StateOK or StateNOK if collection didn't fail. As
+	// it could be either, our best bet is to ignore it.
+	nokCount, okCount := 0, 0
+	for _, state := range states {
+		switch state {
+		case StateOK:
+			okCount++
+		case StateNOK:
+			nokCount++
+		}
+	}
+
+	// If there are enough NOK datapoints, we should be in an alert state.
+	if percent(nokCount, len(states)) >= c.Alert.MinimumNOKsPercent {
+		return database.WorkspaceAgentMonitorStateNOK
+	}
+
+	// If all datapoints are OK, we should be in an OK state
+	if okCount == len(states) {
+		return database.WorkspaceAgentMonitorStateOK
+	}
+
+	// Otherwise we stay in the same state as last.
+	return oldState
+}
+
+func percent[T int](numerator, denominator T) int {
+	percent := float64(numerator*100) / float64(denominator)
+	return int(math.Round(percent))
+}