chore: use container memory if containerised for oom notifications (#17062)

Currently we query only the underlying host's memory usage for our
memory resource monitor. This PR changes that to check if the workspace
is in a container, and if so it queries the container's memory usage,
falling back to the host's memory usage if not.
This commit is contained in:
Danielle Maywood
2025-03-24 11:14:21 +00:00
committed by GitHub
parent 674f60fc5b
commit 765e7058e8
4 changed files with 156 additions and 8 deletions

View File

@ -965,7 +965,10 @@ func (a *agent) run() (retErr error) {
if err != nil { if err != nil {
return xerrors.Errorf("failed to create resources fetcher: %w", err) return xerrors.Errorf("failed to create resources fetcher: %w", err)
} }
resourcesFetcher := resourcesmonitor.NewFetcher(statfetcher) resourcesFetcher, err := resourcesmonitor.NewFetcher(statfetcher)
if err != nil {
return xerrors.Errorf("new resource fetcher: %w", err)
}
resourcesmonitor := resourcesmonitor.NewResourcesMonitor(logger, clk, config, resourcesFetcher, aAPI) resourcesmonitor := resourcesmonitor.NewResourcesMonitor(logger, clk, config, resourcesFetcher, aAPI)
return resourcesmonitor.Start(ctx) return resourcesmonitor.Start(ctx)

View File

@ -6,26 +6,58 @@ import (
"github.com/coder/coder/v2/cli/clistat" "github.com/coder/coder/v2/cli/clistat"
) )
type Statter interface {
IsContainerized() (bool, error)
ContainerMemory(p clistat.Prefix) (*clistat.Result, error)
HostMemory(p clistat.Prefix) (*clistat.Result, error)
Disk(p clistat.Prefix, path string) (*clistat.Result, error)
}
type Fetcher interface { type Fetcher interface {
FetchMemory() (total int64, used int64, err error) FetchMemory() (total int64, used int64, err error)
FetchVolume(volume string) (total int64, used int64, err error) FetchVolume(volume string) (total int64, used int64, err error)
} }
type fetcher struct { type fetcher struct {
*clistat.Statter Statter
isContainerized bool
} }
//nolint:revive //nolint:revive
func NewFetcher(f *clistat.Statter) *fetcher { func NewFetcher(f Statter) (*fetcher, error) {
return &fetcher{ isContainerized, err := f.IsContainerized()
f, if err != nil {
return nil, xerrors.Errorf("check is containerized: %w", err)
} }
return &fetcher{f, isContainerized}, nil
} }
func (f *fetcher) FetchMemory() (total int64, used int64, err error) { func (f *fetcher) FetchMemory() (total int64, used int64, err error) {
mem, err := f.HostMemory(clistat.PrefixDefault) var mem *clistat.Result
if err != nil {
return 0, 0, xerrors.Errorf("failed to fetch memory: %w", err) if f.isContainerized {
mem, err = f.ContainerMemory(clistat.PrefixDefault)
if err != nil {
return 0, 0, xerrors.Errorf("fetch container memory: %w", err)
}
// A container might not have a memory limit set. If this
// happens we want to fallback to querying the host's memory
// to know what the total memory is on the host.
if mem.Total == nil {
hostMem, err := f.HostMemory(clistat.PrefixDefault)
if err != nil {
return 0, 0, xerrors.Errorf("fetch host memory: %w", err)
}
mem.Total = hostMem.Total
}
} else {
mem, err = f.HostMemory(clistat.PrefixDefault)
if err != nil {
return 0, 0, xerrors.Errorf("fetch host memory: %w", err)
}
} }
if mem.Total == nil { if mem.Total == nil {

View File

@ -0,0 +1,109 @@
package resourcesmonitor_test
import (
"testing"
"github.com/stretchr/testify/require"
"golang.org/x/xerrors"
"github.com/coder/coder/v2/agent/proto/resourcesmonitor"
"github.com/coder/coder/v2/cli/clistat"
"github.com/coder/coder/v2/coderd/util/ptr"
)
type mockStatter struct {
isContainerized bool
containerMemory clistat.Result
hostMemory clistat.Result
disk map[string]clistat.Result
}
func (s *mockStatter) IsContainerized() (bool, error) {
return s.isContainerized, nil
}
func (s *mockStatter) ContainerMemory(_ clistat.Prefix) (*clistat.Result, error) {
return &s.containerMemory, nil
}
func (s *mockStatter) HostMemory(_ clistat.Prefix) (*clistat.Result, error) {
return &s.hostMemory, nil
}
func (s *mockStatter) Disk(_ clistat.Prefix, path string) (*clistat.Result, error) {
disk, ok := s.disk[path]
if !ok {
return nil, xerrors.New("path not found")
}
return &disk, nil
}
func TestFetchMemory(t *testing.T) {
t.Parallel()
t.Run("IsContainerized", func(t *testing.T) {
t.Parallel()
t.Run("WithMemoryLimit", func(t *testing.T) {
t.Parallel()
fetcher, err := resourcesmonitor.NewFetcher(&mockStatter{
isContainerized: true,
containerMemory: clistat.Result{
Used: 10.0,
Total: ptr.Ref(20.0),
},
hostMemory: clistat.Result{
Used: 20.0,
Total: ptr.Ref(30.0),
},
})
require.NoError(t, err)
total, used, err := fetcher.FetchMemory()
require.NoError(t, err)
require.Equal(t, int64(10), used)
require.Equal(t, int64(20), total)
})
t.Run("WithoutMemoryLimit", func(t *testing.T) {
t.Parallel()
fetcher, err := resourcesmonitor.NewFetcher(&mockStatter{
isContainerized: true,
containerMemory: clistat.Result{
Used: 10.0,
Total: nil,
},
hostMemory: clistat.Result{
Used: 20.0,
Total: ptr.Ref(30.0),
},
})
require.NoError(t, err)
total, used, err := fetcher.FetchMemory()
require.NoError(t, err)
require.Equal(t, int64(10), used)
require.Equal(t, int64(30), total)
})
})
t.Run("IsHost", func(t *testing.T) {
t.Parallel()
fetcher, err := resourcesmonitor.NewFetcher(&mockStatter{
isContainerized: false,
hostMemory: clistat.Result{
Used: 20.0,
Total: ptr.Ref(30.0),
},
})
require.NoError(t, err)
total, used, err := fetcher.FetchMemory()
require.NoError(t, err)
require.Equal(t, int64(20), used)
require.Equal(t, int64(30), total)
})
}

View File

@ -16,6 +16,10 @@ const (
kubernetesDefaultServiceAccountToken = "/var/run/secrets/kubernetes.io/serviceaccount/token" //nolint:gosec kubernetesDefaultServiceAccountToken = "/var/run/secrets/kubernetes.io/serviceaccount/token" //nolint:gosec
) )
func (s *Statter) IsContainerized() (ok bool, err error) {
return IsContainerized(s.fs)
}
// IsContainerized returns whether the host is containerized. // IsContainerized returns whether the host is containerized.
// This is adapted from https://github.com/elastic/go-sysinfo/tree/main/providers/linux/container.go#L31 // This is adapted from https://github.com/elastic/go-sysinfo/tree/main/providers/linux/container.go#L31
// with modifications to support Sysbox containers. // with modifications to support Sysbox containers.