chore: add DRPC server implementation for network telemetry (#13675)

This commit is contained in:
Dean Sheather
2024-07-02 01:50:52 +10:00
committed by GitHub
parent 2fde054e10
commit 6c94dd4f23
14 changed files with 1192 additions and 557 deletions

View File

@ -39,6 +39,7 @@ import (
"cdr.dev/slog"
agentproto "github.com/coder/coder/v2/agent/proto"
"github.com/coder/coder/v2/buildinfo"
"github.com/coder/coder/v2/clock"
_ "github.com/coder/coder/v2/coderd/apidoc" // Used for swagger docs.
"github.com/coder/coder/v2/coderd/appearance"
"github.com/coder/coder/v2/coderd/audit"
@ -142,14 +143,16 @@ type Options struct {
DERPServer *derp.Server
// BaseDERPMap is used as the base DERP map for all clients and agents.
// Proxies are added to this list.
BaseDERPMap *tailcfg.DERPMap
DERPMapUpdateFrequency time.Duration
SwaggerEndpoint bool
SetUserGroups func(ctx context.Context, logger slog.Logger, tx database.Store, userID uuid.UUID, orgGroupNames map[uuid.UUID][]string, createMissingGroups bool) error
SetUserSiteRoles func(ctx context.Context, logger slog.Logger, tx database.Store, userID uuid.UUID, roles []string) error
TemplateScheduleStore *atomic.Pointer[schedule.TemplateScheduleStore]
UserQuietHoursScheduleStore *atomic.Pointer[schedule.UserQuietHoursScheduleStore]
AccessControlStore *atomic.Pointer[dbauthz.AccessControlStore]
BaseDERPMap *tailcfg.DERPMap
DERPMapUpdateFrequency time.Duration
NetworkTelemetryBatchFrequency time.Duration
NetworkTelemetryBatchMaxSize int
SwaggerEndpoint bool
SetUserGroups func(ctx context.Context, logger slog.Logger, tx database.Store, userID uuid.UUID, orgGroupNames map[uuid.UUID][]string, createMissingGroups bool) error
SetUserSiteRoles func(ctx context.Context, logger slog.Logger, tx database.Store, userID uuid.UUID, roles []string) error
TemplateScheduleStore *atomic.Pointer[schedule.TemplateScheduleStore]
UserQuietHoursScheduleStore *atomic.Pointer[schedule.UserQuietHoursScheduleStore]
AccessControlStore *atomic.Pointer[dbauthz.AccessControlStore]
// AppSecurityKey is the crypto key used to sign and encrypt tokens related to
// workspace applications. It consists of both a signing and encryption key.
AppSecurityKey workspaceapps.SecurityKey
@ -305,6 +308,12 @@ func New(options *Options) *API {
if options.DERPMapUpdateFrequency == 0 {
options.DERPMapUpdateFrequency = 5 * time.Second
}
if options.NetworkTelemetryBatchFrequency == 0 {
options.NetworkTelemetryBatchFrequency = 1 * time.Minute
}
if options.NetworkTelemetryBatchMaxSize == 0 {
options.NetworkTelemetryBatchMaxSize = 1_000
}
if options.TailnetCoordinator == nil {
options.TailnetCoordinator = tailnet.NewCoordinator(options.Logger)
}
@ -539,12 +548,19 @@ func New(options *Options) *API {
if options.DeploymentValues.Prometheus.Enable {
options.PrometheusRegistry.MustRegister(stn)
}
api.TailnetClientService, err = tailnet.NewClientService(
api.Logger.Named("tailnetclient"),
&api.TailnetCoordinator,
api.Options.DERPMapUpdateFrequency,
api.DERPMap,
api.NetworkTelemetryBatcher = tailnet.NewNetworkTelemetryBatcher(
clock.NewReal(),
api.Options.NetworkTelemetryBatchFrequency,
api.Options.NetworkTelemetryBatchMaxSize,
api.handleNetworkTelemetry,
)
api.TailnetClientService, err = tailnet.NewClientService(tailnet.ClientServiceOptions{
Logger: api.Logger.Named("tailnetclient"),
CoordPtr: &api.TailnetCoordinator,
DERPMapUpdateFrequency: api.Options.DERPMapUpdateFrequency,
DERPMapFn: api.DERPMap,
NetworkTelemetryHandler: api.NetworkTelemetryBatcher.Handler,
})
if err != nil {
api.Logger.Fatal(api.ctx, "failed to initialize tailnet client service", slog.Error(err))
}
@ -1255,6 +1271,7 @@ type API struct {
Auditor atomic.Pointer[audit.Auditor]
WorkspaceClientCoordinateOverride atomic.Pointer[func(rw http.ResponseWriter) bool]
TailnetCoordinator atomic.Pointer[tailnet.Coordinator]
NetworkTelemetryBatcher *tailnet.NetworkTelemetryBatcher
TailnetClientService *tailnet.ClientService
QuotaCommitter atomic.Pointer[proto.QuotaCommitter]
AppearanceFetcher atomic.Pointer[appearance.Fetcher]
@ -1313,7 +1330,12 @@ type API struct {
// Close waits for all WebSocket connections to drain before returning.
func (api *API) Close() error {
api.cancel()
select {
case <-api.ctx.Done():
return xerrors.New("API already closed")
default:
api.cancel()
}
if api.derpCloseFunc != nil {
api.derpCloseFunc()
}
@ -1348,6 +1370,7 @@ func (api *API) Close() error {
}
_ = api.agentProvider.Close()
_ = api.statsReporter.Close()
_ = api.NetworkTelemetryBatcher.Close()
return nil
}