chore: track usage of organizations in telemetry (#16323)

Addresses https://github.com/coder/internal/issues/317.

## Changes

Requirements are quoted below:

> how many orgs does deployment have

Adds the Organization entity to telemetry.

> ensuring resources are associated with orgs

All resources that reference an org already report the org id to
telemetry. Adds a test to check that.

> whether org sync is configured

Adds the `IDPOrgSync` boolean field to the Deployment entity.

## Implementation of the org sync check

While there's an `OrganizationSyncEnabled` method on the IDPSync
interface, I decided not to use it directly and implemented a
counterpart just for telemetry purposes. It's a compromise I'm not happy
about, but I found that it's a simpler approach than the alternative.
There are multiple reasons:

1. The telemetry package cannot statically access the IDPSync interface
due to a circular import.
2. We can't dynamically pass a reference to the
`OrganizationSyncEnabled` function at the time of instantiating the
telemetry object, because our server initialization logic depends on the
telemetry object being created before the IDPSync object.
3. If we circumvent that problem by passing the reference as an
initially empty pointer, initializing telemetry, then IDPSync, then
updating the pointer to point to `OrganizationSyncEnabled`, we have to
refactor the initialization logic of the telemetry object itself to
avoid a race condition where the first telemetry report is performed
without a valid reference.

I actually implemented that approach in
https://github.com/coder/coder/pull/16307, but realized I'm unable to
fully test it. It changed the initialization order in the server
command, and I wanted to test our CLI with Org Sync configured with a
premium license. As far as I'm aware, we don't have the tooling to do
that. I couldn't figure out a way to start the CLI with a mock license,
and I didn't want to go down further into the refactoring rabbit hole.

So I decided that reimplementing the org sync checking logic is simpler.
This commit is contained in:
Hugo Dutka
2025-01-29 15:54:31 +01:00
committed by GitHub
parent b77b5432c6
commit 92d22e296b
4 changed files with 152 additions and 6 deletions

View File

@ -45,6 +45,8 @@ func (s AGPLIDPSync) UpdateOrganizationSettings(ctx context.Context, db database
}
func (s AGPLIDPSync) OrganizationSyncSettings(ctx context.Context, db database.Store) (*OrganizationSyncSettings, error) {
// If this logic is ever updated, make sure to update the corresponding
// checkIDPOrgSync in coderd/telemetry/telemetry.go.
rlv := s.Manager.Resolver(db)
orgSettings, err := s.SyncSettings.Organization.Resolve(ctx, rlv)
if err != nil {

View File

@ -4,6 +4,7 @@ import (
"bytes"
"context"
"crypto/sha256"
"database/sql"
"encoding/json"
"errors"
"fmt"
@ -244,6 +245,11 @@ func (r *remoteReporter) deployment() error {
return xerrors.Errorf("install source must be <=64 chars: %s", installSource)
}
idpOrgSync, err := checkIDPOrgSync(r.ctx, r.options.Database, r.options.DeploymentConfig)
if err != nil {
r.options.Logger.Debug(r.ctx, "check IDP org sync", slog.Error(err))
}
data, err := json.Marshal(&Deployment{
ID: r.options.DeploymentID,
Architecture: sysInfo.Architecture,
@ -263,6 +269,7 @@ func (r *remoteReporter) deployment() error {
MachineID: sysInfo.UniqueID,
StartedAt: r.startedAt,
ShutdownAt: r.shutdownAt,
IDPOrgSync: &idpOrgSync,
})
if err != nil {
return xerrors.Errorf("marshal deployment: %w", err)
@ -284,6 +291,45 @@ func (r *remoteReporter) deployment() error {
return nil
}
// idpOrgSyncConfig is a subset of
// https://github.com/coder/coder/blob/5c6578d84e2940b9cfd04798c45e7c8042c3fe0e/coderd/idpsync/organization.go#L148
type idpOrgSyncConfig struct {
Field string `json:"field"`
}
// checkIDPOrgSync inspects the server flags and the runtime config. It's based on
// the OrganizationSyncEnabled function from enterprise/coderd/enidpsync/organizations.go.
// It has one distinct difference: it doesn't check if the license entitles to the
// feature, it only checks if the feature is configured.
//
// The above function is not used because it's very hard to make it available in
// the telemetry package due to coder/coder package structure and initialization
// order of the coder server.
//
// We don't check license entitlements because it's also hard to do from the
// telemetry package, and the config check should be sufficient for telemetry purposes.
//
// While this approach duplicates code, it's simpler than the alternative.
//
// See https://github.com/coder/coder/pull/16323 for more details.
func checkIDPOrgSync(ctx context.Context, db database.Store, values *codersdk.DeploymentValues) (bool, error) {
// key based on https://github.com/coder/coder/blob/5c6578d84e2940b9cfd04798c45e7c8042c3fe0e/coderd/idpsync/idpsync.go#L168
syncConfigRaw, err := db.GetRuntimeConfig(ctx, "organization-sync-settings")
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
// If the runtime config is not set, we check if the deployment config
// has the organization field set.
return values != nil && values.OIDC.OrganizationField != "", nil
}
return false, xerrors.Errorf("get runtime config: %w", err)
}
syncConfig := idpOrgSyncConfig{}
if err := json.Unmarshal([]byte(syncConfigRaw), &syncConfig); err != nil {
return false, xerrors.Errorf("unmarshal runtime config: %w", err)
}
return syncConfig.Field != "", nil
}
// createSnapshot collects a full snapshot from the database.
func (r *remoteReporter) createSnapshot() (*Snapshot, error) {
var (
@ -518,6 +564,21 @@ func (r *remoteReporter) createSnapshot() (*Snapshot, error) {
}
return nil
})
eg.Go(func() error {
// Warning: When an organization is deleted, it's completely removed from
// the database. It will no longer be reported, and there will be no other
// indicator that it was deleted. This requires special handling when
// interpreting the telemetry data later.
orgs, err := r.options.Database.GetOrganizations(r.ctx, database.GetOrganizationsParams{})
if err != nil {
return xerrors.Errorf("get organizations: %w", err)
}
snapshot.Organizations = make([]Organization, 0, len(orgs))
for _, org := range orgs {
snapshot.Organizations = append(snapshot.Organizations, ConvertOrganization(org))
}
return nil
})
err := eg.Wait()
if err != nil {
@ -916,6 +977,14 @@ func ConvertExternalProvisioner(id uuid.UUID, tags map[string]string, provisione
}
}
func ConvertOrganization(org database.Organization) Organization {
return Organization{
ID: org.ID,
CreatedAt: org.CreatedAt,
IsDefault: org.IsDefault,
}
}
// Snapshot represents a point-in-time anonymized database dump.
// Data is aggregated by latest on the server-side, so partial data
// can be sent without issue.
@ -942,6 +1011,7 @@ type Snapshot struct {
WorkspaceModules []WorkspaceModule `json:"workspace_modules"`
Workspaces []Workspace `json:"workspaces"`
NetworkEvents []NetworkEvent `json:"network_events"`
Organizations []Organization `json:"organizations"`
}
// Deployment contains information about the host running Coder.
@ -964,6 +1034,9 @@ type Deployment struct {
MachineID string `json:"machine_id"`
StartedAt time.Time `json:"started_at"`
ShutdownAt *time.Time `json:"shutdown_at"`
// While IDPOrgSync will always be set, it's nullable to make
// the struct backwards compatible with older coder versions.
IDPOrgSync *bool `json:"idp_org_sync"`
}
type APIKey struct {
@ -1457,6 +1530,12 @@ func NetworkEventFromProto(proto *tailnetproto.TelemetryEvent) (NetworkEvent, er
}, nil
}
type Organization struct {
ID uuid.UUID `json:"id"`
IsDefault bool `json:"is_default"`
CreatedAt time.Time `json:"created_at"`
}
type noopReporter struct{}
func (*noopReporter) Report(_ *Snapshot) {}

View File

@ -22,7 +22,10 @@ import (
"github.com/coder/coder/v2/coderd/database/dbmem"
"github.com/coder/coder/v2/coderd/database/dbtestutil"
"github.com/coder/coder/v2/coderd/database/dbtime"
"github.com/coder/coder/v2/coderd/idpsync"
"github.com/coder/coder/v2/coderd/runtimeconfig"
"github.com/coder/coder/v2/coderd/telemetry"
"github.com/coder/coder/v2/codersdk"
"github.com/coder/coder/v2/testutil"
)
@ -40,22 +43,33 @@ func TestTelemetry(t *testing.T) {
db := dbmem.New()
ctx := testutil.Context(t, testutil.WaitMedium)
org, err := db.GetDefaultOrganization(ctx)
require.NoError(t, err)
_, _ = dbgen.APIKey(t, db, database.APIKey{})
_ = dbgen.ProvisionerJob(t, db, nil, database.ProvisionerJob{
Provisioner: database.ProvisionerTypeTerraform,
StorageMethod: database.ProvisionerStorageMethodFile,
Type: database.ProvisionerJobTypeTemplateVersionDryRun,
Provisioner: database.ProvisionerTypeTerraform,
StorageMethod: database.ProvisionerStorageMethodFile,
Type: database.ProvisionerJobTypeTemplateVersionDryRun,
OrganizationID: org.ID,
})
_ = dbgen.Template(t, db, database.Template{
Provisioner: database.ProvisionerTypeTerraform,
Provisioner: database.ProvisionerTypeTerraform,
OrganizationID: org.ID,
})
sourceExampleID := uuid.NewString()
_ = dbgen.TemplateVersion(t, db, database.TemplateVersion{
SourceExampleID: sql.NullString{String: sourceExampleID, Valid: true},
OrganizationID: org.ID,
})
_ = dbgen.TemplateVersion(t, db, database.TemplateVersion{
OrganizationID: org.ID,
})
_ = dbgen.TemplateVersion(t, db, database.TemplateVersion{})
user := dbgen.User(t, db, database.User{})
_ = dbgen.Workspace(t, db, database.WorkspaceTable{})
_ = dbgen.Workspace(t, db, database.WorkspaceTable{
OrganizationID: org.ID,
})
_ = dbgen.WorkspaceApp(t, db, database.WorkspaceApp{
SharingLevel: database.AppSharingLevelOwner,
Health: database.WorkspaceAppHealthDisabled,
@ -112,6 +126,7 @@ func TestTelemetry(t *testing.T) {
require.Len(t, snapshot.WorkspaceAgentStats, 1)
require.Len(t, snapshot.WorkspaceProxies, 1)
require.Len(t, snapshot.WorkspaceModules, 1)
require.Len(t, snapshot.Organizations, 1)
wsa := snapshot.WorkspaceAgents[0]
require.Len(t, wsa.Subsystems, 2)
@ -128,6 +143,19 @@ func TestTelemetry(t *testing.T) {
})
require.Equal(t, tvs[0].SourceExampleID, &sourceExampleID)
require.Nil(t, tvs[1].SourceExampleID)
for _, entity := range snapshot.Workspaces {
require.Equal(t, entity.OrganizationID, org.ID)
}
for _, entity := range snapshot.ProvisionerJobs {
require.Equal(t, entity.OrganizationID, org.ID)
}
for _, entity := range snapshot.TemplateVersions {
require.Equal(t, entity.OrganizationID, org.ID)
}
for _, entity := range snapshot.Templates {
require.Equal(t, entity.OrganizationID, org.ID)
}
})
t.Run("HashedEmail", func(t *testing.T) {
t.Parallel()
@ -243,6 +271,41 @@ func TestTelemetry(t *testing.T) {
require.Equal(t, c.want, telemetry.GetModuleSourceType(c.source))
}
})
t.Run("IDPOrgSync", func(t *testing.T) {
t.Parallel()
ctx := testutil.Context(t, testutil.WaitMedium)
db, _ := dbtestutil.NewDB(t)
// 1. No org sync settings
deployment, _ := collectSnapshot(t, db, nil)
require.False(t, *deployment.IDPOrgSync)
// 2. Org sync settings set in server flags
deployment, _ = collectSnapshot(t, db, func(opts telemetry.Options) telemetry.Options {
opts.DeploymentConfig = &codersdk.DeploymentValues{
OIDC: codersdk.OIDCConfig{
OrganizationField: "organizations",
},
}
return opts
})
require.True(t, *deployment.IDPOrgSync)
// 3. Org sync settings set in runtime config
org, err := db.GetDefaultOrganization(ctx)
require.NoError(t, err)
sync := idpsync.NewAGPLSync(testutil.Logger(t), runtimeconfig.NewManager(), idpsync.DeploymentSyncSettings{})
err = sync.UpdateOrganizationSettings(ctx, db, idpsync.OrganizationSyncSettings{
Field: "organizations",
Mapping: map[string][]uuid.UUID{
"first": {org.ID},
},
AssignDefault: true,
})
require.NoError(t, err)
deployment, _ = collectSnapshot(t, db, nil)
require.True(t, *deployment.IDPOrgSync)
})
}
// nolint:paralleltest