chore: More complete tracing for RBAC functions (#5690)

* chore: More complete tracing for RBAC functions
* Add input.json as example rbac input for rego cli

The input.json is required to play with the rego cli and debug
the policy without golang. It is good to have an example to run
the commands in the readme.md

* Add span events to capture authorize and prepared results
* chore: Add prometheus metrics to rbac authorizer
This commit is contained in:
Steven Masley
2023-01-13 16:07:15 -06:00
committed by GitHub
parent e821b98918
commit eb48341696
12 changed files with 425 additions and 147 deletions

View File

@ -4,8 +4,11 @@ import (
"context"
_ "embed"
"sync"
"time"
"github.com/open-policy-agent/opa/rego"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"golang.org/x/xerrors"
@ -21,20 +24,16 @@ type Authorizer interface {
type PreparedAuthorized interface {
Authorize(ctx context.Context, object Object) error
CompileToSQL(cfg regosql.ConvertConfig) (string, error)
CompileToSQL(ctx context.Context, cfg regosql.ConvertConfig) (string, error)
}
// Filter takes in a list of objects, and will filter the list removing all
// the elements the subject does not have permission for. All objects must be
// of the same type.
//
// Ideally the 'CompileToSQL' is used instead for large sets. This cost scales
// linearly with the number of objects passed in.
func Filter[O Objecter](ctx context.Context, auth Authorizer, subjID string, subjRoles []string, scope Scope, groups []string, action Action, objects []O) ([]O, error) {
ctx, span := tracing.StartSpan(ctx, trace.WithAttributes(
attribute.String("subject_id", subjID),
attribute.StringSlice("subject_roles", subjRoles),
attribute.Int("num_objects", len(objects)),
))
defer span.End()
if len(objects) == 0 {
// Nothing to filter
return objects, nil
@ -42,6 +41,20 @@ func Filter[O Objecter](ctx context.Context, auth Authorizer, subjID string, sub
objectType := objects[0].RBACObject().Type
filtered := make([]O, 0)
// Start the span after the object type is detected. If we are filtering 0
// objects, then the span is not interesting. It would just add excessive
// 0 time spans that provide no insight.
ctx, span := tracing.StartSpan(ctx,
rbacTraceAttributes(subjRoles, len(groups), scope, action, objectType,
// For filtering, we are only measuring the total time for the entire
// set of objects. This and the 'PrepareByRoleName' span time
// is all that is required to measure the performance of this
// function on a per-object basis.
attribute.Int("num_objects", len(objects)),
),
)
defer span.End()
// Running benchmarks on this function, it is **always** faster to call
// auth.ByRoleName on <10 objects. This is because the overhead of
// 'PrepareByRoleName'. Once we cross 10 objects, then it starts to become
@ -82,6 +95,9 @@ func Filter[O Objecter](ctx context.Context, auth Authorizer, subjID string, sub
// RegoAuthorizer will use a prepared rego query for performing authorize()
type RegoAuthorizer struct {
query rego.PreparedEvalQuery
authorizeHist *prometheus.HistogramVec
prepareHist prometheus.Histogram
}
var _ Authorizer = (*RegoAuthorizer)(nil)
@ -95,7 +111,7 @@ var (
query rego.PreparedEvalQuery
)
func NewAuthorizer() *RegoAuthorizer {
func NewAuthorizer(registry prometheus.Registerer) *RegoAuthorizer {
queryOnce.Do(func() {
var err error
query, err = rego.New(
@ -106,7 +122,51 @@ func NewAuthorizer() *RegoAuthorizer {
panic(xerrors.Errorf("compile rego: %w", err))
}
})
return &RegoAuthorizer{query: query}
// Register metrics to prometheus.
// These bucket values are based on the average time it takes to run authz
// being around 1ms. Anything under ~2ms is OK and does not need to be
// analyzed any further.
buckets := []float64{
0.0005, // 0.5ms
0.001, // 1ms
0.002, // 2ms
0.003,
0.005,
0.01, // 10ms
0.02,
0.035, // 35ms
0.05,
0.075,
0.1, // 100ms
0.25, // 250ms
0.75, // 750ms
1, // 1s
}
factory := promauto.With(registry)
authorizeHistogram := factory.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "coderd",
Subsystem: "authz",
Name: "authorize_duration_seconds",
Help: "Duration of the 'Authorize' call in seconds. Only counts calls that succeed.",
Buckets: buckets,
}, []string{"allowed"})
prepareHistogram := factory.NewHistogram(prometheus.HistogramOpts{
Namespace: "coderd",
Subsystem: "authz",
Name: "prepare_authorize_duration_seconds",
Help: "Duration of the 'PrepareAuthorize' call in seconds.",
Buckets: buckets,
})
return &RegoAuthorizer{
query: query,
authorizeHist: authorizeHistogram,
prepareHist: prepareHistogram,
}
}
type authSubject struct {
@ -120,6 +180,18 @@ type authSubject struct {
// This is the function intended to be used outside this package.
// The role is fetched from the builtin map located in memory.
func (a RegoAuthorizer) ByRoleName(ctx context.Context, subjectID string, roleNames []string, scope Scope, groups []string, action Action, object Object) error {
start := time.Now()
ctx, span := tracing.StartSpan(ctx,
trace.WithTimestamp(start), // Reuse the time.Now for metric and trace
rbacTraceAttributes(roleNames, len(groups), scope, action, object.Type,
// For authorizing a single object, this data is useful to know how
// complex our objects are getting.
attribute.Int("object_num_groups", len(object.ACLGroupList)),
attribute.Int("object_num_users", len(object.ACLUserList)),
),
)
defer span.End()
roles, err := RolesByNames(roleNames)
if err != nil {
return err
@ -131,19 +203,20 @@ func (a RegoAuthorizer) ByRoleName(ctx context.Context, subjectID string, roleNa
}
err = a.Authorize(ctx, subjectID, roles, scopeRole, groups, action, object)
span.AddEvent("authorized", trace.WithAttributes(attribute.Bool("authorized", err == nil)))
dur := time.Since(start)
if err != nil {
a.authorizeHist.WithLabelValues("false").Observe(dur.Seconds())
return err
}
a.authorizeHist.WithLabelValues("true").Observe(dur.Seconds())
return nil
}
// Authorize allows passing in custom Roles.
// This is really helpful for unit testing, as we can create custom roles to exercise edge cases.
func (a RegoAuthorizer) Authorize(ctx context.Context, subjectID string, roles []Role, scope Role, groups []string, action Action, object Object) error {
ctx, span := tracing.StartSpan(ctx)
defer span.End()
input := map[string]interface{}{
"subject": authSubject{
ID: subjectID,
@ -166,22 +239,12 @@ func (a RegoAuthorizer) Authorize(ctx context.Context, subjectID string, roles [
return nil
}
// Prepare will partially execute the rego policy leaving the object fields unknown (except for the type).
// This will vastly speed up performance if batch authorization on the same type of objects is needed.
func (RegoAuthorizer) Prepare(ctx context.Context, subjectID string, roles []Role, scope Role, groups []string, action Action, objectType string) (*PartialAuthorizer, error) {
ctx, span := tracing.StartSpan(ctx)
defer span.End()
auth, err := newPartialAuthorizer(ctx, subjectID, roles, scope, groups, action, objectType)
if err != nil {
return nil, xerrors.Errorf("new partial authorizer: %w", err)
}
return auth, nil
}
func (a RegoAuthorizer) PrepareByRoleName(ctx context.Context, subjectID string, roleNames []string, scope Scope, groups []string, action Action, objectType string) (PreparedAuthorized, error) {
ctx, span := tracing.StartSpan(ctx)
start := time.Now()
ctx, span := tracing.StartSpan(ctx,
trace.WithTimestamp(start),
rbacTraceAttributes(roleNames, len(groups), scope, action, objectType),
)
defer span.End()
roles, err := RolesByNames(roleNames)
@ -194,5 +257,29 @@ func (a RegoAuthorizer) PrepareByRoleName(ctx context.Context, subjectID string,
return nil, err
}
return a.Prepare(ctx, subjectID, roles, scopeRole, groups, action, objectType)
prepared, err := a.Prepare(ctx, subjectID, roles, scopeRole, groups, action, objectType)
if err != nil {
return nil, err
}
// Add attributes of the Prepare results. This will help understand the
// complexity of the roles and how it affects the time taken.
span.SetAttributes(
attribute.Int("num_queries", len(prepared.preparedQueries)),
attribute.Bool("always_true", prepared.alwaysTrue),
)
a.prepareHist.Observe(time.Since(start).Seconds())
return prepared, nil
}
// Prepare will partially execute the rego policy leaving the object fields unknown (except for the type).
// This will vastly speed up performance if batch authorization on the same type of objects is needed.
func (RegoAuthorizer) Prepare(ctx context.Context, subjectID string, roles []Role, scope Role, groups []string, action Action, objectType string) (*PartialAuthorizer, error) {
auth, err := newPartialAuthorizer(ctx, subjectID, roles, scope, groups, action, objectType)
if err != nil {
return nil, xerrors.Errorf("new partial authorizer: %w", err)
}
return auth, nil
}