Files
coder/provisioner/terraform/timings.go
Eng Zer Jun 92d02ad6cb chore: update github.com/cespare/xxhash to v2 (#16094)
`github.com/cespare/xxhash/v2` is the latest version with bug fixes and
improvements. No breaking changes, see
https://go.dev/play/p/4fisKvMhrGR.

Affected lines of code are: 

-
8b9763dd2c/provisioner/terraform/timings.go (L202-L205)

-
8b9763dd2c/provisioner/terraform/internal/timings_test_utils.go (L90-L91)

Signed-off-by: Eng Zer Jun <engzerjun@gmail.com>
2025-01-15 17:03:29 +02:00

241 lines
7.8 KiB
Go

package terraform
import (
"fmt"
"slices"
"sync"
"time"
"github.com/cespare/xxhash/v2"
"google.golang.org/protobuf/types/known/timestamppb"
"github.com/coder/coder/v2/coderd/database"
"github.com/coder/coder/v2/coderd/database/dbtime"
"github.com/coder/coder/v2/provisionersdk/proto"
)
type timingKind string
// Copied from https://github.com/hashicorp/terraform/blob/01c0480e77263933b2b086dc8d600a69f80fad2d/internal/command/jsonformat/renderer.go
// We cannot reference these because they're in an internal package.
const (
timingApplyStart timingKind = "apply_start"
timingApplyProgress timingKind = "apply_progress"
timingApplyComplete timingKind = "apply_complete"
timingApplyErrored timingKind = "apply_errored"
timingProvisionStart timingKind = "provision_start"
timingProvisionProgress timingKind = "provision_progress"
timingProvisionComplete timingKind = "provision_complete"
timingProvisionErrored timingKind = "provision_errored"
timingRefreshStart timingKind = "refresh_start"
timingRefreshComplete timingKind = "refresh_complete"
// Ignored.
timingChangeSummary timingKind = "change_summary"
timingDiagnostic timingKind = "diagnostic"
timingPlannedChange timingKind = "planned_change"
timingOutputs timingKind = "outputs"
timingResourceDrift timingKind = "resource_drift"
timingVersion timingKind = "version"
// These are not part of message_types, but we want to track init/graph timings as well.
timingInitStart timingKind = "init_start"
timingInitComplete timingKind = "init_complete"
timingInitErrored timingKind = "init_errored"
timingGraphStart timingKind = "graph_start"
timingGraphComplete timingKind = "graph_complete"
timingGraphErrored timingKind = "graph_errored"
// Other terraform log types which we ignore.
timingLog timingKind = "log"
timingInitOutput timingKind = "init_output"
)
type timingAggregator struct {
stage database.ProvisionerJobTimingStage
// Protects the stateLookup map.
lookupMu sync.Mutex
stateLookup map[uint64]*timingSpan
}
type timingSpan struct {
kind timingKind
start, end time.Time
stage database.ProvisionerJobTimingStage
action, provider, resource string
state proto.TimingState
}
// newTimingAggregator creates a new aggregator which measures the duration of resource init/plan/apply actions; stage
// represents the stage of provisioning the timings are occurring within.
func newTimingAggregator(stage database.ProvisionerJobTimingStage) *timingAggregator {
return &timingAggregator{
stage: stage,
stateLookup: make(map[uint64]*timingSpan),
}
}
// ingest accepts a timing span at a certain timestamp and assigns it a state according to the kind of timing event.
// We memoize start & completion events, and then calculate their total duration in aggregate.
// We ignore progress events because we only care about the full duration of the action (delta between *_start and *_complete events).
func (t *timingAggregator) ingest(ts time.Time, s *timingSpan) {
if s == nil {
return
}
s.stage = t.stage
ts = dbtime.Time(ts.UTC())
switch s.kind {
case timingApplyStart, timingProvisionStart, timingRefreshStart, timingInitStart, timingGraphStart:
s.start = ts
s.state = proto.TimingState_STARTED
case timingApplyComplete, timingProvisionComplete, timingRefreshComplete, timingInitComplete, timingGraphComplete:
s.end = ts
s.state = proto.TimingState_COMPLETED
case timingApplyErrored, timingProvisionErrored, timingInitErrored, timingGraphErrored:
s.end = ts
s.state = proto.TimingState_FAILED
default:
// We just want start/end timings, ignore all other events.
return
}
t.lookupMu.Lock()
// Memoize this span by its unique attributes and the determined state.
// This will be used in aggregate() to determine the duration of the resource action.
t.stateLookup[s.hashByState(s.state)] = s
t.lookupMu.Unlock()
}
// aggregate performs a pass through all memoized events to build up a slice of *proto.Timing instances which represent
// the total time taken to perform a certain action.
// The resulting slice of *proto.Timing is NOT sorted.
func (t *timingAggregator) aggregate() []*proto.Timing {
t.lookupMu.Lock()
defer t.lookupMu.Unlock()
// Pre-allocate len(measurements)/2 since each timing will have one STARTED and one FAILED/COMPLETED entry.
out := make([]*proto.Timing, 0, len(t.stateLookup)/2)
for _, s := range t.stateLookup {
// We are only concerned here with failed or completed events.
if s.state != proto.TimingState_FAILED && s.state != proto.TimingState_COMPLETED {
continue
}
// Look for a corresponding span for the STARTED state.
startSpan, ok := t.stateLookup[s.hashByState(proto.TimingState_STARTED)]
if !ok {
// Not found, we'll ignore this span.
continue
}
s.start = startSpan.start
// Until faster-than-light travel is a possibility, let's prevent this.
// Better to capture a zero delta than a negative one.
if s.start.After(s.end) {
s.start = s.end
}
// Let's only aggregate valid entries.
// Later we can add support for partial / failed applies, perhaps.
if s.start.IsZero() || s.end.IsZero() {
continue
}
out = append(out, s.toProto())
}
return out
}
func (l timingKind) Valid() bool {
return slices.Contains([]timingKind{
timingApplyStart,
timingApplyProgress,
timingApplyComplete,
timingApplyErrored,
timingProvisionStart,
timingProvisionProgress,
timingProvisionComplete,
timingProvisionErrored,
timingRefreshStart,
timingRefreshComplete,
timingChangeSummary,
timingDiagnostic,
timingPlannedChange,
timingOutputs,
timingResourceDrift,
timingVersion,
timingInitStart,
timingInitComplete,
timingInitErrored,
timingGraphStart,
timingGraphComplete,
timingGraphErrored,
timingLog,
timingInitOutput,
}, l)
}
// Category returns the category for a giving timing state so that timings can be aggregated by this category.
// We can't use the state itself because we need an `apply_start` and an `apply_complete` to both hash to the same entry
// if all other attributes are identical.
func (l timingKind) Category() string {
switch l {
case timingInitStart, timingInitComplete, timingInitErrored:
return "init"
case timingGraphStart, timingGraphComplete, timingGraphErrored:
return "graph"
case timingApplyStart, timingApplyProgress, timingApplyComplete, timingApplyErrored:
return "apply"
case timingProvisionStart, timingProvisionProgress, timingProvisionComplete, timingProvisionErrored:
return "provision"
case timingRefreshStart, timingRefreshComplete:
return "state refresh"
default:
return "?"
}
}
// hashState computes a hash based on a timingSpan's unique properties and state.
// The combination of resource and provider names MUST be unique across entries.
func (e *timingSpan) hashByState(state proto.TimingState) uint64 {
id := fmt.Sprintf("%s:%s:%s:%s:%s", e.kind.Category(), state.String(), e.action, e.resource, e.provider)
return xxhash.Sum64String(id)
}
func (e *timingSpan) toProto() *proto.Timing {
// Some log entries, like state refreshes, don't have any "action" logged.
if e.action == "" {
e.action = e.kind.Category()
}
return &proto.Timing{
Start: timestamppb.New(e.start),
End: timestamppb.New(e.end),
Action: e.action,
Stage: string(e.stage),
Source: e.provider,
Resource: e.resource,
State: e.state,
}
}
func createInitTimingsEvent(event timingKind) (time.Time, *timingSpan) {
return dbtime.Now(), &timingSpan{
kind: event,
action: "initializing terraform",
provider: "terraform",
resource: "state file",
}
}
func createGraphTimingsEvent(event timingKind) (time.Time, *timingSpan) {
return dbtime.Now(), &timingSpan{
kind: event,
action: "building terraform dependency graph",
provider: "terraform",
resource: "state file",
}
}