mirror of
https://github.com/coder/coder.git
synced 2025-03-14 10:09:57 +00:00
Fixes #13139 Using a bare channel to signal dependent goroutines means that we can only signal success, not failure, which leads to deadlock if we fail in a way that doesn't cause the whole `apiConnRoutineManager` to tear down routines. Instead, we use a new object called a `checkpoint` that signals success or failure, so that dependent routines get unblocked if the routine they depend on fails.
52 lines
1.0 KiB
Go
52 lines
1.0 KiB
Go
package agent
|
|
|
|
import (
|
|
"context"
|
|
"runtime"
|
|
"sync"
|
|
|
|
"cdr.dev/slog"
|
|
)
|
|
|
|
// checkpoint allows a goroutine to communicate when it is OK to proceed beyond some async condition
|
|
// to other dependent goroutines.
|
|
type checkpoint struct {
|
|
logger slog.Logger
|
|
mu sync.Mutex
|
|
called bool
|
|
done chan struct{}
|
|
err error
|
|
}
|
|
|
|
// complete the checkpoint. Pass nil to indicate the checkpoint was ok. It is an error to call this
|
|
// more than once.
|
|
func (c *checkpoint) complete(err error) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
if c.called {
|
|
b := make([]byte, 2048)
|
|
n := runtime.Stack(b, false)
|
|
c.logger.Critical(context.Background(), "checkpoint complete called more than once", slog.F("stacktrace", b[:n]))
|
|
return
|
|
}
|
|
c.called = true
|
|
c.err = err
|
|
close(c.done)
|
|
}
|
|
|
|
func (c *checkpoint) wait(ctx context.Context) error {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-c.done:
|
|
return c.err
|
|
}
|
|
}
|
|
|
|
func newCheckpoint(logger slog.Logger) *checkpoint {
|
|
return &checkpoint{
|
|
logger: logger,
|
|
done: make(chan struct{}),
|
|
}
|
|
}
|