mirror of
https://github.com/coder/coder.git
synced 2025-03-14 10:09:57 +00:00
fix(tailnet): prevent redial after Coord graceful restart (#15586)
fixes: https://github.com/coder/internal/issues/217 > There are a couple problems: > > One is that we assert the RPCs succeed, but if the pipeDialer context is canceled at the end of the test, then these assertions happen after the test is officially complete, which panics and affects other tests. This converts these to just return the error rather than assert. > The other is that the retrier is slightly bugged: if the current retry delay is 0 AND the ctx is done, (e.g. after successfully connecting and then gracefully disconnecting), then retrier.Wait(c.ctx) is racy and could return either true or false. Fixes the phantom redial by explicitly checking the context before dialing. Also, in the test, we assert that the controller is closed before completing the test.
This commit is contained in:
@ -1193,10 +1193,17 @@ func (c *Controller) Run(ctx context.Context) {
|
||||
// Sadly retry doesn't support quartz.Clock yet so this is not
|
||||
// influenced by the configured clock.
|
||||
for retrier := retry.New(50*time.Millisecond, 10*time.Second); retrier.Wait(c.ctx); {
|
||||
// Check the context again before dialing, since `retrier.Wait()` could return true
|
||||
// if the delay is 0, even if the context was canceled. This ensures we don't redial
|
||||
// after a graceful shutdown.
|
||||
if c.ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
|
||||
tailnetClients, err := c.Dialer.Dial(c.ctx, c.ResumeTokenCtrl)
|
||||
if err != nil {
|
||||
if xerrors.Is(err, context.Canceled) {
|
||||
continue
|
||||
if xerrors.Is(err, context.Canceled) || xerrors.Is(err, context.DeadlineExceeded) {
|
||||
return
|
||||
}
|
||||
errF := slog.Error(err)
|
||||
var sdkErr *codersdk.Error
|
||||
|
@ -14,7 +14,6 @@ import (
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/hashicorp/yamux"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.uber.org/mock/gomock"
|
||||
"golang.org/x/xerrors"
|
||||
@ -1071,6 +1070,7 @@ func TestController_Disconnects(t *testing.T) {
|
||||
close(call.Resps)
|
||||
|
||||
_ = testutil.RequireRecvCtx(testCtx, t, peersLost)
|
||||
_ = testutil.RequireRecvCtx(testCtx, t, uut.Closed())
|
||||
}
|
||||
|
||||
func TestController_TelemetrySuccess(t *testing.T) {
|
||||
@ -1210,24 +1210,28 @@ type pipeDialer struct {
|
||||
|
||||
func (p *pipeDialer) Dial(_ context.Context, _ tailnet.ResumeTokenController) (tailnet.ControlProtocolClients, error) {
|
||||
s, c := net.Pipe()
|
||||
p.t.Cleanup(func() {
|
||||
_ = s.Close()
|
||||
_ = c.Close()
|
||||
})
|
||||
go func() {
|
||||
err := p.svc.ServeConnV2(p.ctx, s, p.streamID)
|
||||
p.logger.Debug(p.ctx, "piped tailnet service complete", slog.Error(err))
|
||||
}()
|
||||
client, err := tailnet.NewDRPCClient(c, p.logger)
|
||||
if !assert.NoError(p.t, err) {
|
||||
if err != nil {
|
||||
_ = c.Close()
|
||||
return tailnet.ControlProtocolClients{}, err
|
||||
}
|
||||
coord, err := client.Coordinate(context.Background())
|
||||
if !assert.NoError(p.t, err) {
|
||||
if err != nil {
|
||||
_ = c.Close()
|
||||
return tailnet.ControlProtocolClients{}, err
|
||||
}
|
||||
|
||||
derps := &tailnet.DERPFromDRPCWrapper{}
|
||||
derps.Client, err = client.StreamDERPMaps(context.Background(), &proto.StreamDERPMapsRequest{})
|
||||
if !assert.NoError(p.t, err) {
|
||||
if err != nil {
|
||||
_ = c.Close()
|
||||
return tailnet.ControlProtocolClients{}, err
|
||||
}
|
||||
|
Reference in New Issue
Block a user