fix: fix TestPGCoordinatorSingle_MissedHeartbeats flake (#13686)

This commit is contained in:
Spike Curtis
2024-06-27 19:17:24 +04:00
committed by GitHub
parent 089f06886b
commit ce7f13c6c3
2 changed files with 12 additions and 11 deletions

View File

@ -1613,7 +1613,7 @@ func (h *heartbeats) resetExpiryTimerWithLock() {
if d < 0 {
d = 0
}
h.timer.Reset(d)
h.timer.Reset(d, "heartbeats", "resetExpiryTimerWithLock")
}
func (h *heartbeats) checkExpiry() {

View File

@ -336,14 +336,14 @@ func TestPGCoordinatorSingle_MissedHeartbeats(t *testing.T) {
t.Skip("test only with postgres")
}
store, ps := dbtestutil.NewDB(t)
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitSuperLong)
ctx, cancel := context.WithTimeout(context.Background(), testutil.WaitShort)
defer cancel()
logger := slogtest.Make(t, nil).Leveled(slog.LevelDebug)
mClock := clock.NewMock(t)
nowTrap := mClock.Trap().Now("heartbeats", "recvBeat")
defer nowTrap.Close()
afTrap := mClock.Trap().AfterFunc("heartbeats", "recvBeat")
defer afTrap.Close()
rstTrap := mClock.Trap().TimerReset("heartbeats", "resetExpiryTimerWithLock")
defer rstTrap.Close()
coordinator, err := tailnet.NewTestPGCoord(ctx, logger, ps, store, mClock)
require.NoError(t, err)
@ -370,7 +370,6 @@ func TestPGCoordinatorSingle_MissedHeartbeats(t *testing.T) {
}
fCoord2.heartbeat()
nowTrap.MustWait(ctx).Release()
afTrap.MustWait(ctx).Release() // heartbeat timeout started
fCoord2.agentNode(agent.id, &agpl.Node{PreferredDERP: 12})
@ -383,30 +382,32 @@ func TestPGCoordinatorSingle_MissedHeartbeats(t *testing.T) {
id: uuid.New(),
}
fCoord3.heartbeat()
nowTrap.MustWait(ctx).Release()
rstTrap.MustWait(ctx).Release() // timeout gets reset
fCoord3.agentNode(agent.id, &agpl.Node{PreferredDERP: 13})
assertEventuallyHasDERPs(ctx, t, client, 13)
// fCoord2 sends in a second heartbeat, one period later (on time)
fCoord2.heartbeat()
c := nowTrap.MustWait(ctx)
mClock.Advance(tailnet.HeartbeatPeriod).MustWait(ctx)
c.Release()
fCoord2.heartbeat()
rstTrap.MustWait(ctx).Release() // timeout gets reset
// when the fCoord3 misses enough heartbeats, the real coordinator should send an update with the
// node from fCoord2 for the agent.
mClock.Advance(tailnet.HeartbeatPeriod).MustWait(ctx)
mClock.Advance(tailnet.HeartbeatPeriod).MustWait(ctx)
w := mClock.Advance(tailnet.HeartbeatPeriod)
rstTrap.MustWait(ctx).Release()
w.MustWait(ctx)
assertEventuallyHasDERPs(ctx, t, client, 12)
// one more heartbeat period will result in fCoord2 being expired, which should cause us to
// revert to the original agent mapping
mClock.Advance(tailnet.HeartbeatPeriod).MustWait(ctx)
// note that the timeout doesn't get reset because both fCoord2 and fCoord3 are expired
assertEventuallyHasDERPs(ctx, t, client, 10)
// send fCoord3 heartbeat, which should trigger us to consider that mapping valid again.
fCoord3.heartbeat()
nowTrap.MustWait(ctx).Release()
rstTrap.MustWait(ctx).Release() // timeout gets reset
assertEventuallyHasDERPs(ctx, t, client, 13)
err = agent.close()