Skip to content

Commit 74d22f3

Browse files
committed
park waiters in slice; revise closure logic.
1 parent bf4b91c commit 74d22f3

File tree

3 files changed

+49
-68
lines changed

3 files changed

+49
-68
lines changed

dial_queue.go

+44-38
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ type waitingCh struct {
6161
// Dialler throttling (e.g. FD limit exceeded) is a concern, as we can easily spin up more workers to compensate, and
6262
// end up adding fuel to the fire. Since we have no deterministic way to detect this for now, we hard-limit concurrency
6363
// to DialQueueMaxParallelism.
64-
func newDialQueue(ctx context.Context, target string, in *queue.ChanQueue, dialFn func(context.Context, peer.ID) error, nConsumers int) *dialQueue {
64+
func newDialQueue(ctx context.Context, target string, in *queue.ChanQueue, dialFn func(context.Context, peer.ID) error) *dialQueue {
6565
sq := &dialQueue{
6666
ctx: ctx,
6767
dialFn: dialFn,
@@ -71,9 +71,9 @@ func newDialQueue(ctx context.Context, target string, in *queue.ChanQueue, dialF
7171
in: in,
7272
out: queue.NewChanQueue(ctx, queue.NewXORDistancePQ(target)),
7373

74-
growCh: make(chan struct{}, nConsumers),
74+
growCh: make(chan struct{}, 1),
7575
shrinkCh: make(chan struct{}, 1),
76-
waitingCh: make(chan waitingCh, nConsumers),
76+
waitingCh: make(chan waitingCh),
7777
dieCh: make(chan struct{}, DialQueueMaxParallelism),
7878
}
7979
for i := 0; i < DialQueueMinParallelism; i++ {
@@ -85,22 +85,16 @@ func newDialQueue(ctx context.Context, target string, in *queue.ChanQueue, dialF
8585

8686
func (dq *dialQueue) control() {
8787
var (
88-
p peer.ID
89-
dialled = dq.out.DeqChan
90-
resp waitingCh
91-
waiting <-chan waitingCh
88+
dialled <-chan peer.ID
89+
waiting []waitingCh
9290
lastScalingEvt = time.Now()
9391
)
9492

9593
defer func() {
96-
// close channels.
97-
if resp.ch != nil {
98-
close(resp.ch)
99-
}
100-
close(dq.waitingCh)
101-
for w := range dq.waitingCh {
94+
for _, w := range waiting {
10295
close(w.ch)
10396
}
97+
waiting = nil
10498
}()
10599

106100
for {
@@ -109,16 +103,23 @@ func (dq *dialQueue) control() {
109103
select {
110104
case <-dq.ctx.Done():
111105
return
112-
case p = <-dialled:
113-
dialled, waiting = nil, dq.waitingCh
106+
case w := <-dq.waitingCh:
107+
waiting = append(waiting, w)
108+
dialled = dq.out.DeqChan
114109
continue // onto the top.
115-
case resp = <-waiting:
116-
// got a channel that's waiting for a peer.
117-
log.Debugf("delivering dialled peer to DHT; took %dms.", time.Now().Sub(resp.ts)/time.Millisecond)
118-
resp.ch <- p
119-
close(resp.ch)
120-
dialled, waiting = dq.out.DeqChan, nil // stop consuming waiting jobs until we've cleared a peer.
121-
resp.ch = nil
110+
case p, ok := <-dialled:
111+
if !ok {
112+
return // we're done if the ChanQueue is closed, which happens when the context is closed.
113+
}
114+
w := waiting[0]
115+
log.Debugf("delivering dialled peer to DHT; took %dms.", time.Since(w.ts)/time.Millisecond)
116+
w.ch <- p
117+
close(w.ch)
118+
waiting = waiting[1:]
119+
if len(waiting) == 0 {
120+
// no more waiters, so stop consuming dialled jobs.
121+
dialled = nil
122+
}
122123
continue // onto the top.
123124
default:
124125
// there's nothing to process, so proceed onto the main select block.
@@ -127,23 +128,30 @@ func (dq *dialQueue) control() {
127128
select {
128129
case <-dq.ctx.Done():
129130
return
130-
case p = <-dialled:
131-
dialled, waiting = nil, dq.waitingCh
132-
case resp = <-waiting:
133-
// got a channel that's waiting for a peer.
134-
log.Debugf("delivering dialled peer to DHT; took %dms.", time.Now().Sub(resp.ts)/time.Millisecond)
135-
resp.ch <- p
136-
close(resp.ch)
137-
dialled, waiting = dq.out.DeqChan, nil // stop consuming waiting jobs until we've cleared a peer.
138-
resp.ch = nil
131+
case w := <-dq.waitingCh:
132+
waiting = append(waiting, w)
133+
dialled = dq.out.DeqChan
134+
case p, ok := <-dialled:
135+
if !ok {
136+
return // we're done if the ChanQueue is closed, which happens when the context is closed.
137+
}
138+
w := waiting[0]
139+
log.Debugf("delivering dialled peer to DHT; took %dms.", time.Since(w.ts)/time.Millisecond)
140+
w.ch <- p
141+
close(w.ch)
142+
waiting = waiting[1:]
143+
if len(waiting) == 0 {
144+
// no more waiters, so stop consuming dialled jobs.
145+
dialled = nil
146+
}
139147
case <-dq.growCh:
140-
if time.Now().Sub(lastScalingEvt) < DialQueueScalingMutePeriod {
148+
if time.Since(lastScalingEvt) < DialQueueScalingMutePeriod {
141149
continue
142150
}
143151
dq.grow()
144152
lastScalingEvt = time.Now()
145153
case <-dq.shrinkCh:
146-
if time.Now().Sub(lastScalingEvt) < DialQueueScalingMutePeriod {
154+
if time.Since(lastScalingEvt) < DialQueueScalingMutePeriod {
147155
continue
148156
}
149157
dq.shrink()
@@ -176,13 +184,11 @@ func (dq *dialQueue) Consume() <-chan peer.ID {
176184

177185
// park the channel until a dialled peer becomes available.
178186
select {
187+
case dq.waitingCh <- waitingCh{ch, time.Now()}:
188+
// all good
179189
case <-dq.ctx.Done():
180190
// return a closed channel with no value if we're done.
181191
close(ch)
182-
return ch
183-
case dq.waitingCh <- waitingCh{ch, time.Now()}:
184-
default:
185-
panic("detected more consuming goroutines than declared upfront")
186192
}
187193
return ch
188194
}
@@ -268,7 +274,7 @@ func (dq *dialQueue) worker() {
268274
log.Debugf("discarding dialled peer because of error: %v", err)
269275
continue
270276
}
271-
log.Debugf("dialling %v took %dms (as observed by the dht subsystem).", p, time.Now().Sub(t)/time.Millisecond)
277+
log.Debugf("dialling %v took %dms (as observed by the dht subsystem).", p, time.Since(t)/time.Millisecond)
272278
waiting := len(dq.waitingCh)
273279
dq.out.EnqChan <- p
274280
if waiting > 0 {

dial_queue_test.go

+4-29
Original file line numberDiff line numberDiff line change
@@ -16,31 +16,6 @@ func init() {
1616
DialQueueScalingMutePeriod = 0
1717
}
1818

19-
func TestDialQueueErrorsWithTooManyConsumers(t *testing.T) {
20-
var calls int
21-
defer func() {
22-
if e := recover(); e == nil {
23-
t.Error("expected a panic, got none")
24-
} else if calls != 4 {
25-
t.Errorf("expected a panic on the 4th call to Consume(); got it on call number %d", calls)
26-
}
27-
}()
28-
29-
in := queue.NewChanQueue(context.Background(), queue.NewXORDistancePQ("test"))
30-
hang := make(chan struct{})
31-
dialFn := func(ctx context.Context, p peer.ID) error {
32-
<-hang
33-
return nil
34-
}
35-
36-
dq := newDialQueue(context.Background(), "test", in, dialFn, 3)
37-
for ; calls < 3; calls++ {
38-
dq.Consume()
39-
}
40-
calls++
41-
dq.Consume()
42-
}
43-
4419
func TestDialQueueGrowsOnSlowDials(t *testing.T) {
4520
DialQueueMaxIdle = 10 * time.Minute
4621

@@ -60,7 +35,7 @@ func TestDialQueueGrowsOnSlowDials(t *testing.T) {
6035
}
6136

6237
// remove the mute period to grow faster.
63-
dq := newDialQueue(context.Background(), "test", in, dialFn, 4)
38+
dq := newDialQueue(context.Background(), "test", in, dialFn)
6439

6540
for i := 0; i < 4; i++ {
6641
_ = dq.Consume()
@@ -93,7 +68,7 @@ func TestDialQueueShrinksWithNoConsumers(t *testing.T) {
9368
return nil
9469
}
9570

96-
dq := newDialQueue(context.Background(), "test", in, dialFn, 3)
71+
dq := newDialQueue(context.Background(), "test", in, dialFn)
9772

9873
defer func() {
9974
recover()
@@ -160,7 +135,7 @@ func TestDialQueueShrinksWithWhenIdle(t *testing.T) {
160135
in.EnqChan <- peer.ID(i)
161136
}
162137

163-
dq := newDialQueue(context.Background(), "test", in, dialFn, 3)
138+
dq := newDialQueue(context.Background(), "test", in, dialFn)
164139

165140
// keep up to speed with backlog by releasing the dial function every time we acquire a channel.
166141
for i := 0; i < 13; i++ {
@@ -203,7 +178,7 @@ func TestDialQueueMutePeriodHonored(t *testing.T) {
203178
in.EnqChan <- peer.ID(i)
204179
}
205180

206-
dq := newDialQueue(context.Background(), "test", in, dialFn, 3)
181+
dq := newDialQueue(context.Background(), "test", in, dialFn)
207182

208183
// pick up three consumers.
209184
for i := 0; i < 3; i++ {

query.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ func newQueryRunner(q *dhtQuery) *dhtQueryRunner {
103103
peersToQuery: peersToQuery,
104104
proc: proc,
105105
}
106-
r.peersDialed = newDialQueue(ctx, q.key, peersToQuery, r.dialPeer, AlphaValue)
106+
r.peersDialed = newDialQueue(ctx, q.key, peersToQuery, r.dialPeer)
107107
return r
108108
}
109109

0 commit comments

Comments
 (0)