@@ -58,7 +58,6 @@ use std::sync::Arc;
58
58
use std:: time:: Duration ;
59
59
60
60
use anyhow:: format_err;
61
- use crossbeam_channel:: { unbounded, Receiver , Sender } ;
62
61
use crossbeam_utils:: thread:: Scope ;
63
62
use jobserver:: { Acquired , Client , HelperThread } ;
64
63
use log:: { debug, info, trace} ;
@@ -73,6 +72,7 @@ use super::{BuildContext, BuildPlan, CompileMode, Context, Unit};
73
72
use crate :: core:: { PackageId , TargetKind } ;
74
73
use crate :: util;
75
74
use crate :: util:: diagnostic_server:: { self , DiagnosticPrinter } ;
75
+ use crate :: util:: Queue ;
76
76
use crate :: util:: { internal, profile, CargoResult , CargoResultExt , ProcessBuilder } ;
77
77
use crate :: util:: { Config , DependencyQueue } ;
78
78
use crate :: util:: { Progress , ProgressStyle } ;
@@ -93,13 +93,34 @@ pub struct JobQueue<'a, 'cfg> {
93
93
///
94
94
/// It is created from JobQueue when we have fully assembled the crate graph
95
95
/// (i.e., all package dependencies are known).
96
+ ///
97
+ /// # Message queue
98
+ ///
99
+ /// Each thread running a process uses the message queue to send messages back
100
+ /// to the main thread. The main thread coordinates everything, and handles
101
+ /// printing output.
102
+ ///
103
+ /// It is important to be careful which messages use `push` vs `push_bounded`.
104
+ /// `push` is for priority messages (like tokens, or "finished") where the
105
+ /// sender shouldn't block. We want to handle those so real work can proceed
106
+ /// ASAP.
107
+ ///
108
+ /// `push_bounded` is only for messages being printed to stdout/stderr. Being
109
+ /// bounded prevents a flood of messages causing a large amount of memory
110
+ /// being used.
111
+ ///
112
+ /// `push` also avoids blocking which helps avoid deadlocks. For example, when
113
+ /// the diagnostic server thread is dropped, it waits for the thread to exit.
114
+ /// But if the thread is blocked on a full queue, and there is a critical
115
+ /// error, the drop will deadlock. This should be fixed at some point in the
116
+ /// future. The jobserver thread has a similar problem, though it will time
117
+ /// out after 1 second.
96
118
struct DrainState < ' a , ' cfg > {
97
119
// This is the length of the DependencyQueue when starting out
98
120
total_units : usize ,
99
121
100
122
queue : DependencyQueue < Unit < ' a > , Artifact , Job > ,
101
- tx : Sender < Message > ,
102
- rx : Receiver < Message > ,
123
+ messages : Arc < Queue < Message > > ,
103
124
active : HashMap < JobId , Unit < ' a > > ,
104
125
compiled : HashSet < PackageId > ,
105
126
documented : HashSet < PackageId > ,
@@ -145,7 +166,7 @@ impl std::fmt::Display for JobId {
145
166
146
167
pub struct JobState < ' a > {
147
168
/// Channel back to the main thread to coordinate messages and such.
148
- tx : Sender < Message > ,
169
+ messages : Arc < Queue < Message > > ,
149
170
150
171
/// The job id that this state is associated with, used when sending
151
172
/// messages back to the main thread.
@@ -199,7 +220,7 @@ enum Message {
199
220
200
221
impl < ' a > JobState < ' a > {
201
222
pub fn running ( & self , cmd : & ProcessBuilder ) {
202
- let _ = self . tx . send ( Message :: Run ( self . id , cmd. to_string ( ) ) ) ;
223
+ self . messages . push ( Message :: Run ( self . id , cmd. to_string ( ) ) ) ;
203
224
}
204
225
205
226
pub fn build_plan (
@@ -208,17 +229,16 @@ impl<'a> JobState<'a> {
208
229
cmd : ProcessBuilder ,
209
230
filenames : Arc < Vec < OutputFile > > ,
210
231
) {
211
- let _ = self
212
- . tx
213
- . send ( Message :: BuildPlanMsg ( module_name, cmd, filenames) ) ;
232
+ self . messages
233
+ . push ( Message :: BuildPlanMsg ( module_name, cmd, filenames) ) ;
214
234
}
215
235
216
236
pub fn stdout ( & self , stdout : String ) {
217
- drop ( self . tx . send ( Message :: Stdout ( stdout) ) ) ;
237
+ self . messages . push_bounded ( Message :: Stdout ( stdout) ) ;
218
238
}
219
239
220
240
pub fn stderr ( & self , stderr : String ) {
221
- drop ( self . tx . send ( Message :: Stderr ( stderr) ) ) ;
241
+ self . messages . push_bounded ( Message :: Stderr ( stderr) ) ;
222
242
}
223
243
224
244
/// A method used to signal to the coordinator thread that the rmeta file
@@ -228,9 +248,8 @@ impl<'a> JobState<'a> {
228
248
/// produced once!
229
249
pub fn rmeta_produced ( & self ) {
230
250
self . rmeta_required . set ( false ) ;
231
- let _ = self
232
- . tx
233
- . send ( Message :: Finish ( self . id , Artifact :: Metadata , Ok ( ( ) ) ) ) ;
251
+ self . messages
252
+ . push ( Message :: Finish ( self . id , Artifact :: Metadata , Ok ( ( ) ) ) ) ;
234
253
}
235
254
236
255
/// The rustc underlying this Job is about to acquire a jobserver token (i.e., block)
@@ -239,14 +258,14 @@ impl<'a> JobState<'a> {
239
258
/// This should arrange for the associated client to eventually get a token via
240
259
/// `client.release_raw()`.
241
260
pub fn will_acquire ( & self ) {
242
- let _ = self . tx . send ( Message :: NeedsToken ( self . id ) ) ;
261
+ self . messages . push ( Message :: NeedsToken ( self . id ) ) ;
243
262
}
244
263
245
264
/// The rustc underlying this Job is informing us that it is done with a jobserver token.
246
265
///
247
266
/// Note that it does *not* write that token back anywhere.
248
267
pub fn release_token ( & self ) {
249
- let _ = self . tx . send ( Message :: ReleaseToken ( self . id ) ) ;
268
+ self . messages . push ( Message :: ReleaseToken ( self . id ) ) ;
250
269
}
251
270
}
252
271
@@ -340,21 +359,22 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> {
340
359
let _p = profile:: start ( "executing the job graph" ) ;
341
360
self . queue . queue_finished ( ) ;
342
361
343
- let ( tx, rx) = unbounded ( ) ;
344
362
let progress = Progress :: with_style ( "Building" , ProgressStyle :: Ratio , cx. bcx . config ) ;
345
363
let state = DrainState {
346
364
total_units : self . queue . len ( ) ,
347
365
queue : self . queue ,
348
- tx,
349
- rx,
366
+ // 100 here is somewhat arbitrary. It is a few screenfulls of
367
+ // output, and hopefully at most a few megabytes of memory for
368
+ // typical messages. If you change this, please update the test
369
+ // caching_large_output, too.
370
+ messages : Arc :: new ( Queue :: new ( 100 ) ) ,
350
371
active : HashMap :: new ( ) ,
351
372
compiled : HashSet :: new ( ) ,
352
373
documented : HashSet :: new ( ) ,
353
374
counts : self . counts ,
354
375
progress,
355
376
next_id : 0 ,
356
377
timings : self . timings ,
357
-
358
378
tokens : Vec :: new ( ) ,
359
379
rustc_tokens : HashMap :: new ( ) ,
360
380
to_send_clients : BTreeMap :: new ( ) ,
@@ -364,25 +384,28 @@ impl<'a, 'cfg> JobQueue<'a, 'cfg> {
364
384
} ;
365
385
366
386
// Create a helper thread for acquiring jobserver tokens
367
- let tx = state. tx . clone ( ) ;
387
+ let messages = state. messages . clone ( ) ;
368
388
let helper = cx
369
389
. jobserver
370
390
. clone ( )
371
391
. into_helper_thread ( move |token| {
372
- drop ( tx . send ( Message :: Token ( token) ) ) ;
392
+ drop ( messages . push ( Message :: Token ( token) ) ) ;
373
393
} )
374
394
. chain_err ( || "failed to create helper thread for jobserver management" ) ?;
375
395
376
396
// Create a helper thread to manage the diagnostics for rustfix if
377
397
// necessary.
378
- let tx = state. tx . clone ( ) ;
398
+ let messages = state. messages . clone ( ) ;
399
+ // It is important that this uses `push` instead of `push_bounded` for
400
+ // now. If someone wants to fix this to be bounded, the `drop`
401
+ // implementation needs to be changed to avoid possible deadlocks.
379
402
let _diagnostic_server = cx
380
403
. bcx
381
404
. build_config
382
405
. rustfix_diagnostic_server
383
406
. borrow_mut ( )
384
407
. take ( )
385
- . map ( move |srv| srv. start ( move |msg| drop ( tx . send ( Message :: FixDiagnostic ( msg) ) ) ) ) ;
408
+ . map ( move |srv| srv. start ( move |msg| drop ( messages . push ( Message :: FixDiagnostic ( msg) ) ) ) ) ;
386
409
387
410
crossbeam_utils:: thread:: scope ( move |scope| state. drain_the_queue ( cx, plan, scope, & helper) )
388
411
. expect ( "child threads shouldn't panic" )
@@ -584,7 +607,7 @@ impl<'a, 'cfg> DrainState<'a, 'cfg> {
584
607
// to run above to calculate CPU usage over time. To do this we
585
608
// listen for a message with a timeout, and on timeout we run the
586
609
// previous parts of the loop again.
587
- let events: Vec < _ > = self . rx . try_iter ( ) . collect ( ) ;
610
+ let mut events = self . messages . try_pop_all ( ) ;
588
611
info ! (
589
612
"tokens in use: {}, rustc_tokens: {:?}, waiting_rustcs: {:?} (events this tick: {})" ,
590
613
self . tokens. len( ) ,
@@ -602,14 +625,16 @@ impl<'a, 'cfg> DrainState<'a, 'cfg> {
602
625
loop {
603
626
self . tick_progress ( ) ;
604
627
self . tokens . truncate ( self . active . len ( ) - 1 ) ;
605
- match self . rx . recv_timeout ( Duration :: from_millis ( 500 ) ) {
606
- Ok ( message) => break vec ! [ message] ,
607
- Err ( _) => continue ,
628
+ match self . messages . pop ( Duration :: from_millis ( 500 ) ) {
629
+ Some ( message) => {
630
+ events. push ( message) ;
631
+ break ;
632
+ }
633
+ None => continue ,
608
634
}
609
635
}
610
- } else {
611
- events
612
636
}
637
+ return events;
613
638
}
614
639
615
640
fn drain_the_queue (
@@ -756,7 +781,7 @@ impl<'a, 'cfg> DrainState<'a, 'cfg> {
756
781
assert ! ( self . active. insert( id, * unit) . is_none( ) ) ;
757
782
* self . counts . get_mut ( & unit. pkg . package_id ( ) ) . unwrap ( ) -= 1 ;
758
783
759
- let my_tx = self . tx . clone ( ) ;
784
+ let messages = self . messages . clone ( ) ;
760
785
let fresh = job. freshness ( ) ;
761
786
let rmeta_required = cx. rmeta_required ( unit) ;
762
787
@@ -768,13 +793,13 @@ impl<'a, 'cfg> DrainState<'a, 'cfg> {
768
793
let doit = move || {
769
794
let state = JobState {
770
795
id,
771
- tx : my_tx . clone ( ) ,
796
+ messages : messages . clone ( ) ,
772
797
rmeta_required : Cell :: new ( rmeta_required) ,
773
798
_marker : marker:: PhantomData ,
774
799
} ;
775
800
776
801
let mut sender = FinishOnDrop {
777
- tx : & my_tx ,
802
+ messages : & messages ,
778
803
id,
779
804
result : Err ( format_err ! ( "worker panicked" ) ) ,
780
805
} ;
@@ -793,39 +818,33 @@ impl<'a, 'cfg> DrainState<'a, 'cfg> {
793
818
// we need to make sure that the metadata is flagged as produced so
794
819
// send a synthetic message here.
795
820
if state. rmeta_required . get ( ) && sender. result . is_ok ( ) {
796
- my_tx
797
- . send ( Message :: Finish ( id, Artifact :: Metadata , Ok ( ( ) ) ) )
798
- . unwrap ( ) ;
821
+ messages. push ( Message :: Finish ( id, Artifact :: Metadata , Ok ( ( ) ) ) ) ;
799
822
}
800
823
801
824
// Use a helper struct with a `Drop` implementation to guarantee
802
825
// that a `Finish` message is sent even if our job panics. We
803
826
// shouldn't panic unless there's a bug in Cargo, so we just need
804
827
// to make sure nothing hangs by accident.
805
828
struct FinishOnDrop < ' a > {
806
- tx : & ' a Sender < Message > ,
829
+ messages : & ' a Queue < Message > ,
807
830
id : JobId ,
808
831
result : CargoResult < ( ) > ,
809
832
}
810
833
811
834
impl Drop for FinishOnDrop < ' _ > {
812
835
fn drop ( & mut self ) {
813
836
let msg = mem:: replace ( & mut self . result , Ok ( ( ) ) ) ;
814
- drop ( self . tx . send ( Message :: Finish ( self . id , Artifact :: All , msg) ) ) ;
837
+ self . messages
838
+ . push ( Message :: Finish ( self . id , Artifact :: All , msg) ) ;
815
839
}
816
840
}
817
841
} ;
818
842
819
843
match fresh {
820
- Freshness :: Fresh => {
821
- self . timings . add_fresh ( ) ;
822
- doit ( ) ;
823
- }
824
- Freshness :: Dirty => {
825
- self . timings . add_dirty ( ) ;
826
- scope. spawn ( move |_| doit ( ) ) ;
827
- }
844
+ Freshness :: Fresh => self . timings . add_fresh ( ) ,
845
+ Freshness :: Dirty => self . timings . add_dirty ( ) ,
828
846
}
847
+ scope. spawn ( move |_| doit ( ) ) ;
829
848
830
849
Ok ( ( ) )
831
850
}
0 commit comments