@@ -109,9 +109,11 @@ class AllreduceBase : public IEngine {
109
109
if (world_size == 1 || world_size == -1 ) {
110
110
return ;
111
111
}
112
- utils::Assert (TryAllgatherRing (sendrecvbuf_, total_size, slice_begin,
113
- slice_end, size_prev_slice) == kSuccess ,
114
- " AllgatherRing failed" );
112
+ auto ret = TryAllgatherRing (sendrecvbuf_, total_size, slice_begin,
113
+ slice_end, size_prev_slice);
114
+ if (ret != kSuccess ) {
115
+ utils::Error (" AllgatherRing failed: %d\n " , ret.line );
116
+ }
115
117
}
116
118
/* !
117
119
* \brief perform in-place allreduce, on sendrecvbuf
@@ -135,9 +137,10 @@ class AllreduceBase : public IEngine {
135
137
const char *_caller = _CALLER) override {
136
138
if (prepare_fun != nullptr ) prepare_fun (prepare_arg);
137
139
if (world_size == 1 || world_size == -1 ) return ;
138
- utils::Assert (TryAllreduce (sendrecvbuf_, type_nbytes, count, reducer) ==
139
- kSuccess ,
140
- " Allreduce failed" );
140
+ auto ret = TryAllreduce (sendrecvbuf_, type_nbytes, count, reducer);
141
+ if (ret != kSuccess ) {
142
+ utils::Error (" Allreduce failed: %d\n " , ret.line );
143
+ }
141
144
}
142
145
/* !
143
146
* \brief broadcast data from root to all nodes
@@ -152,8 +155,10 @@ class AllreduceBase : public IEngine {
152
155
const char *_file = _FILE, const int _line = _LINE,
153
156
const char *_caller = _CALLER) override {
154
157
if (world_size == 1 || world_size == -1 ) return ;
155
- utils::Assert (TryBroadcast (sendrecvbuf_, total_size, root) == kSuccess ,
156
- " Broadcast failed" );
158
+ auto ret = TryBroadcast (sendrecvbuf_, total_size, root);
159
+ if (ret != kSuccess ) {
160
+ utils::Error (" Broadcast failed: %d\n " , ret.line );
161
+ }
157
162
}
158
163
/* !
159
164
* \brief load latest check point
@@ -272,9 +277,11 @@ class AllreduceBase : public IEngine {
272
277
struct ReturnType {
273
278
/* ! \brief internal return type */
274
279
ReturnTypeEnum value;
280
+ int32_t line { -1 };
275
281
// constructor
276
- ReturnType () = default ;
277
- ReturnType (ReturnTypeEnum value) : value(value) {} // NOLINT(*)
282
+ explicit ReturnType (int l = __builtin_LINE()) : line{l} {}
283
+ ReturnType (ReturnTypeEnum value, int32_t l = __builtin_LINE()) : value(value), line{l} {} // NOLINT(*)
284
+
278
285
inline bool operator ==(const ReturnTypeEnum &v) const {
279
286
return value == v;
280
287
}
@@ -518,7 +525,8 @@ class AllreduceBase : public IEngine {
518
525
* \param err the error type
519
526
*/
520
527
inline ReturnType ReportError (LinkRecord *link, ReturnType err) {
521
- err_link = link; return err;
528
+ err_link = link;
529
+ return err;
522
530
}
523
531
// ---- data structure related to model ----
524
532
// call sequence counter, records how many calls we made so far
0 commit comments