3
3
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
4
4
* University Research and Technology
5
5
* Corporation. All rights reserved.
6
- * Copyright (c) 2004-2014 The University of Tennessee and The University
6
+ * Copyright (c) 2004-2019 The University of Tennessee and The University
7
7
* of Tennessee Research Foundation. All rights
8
8
* reserved.
9
9
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@@ -123,11 +123,18 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
123
123
do_now = cando_count / _elem -> blocklen ;
124
124
if ( 0 != do_now ) {
125
125
do_now_bytes = _elem -> blocklen * opal_datatype_basicDatatypes [_elem -> common .type ]-> size ;
126
+ #if OPAL_ENABLE_DEBUG
126
127
for (size_t _i = 0 ; _i < do_now ; _i ++ ) {
127
128
position_single_block ( CONVERTOR , & _memory , _elem -> extent ,
128
129
SPACE , do_now_bytes , COUNT , _elem -> blocklen );
129
130
cando_count -= _elem -> blocklen ;
130
131
}
132
+ #else
133
+ _memory += do_now * _elem -> extent ;
134
+ * SPACE -= do_now * do_now_bytes ;
135
+ * COUNT -= do_now * _elem -> blocklen ;
136
+ cando_count -= do_now * _elem -> blocklen ;
137
+ #endif /* OPAL_ENABLE_DEBUG */
131
138
}
132
139
133
140
/**
@@ -144,48 +151,16 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
144
151
* (POINTER ) = _memory - _elem -> disp ;
145
152
}
146
153
147
- /**
148
- * Advance the current position in the convertor based using the
149
- * current contiguous loop and a left-over counter. Update the head
150
- * pointer and the leftover byte space.
151
- */
152
- static inline void
153
- position_contiguous_loop ( opal_convertor_t * CONVERTOR ,
154
- dt_elem_desc_t * ELEM ,
155
- size_t * COUNT ,
156
- unsigned char * * POINTER ,
157
- size_t * SPACE )
158
- {
159
- ddt_loop_desc_t * _loop = (ddt_loop_desc_t * )(ELEM );
160
- ddt_endloop_desc_t * _end_loop = (ddt_endloop_desc_t * )((ELEM ) + (ELEM )-> loop .items );
161
- size_t _copy_loops = * (COUNT );
162
-
163
- if ( (_copy_loops * _end_loop -> size ) > * (SPACE ) )
164
- _copy_loops = * (SPACE ) / _end_loop -> size ;
165
- OPAL_DATATYPE_SAFEGUARD_POINTER ( * (POINTER ) + _end_loop -> first_elem_disp ,
166
- (_copy_loops - 1 ) * _loop -> extent + _end_loop -> size ,
167
- (CONVERTOR )-> pBaseBuf , (CONVERTOR )-> pDesc , (CONVERTOR )-> count );
168
- * (POINTER ) += _copy_loops * _loop -> extent ;
169
- * (SPACE ) -= _copy_loops * _end_loop -> size ;
170
- * (COUNT ) -= _copy_loops ;
171
- }
172
-
173
- #define POSITION_PREDEFINED_DATATYPE ( CONVERTOR , ELEM , COUNT , POSITION , SPACE ) \
174
- position_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(POSITION), &(SPACE) )
175
-
176
- #define POSITION_CONTIGUOUS_LOOP ( CONVERTOR , ELEM , COUNT , POSITION , SPACE ) \
177
- position_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(POSITION), &(SPACE) )
178
-
179
154
int opal_convertor_generic_simple_position ( opal_convertor_t * pConvertor ,
180
155
size_t * position )
181
156
{
182
157
dt_stack_t * pStack ; /* pointer to the position on the stack */
183
158
uint32_t pos_desc ; /* actual position in the description of the derived datatype */
184
159
size_t count_desc ; /* the number of items already done in the actual pos_desc */
160
+ size_t iov_len_local ;
185
161
dt_elem_desc_t * description = pConvertor -> use_desc -> desc ;
186
162
dt_elem_desc_t * pElem ; /* current position */
187
163
unsigned char * base_pointer = pConvertor -> pBaseBuf ;
188
- size_t iov_len_local ;
189
164
ptrdiff_t extent = pConvertor -> pDesc -> ub - pConvertor -> pDesc -> lb ;
190
165
191
166
DUMP ( "opal_convertor_generic_simple_position( %p, &%ld )\n" , (void * )pConvertor , (long )* position );
@@ -236,21 +211,19 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
236
211
assert (pConvertor -> partial_length < element_length );
237
212
return 0 ;
238
213
}
239
- pConvertor -> partial_length = (pConvertor -> partial_length + missing_length ) % element_length ;
240
- assert (pConvertor -> partial_length == 0 );
214
+ pConvertor -> partial_length = 0 ;
241
215
pConvertor -> bConverted += missing_length ;
242
216
iov_len_local -= missing_length ;
243
217
count_desc -- ;
244
218
}
245
219
while ( 1 ) {
246
- if ( OPAL_DATATYPE_END_LOOP == pElem -> elem .common .type ) { /* end of the current loop */
220
+ if ( OPAL_DATATYPE_END_LOOP == pElem -> elem .common .type ) { /* end of the the entire datatype */
247
221
DO_DEBUG ( opal_output ( 0 , "position end_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %lx space %lu\n" ,
248
222
pStack -> count , pConvertor -> stack_pos , pos_desc ,
249
223
pStack -> disp , (unsigned long )iov_len_local ); );
250
224
if ( -- (pStack -> count ) == 0 ) { /* end of loop */
251
225
if ( pConvertor -> stack_pos == 0 ) {
252
226
pConvertor -> flags |= CONVERTOR_COMPLETED ;
253
- pConvertor -> partial_length = 0 ;
254
227
goto complete_loop ; /* completed */
255
228
}
256
229
pConvertor -> stack_pos -- ;
@@ -259,11 +232,13 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
259
232
} else {
260
233
if ( pStack -> index == -1 ) {
261
234
pStack -> disp += extent ;
235
+ pos_desc = 0 ; /* back to the first element */
262
236
} else {
263
237
assert ( OPAL_DATATYPE_LOOP == description [pStack -> index ].loop .common .type );
264
238
pStack -> disp += description [pStack -> index ].loop .extent ;
239
+ pos_desc = pStack -> index ; /* go back to the loop start itself to give a chance
240
+ * to move forward by entire loops */
265
241
}
266
- pos_desc = pStack -> index + 1 ;
267
242
}
268
243
base_pointer = pConvertor -> pBaseBuf + pStack -> disp ;
269
244
UPDATE_INTERNAL_COUNTERS ( description , pos_desc , pElem , count_desc );
@@ -273,9 +248,14 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
273
248
}
274
249
if ( OPAL_DATATYPE_LOOP == pElem -> elem .common .type ) {
275
250
ptrdiff_t local_disp = (ptrdiff_t )base_pointer ;
276
- if ( pElem -> loop .common .flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
277
- POSITION_CONTIGUOUS_LOOP ( pConvertor , pElem , count_desc ,
278
- base_pointer , iov_len_local );
251
+ ddt_endloop_desc_t * end_loop = (ddt_endloop_desc_t * )(pElem + pElem -> loop .items );
252
+ size_t full_loops = iov_len_local / end_loop -> size ;
253
+ full_loops = count_desc <= full_loops ? count_desc : full_loops ;
254
+ if ( full_loops ) {
255
+ base_pointer += full_loops * pElem -> loop .extent ;
256
+ iov_len_local -= full_loops * end_loop -> size ;
257
+ count_desc -= full_loops ;
258
+
279
259
if ( 0 == count_desc ) { /* completed */
280
260
pos_desc += pElem -> loop .items + 1 ;
281
261
goto update_loop_description ;
@@ -297,8 +277,7 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
297
277
}
298
278
while ( pElem -> elem .common .flags & OPAL_DATATYPE_FLAG_DATA ) {
299
279
/* now here we have a basic datatype */
300
- POSITION_PREDEFINED_DATATYPE ( pConvertor , pElem , count_desc ,
301
- base_pointer , iov_len_local );
280
+ position_predefined_data ( pConvertor , pElem , & count_desc , & base_pointer , & iov_len_local );
302
281
if ( 0 != count_desc ) { /* completed */
303
282
pConvertor -> partial_length = iov_len_local ;
304
283
goto complete_loop ;
0 commit comments