Skip to content

Commit ecf7d9a

Browse files
committed
Optimize the position placement.
Upon detecting a datatype loop representation skip the entire loop according the the remaining space. Signed-off-by: George Bosilca <[email protected]>
1 parent d75fb12 commit ecf7d9a

File tree

1 file changed

+23
-44
lines changed

1 file changed

+23
-44
lines changed

opal/datatype/opal_datatype_position.c

Lines changed: 23 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
44
* University Research and Technology
55
* Corporation. All rights reserved.
6-
* Copyright (c) 2004-2014 The University of Tennessee and The University
6+
* Copyright (c) 2004-2019 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
99
* Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
@@ -123,11 +123,18 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
123123
do_now = cando_count / _elem->blocklen;
124124
if( 0 != do_now ) {
125125
do_now_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
126+
#if OPAL_ENABLE_DEBUG
126127
for(size_t _i = 0; _i < do_now; _i++ ) {
127128
position_single_block( CONVERTOR, &_memory, _elem->extent,
128129
SPACE, do_now_bytes, COUNT, _elem->blocklen );
129130
cando_count -= _elem->blocklen;
130131
}
132+
#else
133+
_memory += do_now * _elem->extent;
134+
*SPACE -= do_now * do_now_bytes;
135+
*COUNT -= do_now * _elem->blocklen;
136+
cando_count -= do_now * _elem->blocklen;
137+
#endif /* OPAL_ENABLE_DEBUG */
131138
}
132139

133140
/**
@@ -144,48 +151,16 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
144151
*(POINTER) = _memory - _elem->disp;
145152
}
146153

147-
/**
148-
* Advance the current position in the convertor based using the
149-
* current contiguous loop and a left-over counter. Update the head
150-
* pointer and the leftover byte space.
151-
*/
152-
static inline void
153-
position_contiguous_loop( opal_convertor_t* CONVERTOR,
154-
dt_elem_desc_t* ELEM,
155-
size_t* COUNT,
156-
unsigned char** POINTER,
157-
size_t* SPACE )
158-
{
159-
ddt_loop_desc_t *_loop = (ddt_loop_desc_t*)(ELEM);
160-
ddt_endloop_desc_t* _end_loop = (ddt_endloop_desc_t*)((ELEM) + (ELEM)->loop.items);
161-
size_t _copy_loops = *(COUNT);
162-
163-
if( (_copy_loops * _end_loop->size) > *(SPACE) )
164-
_copy_loops = *(SPACE) / _end_loop->size;
165-
OPAL_DATATYPE_SAFEGUARD_POINTER( *(POINTER) + _end_loop->first_elem_disp,
166-
(_copy_loops - 1) * _loop->extent + _end_loop->size,
167-
(CONVERTOR)->pBaseBuf, (CONVERTOR)->pDesc, (CONVERTOR)->count );
168-
*(POINTER) += _copy_loops * _loop->extent;
169-
*(SPACE) -= _copy_loops * _end_loop->size;
170-
*(COUNT) -= _copy_loops;
171-
}
172-
173-
#define POSITION_PREDEFINED_DATATYPE( CONVERTOR, ELEM, COUNT, POSITION, SPACE ) \
174-
position_predefined_data( (CONVERTOR), (ELEM), &(COUNT), &(POSITION), &(SPACE) )
175-
176-
#define POSITION_CONTIGUOUS_LOOP( CONVERTOR, ELEM, COUNT, POSITION, SPACE ) \
177-
position_contiguous_loop( (CONVERTOR), (ELEM), &(COUNT), &(POSITION), &(SPACE) )
178-
179154
int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
180155
size_t* position )
181156
{
182157
dt_stack_t* pStack; /* pointer to the position on the stack */
183158
uint32_t pos_desc; /* actual position in the description of the derived datatype */
184159
size_t count_desc; /* the number of items already done in the actual pos_desc */
160+
size_t iov_len_local;
185161
dt_elem_desc_t* description = pConvertor->use_desc->desc;
186162
dt_elem_desc_t* pElem; /* current position */
187163
unsigned char *base_pointer = pConvertor->pBaseBuf;
188-
size_t iov_len_local;
189164
ptrdiff_t extent = pConvertor->pDesc->ub - pConvertor->pDesc->lb;
190165

191166
DUMP( "opal_convertor_generic_simple_position( %p, &%ld )\n", (void*)pConvertor, (long)*position );
@@ -236,21 +211,19 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
236211
assert(pConvertor->partial_length < element_length);
237212
return 0;
238213
}
239-
pConvertor->partial_length = (pConvertor->partial_length + missing_length) % element_length;
240-
assert(pConvertor->partial_length == 0);
214+
pConvertor->partial_length = 0;
241215
pConvertor->bConverted += missing_length;
242216
iov_len_local -= missing_length;
243217
count_desc--;
244218
}
245219
while( 1 ) {
246-
if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the current loop */
220+
if( OPAL_DATATYPE_END_LOOP == pElem->elem.common.type ) { /* end of the the entire datatype */
247221
DO_DEBUG( opal_output( 0, "position end_loop count %" PRIsize_t " stack_pos %d pos_desc %d disp %lx space %lu\n",
248222
pStack->count, pConvertor->stack_pos, pos_desc,
249223
pStack->disp, (unsigned long)iov_len_local ); );
250224
if( --(pStack->count) == 0 ) { /* end of loop */
251225
if( pConvertor->stack_pos == 0 ) {
252226
pConvertor->flags |= CONVERTOR_COMPLETED;
253-
pConvertor->partial_length = 0;
254227
goto complete_loop; /* completed */
255228
}
256229
pConvertor->stack_pos--;
@@ -259,11 +232,13 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
259232
} else {
260233
if( pStack->index == -1 ) {
261234
pStack->disp += extent;
235+
pos_desc = 0; /* back to the first element */
262236
} else {
263237
assert( OPAL_DATATYPE_LOOP == description[pStack->index].loop.common.type );
264238
pStack->disp += description[pStack->index].loop.extent;
239+
pos_desc = pStack->index; /* go back to the loop start itself to give a chance
240+
* to move forward by entire loops */
265241
}
266-
pos_desc = pStack->index + 1;
267242
}
268243
base_pointer = pConvertor->pBaseBuf + pStack->disp;
269244
UPDATE_INTERNAL_COUNTERS( description, pos_desc, pElem, count_desc );
@@ -273,9 +248,14 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
273248
}
274249
if( OPAL_DATATYPE_LOOP == pElem->elem.common.type ) {
275250
ptrdiff_t local_disp = (ptrdiff_t)base_pointer;
276-
if( pElem->loop.common.flags & OPAL_DATATYPE_FLAG_CONTIGUOUS ) {
277-
POSITION_CONTIGUOUS_LOOP( pConvertor, pElem, count_desc,
278-
base_pointer, iov_len_local );
251+
ddt_endloop_desc_t* end_loop = (ddt_endloop_desc_t*)(pElem + pElem->loop.items);
252+
size_t full_loops = iov_len_local / end_loop->size;
253+
full_loops = count_desc <= full_loops ? count_desc : full_loops;
254+
if( full_loops ) {
255+
base_pointer += full_loops * pElem->loop.extent;
256+
iov_len_local -= full_loops * end_loop->size;
257+
count_desc -= full_loops;
258+
279259
if( 0 == count_desc ) { /* completed */
280260
pos_desc += pElem->loop.items + 1;
281261
goto update_loop_description;
@@ -297,8 +277,7 @@ int opal_convertor_generic_simple_position( opal_convertor_t* pConvertor,
297277
}
298278
while( pElem->elem.common.flags & OPAL_DATATYPE_FLAG_DATA ) {
299279
/* now here we have a basic datatype */
300-
POSITION_PREDEFINED_DATATYPE( pConvertor, pElem, count_desc,
301-
base_pointer, iov_len_local );
280+
position_predefined_data( pConvertor, pElem, &count_desc, &base_pointer, &iov_len_local );
302281
if( 0 != count_desc ) { /* completed */
303282
pConvertor->partial_length = iov_len_local;
304283
goto complete_loop;

0 commit comments

Comments
 (0)