@@ -275,27 +275,110 @@ void J9::RecognizedCallTransformer::process_java_lang_StringUTF16_toBytes(TR::Tr
275
275
{
276
276
TR_J9VMBase* fej9 = static_cast <TR_J9VMBase*>(comp ()->fe ());
277
277
278
+ // Place arguments to StringUTF16.toBytes in temporaries
279
+ // to allow for control flow
280
+ TransformUtil::createTempsForCall (this , treetop);
281
+
278
282
TR::Node* valueNode = node->getChild (0 );
279
283
TR::Node* offNode = node->getChild (1 );
280
284
TR::Node* lenNode = node->getChild (2 );
281
285
282
- anchorAllChildren (node, treetop);
283
- prepareToReplaceNode (node);
286
+ TR::CFG *cfg = comp ()->getFlowGraph ();
284
287
285
- int32_t byteArrayType = fej9->getNewArrayTypeFromClass (fej9->getByteArrayClass ());
288
+ // The implementation of java.lang.StringUTF16.toBytes(char[],int,int) will
289
+ // throw a NegativeArraySizeException or OutOfMemoryError if the specified
290
+ // length is outside the range [0,0x3fffffff]. In order to avoid deciding
291
+ // which to throw in the IL, fall back to the out-of-line call if the length
292
+ // is negative or too great. Otherwise, create the byte array and copy the
293
+ // input char array to it with java.lang.String.decompressedArrayCopy
294
+ //
295
+ // Before:
296
+ //
297
+ // +----------------------------------------+
298
+ // | treetop |
299
+ // | acall java/lang/StringUTF16.toBytes |
300
+ // | aload charArr |
301
+ // | iload off |
302
+ // | iload len |
303
+ // +----------------------------------------+
304
+ //
305
+ // After:
306
+ //
307
+ // ifCmpblock
308
+ // +----------------------------------------+
309
+ // | astore charArrTemp |
310
+ // | aload charArr |
311
+ // | istore offTemp |
312
+ // | iload off |
313
+ // | istore lenTemp |
314
+ // | iload len |
315
+ // | ifiucmpgt --> fallbackPathBlock -----------------+
316
+ // | iload lenTemp | |
317
+ // | iconst 0x3fffffff | |
318
+ // +--------------------+-------------------+ |
319
+ // | |
320
+ // fallThroughPathBlock V |
321
+ // +----------------------------------------+ |
322
+ // | astore result | |
323
+ // | newarray jitNewArray | |
324
+ // | ishl | |
325
+ // | iload lenTemp | |
326
+ // | icosnt 1 | |
327
+ // | iconst 8 ; array type is byte | |
328
+ // | treetop | |
329
+ // | call java/lang/String.decompressedArrayCopy |
330
+ // | aload charArrTemp | |
331
+ // | iload offTemp | |
332
+ // | ==>newarray | |
333
+ // | iconst 0 | |
334
+ // | iload lenTemp | |
335
+ // | goto joinBlock ----------------------------+ |
336
+ // +----------------------------------------+ | |
337
+ // | |
338
+ // +------------------------------+
339
+ // | |
340
+ // fallbackPathBlock V (freq 0) (cold) |
341
+ // +----------------------------------------+ |
342
+ // | astore result | |
343
+ // | acall java/lang/StringUTF16.toBytes | |
344
+ // | aload charArrTemp | |
345
+ // | iload offTemp | |
346
+ // | iload lenTemp | |
347
+ // +--------------------+-------------------+ |
348
+ // | |
349
+ // +-------------------------+
350
+ // |
351
+ // joinBlock V
352
+ // +----------------------------------------+
353
+ // | treetop |
354
+ // | aload result ; Replaces acall StringUTF16.toBytes
355
+ // +----------------------------------------+
356
+ //
357
+ TR::Node *upperBoundConstNode = TR::Node::iconst (node, TR::getMaxSigned<TR::Int32>() >> 1 );
358
+ TR::Node *ifCmpNode = TR::Node::createif (TR::ifiucmpgt, lenNode, upperBoundConstNode);
359
+ TR::TreeTop *ifCmpTreeTop = TR::TreeTop::create (comp (), treetop->getPrevTreeTop (), ifCmpNode);
286
360
287
- TR::Node::recreateWithoutProperties (node, TR::newarray, 2 ,
288
- TR::Node::create (TR::ishl, 2 ,
289
- lenNode,
290
- TR::Node::iconst (1 )),
291
- TR::Node::iconst (byteArrayType),
361
+ // Create temporary variable that will be used to hold result
362
+ TR::DataType resultDataType = node->getDataType ();
363
+ TR::SymbolReference *resultSymRef = comp ()->getSymRefTab ()->createTemporary (comp ()->getMethodSymbol (), resultDataType);
292
364
293
- getSymRefTab ()->findOrCreateNewArraySymbolRef (node->getSymbolReference ()->getOwningMethodSymbol (comp ())));
365
+ // Create result byte array and copy input char array to it with String.decompressedArrayCopy
366
+ int32_t byteArrayType = fej9->getNewArrayTypeFromClass (fej9->getByteArrayClass ());
294
367
295
- TR::Node* newByteArrayNode = node;
368
+ TR::Node *newByteArrayNode = TR::Node::createWithSymRef (TR::newarray, 2 , 2 ,
369
+ TR::Node::create (TR::ishl, 2 , lenNode,
370
+ TR::Node::iconst (1 )),
371
+ TR::Node::iconst (byteArrayType),
372
+ getSymRefTab ()->findOrCreateNewArraySymbolRef (
373
+ node->getSymbolReference ()->getOwningMethodSymbol (comp ())));
374
+
375
+ newByteArrayNode->copyByteCodeInfo (node);
296
376
newByteArrayNode->setCanSkipZeroInitialization (true );
297
377
newByteArrayNode->setIsNonNull (true );
298
378
379
+ TR::Node *newByteArrayStoreNode = TR::Node::createStore (node, resultSymRef, newByteArrayNode);
380
+ TR::TreeTop *newByteArraryTreeTop = TR::TreeTop::create (comp (), ifCmpTreeTop, newByteArrayStoreNode);
381
+
299
382
TR::Node* newCallNode = TR::Node::createWithSymRef (node, TR::call, 5 ,
300
383
getSymRefTab ()->methodSymRefFromName (comp ()->getMethodSymbol (), " java/lang/String" , " decompressedArrayCopy" , " ([CI[BII)V" , TR::MethodSymbol::Static));
301
384
newCallNode->setAndIncChild (0 , valueNode);
@@ -304,14 +387,58 @@ void J9::RecognizedCallTransformer::process_java_lang_StringUTF16_toBytes(TR::Tr
304
387
newCallNode->setAndIncChild (3 , TR::Node::iconst (0 ));
305
388
newCallNode->setAndIncChild (4 , lenNode);
306
389
307
- TR::TreeTop* newTT = treetop->insertAfter (TR::TreeTop::create (comp (), TR::Node::create (node, TR::treetop, 1 , newCallNode)));
390
+ TR::TreeTop* lastFallThroughTreeTop = TR::TreeTop::create (comp (), newByteArraryTreeTop,
391
+ TR::Node::create (node, TR::treetop, 1 , newCallNode));
392
+
308
393
// Insert the allocationFence after the arraycopy because the array can be allocated from the non-zeroed TLH
309
394
// and therefore we need to make sure no other thread sees stale memory from the array element section.
310
395
if (cg ()->getEnforceStoreOrder ())
311
396
{
312
397
TR::Node *allocationFence = TR::Node::createAllocationFence (newByteArrayNode, newByteArrayNode);
313
- newTT-> insertAfter ( TR::TreeTop::create (comp (), allocationFence) );
398
+ lastFallThroughTreeTop = TR::TreeTop::create (comp (), lastFallThroughTreeTop, allocationFence);
314
399
}
400
+
401
+ // Copy the original call tree for the fallback path, and store the
402
+ // result into the temporary that was created.
403
+ TR::Node *fallbackCallNode = node->duplicateTree ();
404
+ TR::Node *fallbackStoreNode = TR::Node::createStore (node, resultSymRef, fallbackCallNode);
405
+ TR::TreeTop *fallbackTreeTop = TR::TreeTop::create (comp (), lastFallThroughTreeTop, fallbackStoreNode);
406
+
407
+ // Replace original call node with the load of the temporary
408
+ // variable that is stored on both sides of the if branch.
409
+ prepareToReplaceNode (node);
410
+ TR::Node::recreate (node, comp ()->il .opCodeForDirectLoad (resultDataType));
411
+ node->setSymbolReference (resultSymRef);
412
+
413
+ // Split the current block right after the ifuicmpgt
414
+ TR::Block *ifCmpBlock = ifCmpTreeTop->getEnclosingBlock ();
415
+
416
+ // Then split the inline version of the code into its own block
417
+ TR::Block *fallThroughPathBlock = ifCmpBlock->split (newByteArraryTreeTop, cfg, true /* fixUpCommoning */ , true /* copyExceptionSuccessors */ );
418
+
419
+ // Then split the fallback, out-of-line call into its own block
420
+ TR::Block *fallbackPathBlock = fallThroughPathBlock->split (fallbackTreeTop, cfg, true /* fixUpCommoning */ , true /* copyExceptionSuccessors */ );
421
+
422
+ // Then split again at the original call TreeTop to create the tail block
423
+ TR::Block *tailBlock = fallbackPathBlock->split (treetop, cfg, true /* fixUpCommoning */ , true /* copyExceptionSuccessors */ );
424
+
425
+ // Now create a node to go to the merge (i.e. tail) block.
426
+ TR::Node *gotoNode = TR::Node::create (node, TR::Goto);
427
+ TR::TreeTop *gotoTree = TR::TreeTop::create (comp (), gotoNode, NULL , NULL );
428
+ gotoNode->setBranchDestination (tailBlock->getEntry ());
429
+ fallThroughPathBlock->getExit ()->insertBefore (gotoTree);
430
+
431
+ // Now we have fall-through block, fallback block and tail/merge block.
432
+ // Set the ifuicmp's destination to the fallback block and update the CFG as well.
433
+ ifCmpNode->setBranchDestination (fallbackPathBlock->getEntry ());
434
+ cfg->addEdge (ifCmpBlock, fallbackPathBlock);
435
+ cfg->addEdge (fallThroughPathBlock, tailBlock);
436
+ cfg->removeEdge (fallThroughPathBlock, fallbackPathBlock);
437
+
438
+ // The original call to StringUTF16.toBytes will only be used
439
+ // if an exception needs to be thrown. Mark it as cold.
440
+ fallbackPathBlock->setFrequency (UNKNOWN_COLD_BLOCK_COUNT);
441
+ fallbackPathBlock->setIsCold ();
315
442
}
316
443
317
444
void J9::RecognizedCallTransformer::process_jdk_internal_util_ArraysSupport_vectorizedMismatch (TR::TreeTop* treetop, TR::Node* node)
0 commit comments