Skip to content

Commit 0d9d9b5

Browse files
committed
AArch64: Inline StringUTF16.compress([CI[BII)I
This commit recognizes StringUTF16.compress([CI[BII)I, and generates inlined code for AArch64.
1 parent 7f7577a commit 0d9d9b5

File tree

6 files changed

+173
-0
lines changed

6 files changed

+173
-0
lines changed

runtime/compiler/aarch64/codegen/J9CodeGenerator.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@ J9::ARM64::CodeGenerator::initialize()
9999
{
100100
cg->setSupportsInlineStringLatin1Inflate();
101101
}
102+
static bool disableInlineCompressCharArray = feGetEnv("TR_disableInlineCompressCharArray") != NULL;
103+
if ((!TR::Compiler->om.canGenerateArraylets()) && (!disableInlineCompressCharArray) && !TR::Compiler->om.isOffHeapAllocationEnabled())
104+
{
105+
cg->setSupportsInlineCompressCharArray();
106+
}
102107
if (comp->fej9()->hasFixedFrameC_CallingConvention())
103108
cg->setHasFixedFrameC_CallingConvention();
104109
}

runtime/compiler/aarch64/codegen/J9TreeEvaluator.cpp

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6756,6 +6756,151 @@ static TR::Register *inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerator
67566756
return NULL;
67576757
}
67586758

6759+
static TR::Register *inlineStringUTF16compressCharArray(TR::Node *node, TR::CodeGenerator *cg)
6760+
{
6761+
TR_ASSERT_FATAL(!TR::Compiler->om.canGenerateArraylets(), "StringUTF16.compress intrinsic is not supported with arraylets");
6762+
TR_ASSERT_FATAL_WITH_NODE(node, node->getNumChildren() == 5, "Wrong number of children in inlineStringUTF16compressCharArray");
6763+
6764+
TR::Node *srcArrayNode = node->getChild(0);
6765+
TR::Node *srcOffsetNode = node->getChild(1);
6766+
TR::Node *dstArrayNode = node->getChild(2);
6767+
TR::Node *dstOffsetNode = node->getChild(3);
6768+
TR::Node *lengthNode = node->getChild(4);
6769+
6770+
TR::Register *srcArrayReg, *srcOffsetReg, *dstArrayReg, *dstOffsetReg, *lengthReg, *resultReg;
6771+
bool stopUsingCopyReg1, stopUsingCopyReg2, stopUsingCopyReg3, stopUsingCopyReg4;
6772+
6773+
stopUsingCopyReg1 = TR::TreeEvaluator::stopUsingCopyReg(srcArrayNode, srcArrayReg, cg);
6774+
stopUsingCopyReg2 = TR::TreeEvaluator::stopUsingCopyReg(srcOffsetNode, srcOffsetReg, cg);
6775+
stopUsingCopyReg3 = TR::TreeEvaluator::stopUsingCopyReg(dstArrayNode, dstArrayReg, cg);
6776+
stopUsingCopyReg4 = TR::TreeEvaluator::stopUsingCopyReg(dstOffsetNode, dstOffsetReg, cg);
6777+
lengthReg = cg->evaluate(lengthNode);
6778+
if (lengthNode->getReferenceCount() > 1)
6779+
{
6780+
resultReg = cg->allocateRegister();
6781+
generateMovInstruction(cg, node, resultReg, lengthReg);
6782+
}
6783+
else
6784+
{
6785+
resultReg = lengthReg;
6786+
}
6787+
6788+
TR::Register *tmpReg = cg->allocateRegister();
6789+
TR::Register *vtmp0Reg = cg->allocateRegister(TR_VRF);
6790+
TR::Register *vtmp1Reg = cg->allocateRegister(TR_VRF);
6791+
TR::Register *vtmp2Reg = cg->allocateRegister(TR_VRF);
6792+
6793+
TR::LabelSymbol *loopLabel = generateLabelSymbol(cg);
6794+
TR::LabelSymbol *compress8Label = generateLabelSymbol(cg);
6795+
TR::LabelSymbol *compress4Label = generateLabelSymbol(cg);
6796+
TR::LabelSymbol *compress2Label = generateLabelSymbol(cg);
6797+
TR::LabelSymbol *compress1Label = generateLabelSymbol(cg);
6798+
TR::LabelSymbol *failLabel = generateLabelSymbol(cg);
6799+
TR::LabelSymbol *doneLabel = generateLabelSymbol(cg);
6800+
6801+
// add header size
6802+
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, srcArrayReg, srcArrayReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
6803+
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::addimmx, node, dstArrayReg, dstArrayReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes());
6804+
6805+
// add offset
6806+
if (!srcOffsetNode->getOpCode().isLoadConst() || srcOffsetNode->getInt() != 0)
6807+
{
6808+
generateTrg1Src2ShiftedInstruction(cg, TR::InstOpCode::addx, node, srcArrayReg, srcArrayReg, srcOffsetReg, TR::SH_LSL, 1);
6809+
}
6810+
if (!dstOffsetNode->getOpCode().isLoadConst() || dstOffsetNode->getInt() != 0)
6811+
{
6812+
generateTrg1Src2Instruction(cg, TR::InstOpCode::addx, node, dstArrayReg, dstArrayReg, dstOffsetReg);
6813+
}
6814+
generateCompareImmInstruction(cg, node, lengthReg, 16, /* is64bit */ false);
6815+
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, compress8Label, TR::CC_CC);
6816+
generateLogicalShiftRightImmInstruction(cg, node, tmpReg, lengthReg, 4, /* is64bit */ false);
6817+
6818+
// loop for copying 16 elements
6819+
generateLabelInstruction(cg, TR::InstOpCode::label, node, loopLabel);
6820+
generateTrg2MemInstruction(cg, TR::InstOpCode::vldppostq, node, vtmp0Reg, vtmp1Reg, TR::MemoryReference::createWithDisplacement(cg, srcArrayReg, 32));
6821+
generateTrg1Src2Instruction(cg, TR::InstOpCode::vuzp2_16b, node, vtmp2Reg, vtmp0Reg, vtmp1Reg);
6822+
generateMovVectorElementToGPRInstruction(cg, TR::InstOpCode::umovxd, node, srcOffsetReg, vtmp2Reg, 0);
6823+
generateMovVectorElementToGPRInstruction(cg, TR::InstOpCode::umovxd, node, dstOffsetReg, vtmp2Reg, 1);
6824+
generateTrg1Src2Instruction(cg, TR::InstOpCode::orrx, node, srcOffsetReg, srcOffsetReg, dstOffsetReg);
6825+
generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, srcOffsetReg, failLabel);
6826+
generateTrg1Src2Instruction(cg, TR::InstOpCode::vuzp1_16b, node, vtmp2Reg, vtmp0Reg, vtmp1Reg);
6827+
generateTrg1Src1ImmInstruction(cg, TR::InstOpCode::subsimmw, node, tmpReg, tmpReg, 1);
6828+
generateMemSrc1Instruction(cg, TR::InstOpCode::vstrpostq, node, TR::MemoryReference::createWithDisplacement(cg, dstArrayReg, 16), vtmp2Reg);
6829+
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, loopLabel, TR::CC_NE);
6830+
6831+
// residue
6832+
generateLabelInstruction(cg, TR::InstOpCode::label, node, compress8Label);
6833+
generateTestImmInstruction(cg, node, lengthReg, 0x740, false, false); // 0x740 is immr:imms for 8
6834+
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, compress4Label, TR::CC_EQ);
6835+
generateTrg1MemInstruction(cg, TR::InstOpCode::vldrpostq, node, vtmp0Reg, TR::MemoryReference::createWithDisplacement(cg, srcArrayReg, 16));
6836+
generateTrg1Src2Instruction(cg, TR::InstOpCode::vtrn2_16b, node, vtmp2Reg, vtmp0Reg, vtmp0Reg);
6837+
generateMovVectorElementToGPRInstruction(cg, TR::InstOpCode::umovxd, node, srcOffsetReg, vtmp2Reg, 0);
6838+
generateMovVectorElementToGPRInstruction(cg, TR::InstOpCode::umovxd, node, dstOffsetReg, vtmp2Reg, 1);
6839+
generateTrg1Src2Instruction(cg, TR::InstOpCode::orrx, node, srcOffsetReg, srcOffsetReg, dstOffsetReg);
6840+
generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, srcOffsetReg, failLabel);
6841+
generateTrg1Src1Instruction(cg, TR::InstOpCode::vxtn_8b, node, vtmp2Reg, vtmp0Reg);
6842+
generateMemSrc1Instruction(cg, TR::InstOpCode::vstrpostd, node, TR::MemoryReference::createWithDisplacement(cg, dstArrayReg, 8), vtmp2Reg);
6843+
6844+
generateLabelInstruction(cg, TR::InstOpCode::label, node, compress4Label);
6845+
generateTestImmInstruction(cg, node, lengthReg, 0x780, false, false); // 0x780 is immr:imms for 4
6846+
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, compress2Label, TR::CC_EQ);
6847+
generateTrg1MemInstruction(cg, TR::InstOpCode::vldrpostd, node, vtmp0Reg, TR::MemoryReference::createWithDisplacement(cg, srcArrayReg, 8));
6848+
generateTrg1Src2Instruction(cg, TR::InstOpCode::vtrn2_8b, node, vtmp2Reg, vtmp0Reg, vtmp0Reg);
6849+
generateMovVectorElementToGPRInstruction(cg, TR::InstOpCode::umovxd, node, srcOffsetReg, vtmp2Reg, 0);
6850+
generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzx, node, srcOffsetReg, failLabel);
6851+
generateTrg1Src1Instruction(cg, TR::InstOpCode::vxtn_8b, node, vtmp2Reg, vtmp0Reg);
6852+
generateMemSrc1Instruction(cg, TR::InstOpCode::vstrposts, node, TR::MemoryReference::createWithDisplacement(cg, dstArrayReg, 4), vtmp2Reg);
6853+
6854+
generateLabelInstruction(cg, TR::InstOpCode::label, node, compress2Label);
6855+
generateTestImmInstruction(cg, node, lengthReg, 0x7c0, false, false); // 0x7c0 is immr:imms for 2
6856+
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, compress1Label, TR::CC_EQ);
6857+
generateTrg1MemInstruction(cg, TR::InstOpCode::vldrposts, node, vtmp0Reg, TR::MemoryReference::createWithDisplacement(cg, srcArrayReg, 4));
6858+
generateTrg1Src2Instruction(cg, TR::InstOpCode::vtrn2_8b, node, vtmp2Reg, vtmp0Reg, vtmp0Reg);
6859+
generateMovVectorElementToGPRInstruction(cg, TR::InstOpCode::umovws, node, srcOffsetReg, vtmp2Reg, 0);
6860+
generateCompareBranchInstruction(cg, TR::InstOpCode::cbnzw, node, srcOffsetReg, failLabel);
6861+
generateTrg1Src1Instruction(cg, TR::InstOpCode::vxtn_8b, node, vtmp2Reg, vtmp0Reg);
6862+
generateMemSrc1Instruction(cg, TR::InstOpCode::vstrposth, node, TR::MemoryReference::createWithDisplacement(cg, dstArrayReg, 2), vtmp2Reg);
6863+
6864+
generateLabelInstruction(cg, TR::InstOpCode::label, node, compress1Label);
6865+
generateTestImmInstruction(cg, node, lengthReg, 0x000, false, false); // 0x000 is immr:imms for 1
6866+
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, doneLabel, TR::CC_EQ);
6867+
generateTrg1MemInstruction(cg, TR::InstOpCode::ldrhpost, node, tmpReg, TR::MemoryReference::createWithDisplacement(cg, srcArrayReg, 2));
6868+
generateCompareImmInstruction(cg, node, lengthReg, 256, false); // 32-bit comparison
6869+
generateConditionalBranchInstruction(cg, TR::InstOpCode::b_cond, node, failLabel, TR::CC_CS);
6870+
generateMemSrc1Instruction(cg, TR::InstOpCode::strbpost, node, TR::MemoryReference::createWithDisplacement(cg, dstArrayReg, 1), tmpReg);
6871+
generateLabelInstruction(cg, TR::InstOpCode::b, node, doneLabel);
6872+
6873+
generateLabelInstruction(cg, TR::InstOpCode::label, node, failLabel);
6874+
loadConstant64(cg, node, 0, resultReg);
6875+
6876+
generateLabelInstruction(cg, TR::InstOpCode::label, node, doneLabel);
6877+
6878+
node->setRegister(resultReg);
6879+
6880+
if (stopUsingCopyReg1)
6881+
{
6882+
cg->stopUsingRegister(srcArrayReg);
6883+
}
6884+
if (stopUsingCopyReg2)
6885+
{
6886+
cg->stopUsingRegister(srcOffsetReg);
6887+
}
6888+
if (stopUsingCopyReg3)
6889+
{
6890+
cg->stopUsingRegister(dstArrayReg);
6891+
}
6892+
if (stopUsingCopyReg4)
6893+
{
6894+
cg->stopUsingRegister(dstOffsetReg);
6895+
}
6896+
cg->stopUsingRegister(tmpReg);
6897+
cg->stopUsingRegister(vtmp0Reg);
6898+
cg->stopUsingRegister(vtmp1Reg);
6899+
cg->stopUsingRegister(vtmp2Reg);
6900+
6901+
return resultReg;
6902+
}
6903+
67596904
bool
67606905
J9::ARM64::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&resultReg)
67616906
{
@@ -6877,6 +7022,10 @@ J9::ARM64::CodeGenerator::inlineDirectCall(TR::Node *node, TR::Register *&result
68777022
}
68787023
break;
68797024

7025+
case TR::java_lang_StringUTF16_compress_charArray:
7026+
resultReg = inlineStringUTF16compressCharArray(node, cg);
7027+
return true;
7028+
68807029
case TR::sun_misc_Unsafe_compareAndSwapInt_jlObjectJII_Z:
68817030
{
68827031
// In Java9 and newer this can be either the jdk.internal JNI method or the sun.misc Java wrapper.

runtime/compiler/codegen/J9CodeGenerator.hpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,16 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz);
471471
*/
472472
void setSupportsInlineStringLatin1Inflate() { _j9Flags.set(SupportsInlineStringLatin1Inflate); }
473473

474+
/** \brief
475+
* Determines whether the code generator supports inlining of java/lang/StringUTF16.compress([CI[BII)
476+
*/
477+
bool getSupportsInlineCompressCharArray() { return _j9Flags.testAny(SupportsInlineCompressCharArray); }
478+
479+
/** \brief
480+
* The code generator supports inlining of java/lang/StringUTF16.compress([CI[BII)
481+
*/
482+
void setSupportsInlineCompressCharArray() { _j9Flags.set(SupportsInlineCompressCharArray); }
483+
474484
/** \brief
475485
* Determines whether the code generator supports inlining of java_util_concurrent_ConcurrentLinkedQueue_tm*
476486
* methods
@@ -677,6 +687,7 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz);
677687
SavesNonVolatileGPRsForGC = 0x00000800,
678688
SupportsInlineVectorizedMismatch = 0x00001000,
679689
SupportsInlineVectorizedHashCode = 0x00002000,
690+
SupportsInlineCompressCharArray = 0x00004000, /*! codegen inlining of Java StringUTF16.compress([CI[BII) */
680691
};
681692

682693
flags32_t _j9Flags;

runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@
240240
java_lang_StringUTF16_compareCodePointCI,
241241
java_lang_StringUTF16_compareToCIImpl,
242242
java_lang_StringUTF16_compareValues,
243+
java_lang_StringUTF16_compress_charArray,
243244
java_lang_StringUTF16_getChar,
244245
java_lang_StringUTF16_indexOf,
245246
java_lang_StringUTF16_indexOfCharUnsafe,

runtime/compiler/env/j9method.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3236,6 +3236,7 @@ void TR_ResolvedJ9Method::construct()
32363236
{ x(TR::java_lang_StringUTF16_compareCodePointCI, "compareCodePointCI", "(II)I")},
32373237
{ x(TR::java_lang_StringUTF16_compareToCIImpl, "compareToCIImpl", "([BII[BII)I")},
32383238
{ x(TR::java_lang_StringUTF16_compareValues, "compareValues", "([B[BII)I")},
3239+
{ x(TR::java_lang_StringUTF16_compress_charArray, "compress", "([CI[BII)I")},
32393240
{ x(TR::java_lang_StringUTF16_getChar, "getChar", "([BI)C")},
32403241
{ x(TR::java_lang_StringUTF16_indexOf, "indexOf", "([BI[BII)I")},
32413242
{ x(TR::java_lang_StringUTF16_indexOfCharUnsafe, "indexOfCharUnsafe", "([BIII)I")},

runtime/compiler/optimizer/InlinerTempForJ9.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5518,6 +5518,12 @@ TR_J9InlinerPolicy::supressInliningRecognizedInitialCallee(TR_CallSite* callsite
55185518
return true;
55195519
}
55205520
break;
5521+
case TR::java_lang_StringUTF16_compress_charArray:
5522+
if (comp->cg()->getSupportsInlineCompressCharArray())
5523+
{
5524+
return true;
5525+
}
5526+
break;
55215527
default:
55225528
break;
55235529
}

0 commit comments

Comments
 (0)