Skip to content

Add support for Classify, CompressStore, ExpandLoad, MaskLoad, MaskStore, and MoveMask #116708

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -2670,12 +2670,13 @@ class emitter

#if defined(TARGET_XARCH)
regNumber emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src, regNumber targetReg = REG_NA);
void emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem, insOpts instOptions = INS_OPTS_NONE);
#else
regNumber emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src);
void emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem);
#endif
regNumber emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src1, GenTree* src2);
void emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, GenTreeIndir* mem);
void emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem);
void emitInsStoreLcl(instruction ins, emitAttr attr, GenTreeLclVarCommon* varNode);
insFormat emitMapFmtForIns(insFormat fmt, instruction ins);
insFormat emitMapFmtAtoM(insFormat fmt);
Expand Down
29 changes: 25 additions & 4 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6126,7 +6126,7 @@ void emitter::emitInsLoadInd(instruction ins, emitAttr attr, regNumber dstReg, G
// attr - the instruction operand size
// mem - the GT_STOREIND node
//
void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem)
void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* mem, insOpts instOptions)
{
assert(mem->OperIs(GT_STOREIND));

Expand All @@ -6150,6 +6150,7 @@ void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* m

if (data->isContainedIntOrIImmed())
{
assert(instOptions == INS_OPTS_NONE);
emitIns_S_I(ins, attr, varNode->GetLclNum(), offset, (int)data->AsIntConCommon()->IconValue());
}
#if defined(FEATURE_HW_INTRINSICS)
Expand All @@ -6162,11 +6163,12 @@ void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* m

if (numArgs == 1)
{
emitIns_S_R(ins, attr, op1->GetRegNum(), varNode->GetLclNum(), offset);
emitIns_S_R(ins, attr, op1->GetRegNum(), varNode->GetLclNum(), offset, instOptions);
}
else
{
assert(numArgs == 2);
assert(instOptions == INS_OPTS_NONE);

int icon = static_cast<int>(hwintrinsic->Op(2)->AsIntConCommon()->IconValue());
emitIns_S_R_I(ins, attr, varNode->GetLclNum(), offset, op1->GetRegNum(), icon);
Expand All @@ -6176,7 +6178,7 @@ void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* m
else
{
assert(!data->isContained());
emitIns_S_R(ins, attr, data->GetRegNum(), varNode->GetLclNum(), offset);
emitIns_S_R(ins, attr, data->GetRegNum(), varNode->GetLclNum(), offset, instOptions);
}

// Updating variable liveness after instruction was emitted
Expand All @@ -6190,6 +6192,8 @@ void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* m

if (data->isContainedIntOrIImmed())
{
assert(instOptions == INS_OPTS_NONE);

int icon = (int)data->AsIntConCommon()->IconValue();
id = emitNewInstrAmdCns(attr, offset, icon);
id->idIns(ins);
Expand All @@ -6211,12 +6215,18 @@ void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* m
id->idIns(ins);
emitHandleMemOp(mem, id, emitInsModeFormat(ins, IF_ARD_RRD), ins);
id->idReg1(op1->GetRegNum());

assert((instOptions & INS_OPTS_EVEX_b_MASK) == 0);
SetEvexEmbMaskIfNeeded(id, instOptions);

sz = emitInsSizeAM(id, insCodeMR(ins));
id->idCodeSize(sz);
}
else
{
assert(numArgs == 2);
assert(instOptions == INS_OPTS_NONE);

int icon = static_cast<int>(hwintrinsic->Op(2)->AsIntConCommon()->IconValue());

id = emitNewInstrAmdCns(attr, offset, icon);
Expand All @@ -6235,6 +6245,10 @@ void emitter::emitInsStoreInd(instruction ins, emitAttr attr, GenTreeStoreInd* m
id->idIns(ins);
emitHandleMemOp(mem, id, emitInsModeFormat(ins, IF_ARD_RRD), ins);
id->idReg1(data->GetRegNum());

assert((instOptions & INS_OPTS_EVEX_b_MASK) == 0);
SetEvexEmbMaskIfNeeded(id, instOptions);

sz = emitInsSizeAM(id, insCodeMR(ins));
id->idCodeSize(sz);
}
Expand Down Expand Up @@ -10727,7 +10741,7 @@ bool emitter::IsRedundantStackMov(instruction ins, insFormat fmt, emitAttr size,
return false;
}

void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs, insOpts instOptions)
{
insFormat fmt = (ins == INS_xchg) ? IF_SRW_RRW : emitInsModeFormat(ins, IF_SRD_RRD);
if (IsMovInstruction(ins) && IsRedundantStackMov(ins, fmt, attr, ireg, varx, offs))
Expand All @@ -10742,6 +10756,9 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int va
id->idReg1(ireg);
id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);

assert((instOptions & INS_OPTS_EVEX_b_MASK) == 0);
SetEvexEmbMaskIfNeeded(id, instOptions);

sz = emitInsSizeSV(id, insCodeMR(ins), varx, offs);

#ifdef TARGET_X86
Expand Down Expand Up @@ -21045,6 +21062,10 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_comisd:
case INS_ucomiss:
case INS_ucomisd:
case INS_vfpclasspd:
case INS_vfpclassps:
case INS_vfpclasssd:
case INS_vfpclassss:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_3C;
break;
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/emitxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,8 @@ void emitIns_R_R_R_R(instruction ins,

void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);

void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
void emitIns_S_R(
instruction ins, emitAttr attr, regNumber ireg, int varx, int offs, insOpts instOptions = INS_OPTS_NONE);

void emitIns_R_S(
instruction ins, emitAttr attr, regNumber ireg, int varx, int offs, insOpts instOptions = INS_OPTS_NONE);
Expand Down
10 changes: 9 additions & 1 deletion src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1344,7 +1344,15 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp,
{
return ni;
}
return binarySearchId(InstructionSet_AVX512v2, sig, methodName, isLimitedVector256Isa);

ni = binarySearchId(InstructionSet_AVX512v2, sig, methodName, isLimitedVector256Isa);

if (ni != NI_Illegal)
{
return ni;
}

return binarySearchId(InstructionSet_AVX512v3, sig, methodName, isLimitedVector256Isa);
}
else if (isa == InstructionSet_AVX10v1_X64)
{
Expand Down
32 changes: 32 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -816,8 +816,40 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
emitSwCase);
}
}
else if (category == HW_Category_MemoryLoad)
{
bool mergeWithZero = false;

if (op3->isContained())
{
op3Reg = targetReg;
mergeWithZero = true;
}

assert(emitter::isMaskReg(op2Reg));
assert(mergeWithZero == op3->IsVectorZero());

// Until we improve the handling of addressing modes in the emitter, we'll create a
// temporary GT_IND to generate code with.
GenTreeIndir load = indirForm(node->TypeGet(), op1);
emit->emitIns_Mov(INS_movaps, simdSize, targetReg, op3Reg, /* canSkip */ true);

instOptions = AddEmbMaskingMode(instOptions, op2Reg, mergeWithZero);
emit->emitIns_R_A(ins, simdSize, targetReg, &load, instOptions);
}
else if (category == HW_Category_MemoryStore)
{
if (emitter::isMaskReg(op2Reg))
{
// Until we improve the handling of addressing modes in the emitter, we'll create a
// temporary GT_STORE_IND to generate code with.
GenTreeStoreInd store = storeIndirForm(node->TypeGet(), op1, op3);

instOptions = AddEmbMaskingMode(instOptions, op2Reg, false);
emit->emitInsStoreInd(ins, simdSize, &store, instOptions);
break;
}

// The Mask instructions do not currently support containment of the address.
assert(!op2->isContained());

Expand Down
Loading
Loading