Skip to content

Commit 5291af5

Browse files
committed
shader_recompiler: Implement V_SUBB_U32 and V_SUBBREV_U32.
1 parent d91ad61 commit 5291af5

File tree

2 files changed

+85
-26
lines changed

2 files changed

+85
-26
lines changed

src/shader_recompiler/frontend/translate/translate.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ class Translator {
155155
void V_SUB_I32(const GcnInst& inst);
156156
void V_SUBREV_I32(const GcnInst& inst);
157157
void V_ADDC_U32(const GcnInst& inst);
158+
void V_SUBB_U32(const GcnInst& inst);
159+
void V_SUBBREV_U32(const GcnInst& inst);
158160
void V_LDEXP_F32(const GcnInst& inst);
159161
void V_CVT_PKNORM_U16_F32(const GcnInst& inst);
160162
void V_CVT_PKRTZ_F16_F32(const GcnInst& inst);
@@ -273,7 +275,9 @@ class Translator {
273275
void SetDst(const InstOperand& operand, const IR::U32F32& value);
274276
void SetDst64(const InstOperand& operand, const IR::U64F64& value_raw);
275277

276-
// Vector ALU Helprers
278+
// Vector ALU Helpers
279+
IR::U32 GetCarryIn(const GcnInst& inst);
280+
void SetCarryOut(const GcnInst& inst, const IR::U1& carry);
277281
IR::U32 VMovRelSHelper(u32 src_vgprno, const IR::U32 m0);
278282
void VMovRelDHelper(u32 dst_vgprno, const IR::U32 src_val, const IR::U32 m0);
279283

src/shader_recompiler/frontend/translate/vector_alu.cpp

Lines changed: 80 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
8787
return V_SUBREV_I32(inst);
8888
case Opcode::V_ADDC_U32:
8989
return V_ADDC_U32(inst);
90+
case Opcode::V_SUBB_U32:
91+
return V_SUBB_U32(inst);
92+
case Opcode::V_SUBBREV_U32:
93+
return V_SUBBREV_U32(inst);
9094
case Opcode::V_LDEXP_F32:
9195
return V_LDEXP_F32(inst);
9296
case Opcode::V_CVT_PKNORM_U16_F32:
@@ -546,51 +550,71 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
546550
}
547551

548552
void Translator::V_ADD_I32(const GcnInst& inst) {
553+
// Signed or unsigned components
549554
const IR::U32 src0{GetSrc(inst.src[0])};
550555
const IR::U32 src1{ir.GetVectorReg(IR::VectorReg(inst.src[1].code))};
551-
SetDst(inst.dst[0], ir.IAdd(src0, src1));
552-
// TODO: Carry
556+
const IR::U32 result{ir.IAdd(src0, src1)};
557+
SetDst(inst.dst[0], result);
558+
559+
// TODO: Carry-out with signed or unsigned components
553560
}
554561

555562
void Translator::V_SUB_I32(const GcnInst& inst) {
563+
// Unsigned components
556564
const IR::U32 src0{GetSrc(inst.src[0])};
557565
const IR::U32 src1{GetSrc(inst.src[1])};
558-
SetDst(inst.dst[0], ir.ISub(src0, src1));
566+
const IR::U32 result{ir.ISub(src0, src1)};
567+
SetDst(inst.dst[0], result);
568+
569+
const IR::U1 did_underflow{ir.IGreaterThan(src1, src0, false)};
570+
SetCarryOut(inst, did_underflow);
559571
}
560572

561573
void Translator::V_SUBREV_I32(const GcnInst& inst) {
574+
// Unsigned components
562575
const IR::U32 src0{GetSrc(inst.src[0])};
563576
const IR::U32 src1{GetSrc(inst.src[1])};
564-
SetDst(inst.dst[0], ir.ISub(src1, src0));
565-
// TODO: Carry-out
577+
const IR::U32 result{ir.ISub(src1, src0)};
578+
SetDst(inst.dst[0], result);
579+
580+
const IR::U1 did_underflow{ir.IGreaterThan(src0, src1, false)};
581+
SetCarryOut(inst, did_underflow);
566582
}
567583

568584
void Translator::V_ADDC_U32(const GcnInst& inst) {
569-
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
570-
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
585+
// Unsigned components
586+
const IR::U32 src0{GetSrc(inst.src[0])};
587+
const IR::U32 src1{GetSrc(inst.src[1])};
588+
const IR::U32 carry{GetCarryIn(inst)};
589+
const IR::U32 result{ir.IAdd(ir.IAdd(src0, src1), carry)};
590+
SetDst(inst.dst[0], result);
571591

572-
IR::U1 carry;
573-
if (inst.src_count == 3) { // VOP3
574-
if (inst.src[2].field == OperandField::VccLo) {
575-
carry = ir.GetVcc();
576-
} else if (inst.src[2].field == OperandField::ScalarGPR) {
577-
carry = ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code));
578-
} else {
579-
UNREACHABLE();
580-
}
581-
} else { // VOP2
582-
carry = ir.GetVcc();
583-
}
592+
const IR::U1 less_src0{ir.ILessThan(result, src0, false)};
593+
const IR::U1 less_src1{ir.ILessThan(result, src1, false)};
594+
const IR::U1 did_overflow{ir.LogicalOr(less_src0, less_src1)};
595+
SetCarryOut(inst, did_overflow);
596+
}
584597

585-
const IR::U32 scarry = IR::U32{ir.Select(carry, ir.Imm32(1), ir.Imm32(0))};
586-
const IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry);
598+
void Translator::V_SUBB_U32(const GcnInst& inst) {
599+
// Signed or unsigned components
600+
const IR::U32 src0{GetSrc(inst.src[0])};
601+
const IR::U32 src1{GetSrc(inst.src[1])};
602+
const IR::U32 carry{GetCarryIn(inst)};
603+
const IR::U32 result{ir.ISub(ir.ISub(src0, src1), carry)};
604+
SetDst(inst.dst[0], result);
587605

606+
// TODO: Carry-out with signed or unsigned components
607+
}
608+
609+
void Translator::V_SUBBREV_U32(const GcnInst& inst) {
610+
// Signed or unsigned components
611+
const IR::U32 src0{GetSrc(inst.src[0])};
612+
const IR::U32 src1{GetSrc(inst.src[1])};
613+
const IR::U32 carry{GetCarryIn(inst)};
614+
const IR::U32 result{ir.ISub(ir.ISub(src1, src0), carry)};
588615
SetDst(inst.dst[0], result);
589616

590-
const IR::U1 less_src0 = ir.ILessThan(result, src0, false);
591-
const IR::U1 less_src1 = ir.ILessThan(result, src1, false);
592-
const IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1);
593-
ir.SetVcc(did_overflow);
617+
// TODO: Carry-out with signed or unsigned components
594618
}
595619

596620
void Translator::V_LDEXP_F32(const GcnInst& inst) {
@@ -1152,6 +1176,37 @@ void Translator::V_MAD_U64_U32(const GcnInst& inst) {
11521176
ir.SetVcc(did_overflow);
11531177
}
11541178

1179+
IR::U32 Translator::GetCarryIn(const GcnInst& inst) {
1180+
IR::U1 carry;
1181+
if (inst.src_count == 3) { // VOP3
1182+
if (inst.src[2].field == OperandField::VccLo) {
1183+
carry = ir.GetVcc();
1184+
} else if (inst.src[2].field == OperandField::ScalarGPR) {
1185+
carry = ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code));
1186+
} else {
1187+
UNREACHABLE();
1188+
}
1189+
} else { // VOP2
1190+
carry = ir.GetVcc();
1191+
}
1192+
1193+
return IR::U32{ir.Select(carry, ir.Imm32(1), ir.Imm32(0))};
1194+
}
1195+
1196+
void Translator::SetCarryOut(const GcnInst& inst, const IR::U1& carry) {
1197+
if (inst.dst_count == 2) { // VOP3
1198+
if (inst.dst[1].field == OperandField::VccLo) {
1199+
ir.SetVcc(carry);
1200+
} else if (inst.dst[1].field == OperandField::ScalarGPR) {
1201+
ir.SetThreadBitScalarReg(IR::ScalarReg(inst.dst[1].code), carry);
1202+
} else {
1203+
UNREACHABLE();
1204+
}
1205+
} else { // VOP2
1206+
ir.SetVcc(carry);
1207+
}
1208+
}
1209+
11551210
// TODO: add range analysis pass to hopefully put an upper bound on m0, and only select one of
11561211
// [src_vgprno, src_vgprno + max_m0]. Same for dst regs we may write back to
11571212

0 commit comments

Comments
 (0)