@@ -87,6 +87,10 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
87
87
return V_SUBREV_I32 (inst);
88
88
case Opcode::V_ADDC_U32:
89
89
return V_ADDC_U32 (inst);
90
+ case Opcode::V_SUBB_U32:
91
+ return V_SUBB_U32 (inst);
92
+ case Opcode::V_SUBBREV_U32:
93
+ return V_SUBBREV_U32 (inst);
90
94
case Opcode::V_LDEXP_F32:
91
95
return V_LDEXP_F32 (inst);
92
96
case Opcode::V_CVT_PKNORM_U16_F32:
@@ -546,51 +550,71 @@ void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
546
550
}
547
551
548
552
void Translator::V_ADD_I32 (const GcnInst& inst) {
553
+ // Signed or unsigned components
549
554
const IR::U32 src0{GetSrc (inst.src [0 ])};
550
555
const IR::U32 src1{ir.GetVectorReg (IR::VectorReg (inst.src [1 ].code ))};
551
- SetDst (inst.dst [0 ], ir.IAdd (src0, src1));
552
- // TODO: Carry
556
+ const IR::U32 result{ir.IAdd (src0, src1)};
557
+ SetDst (inst.dst [0 ], result);
558
+
559
+ // TODO: Carry-out with signed or unsigned components
553
560
}
554
561
555
562
void Translator::V_SUB_I32 (const GcnInst& inst) {
563
+ // Unsigned components
556
564
const IR::U32 src0{GetSrc (inst.src [0 ])};
557
565
const IR::U32 src1{GetSrc (inst.src [1 ])};
558
- SetDst (inst.dst [0 ], ir.ISub (src0, src1));
566
+ const IR::U32 result{ir.ISub (src0, src1)};
567
+ SetDst (inst.dst [0 ], result);
568
+
569
+ const IR::U1 did_underflow{ir.IGreaterThan (src1, src0, false )};
570
+ SetCarryOut (inst, did_underflow);
559
571
}
560
572
561
573
void Translator::V_SUBREV_I32 (const GcnInst& inst) {
574
+ // Unsigned components
562
575
const IR::U32 src0{GetSrc (inst.src [0 ])};
563
576
const IR::U32 src1{GetSrc (inst.src [1 ])};
564
- SetDst (inst.dst [0 ], ir.ISub (src1, src0));
565
- // TODO: Carry-out
577
+ const IR::U32 result{ir.ISub (src1, src0)};
578
+ SetDst (inst.dst [0 ], result);
579
+
580
+ const IR::U1 did_underflow{ir.IGreaterThan (src0, src1, false )};
581
+ SetCarryOut (inst, did_underflow);
566
582
}
567
583
568
584
void Translator::V_ADDC_U32 (const GcnInst& inst) {
569
- const auto src0 = GetSrc<IR::U32>(inst.src [0 ]);
570
- const auto src1 = GetSrc<IR::U32>(inst.src [1 ]);
585
+ // Unsigned components
586
+ const IR::U32 src0{GetSrc (inst.src [0 ])};
587
+ const IR::U32 src1{GetSrc (inst.src [1 ])};
588
+ const IR::U32 carry{GetCarryIn (inst)};
589
+ const IR::U32 result{ir.IAdd (ir.IAdd (src0, src1), carry)};
590
+ SetDst (inst.dst [0 ], result);
571
591
572
- IR::U1 carry;
573
- if (inst.src_count == 3 ) { // VOP3
574
- if (inst.src [2 ].field == OperandField::VccLo) {
575
- carry = ir.GetVcc ();
576
- } else if (inst.src [2 ].field == OperandField::ScalarGPR) {
577
- carry = ir.GetThreadBitScalarReg (IR::ScalarReg (inst.src [2 ].code ));
578
- } else {
579
- UNREACHABLE ();
580
- }
581
- } else { // VOP2
582
- carry = ir.GetVcc ();
583
- }
592
+ const IR::U1 less_src0{ir.ILessThan (result, src0, false )};
593
+ const IR::U1 less_src1{ir.ILessThan (result, src1, false )};
594
+ const IR::U1 did_overflow{ir.LogicalOr (less_src0, less_src1)};
595
+ SetCarryOut (inst, did_overflow);
596
+ }
584
597
585
- const IR::U32 scarry = IR::U32{ir.Select (carry, ir.Imm32 (1 ), ir.Imm32 (0 ))};
586
- const IR::U32 result = ir.IAdd (ir.IAdd (src0, src1), scarry);
598
+ void Translator::V_SUBB_U32 (const GcnInst& inst) {
599
+ // Signed or unsigned components
600
+ const IR::U32 src0{GetSrc (inst.src [0 ])};
601
+ const IR::U32 src1{GetSrc (inst.src [1 ])};
602
+ const IR::U32 carry{GetCarryIn (inst)};
603
+ const IR::U32 result{ir.ISub (ir.ISub (src0, src1), carry)};
604
+ SetDst (inst.dst [0 ], result);
587
605
606
+ // TODO: Carry-out with signed or unsigned components
607
+ }
608
+
609
+ void Translator::V_SUBBREV_U32 (const GcnInst& inst) {
610
+ // Signed or unsigned components
611
+ const IR::U32 src0{GetSrc (inst.src [0 ])};
612
+ const IR::U32 src1{GetSrc (inst.src [1 ])};
613
+ const IR::U32 carry{GetCarryIn (inst)};
614
+ const IR::U32 result{ir.ISub (ir.ISub (src1, src0), carry)};
588
615
SetDst (inst.dst [0 ], result);
589
616
590
- const IR::U1 less_src0 = ir.ILessThan (result, src0, false );
591
- const IR::U1 less_src1 = ir.ILessThan (result, src1, false );
592
- const IR::U1 did_overflow = ir.LogicalOr (less_src0, less_src1);
593
- ir.SetVcc (did_overflow);
617
+ // TODO: Carry-out with signed or unsigned components
594
618
}
595
619
596
620
void Translator::V_LDEXP_F32 (const GcnInst& inst) {
@@ -1152,6 +1176,37 @@ void Translator::V_MAD_U64_U32(const GcnInst& inst) {
1152
1176
ir.SetVcc (did_overflow);
1153
1177
}
1154
1178
1179
+ IR::U32 Translator::GetCarryIn (const GcnInst& inst) {
1180
+ IR::U1 carry;
1181
+ if (inst.src_count == 3 ) { // VOP3
1182
+ if (inst.src [2 ].field == OperandField::VccLo) {
1183
+ carry = ir.GetVcc ();
1184
+ } else if (inst.src [2 ].field == OperandField::ScalarGPR) {
1185
+ carry = ir.GetThreadBitScalarReg (IR::ScalarReg (inst.src [2 ].code ));
1186
+ } else {
1187
+ UNREACHABLE ();
1188
+ }
1189
+ } else { // VOP2
1190
+ carry = ir.GetVcc ();
1191
+ }
1192
+
1193
+ return IR::U32{ir.Select (carry, ir.Imm32 (1 ), ir.Imm32 (0 ))};
1194
+ }
1195
+
1196
+ void Translator::SetCarryOut (const GcnInst& inst, const IR::U1& carry) {
1197
+ if (inst.dst_count == 2 ) { // VOP3
1198
+ if (inst.dst [1 ].field == OperandField::VccLo) {
1199
+ ir.SetVcc (carry);
1200
+ } else if (inst.dst [1 ].field == OperandField::ScalarGPR) {
1201
+ ir.SetThreadBitScalarReg (IR::ScalarReg (inst.dst [1 ].code ), carry);
1202
+ } else {
1203
+ UNREACHABLE ();
1204
+ }
1205
+ } else { // VOP2
1206
+ ir.SetVcc (carry);
1207
+ }
1208
+ }
1209
+
1155
1210
// TODO: add range analysis pass to hopefully put an upper bound on m0, and only select one of
1156
1211
// [src_vgprno, src_vgprno + max_m0]. Same for dst regs we may write back to
1157
1212
0 commit comments