@@ -229,6 +229,15 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions
229
229
CONDITIONAL_DISABLE;
230
230
231
231
bool logBlocks = false ;
232
+
233
+ bool letThroughHalves = false ;
234
+ if (opts.optimizeForInterpreter ) {
235
+ // If we're using the interpreter, which can handle these instructions directly,
236
+ // don't break "half" instructions up.
237
+ // Of course, we still want to combine if possible.
238
+ letThroughHalves = true ;
239
+ }
240
+
232
241
for (int i = 0 , n = (int )in.GetInstructions ().size (); i < n; ++i) {
233
242
const IRInst &inst = in.GetInstructions ()[i];
234
243
@@ -305,6 +314,11 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions
305
314
switch (inst.op ) {
306
315
case IROp::Load32Left:
307
316
if (!combineOpposite (IROp::Load32Right, -3 , IROp::Load32, -3 )) {
317
+ if (letThroughHalves) {
318
+ out.Write (inst);
319
+ break ;
320
+ }
321
+
308
322
addCommonProlog ();
309
323
// dest &= (0x00ffffff >> shift)
310
324
// Alternatively, could shift to a wall and back (but would require two shifts each way.)
@@ -339,6 +353,10 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions
339
353
340
354
case IROp::Load32Right:
341
355
if (!combineOpposite (IROp::Load32Left, 3 , IROp::Load32, 0 )) {
356
+ if (letThroughHalves) {
357
+ out.Write (inst);
358
+ break ;
359
+ }
342
360
addCommonProlog ();
343
361
// IRTEMP_LR_VALUE >>= shift
344
362
out.Write (IROp::Shr, IRTEMP_LR_VALUE, IRTEMP_LR_VALUE, IRTEMP_LR_SHIFT);
@@ -382,6 +400,10 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions
382
400
383
401
case IROp::Store32Left:
384
402
if (!combineOpposite (IROp::Store32Right, -3 , IROp::Store32, -3 )) {
403
+ if (letThroughHalves) {
404
+ out.Write (inst);
405
+ break ;
406
+ }
385
407
addCommonProlog ();
386
408
// IRTEMP_LR_VALUE &= 0xffffff00 << shift
387
409
out.WriteSetConstant (IRTEMP_LR_MASK, 0xffffff00 );
@@ -399,6 +421,10 @@ bool RemoveLoadStoreLeftRight(const IRWriter &in, IRWriter &out, const IROptions
399
421
400
422
case IROp::Store32Right:
401
423
if (!combineOpposite (IROp::Store32Left, 3 , IROp::Store32, 0 )) {
424
+ if (letThroughHalves) {
425
+ out.Write (inst);
426
+ break ;
427
+ }
402
428
addCommonProlog ();
403
429
// IRTEMP_LR_VALUE &= 0x00ffffff << (24 - shift)
404
430
out.WriteSetConstant (IRTEMP_LR_MASK, 0x00ffffff );
@@ -2174,13 +2200,23 @@ bool OptimizeLoadsAfterStores(const IRWriter &in, IRWriter &out, const IROptions
2174
2200
case IROp::Store32:
2175
2201
if (next.op == IROp::Load32 &&
2176
2202
next.constant == inst.constant &&
2177
- next.dest == inst.src3 &&
2203
+ next.dest == inst.dest &&
2178
2204
next.src1 == inst.src1 ) {
2179
2205
// The upcoming load is completely redundant.
2180
2206
// Skip it.
2181
2207
i++;
2182
2208
}
2183
2209
break ;
2210
+ case IROp::StoreVec4:
2211
+ if (next.op == IROp::LoadVec4 &&
2212
+ next.constant == inst.constant &&
2213
+ next.dest == inst.dest &&
2214
+ next.src1 == inst.src1 ) {
2215
+ // The upcoming load is completely redundant. These are common in Wipeout.
2216
+ // Skip it. NOTE: It looks like vector load/stores uses different register assignments, but there's a union between dest and src3.
2217
+ i++;
2218
+ }
2219
+ break ;
2184
2220
default :
2185
2221
break ;
2186
2222
}
@@ -2243,10 +2279,18 @@ bool OptimizeForInterpreter(const IRWriter &in, IRWriter &out, const IROptions &
2243
2279
inst.op = IROp::OptFMovToGPRShr8;
2244
2280
i++; // Skip the next instruction.
2245
2281
}
2246
- out.Write (inst);
2247
- } else {
2248
- out.Write (inst);
2249
2282
}
2283
+ out.Write (inst);
2284
+ break ;
2285
+ case IROp::FMovFromGPR:
2286
+ if (!last) {
2287
+ IRInst next = in.GetInstructions ()[i + 1 ];
2288
+ if (next.op == IROp::FCvtSW && next.src1 == inst.dest && next.dest == inst.dest ) {
2289
+ inst.op = IROp::OptFCvtSWFromGPR;
2290
+ i++; // Skip the next
2291
+ }
2292
+ }
2293
+ out.Write (inst);
2250
2294
break ;
2251
2295
default :
2252
2296
out.Write (inst);
0 commit comments