@@ -182,37 +182,54 @@ static int Replace_memcpy_jak() {
182
182
u32 destPtr = PARAM (0 );
183
183
u32 srcPtr = PARAM (1 );
184
184
u32 bytes = PARAM (2 );
185
- bool skip = false ;
185
+
186
186
if (bytes == 0 ) {
187
187
RETURN (destPtr);
188
188
return 5 ;
189
189
}
190
+
191
+ bool skip = false ;
192
+ bool sliced = false ;
193
+ static constexpr uint32_t SLICE_SIZE = 32768 ;
194
+
190
195
currentMIPS->InvalidateICache (srcPtr, bytes);
191
196
if ((skipGPUReplacements & (int )GPUReplacementSkip::MEMCPY) == 0 ) {
192
197
if (Memory::IsVRAMAddress (destPtr) || Memory::IsVRAMAddress (srcPtr)) {
193
198
skip = gpu->PerformMemoryCopy (destPtr, srcPtr, bytes);
194
199
}
195
200
}
201
+ if (!skip && bytes > SLICE_SIZE && bytes != 512 * 272 * 4 ) {
202
+ // This is a very slow func. To avoid thread blocking, do a slice at a time.
203
+ // Avoiding exactly 512 * 272 * 4 to detect videos, though.
204
+ bytes = SLICE_SIZE;
205
+ sliced = true ;
206
+ }
196
207
if (!skip && bytes != 0 ) {
197
208
u8 *dst = Memory::GetPointerWriteRange (destPtr, bytes);
198
209
const u8 *src = Memory::GetPointerRange (srcPtr, bytes);
199
210
200
- if (!dst || !src) {
201
- } else {
211
+ if (dst && src) {
202
212
// Jak style overlap.
203
213
for (u32 i = 0 ; i < bytes; i++) {
204
214
dst[i] = src[i];
205
215
}
206
216
}
207
217
}
208
218
209
- // Jak relies on more registers coming out right than the ABI specifies.
210
- // See the disassembly of the function for the explanations for these...
211
- currentMIPS->r [MIPS_REG_T0] = 0 ;
212
- currentMIPS->r [MIPS_REG_A0] = -1 ;
213
- currentMIPS->r [MIPS_REG_A2] = 0 ;
214
- currentMIPS->r [MIPS_REG_A3] = destPtr + bytes;
215
- RETURN (destPtr);
219
+ if (sliced) {
220
+ currentMIPS->r [MIPS_REG_A0] += SLICE_SIZE;
221
+ currentMIPS->r [MIPS_REG_A1] += SLICE_SIZE;
222
+ currentMIPS->r [MIPS_REG_A2] -= SLICE_SIZE;
223
+ } else {
224
+ // Jak relies on more registers coming out right than the ABI specifies.
225
+ // See the disassembly of the function for the explanations for these...
226
+ currentMIPS->r [MIPS_REG_T0] = 0 ;
227
+ currentMIPS->r [MIPS_REG_A0] = -1 ;
228
+ currentMIPS->r [MIPS_REG_A2] = 0 ;
229
+ // Even after slicing, this ends up correct.
230
+ currentMIPS->r [MIPS_REG_A3] = destPtr + bytes;
231
+ RETURN (destPtr);
232
+ }
216
233
217
234
if (MemBlockInfoDetailed (bytes)) {
218
235
// It's pretty common that games will copy video data.
@@ -231,6 +248,9 @@ static int Replace_memcpy_jak() {
231
248
}
232
249
}
233
250
251
+ if (sliced) {
252
+ return 5 + bytes * -8 + 2 ;
253
+ }
234
254
return 5 + bytes * 8 + 2 ; // approximation. This is a slow memcpy - a byte copy loop..
235
255
}
236
256
@@ -364,24 +384,40 @@ static int Replace_memset_jak() {
364
384
}
365
385
366
386
bool skip = false ;
387
+ bool sliced = false ;
388
+ static constexpr uint32_t SLICE_SIZE = 32768 ;
367
389
if (Memory::IsVRAMAddress (destPtr) && (skipGPUReplacements & (int )GPUReplacementSkip::MEMSET) == 0 ) {
368
390
skip = gpu->PerformMemorySet (destPtr, value, bytes);
369
391
}
392
+ if (!skip && bytes > SLICE_SIZE) {
393
+ // This is a very slow func. To avoid thread blocking, do a slice at a time.
394
+ bytes = SLICE_SIZE;
395
+ sliced = true ;
396
+ }
370
397
if (!skip && bytes != 0 ) {
371
398
u8 *dst = Memory::GetPointerWriteRange (destPtr, bytes);
372
399
if (dst) {
373
400
memset (dst, value, bytes);
374
401
}
375
402
}
376
403
404
+ NotifyMemInfo (MemBlockFlags::WRITE, destPtr, bytes, " ReplaceMemset" );
405
+
406
+ if (sliced) {
407
+ currentMIPS->r [MIPS_REG_A0] += SLICE_SIZE;
408
+ currentMIPS->r [MIPS_REG_A2] -= SLICE_SIZE;
409
+
410
+ // This is approximate, and must be a negative value.
411
+ return 5 + (int )SLICE_SIZE * -6 + 2 ;
412
+ }
413
+
414
+ // Even after slicing, this ends up correct.
377
415
currentMIPS->r [MIPS_REG_T0] = destPtr + bytes;
378
416
currentMIPS->r [MIPS_REG_A2] = -1 ;
379
417
currentMIPS->r [MIPS_REG_A3] = -1 ;
380
418
RETURN (destPtr);
381
419
382
- NotifyMemInfo (MemBlockFlags::WRITE, destPtr, bytes, " ReplaceMemset" );
383
-
384
- return 5 + bytes * 6 + 2 ; // approximation (hm, inspecting the disasm this should be 5 + 6 * bytes + 2, but this is what works..)
420
+ return 5 + bytes * 6 + 2 ; // approximation
385
421
}
386
422
387
423
static uint32_t SafeStringLen (const uint32_t ptr, uint32_t maxLen = 0x07FFFFFF ) {
@@ -1449,12 +1485,12 @@ static const ReplacementTableEntry entries[] = {
1449
1485
{ " ceilf" , &Replace_ceilf, 0 , REPFLAG_DISABLED },
1450
1486
1451
1487
{ " memcpy" , &Replace_memcpy, 0 , 0 },
1452
- { " memcpy_jak" , &Replace_memcpy_jak, 0 , 0 },
1488
+ { " memcpy_jak" , &Replace_memcpy_jak, 0 , REPFLAG_SLICED },
1453
1489
{ " memcpy16" , &Replace_memcpy16, 0 , 0 },
1454
1490
{ " memcpy_swizzled" , &Replace_memcpy_swizzled, 0 , 0 },
1455
1491
{ " memmove" , &Replace_memmove, 0 , 0 },
1456
1492
{ " memset" , &Replace_memset, 0 , 0 },
1457
- { " memset_jak" , &Replace_memset_jak, 0 , 0 },
1493
+ { " memset_jak" , &Replace_memset_jak, 0 , REPFLAG_SLICED },
1458
1494
{ " strlen" , &Replace_strlen, 0 , REPFLAG_DISABLED },
1459
1495
{ " strcpy" , &Replace_strcpy, 0 , REPFLAG_DISABLED },
1460
1496
{ " strncpy" , &Replace_strncpy, 0 , REPFLAG_DISABLED },
0 commit comments