Skip to content

Commit a098846

Browse files
HLE: Slice the very slow memset/memcpy variants.
When they take an especially long time, this allows thread switches meanwhile. Important for cases where they might consume more than a total frame worth of cycles in a background thread.
1 parent 053831b commit a098846

File tree

1 file changed

+51
-15
lines changed

1 file changed

+51
-15
lines changed

Core/HLE/ReplaceTables.cpp

+51-15
Original file line numberDiff line numberDiff line change
@@ -182,37 +182,54 @@ static int Replace_memcpy_jak() {
182182
u32 destPtr = PARAM(0);
183183
u32 srcPtr = PARAM(1);
184184
u32 bytes = PARAM(2);
185-
bool skip = false;
185+
186186
if (bytes == 0) {
187187
RETURN(destPtr);
188188
return 5;
189189
}
190+
191+
bool skip = false;
192+
bool sliced = false;
193+
static constexpr uint32_t SLICE_SIZE = 32768;
194+
190195
currentMIPS->InvalidateICache(srcPtr, bytes);
191196
if ((skipGPUReplacements & (int)GPUReplacementSkip::MEMCPY) == 0) {
192197
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
193198
skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes);
194199
}
195200
}
201+
if (!skip && bytes > SLICE_SIZE && bytes != 512 * 272 * 4) {
202+
// This is a very slow func. To avoid thread blocking, do a slice at a time.
203+
// Avoiding exactly 512 * 272 * 4 to detect videos, though.
204+
bytes = SLICE_SIZE;
205+
sliced = true;
206+
}
196207
if (!skip && bytes != 0) {
197208
u8 *dst = Memory::GetPointerWriteRange(destPtr, bytes);
198209
const u8 *src = Memory::GetPointerRange(srcPtr, bytes);
199210

200-
if (!dst || !src) {
201-
} else {
211+
if (dst && src) {
202212
// Jak style overlap.
203213
for (u32 i = 0; i < bytes; i++) {
204214
dst[i] = src[i];
205215
}
206216
}
207217
}
208218

209-
// Jak relies on more registers coming out right than the ABI specifies.
210-
// See the disassembly of the function for the explanations for these...
211-
currentMIPS->r[MIPS_REG_T0] = 0;
212-
currentMIPS->r[MIPS_REG_A0] = -1;
213-
currentMIPS->r[MIPS_REG_A2] = 0;
214-
currentMIPS->r[MIPS_REG_A3] = destPtr + bytes;
215-
RETURN(destPtr);
219+
if (sliced) {
220+
currentMIPS->r[MIPS_REG_A0] += SLICE_SIZE;
221+
currentMIPS->r[MIPS_REG_A1] += SLICE_SIZE;
222+
currentMIPS->r[MIPS_REG_A2] -= SLICE_SIZE;
223+
} else {
224+
// Jak relies on more registers coming out right than the ABI specifies.
225+
// See the disassembly of the function for the explanations for these...
226+
currentMIPS->r[MIPS_REG_T0] = 0;
227+
currentMIPS->r[MIPS_REG_A0] = -1;
228+
currentMIPS->r[MIPS_REG_A2] = 0;
229+
// Even after slicing, this ends up correct.
230+
currentMIPS->r[MIPS_REG_A3] = destPtr + bytes;
231+
RETURN(destPtr);
232+
}
216233

217234
if (MemBlockInfoDetailed(bytes)) {
218235
// It's pretty common that games will copy video data.
@@ -231,6 +248,9 @@ static int Replace_memcpy_jak() {
231248
}
232249
}
233250

251+
if (sliced) {
252+
return 5 + bytes * -8 + 2;
253+
}
234254
return 5 + bytes * 8 + 2; // approximation. This is a slow memcpy - a byte copy loop..
235255
}
236256

@@ -364,24 +384,40 @@ static int Replace_memset_jak() {
364384
}
365385

366386
bool skip = false;
387+
bool sliced = false;
388+
static constexpr uint32_t SLICE_SIZE = 32768;
367389
if (Memory::IsVRAMAddress(destPtr) && (skipGPUReplacements & (int)GPUReplacementSkip::MEMSET) == 0) {
368390
skip = gpu->PerformMemorySet(destPtr, value, bytes);
369391
}
392+
if (!skip && bytes > SLICE_SIZE) {
393+
// This is a very slow func. To avoid thread blocking, do a slice at a time.
394+
bytes = SLICE_SIZE;
395+
sliced = true;
396+
}
370397
if (!skip && bytes != 0) {
371398
u8 *dst = Memory::GetPointerWriteRange(destPtr, bytes);
372399
if (dst) {
373400
memset(dst, value, bytes);
374401
}
375402
}
376403

404+
NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, "ReplaceMemset");
405+
406+
if (sliced) {
407+
currentMIPS->r[MIPS_REG_A0] += SLICE_SIZE;
408+
currentMIPS->r[MIPS_REG_A2] -= SLICE_SIZE;
409+
410+
// This is approximate, and must be a negative value.
411+
return 5 + (int)SLICE_SIZE * -6 + 2;
412+
}
413+
414+
// Even after slicing, this ends up correct.
377415
currentMIPS->r[MIPS_REG_T0] = destPtr + bytes;
378416
currentMIPS->r[MIPS_REG_A2] = -1;
379417
currentMIPS->r[MIPS_REG_A3] = -1;
380418
RETURN(destPtr);
381419

382-
NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, "ReplaceMemset");
383-
384-
return 5 + bytes * 6 + 2; // approximation (hm, inspecting the disasm this should be 5 + 6 * bytes + 2, but this is what works..)
420+
return 5 + bytes * 6 + 2; // approximation
385421
}
386422

387423
static uint32_t SafeStringLen(const uint32_t ptr, uint32_t maxLen = 0x07FFFFFF) {
@@ -1449,12 +1485,12 @@ static const ReplacementTableEntry entries[] = {
14491485
{ "ceilf", &Replace_ceilf, 0, REPFLAG_DISABLED },
14501486

14511487
{ "memcpy", &Replace_memcpy, 0, 0 },
1452-
{ "memcpy_jak", &Replace_memcpy_jak, 0, 0 },
1488+
{ "memcpy_jak", &Replace_memcpy_jak, 0, REPFLAG_SLICED },
14531489
{ "memcpy16", &Replace_memcpy16, 0, 0 },
14541490
{ "memcpy_swizzled", &Replace_memcpy_swizzled, 0, 0 },
14551491
{ "memmove", &Replace_memmove, 0, 0 },
14561492
{ "memset", &Replace_memset, 0, 0 },
1457-
{ "memset_jak", &Replace_memset_jak, 0, 0 },
1493+
{ "memset_jak", &Replace_memset_jak, 0, REPFLAG_SLICED },
14581494
{ "strlen", &Replace_strlen, 0, REPFLAG_DISABLED },
14591495
{ "strcpy", &Replace_strcpy, 0, REPFLAG_DISABLED },
14601496
{ "strncpy", &Replace_strncpy, 0, REPFLAG_DISABLED },

0 commit comments

Comments
 (0)