Skip to content

Commit b875101

Browse files
authored
Merge pull request #18413 from hrydgard/cache-drawpixels
Cache and hash data for DrawPixels
2 parents 3287c26 + 632fa1c commit b875101

File tree

5 files changed

+35
-6
lines changed

5 files changed

+35
-6
lines changed

GPU/Common/FramebufferManagerCommon.cpp

+28-3
Original file line numberDiff line numberDiff line change
@@ -1208,7 +1208,6 @@ void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int
12081208
vfb ? vfb->bufferHeight : g_display.pixel_yres,
12091209
u0, v0, u1, v1, ROTATION_LOCKED_HORIZONTAL, flags);
12101210

1211-
gpuStats.numUploads++;
12121211
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
12131212

12141213
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
@@ -1324,6 +1323,19 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G
13241323
}
13251324
}
13261325

1326+
int bpp = BufferFormatBytesPerPixel(srcPixelFormat);
1327+
int srcStrideInBytes = srcStride * bpp;
1328+
int widthInBytes = width * bpp;
1329+
1330+
// Compute hash of contents.
1331+
XXH3_state_t *hashState = XXH3_createState();
1332+
XXH3_64bits_reset(hashState);
1333+
for (int y = 0; y < height; y++) {
1334+
XXH3_64bits_update(hashState, srcPixels + srcStrideInBytes, widthInBytes);
1335+
}
1336+
uint64_t imageHash = XXH3_64bits_digest(hashState);
1337+
XXH3_freeState(hashState);
1338+
13271339
// TODO: We can just change the texture format and flip some bits around instead of this.
13281340
// Could share code with the texture cache perhaps.
13291341
auto generateTexture = [&](uint8_t *data, const uint8_t *initData, uint32_t w, uint32_t h, uint32_t d, uint32_t byteStride, uint32_t sliceByteStride) {
@@ -1396,16 +1408,28 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G
13961408

13971409
int frameNumber = draw_->GetFrameCount();
13981410

1399-
// Look for a matching texture we can re-use.
1411+
// First look for an exact match (including contents hash) that we can re-use.
1412+
for (auto &iter : drawPixelsCache_) {
1413+
if (iter.contentsHash == imageHash && iter.tex->Width() == width && iter.tex->Height() == height && iter.tex->Format() == texFormat) {
1414+
iter.frameNumber = frameNumber;
1415+
gpuStats.numCachedUploads++;
1416+
return iter.tex;
1417+
}
1418+
}
1419+
1420+
// Then, look for an alternative one that's not been used recently that we can overwrite.
14001421
for (auto &iter : drawPixelsCache_) {
14011422
if (iter.frameNumber >= frameNumber - 3 || iter.tex->Width() != width || iter.tex->Height() != height || iter.tex->Format() != texFormat) {
14021423
continue;
14031424
}
14041425

14051426
// OK, current one seems good, let's use it (and mark it used).
1427+
gpuStats.numUploads++;
14061428
draw_->UpdateTextureLevels(iter.tex, &srcPixels, generateTexture, 1);
14071429
// NOTE: numFlips is no good - this is called every frame when paused sometimes!
14081430
iter.frameNumber = frameNumber;
1431+
// We need to update the hash for future matching.
1432+
iter.contentsHash = imageHash;
14091433
return iter.tex;
14101434
}
14111435

@@ -1435,8 +1459,9 @@ Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, G
14351459

14361460
// INFO_LOG(G3D, "Creating drawPixelsCache texture: %dx%d", tex->Width(), tex->Height());
14371461

1438-
DrawPixelsEntry entry{ tex, frameNumber };
1462+
DrawPixelsEntry entry{ tex, imageHash, frameNumber };
14391463
drawPixelsCache_.push_back(entry);
1464+
gpuStats.numUploads++;
14401465
return tex;
14411466
}
14421467

GPU/Common/FramebufferManagerCommon.h

+1
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ class DrawContext;
269269

270270
struct DrawPixelsEntry {
271271
Draw::Texture *tex;
272+
uint64_t contentsHash;
272273
int frameNumber;
273274
};
274275

GPU/GPU.h

+2
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ struct GPUStatistics {
9292
numBlockingReadbacks = 0;
9393
numReadbacks = 0;
9494
numUploads = 0;
95+
numCachedUploads = 0;
9596
numDepal = 0;
9697
numClears = 0;
9798
numDepthCopies = 0;
@@ -126,6 +127,7 @@ struct GPUStatistics {
126127
int numBlockingReadbacks;
127128
int numReadbacks;
128129
int numUploads;
130+
int numCachedUploads;
129131
int numDepal;
130132
int numClears;
131133
int numDepthCopies;

GPU/GPUCommonHW.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -1688,7 +1688,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
16881688
"Vertices: %d drawn: %d\n"
16891689
"FBOs active: %d (evaluations: %d)\n"
16901690
"Textures: %d, dec: %d, invalidated: %d, hashed: %d kB\n"
1691-
"readbacks %d (%d non-block), uploads %d, depal %d\n"
1691+
"readbacks %d (%d non-block), upload %d (cached %d), depal %d\n"
16921692
"block transfers: %d\n"
16931693
"replacer: tracks %d references, %d unique textures\n"
16941694
"Cpy: depth %d, color %d, reint %d, blend %d, self %d\n"
@@ -1713,6 +1713,7 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
17131713
gpuStats.numBlockingReadbacks,
17141714
gpuStats.numReadbacks,
17151715
gpuStats.numUploads,
1716+
gpuStats.numCachedUploads,
17161717
gpuStats.numDepal,
17171718
gpuStats.numBlockTransfers,
17181719
gpuStats.numReplacerTrackedTex,

GPU/ge_constants.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -460,10 +460,10 @@ inline bool IsTextureFormat16Bit(GETextureFormat tfmt) {
460460

461461
inline int BufferFormatBytesPerPixel(GEBufferFormat format) {
462462
switch (format) {
463-
case GE_FORMAT_8888: return 4; // applies to depth as well.
463+
case GE_FORMAT_8888: return 4;
464464
case GE_FORMAT_CLUT8: return 1;
465465
default:
466-
return 2;
466+
return 2; // works for depth as well as the 16-bit color formats.
467467
}
468468
}
469469

0 commit comments

Comments
 (0)