Skip to content

Commit 4fafd99

Browse files
buffer_cache: Small performance improvement
Remove redundant call to UnmarkRegionAsGpuModified; ForEachDownloadRange can do its operation while looping over pages. Move stalling download operation after download page gathering, that might avoid some cases of stalling unnecessarily
1 parent c309e1d commit 4fafd99

File tree

8 files changed

+42
-51
lines changed

8 files changed

+42
-51
lines changed

src/video_core/amdgpu/liverpool.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ struct Liverpool {
8787
}
8888
};
8989

90-
static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x1000) {
90+
static const BinaryInfo& SearchBinaryInfo(const u32* code, size_t search_limit = 0x2000) {
9191
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
9292

9393
if (code[0] == token_mov_vcchi) {

src/video_core/buffer_cache/buffer_cache.cpp

Lines changed: 33 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -46,34 +46,16 @@ void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
4646
if (!IsRegionRegistered(device_addr, size)) {
4747
return;
4848
}
49-
if (memory_tracker->IsRegionGpuModified(device_addr, size)) {
50-
ReadMemory(device_addr, size);
51-
}
49+
DownloadMemory(device_addr, size);
5250
memory_tracker->MarkRegionAsCpuModified(device_addr, size);
5351
}
5452

55-
void BufferCache::ReadMemory(VAddr device_addr, u64 size) {
56-
if (std::this_thread::get_id() != liverpool->gpu_id) {
57-
std::binary_semaphore command_wait{0};
58-
liverpool->SendCommand([this, &command_wait, device_addr, size] {
59-
Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)];
60-
DownloadBufferMemory(buffer, device_addr, size);
61-
command_wait.release();
62-
});
63-
command_wait.acquire();
64-
} else {
65-
Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)];
66-
DownloadBufferMemory(buffer, device_addr, size);
67-
}
68-
memory_tracker->UnmarkRegionAsGpuModified(device_addr, size);
69-
}
70-
71-
void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) {
53+
void BufferCache::DownloadMemory(VAddr device_addr, u64 size) {
7254
boost::container::small_vector<vk::BufferCopy, 1> copies;
7355
u64 total_size_bytes = 0;
56+
const VAddr buffer_addr = PageManager::GetPageAddr(device_addr);
7457
memory_tracker->ForEachDownloadRange<true>(
7558
device_addr, size, [&](u64 device_addr_out, u64 range_size) {
76-
const VAddr buffer_addr = buffer.CpuAddr();
7759
const auto add_download = [&](VAddr start, VAddr end) {
7860
const u64 new_offset = start - buffer_addr;
7961
const u64 new_size = end - start;
@@ -93,22 +75,36 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
9375
if (total_size_bytes == 0) {
9476
return;
9577
}
96-
const auto [staging, offset] = staging_buffer.Map(total_size_bytes);
97-
for (auto& copy : copies) {
98-
// Modify copies to have the staging offset in mind
99-
copy.dstOffset += offset;
100-
}
101-
staging_buffer.Commit();
102-
scheduler.EndRendering();
103-
const auto cmdbuf = scheduler.CommandBuffer();
104-
cmdbuf.copyBuffer(buffer.buffer, staging_buffer.Handle(), copies);
105-
scheduler.Finish();
106-
auto* memory = Core::Memory::Instance();
107-
for (const auto& copy : copies) {
108-
const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset;
109-
const u64 dst_offset = copy.dstOffset - offset;
110-
memory->TryWriteBacking(std::bit_cast<u8*>(copy_device_addr), staging + dst_offset,
111-
copy.size);
78+
std::binary_semaphore command_wait{0};
79+
auto do_download = [this, &command_wait, buffer_addr, device_addr,
80+
size, total_size_bytes, &copies]() mutable {
81+
const BufferId buffer_id = FindBuffer(device_addr, size);
82+
auto& buffer = slot_buffers[buffer_id];
83+
const auto [staging, offset] = staging_buffer.Map(total_size_bytes);
84+
for (auto& copy : copies) {
85+
// Modify copies to have the staging offset in mind
86+
copy.srcOffset += buffer.Offset(buffer_addr);
87+
copy.dstOffset += offset;
88+
}
89+
staging_buffer.Commit();
90+
scheduler.EndRendering();
91+
const auto cmdbuf = scheduler.CommandBuffer();
92+
cmdbuf.copyBuffer(buffer.buffer, staging_buffer.Handle(), copies);
93+
scheduler.Finish();
94+
auto* memory = Core::Memory::Instance();
95+
for (const auto& copy : copies) {
96+
const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset;
97+
const u64 dst_offset = copy.dstOffset - offset;
98+
memory->TryWriteBacking(std::bit_cast<u8*>(copy_device_addr), staging + dst_offset,
99+
copy.size);
100+
}
101+
command_wait.release();
102+
};
103+
if (std::this_thread::get_id() != liverpool->gpu_id) {
104+
liverpool->SendCommand(std::move(do_download));
105+
command_wait.acquire();
106+
} else {
107+
do_download();
112108
}
113109
}
114110

src/video_core/buffer_cache/buffer_cache.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class BufferCache {
8282
void InvalidateMemory(VAddr device_addr, u64 size);
8383

8484
/// Waits on pending downloads in the logical page range.
85-
void ReadMemory(VAddr device_addr, u64 size);
85+
void DownloadMemory(VAddr device_addr, u64 size);
8686

8787
/// Binds host vertex buffers for the current draw.
8888
void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline);
@@ -134,8 +134,6 @@ class BufferCache {
134134
}
135135
}
136136

137-
void DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size);
138-
139137
[[nodiscard]] OverlapResult ResolveOverlaps(VAddr device_addr, u32 wanted_size);
140138

141139
void JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, bool accumulate_stream_score);

src/video_core/buffer_cache/word_manager.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,8 @@ class RegionManager {
223223
* @param size Size in bytes of the region to query for modifications
224224
*/
225225
template <Type type>
226-
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
226+
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) noexcept {
227+
std::scoped_lock lk{lock};
227228
const std::span<const u64> state_words = Span<type>();
228229
bool result = false;
229230
IterateWords(offset, size, [&](size_t index, u64 mask) {

src/video_core/page_manager.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ struct PageManager::Impl {
153153
if (Common::IsWriteError(context)) {
154154
return rasterizer->InvalidateMemory(addr, 1);
155155
} else {
156-
return rasterizer->ReadMemory(addr, 1);
156+
return rasterizer->DownloadMemory(addr, 1);
157157
}
158158
return false;
159159
}

src/video_core/renderer_vulkan/vk_rasterizer.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -929,12 +929,8 @@ bool Rasterizer::InvalidateMemory(VAddr addr, u64 size) {
929929
return true;
930930
}
931931

932-
bool Rasterizer::ReadMemory(VAddr addr, u64 size) {
933-
if (!IsMapped(addr, size)) {
934-
// Not GPU mapped memory, can skip invalidation logic entirely.
935-
return false;
936-
}
937-
buffer_cache.ReadMemory(addr, size);
932+
bool Rasterizer::DownloadMemory(VAddr addr, u64 size) {
933+
buffer_cache.DownloadMemory(addr, size);
938934
return true;
939935
}
940936

src/video_core/renderer_vulkan/vk_rasterizer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class Rasterizer {
5757
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
5858
u32 ReadDataFromGds(u32 gsd_offset);
5959
bool InvalidateMemory(VAddr addr, u64 size);
60-
bool ReadMemory(VAddr addr, u64 size);
60+
bool DownloadMemory(VAddr addr, u64 size);
6161
bool IsMapped(VAddr addr, u64 size);
6262
void MapMemory(VAddr addr, u64 size);
6363
void UnmapMemory(VAddr addr, u64 size);

src/video_core/texture_cache/texture_cache.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,9 +436,9 @@ ImageView& TextureCache::FindDepthTarget(BaseDesc& desc) {
436436
const ImageId image_id = FindImage(desc);
437437
Image& image = slot_images[image_id];
438438
image.flags |= ImageFlagBits::GpuModified;
439-
image.flags &= ~ImageFlagBits::Dirty;
440439
image.usage.depth_target = 1u;
441440
image.usage.stencil = image.info.HasStencil();
441+
UpdateImage(image_id);
442442

443443
// Register meta data for this depth buffer
444444
if (!(image.flags & ImageFlagBits::MetaRegistered)) {

0 commit comments

Comments
 (0)