Skip to content

Commit cf3ac75

Browse files
committed
true new readbacks
1 parent 32455d8 commit cf3ac75

File tree

3 files changed

+151
-90
lines changed

3 files changed

+151
-90
lines changed

src/video_core/buffer_cache/buffer_cache.cpp

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77
#include "common/debug.h"
88
#include "common/scope_exit.h"
99
#include "common/types.h"
10+
#include "core/memory.h"
1011
#include "video_core/amdgpu/liverpool.h"
1112
#include "video_core/buffer_cache/buffer_cache.h"
13+
#include "video_core/buffer_cache/memory_tracker.h"
1214
#include "video_core/host_shaders/fault_buffer_process_comp.h"
1315
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
1416
#include "video_core/renderer_vulkan/vk_instance.h"
@@ -167,7 +169,9 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
167169
.dstOffset = total_size_bytes,
168170
.size = new_size,
169171
});
170-
total_size_bytes += new_size;
172+
// Align up to avoid cache conflicts
173+
constexpr u64 align = 64ULL;
174+
total_size_bytes += Common::AlignUp(new_size, align);
171175
};
172176
gpu_modified_ranges.ForEachInRange(device_addr_out, range_size, add_download);
173177
gpu_modified_ranges.Subtract(device_addr_out, range_size);
@@ -185,10 +189,12 @@ void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 si
185189
const auto cmdbuf = scheduler.CommandBuffer();
186190
cmdbuf.copyBuffer(buffer.buffer, download_buffer.Handle(), copies);
187191
scheduler.Finish();
192+
auto* memory = Core::Memory::Instance();
188193
for (const auto& copy : copies) {
189194
const VAddr copy_device_addr = buffer.CpuAddr() + copy.srcOffset;
190195
const u64 dst_offset = copy.dstOffset - offset;
191-
std::memcpy(std::bit_cast<u8*>(copy_device_addr), download + dst_offset, copy.size);
196+
memory->TryWriteBacking(std::bit_cast<u8*>(copy_device_addr), download + dst_offset,
197+
copy.size);
192198
}
193199
}
194200

@@ -308,18 +314,50 @@ void BufferCache::BindIndexBuffer(u32 index_offset) {
308314

309315
void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
310316
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
311-
if (!is_gds && !IsRegionRegistered(address, num_bytes)) {
317+
if (!is_gds) {
312318
memcpy(std::bit_cast<void*>(address), value, num_bytes);
313-
return;
319+
if (!IsRegionRegistered(address, num_bytes)) {
320+
return;
321+
}
314322
}
315-
Buffer* buffer = [&] {
323+
scheduler.EndRendering();
324+
const Buffer* buffer = [&] {
316325
if (is_gds) {
317326
return &gds_buffer;
318327
}
319328
const BufferId buffer_id = FindBuffer(address, num_bytes);
320329
return &slot_buffers[buffer_id];
321330
}();
322-
InlineDataBuffer(*buffer, address, value, num_bytes);
331+
const auto cmdbuf = scheduler.CommandBuffer();
332+
const vk::BufferMemoryBarrier2 pre_barrier = {
333+
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
334+
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
335+
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
336+
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
337+
.buffer = buffer->Handle(),
338+
.offset = buffer->Offset(address),
339+
.size = num_bytes,
340+
};
341+
const vk::BufferMemoryBarrier2 post_barrier = {
342+
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
343+
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
344+
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
345+
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
346+
.buffer = buffer->Handle(),
347+
.offset = buffer->Offset(address),
348+
.size = num_bytes,
349+
};
350+
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
351+
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
352+
.bufferMemoryBarrierCount = 1,
353+
.pBufferMemoryBarriers = &pre_barrier,
354+
});
355+
cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value);
356+
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
357+
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
358+
.bufferMemoryBarrierCount = 1,
359+
.pBufferMemoryBarriers = &post_barrier,
360+
});
323361
}
324362

325363
void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
@@ -431,9 +469,8 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
431469
// For small uniform buffers that have not been modified by gpu
432470
// use device local stream buffer to reduce renderpass breaks.
433471
// Maybe we want to modify the threshold now that the page size is 16KB?
434-
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
435-
const bool is_gpu_dirty = memory_tracker->IsRegionGpuModified(device_addr, size);
436-
if (!is_written && size <= StreamThreshold && !is_gpu_dirty) {
472+
static constexpr u64 StreamThreshold = CACHING_PAGESIZE * 2;
473+
if (!is_written && size <= StreamThreshold && !IsRegionGpuModified(device_addr, size)) {
437474
const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment());
438475
return {&stream_buffer, offset};
439476
}
@@ -443,7 +480,11 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
443480
}
444481
Buffer& buffer = slot_buffers[buffer_id];
445482
SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer);
446-
if (is_written) {
483+
484+
// Mark region as GPU modified to get additional tracking needed for readbacks.
485+
// Somtimes huge buffers may be bound, so set a threshold here as well.
486+
static constexpr u64 GpuMarkThreshold = 512_MB;
487+
if (is_written && size <= GpuMarkThreshold) {
447488
memory_tracker->MarkRegionAsGpuModified(device_addr, size);
448489
gpu_modified_ranges.Add(device_addr, size);
449490
}
@@ -452,8 +493,7 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
452493

453494
std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size, bool prefer_gpu) {
454495
// Check if any buffer contains the full requested range.
455-
const u64 page = gpu_addr >> CACHING_PAGEBITS;
456-
const BufferId buffer_id = page_table[page].buffer_id;
496+
const BufferId buffer_id = page_table[gpu_addr >> CACHING_PAGEBITS].buffer_id;
457497
if (buffer_id) {
458498
Buffer& buffer = slot_buffers[buffer_id];
459499
if (buffer.IsInBounds(gpu_addr, size)) {
@@ -1131,4 +1171,4 @@ void BufferCache::DeleteBuffer(BufferId buffer_id) {
11311171
buffer.is_deleted = true;
11321172
}
11331173

1134-
} // namespace VideoCore
1174+
} // namespace VideoCore

src/video_core/buffer_cache/buffer_cache.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,6 @@ namespace AmdGpu {
1717
struct Liverpool;
1818
}
1919

20-
namespace Shader {
21-
namespace Gcn {
22-
struct FetchShaderData;
23-
}
24-
struct Info;
25-
} // namespace Shader
26-
2720
namespace Vulkan {
2821
class GraphicsPipeline;
2922
}
@@ -66,6 +59,12 @@ class BufferCache {
6659
VAddr end;
6760
bool has_stream_leap = false;
6861
};
62+
using IntervalSet =
63+
boost::icl::interval_set<VAddr, std::less,
64+
ICL_INTERVAL_INSTANCE(ICL_INTERVAL_DEFAULT, VAddr, std::less),
65+
RangeSetsAllocator>;
66+
using IntervalType = typename IntervalSet::interval_type;
67+
6968
public:
7069
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
7170
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool,
@@ -100,7 +99,8 @@ class BufferCache {
10099
/// Invalidates any buffer in the logical page range.
101100
void InvalidateMemory(VAddr device_addr, u64 size, bool unmap);
102101

103-
void ReadMemory(VAddr device_addr, u64 size);
102+
/// Waits on pending downloads in the logical page range.
103+
void ReadMemory(VAddr device_addr, u64 size);
104104

105105
/// Binds host vertex buffers for the current draw.
106106
void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline);
@@ -111,7 +111,8 @@ class BufferCache {
111111
/// Writes a value to GPU buffer. (uses command buffer to temporarily store the data)
112112
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
113113

114-
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
114+
/// Performs buffer to buffer data copy on the GPU.
115+
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
115116

116117
/// Writes a value to GPU buffer. (uses staging buffer to temporarily store the data)
117118
void WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds);

0 commit comments

Comments
 (0)