Skip to content

Commit e2a58f8

Browse files
committed
fixing merge
1 parent 9367c54 commit e2a58f8

File tree

8 files changed

+301
-187
lines changed

8 files changed

+301
-187
lines changed

src/video_core/amdgpu/liverpool.cpp

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ Liverpool::~Liverpool() {
7575
void Liverpool::Process(std::stop_token stoken) {
7676
Common::SetCurrentThreadName("shadPS4:GpuCommandProcessor");
7777

78+
gpu_id = std::this_thread::get_id();
7879
while (!stoken.stop_requested()) {
7980
{
8081
std::unique_lock lk{submit_mutex};
@@ -636,9 +637,9 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
636637
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
637638
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
638639
dma_data->dst_sel == DmaDataDst::Gds) {
639-
rasterizer->InlineData(dma_data->dst_addr_lo,
640-
dma_data->SrcAddress<const void*>(),
641-
dma_data->NumBytes(), true);
640+
rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress<VAddr>(),
641+
dma_data->NumBytes(), true, false);
642+
642643
} else if (dma_data->src_sel == DmaDataSrc::Data &&
643644
(dma_data->dst_sel == DmaDataDst::Memory ||
644645
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
@@ -647,14 +648,17 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
647648
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
648649
(dma_data->dst_sel == DmaDataDst::Memory ||
649650
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
650-
// LOG_WARNING(Render_Vulkan, "GDS memory read");
651+
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->src_addr_lo,
652+
dma_data->NumBytes(), false, true);
653+
651654
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
652655
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
653656
(dma_data->dst_sel == DmaDataDst::Memory ||
654657
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
655-
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
656-
dma_data->SrcAddress<const void*>(),
657-
dma_data->NumBytes(), false);
658+
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(),
659+
dma_data->SrcAddress<VAddr>(), dma_data->NumBytes(),
660+
false, false);
661+
658662
} else {
659663
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
660664
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));
@@ -838,8 +842,9 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
838842
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
839843
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
840844
dma_data->dst_sel == DmaDataDst::Gds) {
841-
rasterizer->InlineData(dma_data->dst_addr_lo, dma_data->SrcAddress<const void*>(),
842-
dma_data->NumBytes(), true);
845+
rasterizer->CopyBuffer(dma_data->dst_addr_lo, dma_data->SrcAddress<VAddr>(),
846+
dma_data->NumBytes(), true, false);
847+
843848
} else if (dma_data->src_sel == DmaDataSrc::Data &&
844849
(dma_data->dst_sel == DmaDataDst::Memory ||
845850
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
@@ -848,14 +853,16 @@ Liverpool::Task Liverpool::ProcessCompute(const u32* acb, u32 acb_dwords, u32 vq
848853
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
849854
(dma_data->dst_sel == DmaDataDst::Memory ||
850855
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
851-
// LOG_WARNING(Render_Vulkan, "GDS memory read");
856+
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->src_addr_lo,
857+
dma_data->NumBytes(), false, true);
858+
852859
} else if ((dma_data->src_sel == DmaDataSrc::Memory ||
853860
dma_data->src_sel == DmaDataSrc::MemoryUsingL2) &&
854861
(dma_data->dst_sel == DmaDataDst::Memory ||
855862
dma_data->dst_sel == DmaDataDst::MemoryUsingL2)) {
856-
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
857-
dma_data->SrcAddress<const void*>(), dma_data->NumBytes(),
858-
false);
863+
rasterizer->CopyBuffer(dma_data->DstAddress<VAddr>(), dma_data->SrcAddress<VAddr>(),
864+
dma_data->NumBytes(), false, false);
865+
859866
} else {
860867
UNREACHABLE_MSG("WriteData src_sel = {}, dst_sel = {}",
861868
u32(dma_data->src_sel.Value()), u32(dma_data->dst_sel.Value()));

src/video_core/amdgpu/liverpool.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1699,4 +1699,4 @@ static_assert(GFX6_3D_REG_INDEX(vgt_tf_memory_base) == 0xc250);
16991699

17001700
#undef GFX6_3D_REG_INDEX
17011701

1702-
} // namespace AmdGpu
1702+
} // namespace AmdGpu

src/video_core/buffer_cache/buffer_cache.cpp

Lines changed: 120 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-License-Identifier: GPL-2.0-or-later
33

44
#include <algorithm>
5+
#include <semaphore>
56
#include "common/alignment.h"
67
#include "common/debug.h"
78
#include "common/scope_exit.h"
@@ -35,8 +36,8 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
3536
gds_buffer{instance, scheduler, MemoryUsage::Stream, 0, AllFlags, DataShareBufferSize},
3637
bda_pagetable_buffer{instance, scheduler, MemoryUsage::DeviceLocal,
3738
0, AllFlags, BDA_PAGETABLE_SIZE},
38-
fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_BUFFER_SIZE),
39-
memory_tracker{tracker} {
39+
fault_buffer(instance, scheduler, MemoryUsage::DeviceLocal, 0, AllFlags, FAULT_BUFFER_SIZE) {
40+
memory_tracker = std::make_unique<MemoryTracker>(tracker);
4041
Vulkan::SetObjectName(instance.GetDevice(), gds_buffer.Handle(), "GDS Buffer");
4142
Vulkan::SetObjectName(instance.GetDevice(), bda_pagetable_buffer.Handle(),
4243
"BDA Page Table Buffer");
@@ -127,21 +128,35 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
127128
BufferCache::~BufferCache() = default;
128129

129130
void BufferCache::InvalidateMemory(VAddr device_addr, u64 size, bool unmap) {
130-
const bool is_tracked = IsRegionRegistered(device_addr, size);
131-
if (is_tracked) {
132-
// Mark the page as CPU modified to stop tracking writes.
133-
memory_tracker.MarkRegionAsCpuModified(device_addr, size);
131+
if (!IsRegionRegistered(device_addr, size)) {
132+
return;
133+
}
134+
if (memory_tracker->IsRegionGpuModified(device_addr, size)) {
135+
ReadMemory(device_addr, size);
136+
}
137+
memory_tracker->MarkRegionAsCpuModified(device_addr, size);
138+
}
134139

135-
if (unmap) {
136-
return;
137-
}
140+
void BufferCache::ReadMemory(VAddr device_addr, u64 size) {
141+
if (std::this_thread::get_id() != liverpool->gpu_id) {
142+
std::binary_semaphore command_wait{0};
143+
liverpool->SendCommand([this, &command_wait, device_addr, size] {
144+
Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)];
145+
DownloadBufferMemory(buffer, device_addr, size);
146+
command_wait.release();
147+
});
148+
command_wait.acquire();
149+
} else {
150+
Buffer& buffer = slot_buffers[FindBuffer(device_addr, size)];
151+
DownloadBufferMemory(buffer, device_addr, size);
138152
}
153+
memory_tracker->UnmarkRegionAsGpuModified(device_addr, size);
139154
}
140155

141156
void BufferCache::DownloadBufferMemory(Buffer& buffer, VAddr device_addr, u64 size) {
142157
boost::container::small_vector<vk::BufferCopy, 1> copies;
143158
u64 total_size_bytes = 0;
144-
memory_tracker.ForEachDownloadRange<true>(
159+
memory_tracker->ForEachDownloadRange<true>(
145160
device_addr, size, [&](u64 device_addr_out, u64 range_size) {
146161
const VAddr buffer_addr = buffer.CpuAddr();
147162
const auto add_download = [&](VAddr start, VAddr end) {
@@ -307,6 +322,94 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
307322
InlineDataBuffer(*buffer, address, value, num_bytes);
308323
}
309324

325+
void BufferCache::CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds) {
326+
if (!dst_gds && !IsRegionRegistered(dst, num_bytes)) {
327+
if (!src_gds && !IsRegionRegistered(src, num_bytes)) {
328+
// Both buffers were not transferred to GPU yet. Can safely copy in host memory.
329+
memcpy(std::bit_cast<void*>(dst), std::bit_cast<void*>(src), num_bytes);
330+
return;
331+
}
332+
// Without a readback there's nothing we can do with this
333+
// Fallback to creating dst buffer on GPU to at least have this data there
334+
}
335+
if (!src_gds && !IsRegionRegistered(src, num_bytes)) {
336+
InlineData(dst, std::bit_cast<void*>(src), num_bytes, dst_gds);
337+
return;
338+
}
339+
auto& src_buffer = [&] -> const Buffer& {
340+
if (src_gds) {
341+
return gds_buffer;
342+
}
343+
const BufferId buffer_id = FindBuffer(src, num_bytes);
344+
return slot_buffers[buffer_id];
345+
}();
346+
auto& dst_buffer = [&] -> const Buffer& {
347+
if (dst_gds) {
348+
return gds_buffer;
349+
}
350+
const BufferId buffer_id = FindBuffer(dst, num_bytes);
351+
return slot_buffers[buffer_id];
352+
}();
353+
vk::BufferCopy region{
354+
.srcOffset = src_buffer.Offset(src),
355+
.dstOffset = dst_buffer.Offset(dst),
356+
.size = num_bytes,
357+
};
358+
const vk::BufferMemoryBarrier2 buf_barriers_before[2] = {
359+
{
360+
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
361+
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
362+
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
363+
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
364+
.buffer = dst_buffer.Handle(),
365+
.offset = dst_buffer.Offset(dst),
366+
.size = num_bytes,
367+
},
368+
{
369+
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
370+
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
371+
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
372+
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
373+
.buffer = src_buffer.Handle(),
374+
.offset = src_buffer.Offset(src),
375+
.size = num_bytes,
376+
},
377+
};
378+
scheduler.EndRendering();
379+
const auto cmdbuf = scheduler.CommandBuffer();
380+
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
381+
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
382+
.bufferMemoryBarrierCount = 2,
383+
.pBufferMemoryBarriers = buf_barriers_before,
384+
});
385+
cmdbuf.copyBuffer(src_buffer.Handle(), dst_buffer.Handle(), region);
386+
const vk::BufferMemoryBarrier2 buf_barriers_after[2] = {
387+
{
388+
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
389+
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
390+
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
391+
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
392+
.buffer = dst_buffer.Handle(),
393+
.offset = dst_buffer.Offset(dst),
394+
.size = num_bytes,
395+
},
396+
{
397+
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
398+
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
399+
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
400+
.dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
401+
.buffer = src_buffer.Handle(),
402+
.offset = src_buffer.Offset(src),
403+
.size = num_bytes,
404+
},
405+
};
406+
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
407+
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
408+
.bufferMemoryBarrierCount = 2,
409+
.pBufferMemoryBarriers = buf_barriers_after,
410+
});
411+
}
412+
310413
void BufferCache::WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
311414
ASSERT_MSG(address % 4 == 0, "GDS offset must be dword aligned");
312415
if (!is_gds && !IsRegionRegistered(address, num_bytes)) {
@@ -329,7 +432,7 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
329432
// use device local stream buffer to reduce renderpass breaks.
330433
// Maybe we want to modify the threshold now that the page size is 16KB?
331434
static constexpr u64 StreamThreshold = CACHING_PAGESIZE;
332-
const bool is_gpu_dirty = memory_tracker.IsRegionGpuModified(device_addr, size);
435+
const bool is_gpu_dirty = memory_tracker->IsRegionGpuModified(device_addr, size);
333436
if (!is_written && size <= StreamThreshold && !is_gpu_dirty) {
334437
const u64 offset = stream_buffer.Copy(device_addr, size, instance.UniformMinAlignment());
335438
return {&stream_buffer, offset};
@@ -341,7 +444,7 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
341444
Buffer& buffer = slot_buffers[buffer_id];
342445
SynchronizeBuffer(buffer, device_addr, size, is_texel_buffer);
343446
if (is_written) {
344-
memory_tracker.MarkRegionAsGpuModified(device_addr, size);
447+
memory_tracker->MarkRegionAsGpuModified(device_addr, size);
345448
gpu_modified_ranges.Add(device_addr, size);
346449
}
347450
return {&buffer, buffer.Offset(device_addr)};
@@ -361,7 +464,7 @@ std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size,
361464
// If no buffer contains the full requested range but some buffer within was GPU-modified,
362465
// fall back to ObtainBuffer to create a full buffer and avoid losing GPU modifications.
363466
// This is only done if the request prefers to use GPU memory, otherwise we can skip it.
364-
if (prefer_gpu && memory_tracker.IsRegionGpuModified(gpu_addr, size)) {
467+
if (prefer_gpu && memory_tracker->IsRegionGpuModified(gpu_addr, size)) {
365468
return ObtainBuffer(gpu_addr, size, false, false);
366469
}
367470
// In all other cases, just do a CPU copy to the staging buffer.
@@ -375,11 +478,11 @@ bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) {
375478
}
376479

377480
bool BufferCache::IsRegionCpuModified(VAddr addr, size_t size) {
378-
return memory_tracker.IsRegionCpuModified(addr, size);
481+
return memory_tracker->IsRegionCpuModified(addr, size);
379482
}
380483

381484
bool BufferCache::IsRegionGpuModified(VAddr addr, size_t size) {
382-
return memory_tracker.IsRegionGpuModified(addr, size);
485+
return memory_tracker->IsRegionGpuModified(addr, size);
383486
}
384487

385488
BufferId BufferCache::FindBuffer(VAddr device_addr, u32 size) {
@@ -718,7 +821,7 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
718821
boost::container::small_vector<vk::BufferCopy, 4> copies;
719822
u64 total_size_bytes = 0;
720823
VAddr buffer_start = buffer.CpuAddr();
721-
memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
824+
memory_tracker->ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
722825
copies.push_back(vk::BufferCopy{
723826
.srcOffset = total_size_bytes,
724827
.dstOffset = device_addr_out - buffer_start,
@@ -1028,4 +1131,4 @@ void BufferCache::DeleteBuffer(BufferId buffer_id) {
10281131
buffer.is_deleted = true;
10291132
}
10301133

1031-
} // namespace VideoCore
1134+
} // namespace VideoCore

src/video_core/buffer_cache/buffer_cache.h

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#include "common/slot_vector.h"
1010
#include "common/types.h"
1111
#include "video_core/buffer_cache/buffer.h"
12-
#include "video_core/buffer_cache/memory_tracker_base.h"
12+
#include "video_core/buffer_cache/memory_tracker.h"
1313
#include "video_core/buffer_cache/range_set.h"
1414
#include "video_core/multi_level_page_table.h"
1515

@@ -35,6 +35,8 @@ using BufferId = Common::SlotId;
3535
static constexpr BufferId NULL_BUFFER_ID{0};
3636

3737
class TextureCache;
38+
class MemoryTracker;
39+
class PageManager;
3840

3941
class BufferCache {
4042
public:
@@ -64,7 +66,6 @@ class BufferCache {
6466
VAddr end;
6567
bool has_stream_leap = false;
6668
};
67-
6869
public:
6970
explicit BufferCache(const Vulkan::Instance& instance, Vulkan::Scheduler& scheduler,
7071
Vulkan::Rasterizer& rasterizer_, AmdGpu::Liverpool* liverpool,
@@ -99,6 +100,8 @@ class BufferCache {
99100
/// Invalidates any buffer in the logical page range.
100101
void InvalidateMemory(VAddr device_addr, u64 size, bool unmap);
101102

103+
void ReadMemory(VAddr device_addr, u64 size);
104+
102105
/// Binds host vertex buffers for the current draw.
103106
void BindVertexBuffers(const Vulkan::GraphicsPipeline& pipeline);
104107

@@ -108,6 +111,8 @@ class BufferCache {
108111
/// Writes a value to GPU buffer. (uses command buffer to temporarily store the data)
109112
void InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
110113

114+
void CopyBuffer(VAddr dst, VAddr src, u32 num_bytes, bool dst_gds, bool src_gds);
115+
111116
/// Writes a value to GPU buffer. (uses staging buffer to temporarily store the data)
112117
void WriteData(VAddr address, const void* value, u32 num_bytes, bool is_gds);
113118

@@ -184,7 +189,7 @@ class BufferCache {
184189
Vulkan::Rasterizer& rasterizer;
185190
AmdGpu::Liverpool* liverpool;
186191
TextureCache& texture_cache;
187-
PageManager& tracker;
192+
std::unique_ptr<MemoryTracker> memory_tracker;
188193
StreamBuffer staging_buffer;
189194
StreamBuffer stream_buffer;
190195
StreamBuffer download_buffer;
@@ -195,11 +200,10 @@ class BufferCache {
195200
Common::SlotVector<Buffer> slot_buffers;
196201
RangeSet gpu_modified_ranges;
197202
SplitRangeMap<BufferId> buffer_ranges;
198-
MemoryTracker memory_tracker;
199203
PageTable page_table;
200204
vk::UniqueDescriptorSetLayout fault_process_desc_layout;
201205
vk::UniquePipeline fault_process_pipeline;
202206
vk::UniquePipelineLayout fault_process_pipeline_layout;
203207
};
204208

205-
} // namespace VideoCore
209+
} // namespace VideoCore

0 commit comments

Comments
 (0)