Skip to content

Commit 2eb548e

Browse files
committed
Ram spike fix - renderer_vulkan: Introduce shader HLE system with copy shader implementation. shadps4-emu#1683
1 parent feb85f8 commit 2eb548e

File tree

6 files changed

+170
-0
lines changed

6 files changed

+170
-0
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
745745
src/video_core/renderer_vulkan/vk_resource_pool.h
746746
src/video_core/renderer_vulkan/vk_scheduler.cpp
747747
src/video_core/renderer_vulkan/vk_scheduler.h
748+
src/video_core/renderer_vulkan/vk_shader_hle.cpp
749+
src/video_core/renderer_vulkan/vk_shader_hle.h
748750
src/video_core/renderer_vulkan/vk_shader_util.cpp
749751
src/video_core/renderer_vulkan/vk_shader_util.h
750752
src/video_core/renderer_vulkan/vk_swapchain.cpp

src/video_core/renderer_vulkan/vk_rasterizer.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "video_core/renderer_vulkan/vk_instance.h"
1010
#include "video_core/renderer_vulkan/vk_rasterizer.h"
1111
#include "video_core/renderer_vulkan/vk_scheduler.h"
12+
#include "video_core/renderer_vulkan/vk_shader_hle.h"
1213
#include "video_core/texture_cache/image_view.h"
1314
#include "video_core/texture_cache/texture_cache.h"
1415
#include "vk_rasterizer.h"
@@ -319,6 +320,11 @@ void Rasterizer::DispatchDirect() {
319320
return;
320321
}
321322

323+
const auto& cs = pipeline->GetStage(Shader::LogicalStage::Compute);
324+
if (ExecuteShaderHLE(cs, liverpool->regs, *this)) {
325+
return;
326+
}
327+
322328
if (!BindResources(pipeline)) {
323329
return;
324330
}

src/video_core/renderer_vulkan/vk_rasterizer.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ class Rasterizer {
2828
AmdGpu::Liverpool* liverpool);
2929
~Rasterizer();
3030

31+
[[nodiscard]] Scheduler& GetScheduler() noexcept {
32+
return scheduler;
33+
}
34+
35+
[[nodiscard]] VideoCore::BufferCache& GetBufferCache() noexcept {
36+
return buffer_cache;
37+
}
38+
3139
[[nodiscard]] VideoCore::TextureCache& GetTextureCache() noexcept {
3240
return texture_cache;
3341
}

src/video_core/renderer_vulkan/vk_scheduler.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
1111
#include "video_core/renderer_vulkan/vk_resource_pool.h"
1212

13+
namespace tracy {
14+
class VkCtxScope;
15+
}
16+
1317
namespace Vulkan {
1418

1519
class Instance;
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
2+
// SPDX-License-Identifier: GPL-2.0-or-later
3+
4+
#include "shader_recompiler/info.h"
5+
#include "video_core/renderer_vulkan/vk_scheduler.h"
6+
#include "video_core/renderer_vulkan/vk_shader_hle.h"
7+
8+
#include "vk_rasterizer.h"
9+
10+
namespace Vulkan {
11+
12+
static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f;
13+
14+
bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
15+
Rasterizer& rasterizer) {
16+
auto& scheduler = rasterizer.GetScheduler();
17+
auto& buffer_cache = rasterizer.GetBufferCache();
18+
19+
// Copy shader defines three formatted buffers as inputs: control, source, and destination.
20+
const auto ctl_buf_sharp = info.texture_buffers[0].GetSharp(info);
21+
const auto src_buf_sharp = info.texture_buffers[1].GetSharp(info);
22+
const auto dst_buf_sharp = info.texture_buffers[2].GetSharp(info);
23+
24+
struct CopyShaderControl {
25+
u32 dst_idx;
26+
u32 src_idx;
27+
u32 end;
28+
};
29+
const auto ctl_buf = reinterpret_cast<CopyShaderControl*>(ctl_buf_sharp.base_address);
30+
31+
// Add list of copies
32+
boost::container::set<vk::BufferCopy> copies;
33+
for (u32 i = 0; i < regs.cs_program.dim_x; i++) {
34+
const auto& [dst_idx, src_idx, end] = ctl_buf[i];
35+
const u32 local_dst_offset = dst_idx * sizeof(u32);
36+
const u32 local_src_offset = src_idx * sizeof(u32);
37+
const u32 local_size = (end + 1) * sizeof(u32);
38+
copies.emplace(local_src_offset, local_dst_offset, local_size);
39+
}
40+
41+
scheduler.EndRendering();
42+
43+
static constexpr vk::MemoryBarrier READ_BARRIER{
44+
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
45+
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
46+
};
47+
static constexpr vk::MemoryBarrier WRITE_BARRIER{
48+
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
49+
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
50+
};
51+
scheduler.CommandBuffer().pipelineBarrier(
52+
vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer,
53+
vk::DependencyFlagBits::eByRegion, READ_BARRIER, {}, {});
54+
55+
static constexpr vk::DeviceSize MaxDistanceForMerge = 64_MB;
56+
boost::container::small_vector<vk::BufferCopy, 32> batch_copies;
57+
while (!copies.empty()) {
58+
// Place first copy into the current batch
59+
auto it = copies.begin();
60+
auto src_offset_min = it->srcOffset;
61+
auto src_offset_max = it->srcOffset + it->size;
62+
auto dst_offset_min = it->dstOffset;
63+
auto dst_offset_max = it->dstOffset + it->size;
64+
batch_copies.emplace_back(*it);
65+
it = copies.erase(it);
66+
67+
while (it != copies.end()) {
68+
// Compute new src and dst bounds if we were to batch this copy
69+
auto new_src_offset_min = std::min(src_offset_min, it->srcOffset);
70+
auto new_src_offset_max = std::max(src_offset_max, it->srcOffset + it->size);
71+
if (new_src_offset_max - new_src_offset_min > MaxDistanceForMerge) {
72+
++it;
73+
continue;
74+
}
75+
76+
auto new_dst_offset_min = std::min(dst_offset_min, it->dstOffset);
77+
auto new_dst_offset_max = std::max(dst_offset_max, it->dstOffset + it->size);
78+
if (new_dst_offset_max - new_dst_offset_min > MaxDistanceForMerge) {
79+
++it;
80+
continue;
81+
}
82+
83+
// We can batch this copy
84+
src_offset_min = new_src_offset_min;
85+
src_offset_max = new_src_offset_max;
86+
dst_offset_min = new_dst_offset_min;
87+
dst_offset_max = new_dst_offset_max;
88+
batch_copies.emplace_back(*it);
89+
it = copies.erase(it);
90+
}
91+
92+
// Obtain buffers for the total source and destination ranges.
93+
const auto [src_buf, src_buf_offset] =
94+
buffer_cache.ObtainBuffer(src_buf_sharp.base_address + src_offset_min,
95+
src_offset_max - src_offset_min, false, false);
96+
const auto [dst_buf, dst_buf_offset] =
97+
buffer_cache.ObtainBuffer(dst_buf_sharp.base_address + dst_offset_min,
98+
dst_offset_max - dst_offset_min, true, false);
99+
100+
// Apply found buffer base.
101+
for (auto& vk_copy : batch_copies) {
102+
vk_copy.srcOffset = vk_copy.srcOffset - src_offset_min + src_buf_offset;
103+
vk_copy.dstOffset = vk_copy.dstOffset - dst_offset_min + dst_buf_offset;
104+
}
105+
106+
// Execute buffer copies.
107+
LOG_TRACE(Render_Vulkan, "HLE buffer copy: src_size = {}, dst_size = {}",
108+
src_offset_max - src_offset_min, dst_offset_max - dst_offset_min);
109+
scheduler.CommandBuffer().copyBuffer(src_buf->Handle(), dst_buf->Handle(), batch_copies);
110+
batch_copies.clear();
111+
}
112+
113+
scheduler.CommandBuffer().pipelineBarrier(
114+
vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands,
115+
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
116+
117+
return true;
118+
}
119+
120+
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
121+
Rasterizer& rasterizer) {
122+
switch (info.pgm_hash) {
123+
case COPY_SHADER_HASH:
124+
return ExecuteCopyShaderHLE(info, regs, rasterizer);
125+
default:
126+
return false;
127+
}
128+
}
129+
130+
} // namespace Vulkan
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
2+
// SPDX-License-Identifier: GPL-2.0-or-later
3+
4+
#pragma once
5+
6+
#include "video_core/amdgpu/liverpool.h"
7+
8+
namespace Shader {
9+
struct Info;
10+
}
11+
12+
namespace Vulkan {
13+
14+
class Rasterizer;
15+
16+
/// Attempts to execute a shader using HLE if possible.
17+
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
18+
Rasterizer& rasterizer);
19+
20+
} // namespace Vulkan

0 commit comments

Comments
 (0)