Skip to content

Commit 096ad97

Browse files
committed
* renderer_vulkan: Introduce shader HLE system with copy shader implementation. (shadps4-emu#1683)
* renderer_vulkan: Introduce shader HLE system with copy shader implementation. Co-authored-by: TheTurtle <[email protected]> * buffer_cache: Handle obtaining buffer views partially within buffers. * vk_shader_hle: Make more efficient
1 parent e32c5d2 commit 096ad97

14 files changed

+227
-24
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
741741
src/video_core/renderer_vulkan/vk_resource_pool.h
742742
src/video_core/renderer_vulkan/vk_scheduler.cpp
743743
src/video_core/renderer_vulkan/vk_scheduler.h
744+
src/video_core/renderer_vulkan/vk_shader_hle.cpp
745+
src/video_core/renderer_vulkan/vk_shader_hle.h
744746
src/video_core/renderer_vulkan/vk_shader_util.cpp
745747
src/video_core/renderer_vulkan/vk_shader_util.h
746748
src/video_core/renderer_vulkan/vk_swapchain.cpp

src/video_core/buffer_cache/buffer_cache.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,8 @@ std::pair<Buffer*, u32> BufferCache::ObtainBuffer(VAddr device_addr, u32 size, b
360360
return {&buffer, buffer.Offset(device_addr)};
361361
}
362362

363-
std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size) {
363+
std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size, bool prefer_gpu) {
364+
// Check if any buffer contains the full requested range.
364365
const u64 page = gpu_addr >> CACHING_PAGEBITS;
365366
const BufferId buffer_id = page_table[page];
366367
if (buffer_id) {
@@ -370,6 +371,13 @@ std::pair<Buffer*, u32> BufferCache::ObtainViewBuffer(VAddr gpu_addr, u32 size)
370371
return {&buffer, buffer.Offset(gpu_addr)};
371372
}
372373
}
374+
// If no buffer contains the full requested range but some buffer within was GPU-modified,
375+
// fall back to ObtainBuffer to create a full buffer and avoid losing GPU modifications.
376+
// This is only done if the request prefers to use GPU memory, otherwise we can skip it.
377+
if (prefer_gpu && memory_tracker.IsRegionGpuModified(gpu_addr, size)) {
378+
return ObtainBuffer(gpu_addr, size, false, false);
379+
}
380+
// In all other cases, just do a CPU copy to the staging buffer.
373381
const u32 offset = staging_buffer.Copy(gpu_addr, size, 16);
374382
return {&staging_buffer, offset};
375383
}

src/video_core/buffer_cache/buffer_cache.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ class BufferCache {
9696
BufferId buffer_id = {});
9797

9898
/// Attempts to obtain a buffer without modifying the cache contents.
99-
[[nodiscard]] std::pair<Buffer*, u32> ObtainViewBuffer(VAddr gpu_addr, u32 size);
99+
[[nodiscard]] std::pair<Buffer*, u32> ObtainViewBuffer(VAddr gpu_addr, u32 size,
100+
bool prefer_gpu);
100101

101102
/// Return true when a region is registered on the cache
102103
[[nodiscard]] bool IsRegionRegistered(VAddr addr, size_t size);

src/video_core/renderer_vulkan/vk_compute_pipeline.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@ ComputePipeline::ComputePipeline(const Instance& instance_, Scheduler& scheduler
1515
DescriptorHeap& desc_heap_, vk::PipelineCache pipeline_cache,
1616
ComputePipelineKey compute_key_, const Shader::Info& info_,
1717
vk::ShaderModule module)
18-
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache}, compute_key{compute_key_},
19-
info{&info_} {
18+
: Pipeline{instance_, scheduler_, desc_heap_, pipeline_cache, true}, compute_key{compute_key_} {
19+
auto& info = stages[int(Shader::Stage::Compute)];
20+
info = &info_;
2021
const vk::PipelineShaderStageCreateInfo shader_ci = {
2122
.stage = vk::ShaderStageFlagBits::eCompute,
2223
.module = module,

src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,6 @@
2020

2121
namespace Vulkan {
2222

23-
static constexpr auto gp_stage_flags =
24-
vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eTessellationControl |
25-
vk::ShaderStageFlagBits::eTessellationEvaluation | vk::ShaderStageFlagBits::eGeometry |
26-
vk::ShaderStageFlagBits::eFragment;
27-
2823
GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& scheduler_,
2924
DescriptorHeap& desc_heap_, const GraphicsPipelineKey& key_,
3025
vk::PipelineCache pipeline_cache,
@@ -58,7 +53,7 @@ GraphicsPipeline::GraphicsPipeline(const Instance& instance_, Scheduler& schedul
5853
boost::container::static_vector<vk::VertexInputBindingDescription, 32> vertex_bindings;
5954
boost::container::static_vector<vk::VertexInputAttributeDescription, 32> vertex_attributes;
6055
if (fetch_shader && !instance.IsVertexInputDynamicState()) {
61-
const auto& vs_info = GetStage(Shader::LogicalStage::Vertex);
56+
const auto& vs_info = GetStage(Shader::Stage::Vertex);
6257
for (const auto& attrib : fetch_shader->attributes) {
6358
if (attrib.UsesStepRates()) {
6459
// Skip attribute binding as the data will be pulled by shader

src/video_core/renderer_vulkan/vk_graphics_pipeline.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,6 @@ class GraphicsPipeline : public Pipeline {
7474
void BindResources(const Liverpool::Regs& regs, VideoCore::BufferCache& buffer_cache,
7575
VideoCore::TextureCache& texture_cache) const;
7676

77-
const Shader::Info& GetStage(Shader::LogicalStage stage) const noexcept {
78-
return *stages[u32(stage)];
79-
}
80-
8177
bool IsEmbeddedVs() const noexcept {
8278
static constexpr size_t EmbeddedVsHash = 0x9b2da5cf47f8c29f;
8379
return key.stage_hashes[u32(Shader::LogicalStage::Vertex)] == EmbeddedVsHash;
@@ -109,10 +105,8 @@ class GraphicsPipeline : public Pipeline {
109105
void BuildDescSetLayout();
110106

111107
private:
112-
std::array<const Shader::Info*, MaxShaderStages> stages{};
113108
GraphicsPipelineKey key;
114109
std::optional<const Shader::Gcn::FetchShaderData> fetch_shader{};
115-
bool uses_push_descriptors{};
116110
};
117111

118112
} // namespace Vulkan

src/video_core/renderer_vulkan/vk_pipeline_common.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ boost::container::static_vector<vk::BufferView, 8> Pipeline::buffer_views;
1717
boost::container::static_vector<vk::DescriptorBufferInfo, 32> Pipeline::buffer_infos;
1818

1919
Pipeline::Pipeline(const Instance& instance_, Scheduler& scheduler_, DescriptorHeap& desc_heap_,
20-
vk::PipelineCache pipeline_cache)
21-
: instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_} {}
20+
vk::PipelineCache pipeline_cache, bool is_compute_ /*= false*/)
21+
: instance{instance_}, scheduler{scheduler_}, desc_heap{desc_heap_}, is_compute{is_compute_} {}
2222

2323
Pipeline::~Pipeline() = default;
2424

src/video_core/renderer_vulkan/vk_pipeline_common.h

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,19 @@ class TextureCache;
1414

1515
namespace Vulkan {
1616

17+
static constexpr auto gp_stage_flags =
18+
vk::ShaderStageFlagBits::eVertex | vk::ShaderStageFlagBits::eTessellationControl |
19+
vk::ShaderStageFlagBits::eTessellationEvaluation | vk::ShaderStageFlagBits::eGeometry |
20+
vk::ShaderStageFlagBits::eFragment;
21+
1722
class Instance;
1823
class Scheduler;
1924
class DescriptorHeap;
2025

2126
class Pipeline {
2227
public:
2328
Pipeline(const Instance& instance, Scheduler& scheduler, DescriptorHeap& desc_heap,
24-
vk::PipelineCache pipeline_cache);
29+
vk::PipelineCache pipeline_cache, bool is_compute = false);
2530
virtual ~Pipeline();
2631

2732
vk::Pipeline Handle() const noexcept {
@@ -32,6 +37,22 @@ class Pipeline {
3237
return *pipeline_layout;
3338
}
3439

40+
auto GetStages() const {
41+
if (is_compute) {
42+
return std::span{stages.cend() - 1, stages.cend()};
43+
} else {
44+
return std::span{stages.cbegin(), stages.cend() - 1};
45+
}
46+
}
47+
48+
const Shader::Info& GetStage(Shader::Stage stage) const noexcept {
49+
return *stages[u32(stage)];
50+
}
51+
52+
bool IsCompute() const {
53+
return is_compute;
54+
}
55+
3556
using DescriptorWrites = boost::container::small_vector<vk::WriteDescriptorSet, 16>;
3657
using BufferBarriers = boost::container::small_vector<vk::BufferMemoryBarrier2, 16>;
3758

@@ -53,6 +74,9 @@ class Pipeline {
5374
static boost::container::static_vector<vk::DescriptorImageInfo, 32> image_infos;
5475
static boost::container::static_vector<vk::BufferView, 8> buffer_views;
5576
static boost::container::static_vector<vk::DescriptorBufferInfo, 32> buffer_infos;
77+
std::array<const Shader::Info*, Shader::MaxStageTypes> stages{};
78+
bool uses_push_descriptors{};
79+
const bool is_compute;
5680
};
5781

5882
} // namespace Vulkan

src/video_core/renderer_vulkan/vk_rasterizer.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "video_core/renderer_vulkan/vk_instance.h"
1010
#include "video_core/renderer_vulkan/vk_rasterizer.h"
1111
#include "video_core/renderer_vulkan/vk_scheduler.h"
12+
#include "video_core/renderer_vulkan/vk_shader_hle.h"
1213
#include "video_core/texture_cache/image_view.h"
1314
#include "video_core/texture_cache/texture_cache.h"
1415
#include "vk_rasterizer.h"
@@ -108,7 +109,7 @@ void Rasterizer::Draw(bool is_indexed, u32 index_offset) {
108109
UNREACHABLE();
109110
}
110111

111-
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
112+
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
112113
const auto& fetch_shader = pipeline->GetFetchShader();
113114
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
114115
const u32 num_indices = buffer_cache.BindIndexBuffer(is_indexed, index_offset);
@@ -160,7 +161,7 @@ void Rasterizer::DrawIndirect(bool is_indexed, VAddr arg_address, u32 offset, u3
160161
UNREACHABLE();
161162
}
162163

163-
const auto& vs_info = pipeline->GetStage(Shader::LogicalStage::Vertex);
164+
const auto& vs_info = pipeline->GetStage(Shader::Stage::Vertex);
164165
const auto& fetch_shader = pipeline->GetFetchShader();
165166
buffer_cache.BindVertexBuffers(vs_info, fetch_shader);
166167
buffer_cache.BindIndexBuffer(is_indexed, 0);
@@ -211,6 +212,10 @@ void Rasterizer::DispatchDirect() {
211212
return;
212213
}
213214

215+
const auto& cs = pipeline->GetStage(Shader::Stage::Compute);
216+
if (ExecuteShaderHLE(cs, liverpool->regs, *this)) {
217+
return;
218+
}
214219
try {
215220
const auto has_resources = pipeline->BindResources(buffer_cache, texture_cache);
216221
if (!has_resources) {

src/video_core/renderer_vulkan/vk_rasterizer.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@ class Rasterizer {
2727
AmdGpu::Liverpool* liverpool);
2828
~Rasterizer();
2929

30+
[[nodiscard]] Scheduler& GetScheduler() noexcept {
31+
return scheduler;
32+
}
33+
34+
[[nodiscard]] VideoCore::BufferCache& GetBufferCache() noexcept {
35+
return buffer_cache;
36+
}
37+
3038
[[nodiscard]] VideoCore::TextureCache& GetTextureCache() noexcept {
3139
return texture_cache;
3240
}

src/video_core/renderer_vulkan/vk_scheduler.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
1111
#include "video_core/renderer_vulkan/vk_resource_pool.h"
1212

13+
namespace tracy {
14+
class VkCtxScope;
15+
}
16+
1317
namespace Vulkan {
1418

1519
class Instance;
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
2+
// SPDX-License-Identifier: GPL-2.0-or-later
3+
4+
#include "shader_recompiler/info.h"
5+
#include "video_core/renderer_vulkan/vk_scheduler.h"
6+
#include "video_core/renderer_vulkan/vk_shader_hle.h"
7+
8+
#include "vk_rasterizer.h"
9+
10+
namespace Vulkan {
11+
12+
static constexpr u64 COPY_SHADER_HASH = 0xfefebf9f;
13+
14+
bool ExecuteCopyShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
15+
Rasterizer& rasterizer) {
16+
auto& scheduler = rasterizer.GetScheduler();
17+
auto& buffer_cache = rasterizer.GetBufferCache();
18+
19+
// Copy shader defines three formatted buffers as inputs: control, source, and destination.
20+
const auto ctl_buf_sharp = info.texture_buffers[0].GetSharp(info);
21+
const auto src_buf_sharp = info.texture_buffers[1].GetSharp(info);
22+
const auto dst_buf_sharp = info.texture_buffers[2].GetSharp(info);
23+
const auto buf_stride = src_buf_sharp.GetStride();
24+
ASSERT(buf_stride == dst_buf_sharp.GetStride());
25+
26+
struct CopyShaderControl {
27+
u32 dst_idx;
28+
u32 src_idx;
29+
u32 end;
30+
};
31+
static_assert(sizeof(CopyShaderControl) == 12);
32+
ASSERT(ctl_buf_sharp.GetStride() == sizeof(CopyShaderControl));
33+
const auto ctl_buf = reinterpret_cast<const CopyShaderControl*>(ctl_buf_sharp.base_address);
34+
35+
static std::vector<vk::BufferCopy> copies;
36+
copies.clear();
37+
copies.reserve(regs.cs_program.dim_x);
38+
39+
for (u32 i = 0; i < regs.cs_program.dim_x; i++) {
40+
const auto& [dst_idx, src_idx, end] = ctl_buf[i];
41+
const u32 local_dst_offset = dst_idx * buf_stride;
42+
const u32 local_src_offset = src_idx * buf_stride;
43+
const u32 local_size = (end + 1) * buf_stride;
44+
copies.emplace_back(local_src_offset, local_dst_offset, local_size);
45+
}
46+
47+
scheduler.EndRendering();
48+
49+
static constexpr vk::MemoryBarrier READ_BARRIER{
50+
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
51+
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
52+
};
53+
static constexpr vk::MemoryBarrier WRITE_BARRIER{
54+
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
55+
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
56+
};
57+
scheduler.CommandBuffer().pipelineBarrier(
58+
vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer,
59+
vk::DependencyFlagBits::eByRegion, READ_BARRIER, {}, {});
60+
61+
static constexpr vk::DeviceSize MaxDistanceForMerge = 64_MB;
62+
u32 batch_start = 0;
63+
u32 batch_end = 1;
64+
65+
while (batch_end < copies.size()) {
66+
// Place first copy into the current batch
67+
const auto& copy = copies[batch_start];
68+
auto src_offset_min = copy.srcOffset;
69+
auto src_offset_max = copy.srcOffset + copy.size;
70+
auto dst_offset_min = copy.dstOffset;
71+
auto dst_offset_max = copy.dstOffset + copy.size;
72+
73+
for (int i = batch_start + 1; i < copies.size(); i++) {
74+
// Compute new src and dst bounds if we were to batch this copy
75+
const auto [src_offset, dst_offset, size] = copies[i];
76+
auto new_src_offset_min = std::min(src_offset_min, src_offset);
77+
auto new_src_offset_max = std::max(src_offset_max, src_offset + size);
78+
if (new_src_offset_max - new_src_offset_min > MaxDistanceForMerge) {
79+
continue;
80+
}
81+
82+
auto new_dst_offset_min = std::min(dst_offset_min, dst_offset);
83+
auto new_dst_offset_max = std::max(dst_offset_max, dst_offset + size);
84+
if (new_dst_offset_max - new_dst_offset_min > MaxDistanceForMerge) {
85+
continue;
86+
}
87+
88+
// We can batch this copy
89+
src_offset_min = new_src_offset_min;
90+
src_offset_max = new_src_offset_max;
91+
dst_offset_min = new_dst_offset_min;
92+
dst_offset_max = new_dst_offset_max;
93+
if (i != batch_end) {
94+
std::swap(copies[i], copies[batch_end]);
95+
}
96+
++batch_end;
97+
}
98+
99+
// Obtain buffers for the total source and destination ranges.
100+
const auto [src_buf, src_buf_offset] =
101+
buffer_cache.ObtainBuffer(src_buf_sharp.base_address + src_offset_min,
102+
src_offset_max - src_offset_min, false, false);
103+
const auto [dst_buf, dst_buf_offset] =
104+
buffer_cache.ObtainBuffer(dst_buf_sharp.base_address + dst_offset_min,
105+
dst_offset_max - dst_offset_min, true, false);
106+
107+
// Apply found buffer base.
108+
const auto vk_copies = std::span{copies}.subspan(batch_start, batch_end - batch_start);
109+
for (auto& copy : vk_copies) {
110+
copy.srcOffset = copy.srcOffset - src_offset_min + src_buf_offset;
111+
copy.dstOffset = copy.dstOffset - dst_offset_min + dst_buf_offset;
112+
}
113+
114+
// Execute buffer copies.
115+
LOG_TRACE(Render_Vulkan, "HLE buffer copy: src_size = {}, dst_size = {}",
116+
src_offset_max - src_offset_min, dst_offset_max - dst_offset_min);
117+
scheduler.CommandBuffer().copyBuffer(src_buf->Handle(), dst_buf->Handle(), vk_copies);
118+
batch_start = batch_end;
119+
++batch_end;
120+
}
121+
122+
scheduler.CommandBuffer().pipelineBarrier(
123+
vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands,
124+
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
125+
126+
return true;
127+
}
128+
129+
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
130+
Rasterizer& rasterizer) {
131+
switch (info.pgm_hash) {
132+
case COPY_SHADER_HASH:
133+
return ExecuteCopyShaderHLE(info, regs, rasterizer);
134+
default:
135+
return false;
136+
}
137+
}
138+
139+
} // namespace Vulkan
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
2+
// SPDX-License-Identifier: GPL-2.0-or-later
3+
4+
#pragma once
5+
6+
#include "video_core/amdgpu/liverpool.h"
7+
8+
namespace Shader {
9+
struct Info;
10+
}
11+
12+
namespace Vulkan {
13+
14+
class Rasterizer;
15+
16+
/// Attempts to execute a shader using HLE if possible.
17+
bool ExecuteShaderHLE(const Shader::Info& info, const AmdGpu::Liverpool::Regs& regs,
18+
Rasterizer& rasterizer);
19+
20+
} // namespace Vulkan

0 commit comments

Comments
 (0)