Skip to content

Commit 9ec75c3

Browse files
authored
Implement shader resource tables (#1165)
* Implement shader resource tables * fix after rebase + squash * address some review comments * fix pipeline_common * cleanup debug stuff * switch to using single codegenerator
1 parent 7b16085 commit 9ec75c3

30 files changed

+740
-119
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
590590
src/shader_recompiler/frontend/structured_control_flow.h
591591
src/shader_recompiler/ir/passes/constant_propagation_pass.cpp
592592
src/shader_recompiler/ir/passes/dead_code_elimination_pass.cpp
593+
src/shader_recompiler/ir/passes/flatten_extended_userdata_pass.cpp
593594
src/shader_recompiler/ir/passes/identity_removal_pass.cpp
594595
src/shader_recompiler/ir/passes/ir_passes.h
595596
src/shader_recompiler/ir/passes/lower_shared_mem_to_registers.cpp

src/common/decoder.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@ DecoderImpl::DecoderImpl() {
1313

1414
DecoderImpl::~DecoderImpl() = default;
1515

16+
std::string DecoderImpl::disassembleInst(ZydisDecodedInstruction& inst,
17+
ZydisDecodedOperand* operands, u64 address) {
18+
const int bufLen = 256;
19+
char szBuffer[bufLen];
20+
ZydisFormatterFormatInstruction(&m_formatter, &inst, operands, inst.operand_count_visible,
21+
szBuffer, sizeof(szBuffer), address, ZYAN_NULL);
22+
return szBuffer;
23+
}
24+
1625
void DecoderImpl::printInstruction(void* code, u64 address) {
1726
ZydisDecodedInstruction instruction;
1827
ZydisDecodedOperand operands[ZYDIS_MAX_OPERAND_COUNT_VISIBLE];
@@ -27,11 +36,8 @@ void DecoderImpl::printInstruction(void* code, u64 address) {
2736

2837
void DecoderImpl::printInst(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands,
2938
u64 address) {
30-
const int bufLen = 256;
31-
char szBuffer[bufLen];
32-
ZydisFormatterFormatInstruction(&m_formatter, &inst, operands, inst.operand_count_visible,
33-
szBuffer, sizeof(szBuffer), address, ZYAN_NULL);
34-
fmt::print("instruction: {}\n", szBuffer);
39+
std::string s = disassembleInst(inst, operands, address);
40+
fmt::print("instruction: {}\n", s);
3541
}
3642

3743
ZyanStatus DecoderImpl::decodeInstruction(ZydisDecodedInstruction& inst,

src/common/decoder.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ class DecoderImpl {
1414
DecoderImpl();
1515
~DecoderImpl();
1616

17+
std::string disassembleInst(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands,
18+
u64 address);
1719
void printInst(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands, u64 address);
1820
void printInstruction(void* code, u64 address);
1921
ZyanStatus decodeInstruction(ZydisDecodedInstruction& inst, ZydisDecodedOperand* operands,

src/common/hash.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
2+
// SPDX-License-Identifier: GPL-2.0-or-later
3+
4+
#pragma once
5+
6+
#include "common/types.h"
7+
8+
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
9+
return seed ^ (hash + 0x9e3779b9 + (seed << 12) + (seed >> 4));
10+
}
11+
12+
[[nodiscard]] inline u32 HashCombine(const u32 seed, const u32 hash) {
13+
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
14+
}

src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
22
// SPDX-License-Identifier: GPL-2.0-or-later
33

4+
#include "common/assert.h"
45
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
56
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
67

@@ -146,9 +147,14 @@ void EmitGetGotoVariable(EmitContext&) {
146147
UNREACHABLE_MSG("Unreachable instruction");
147148
}
148149

149-
Id EmitReadConst(EmitContext& ctx) {
150-
return ctx.u32_zero_value;
151-
UNREACHABLE_MSG("Unreachable instruction");
150+
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst) {
151+
u32 flatbuf_off_dw = inst->Flags<u32>();
152+
ASSERT(ctx.srt_flatbuf.binding >= 0);
153+
ASSERT(flatbuf_off_dw > 0);
154+
Id index = ctx.ConstU32(flatbuf_off_dw);
155+
auto& buffer = ctx.srt_flatbuf;
156+
const Id ptr{ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index)};
157+
return ctx.OpLoad(ctx.U32[1], ptr);
152158
}
153159

154160
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {

src/shader_recompiler/backend/spirv/emit_spirv_instructions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ void EmitSetVectorRegister(EmitContext& ctx);
6262
void EmitSetGotoVariable(EmitContext& ctx);
6363
void EmitGetGotoVariable(EmitContext& ctx);
6464
void EmitSetScc(EmitContext& ctx);
65-
Id EmitReadConst(EmitContext& ctx);
65+
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst);
6666
Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index);
6767
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);
6868
Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address);

src/shader_recompiler/backend/spirv/spirv_emit_context.cpp

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
#include "common/assert.h"
55
#include "common/div_ceil.h"
66
#include "shader_recompiler/backend/spirv/spirv_emit_context.h"
7+
#include "shader_recompiler/ir/passes/srt.h"
78
#include "video_core/amdgpu/types.h"
89

910
#include <boost/container/static_vector.hpp>
1011
#include <fmt/format.h>
1112

1213
#include <numbers>
14+
#include <string_view>
1315

1416
namespace Shader::Backend::SPIRV {
1517
namespace {
@@ -435,14 +437,16 @@ void EmitContext::DefinePushDataBlock() {
435437

436438
void EmitContext::DefineBuffers() {
437439
boost::container::small_vector<Id, 8> type_ids;
438-
const auto define_struct = [&](Id record_array_type, bool is_instance_data) {
440+
const auto define_struct = [&](Id record_array_type, bool is_instance_data,
441+
std::optional<std::string_view> explicit_name = {}) {
439442
const Id struct_type{TypeStruct(record_array_type)};
440443
if (std::ranges::find(type_ids, record_array_type.value, &Id::value) != type_ids.end()) {
441444
return struct_type;
442445
}
443446
Decorate(record_array_type, spv::Decoration::ArrayStride, 4);
444-
const auto name = is_instance_data ? fmt::format("{}_instance_data_f32", stage)
445-
: fmt::format("{}_cbuf_block_f32", stage);
447+
auto name = is_instance_data ? fmt::format("{}_instance_data_f32", stage)
448+
: fmt::format("{}_cbuf_block_f32", stage);
449+
name = explicit_name.value_or(name);
446450
Name(struct_type, name);
447451
Decorate(struct_type, spv::Decoration::Block);
448452
MemberName(struct_type, 0, "data");
@@ -451,6 +455,29 @@ void EmitContext::DefineBuffers() {
451455
return struct_type;
452456
};
453457

458+
if (info.has_readconst) {
459+
const Id data_type = U32[1];
460+
const auto storage_class = spv::StorageClass::Uniform;
461+
const Id pointer_type = TypePointer(storage_class, data_type);
462+
const Id record_array_type{
463+
TypeArray(U32[1], ConstU32(static_cast<u32>(info.flattened_ud_buf.size())))};
464+
465+
const Id struct_type{define_struct(record_array_type, false, "srt_flatbuf_ty")};
466+
467+
const Id struct_pointer_type{TypePointer(storage_class, struct_type)};
468+
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
469+
Decorate(id, spv::Decoration::Binding, binding.unified++);
470+
Decorate(id, spv::Decoration::DescriptorSet, 0U);
471+
Name(id, "srt_flatbuf_ubo");
472+
473+
srt_flatbuf = {
474+
.id = id,
475+
.binding = binding.buffer++,
476+
.pointer_type = pointer_type,
477+
};
478+
interfaces.push_back(id);
479+
}
480+
454481
for (const auto& desc : info.buffers) {
455482
const auto sharp = desc.GetSharp(info);
456483
const bool is_storage = desc.IsStorage(sharp);
@@ -471,7 +498,7 @@ void EmitContext::DefineBuffers() {
471498
if (is_storage && !desc.is_written) {
472499
Decorate(id, spv::Decoration::NonWritable);
473500
}
474-
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sgpr_base));
501+
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sharp_idx));
475502

476503
buffers.push_back({
477504
.id = id,
@@ -495,7 +522,7 @@ void EmitContext::DefineTextureBuffers() {
495522
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
496523
Decorate(id, spv::Decoration::Binding, binding.unified++);
497524
Decorate(id, spv::Decoration::DescriptorSet, 0U);
498-
Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sgpr_base));
525+
Name(id, fmt::format("{}_{}", desc.is_written ? "imgbuf" : "texbuf", desc.sharp_idx));
499526
texture_buffers.push_back({
500527
.id = id,
501528
.binding = binding.buffer++,
@@ -582,7 +609,7 @@ spv::ImageFormat GetFormat(const AmdGpu::Image& image) {
582609
}
583610

584611
Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
585-
const auto image = ctx.info.ReadUd<AmdGpu::Image>(desc.sgpr_base, desc.dword_offset);
612+
const auto image = ctx.info.ReadUdSharp<AmdGpu::Image>(desc.sharp_idx);
586613
const auto format = desc.is_atomic ? GetFormat(image) : spv::ImageFormat::Unknown;
587614
const u32 sampled = desc.is_storage ? 2 : 1;
588615
switch (desc.type) {
@@ -618,8 +645,7 @@ void EmitContext::DefineImagesAndSamplers() {
618645
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
619646
Decorate(id, spv::Decoration::Binding, binding.unified++);
620647
Decorate(id, spv::Decoration::DescriptorSet, 0U);
621-
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base,
622-
image_desc.dword_offset));
648+
Name(id, fmt::format("{}_{}{}", stage, "img", image_desc.sharp_idx));
623649
images.push_back({
624650
.data_types = &data_types,
625651
.id = id,
@@ -643,8 +669,7 @@ void EmitContext::DefineImagesAndSamplers() {
643669
const Id id{AddGlobalVariable(sampler_pointer_type, spv::StorageClass::UniformConstant)};
644670
Decorate(id, spv::Decoration::Binding, binding.unified++);
645671
Decorate(id, spv::Decoration::DescriptorSet, 0U);
646-
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "samp", samp_desc.sgpr_base,
647-
samp_desc.dword_offset));
672+
Name(id, fmt::format("{}_{}{}", stage, "samp", samp_desc.sharp_idx));
648673
samplers.push_back(id);
649674
interfaces.push_back(id);
650675
}

src/shader_recompiler/backend/spirv/spirv_emit_context.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,7 @@ class EmitContext final : public Sirit::Module {
228228
Bindings& binding;
229229
boost::container::small_vector<BufferDefinition, 16> buffers;
230230
boost::container::small_vector<TextureBufferDefinition, 8> texture_buffers;
231+
BufferDefinition srt_flatbuf;
231232
boost::container::small_vector<TextureDefinition, 8> images;
232233
boost::container::small_vector<Id, 4> samplers;
233234

src/shader_recompiler/frontend/translate/scalar_memory.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ static constexpr u32 SQ_SRC_LITERAL = 0xFF;
1010
void Translator::EmitScalarMemory(const GcnInst& inst) {
1111
switch (inst.opcode) {
1212
// SMRD
13+
case Opcode::S_LOAD_DWORD:
14+
return S_LOAD_DWORD(1, inst);
15+
case Opcode::S_LOAD_DWORDX2:
16+
return S_LOAD_DWORD(2, inst);
1317
case Opcode::S_LOAD_DWORDX4:
1418
return S_LOAD_DWORD(4, inst);
1519
case Opcode::S_LOAD_DWORDX8:

src/shader_recompiler/frontend/translate/translate.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
388388
IR::VectorReg dst_reg{attrib.dest_vgpr};
389389

390390
// Read the V# of the attribute to figure out component number and type.
391-
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
391+
const auto buffer = info.ReadUdReg<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
392392
for (u32 i = 0; i < 4; i++) {
393393
const IR::F32 comp = [&] {
394394
switch (buffer.GetSwizzle(i)) {
@@ -418,8 +418,7 @@ void Translator::EmitFetch(const GcnInst& inst) {
418418
if (step_rate == Info::VsInput::OverStepRate0 ||
419419
step_rate == Info::VsInput::OverStepRate1) {
420420
info.buffers.push_back({
421-
.sgpr_base = attrib.sgpr_base,
422-
.dword_offset = attrib.dword_offset,
421+
.sharp_idx = info.srt_info.ReserveSharp(attrib.sgpr_base, attrib.dword_offset, 4),
423422
.used_types = IR::Type::F32,
424423
.is_instance_data = true,
425424
});

src/shader_recompiler/info.h

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,17 @@
22
// SPDX-License-Identifier: GPL-2.0-or-later
33
#pragma once
44

5+
#include <algorithm>
56
#include <span>
7+
#include <vector>
68
#include <boost/container/small_vector.hpp>
79
#include <boost/container/static_vector.hpp>
810
#include "common/assert.h"
911
#include "common/types.h"
1012
#include "shader_recompiler/backend/bindings.h"
1113
#include "shader_recompiler/frontend/copy_shader.h"
1214
#include "shader_recompiler/ir/attribute.h"
15+
#include "shader_recompiler/ir/passes/srt.h"
1316
#include "shader_recompiler/ir/reg.h"
1417
#include "shader_recompiler/ir/type.h"
1518
#include "shader_recompiler/params.h"
@@ -36,8 +39,7 @@ constexpr u32 NUM_TEXTURE_TYPES = 7;
3639
struct Info;
3740

3841
struct BufferResource {
39-
u32 sgpr_base;
40-
u32 dword_offset;
42+
u32 sharp_idx;
4143
IR::Type used_types;
4244
AmdGpu::Buffer inline_cbuf;
4345
bool is_gds_buffer{};
@@ -53,8 +55,7 @@ struct BufferResource {
5355
using BufferResourceList = boost::container::small_vector<BufferResource, 16>;
5456

5557
struct TextureBufferResource {
56-
u32 sgpr_base;
57-
u32 dword_offset;
58+
u32 sharp_idx;
5859
AmdGpu::NumberFormat nfmt;
5960
bool is_written{};
6061

@@ -63,8 +64,7 @@ struct TextureBufferResource {
6364
using TextureBufferResourceList = boost::container::small_vector<TextureBufferResource, 16>;
6465

6566
struct ImageResource {
66-
u32 sgpr_base;
67-
u32 dword_offset;
67+
u32 sharp_idx;
6868
AmdGpu::ImageType type;
6969
AmdGpu::NumberFormat nfmt;
7070
bool is_storage{};
@@ -77,8 +77,7 @@ struct ImageResource {
7777
using ImageResourceList = boost::container::small_vector<ImageResource, 16>;
7878

7979
struct SamplerResource {
80-
u32 sgpr_base;
81-
u32 dword_offset;
80+
u32 sharp_idx;
8281
AmdGpu::Sampler inline_sampler{};
8382
u32 associated_image : 4;
8483
u32 disable_aniso : 1;
@@ -180,6 +179,9 @@ struct Info {
180179
ImageResourceList images;
181180
SamplerResourceList samplers;
182181

182+
PersistentSrtInfo srt_info;
183+
std::vector<u32> flattened_ud_buf;
184+
183185
std::span<const u32> user_data;
184186
Stage stage;
185187

@@ -199,14 +201,20 @@ struct Info {
199201
bool uses_fp64{};
200202
bool uses_step_rates{};
201203
bool translation_failed{}; // indicates that shader has unsupported instructions
204+
bool has_readconst{};
202205
u8 mrt_mask{0u};
203206

204207
explicit Info(Stage stage_, ShaderParams params)
205208
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
206209
user_data{params.user_data} {}
207210

208211
template <typename T>
209-
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
212+
inline T ReadUdSharp(u32 sharp_idx) const noexcept {
213+
return *reinterpret_cast<const T*>(&flattened_ud_buf[sharp_idx]);
214+
}
215+
216+
template <typename T>
217+
T ReadUdReg(u32 ptr_index, u32 dword_offset) const noexcept {
210218
T data;
211219
const u32* base = user_data.data();
212220
if (ptr_index != IR::NumScalarRegs) {
@@ -228,7 +236,8 @@ struct Info {
228236
}
229237

230238
void AddBindings(Backend::Bindings& bnd) const {
231-
const auto total_buffers = buffers.size() + texture_buffers.size();
239+
const auto total_buffers =
240+
buffers.size() + texture_buffers.size() + (has_readconst ? 1 : 0);
232241
bnd.buffer += total_buffers;
233242
bnd.unified += total_buffers + images.size() + samplers.size();
234243
bnd.user_data += ud_mask.NumRegs();
@@ -245,22 +254,32 @@ struct Info {
245254
}
246255
return {vertex_offset, instance_offset};
247256
}
257+
258+
void RefreshFlatBuf() {
259+
flattened_ud_buf.resize(srt_info.flattened_bufsize_dw);
260+
ASSERT(user_data.size() <= NumUserDataRegs);
261+
std::memcpy(flattened_ud_buf.data(), user_data.data(), user_data.size_bytes());
262+
// Run the JIT program to walk the SRT and write the leaves to a flat buffer
263+
if (srt_info.walker_func) {
264+
srt_info.walker_func(user_data.data(), flattened_ud_buf.data());
265+
}
266+
}
248267
};
249268

250269
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
251-
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
270+
return inline_cbuf ? inline_cbuf : info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
252271
}
253272

254273
constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept {
255-
return info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
274+
return info.ReadUdSharp<AmdGpu::Buffer>(sharp_idx);
256275
}
257276

258277
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
259-
return info.ReadUd<AmdGpu::Image>(sgpr_base, dword_offset);
278+
return info.ReadUdSharp<AmdGpu::Image>(sharp_idx);
260279
}
261280

262281
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
263-
return inline_sampler ? inline_sampler : info.ReadUd<AmdGpu::Sampler>(sgpr_base, dword_offset);
282+
return inline_sampler ? inline_sampler : info.ReadUdSharp<AmdGpu::Sampler>(sharp_idx);
264283
}
265284

266285
} // namespace Shader

src/shader_recompiler/ir/basic_block.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,10 @@ std::string DumpBlock(const Block& block, const std::map<const Block*, size_t>&
118118
} else {
119119
ret += fmt::format(" {}", op); // '%00000 = ' -> 1 + 5 + 3 = 9 spaces
120120
}
121+
122+
if (op == Opcode::ReadConst) {
123+
ret += fmt::format(" (flags={}) ", inst.Flags<u32>());
124+
}
121125
const size_t arg_count{inst.NumArgs()};
122126
for (size_t arg_index = 0; arg_index < arg_count; ++arg_index) {
123127
const Value arg{inst.Arg(arg_index)};

0 commit comments

Comments
 (0)