Skip to content

Commit e96a6eb

Browse files
video_core: Attempt no2 for specialization
1 parent 914dbdc commit e96a6eb

20 files changed

+458
-356
lines changed

CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -521,6 +521,8 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
521521
src/video_core/renderer_vulkan/vk_resource_pool.h
522522
src/video_core/renderer_vulkan/vk_scheduler.cpp
523523
src/video_core/renderer_vulkan/vk_scheduler.h
524+
src/video_core/renderer_vulkan/vk_shader_cache.cpp
525+
src/video_core/renderer_vulkan/vk_shader_cache.h
524526
src/video_core/renderer_vulkan/vk_shader_util.cpp
525527
src/video_core/renderer_vulkan/vk_shader_util.h
526528
src/video_core/renderer_vulkan/vk_swapchain.cpp

src/core/libraries/avplayer/avplayer.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ bool PS4_SYSV_ABI sceAvPlayerGetVideoDataEx(SceAvPlayerHandle handle,
120120
}
121121

122122
SceAvPlayerHandle PS4_SYSV_ABI sceAvPlayerInit(SceAvPlayerInitData* data) {
123-
return nullptr;
124123
LOG_TRACE(Lib_AvPlayer, "called");
125124
if (data == nullptr) {
126125
return nullptr;

src/core/libraries/kernel/thread_management.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,16 +1066,7 @@ ScePthread PThreadPool::Create() {
10661066
}
10671067
}
10681068

1069-
#ifdef _WIN64
10701069
auto* ret = new PthreadInternal{};
1071-
#else
1072-
// TODO: Linux specific hack
1073-
static u8* hint_address = reinterpret_cast<u8*>(0x7FFFFC000ULL);
1074-
auto* ret = reinterpret_cast<PthreadInternal*>(
1075-
mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE,
1076-
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
1077-
hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB);
1078-
#endif
10791070
ret->is_free = false;
10801071
ret->is_detached = false;
10811072
ret->is_almost_done = false;

src/shader_recompiler/backend/spirv/emit_spirv.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,12 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
201201
if (info.has_image_query) {
202202
ctx.AddCapability(spv::Capability::ImageQuery);
203203
}
204+
if (info.uses_lane_id) {
205+
ctx.AddCapability(spv::Capability::GroupNonUniform);
206+
}
207+
if (info.uses_group_quad) {
208+
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
209+
}
204210
switch (program.info.stage) {
205211
case Stage::Compute: {
206212
const std::array<u32, 3> workgroup_size{program.info.workgroup_size};
@@ -219,10 +225,6 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
219225
} else {
220226
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
221227
}
222-
if (info.uses_group_quad) {
223-
ctx.AddCapability(spv::Capability::GroupNonUniform);
224-
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
225-
}
226228
if (info.has_discard) {
227229
ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
228230
}

src/shader_recompiler/backend/spirv/spirv_emit_context.cpp

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
132132
case AmdGpu::NumberFormat::SnormNz:
133133
case AmdGpu::NumberFormat::Sscaled:
134134
case AmdGpu::NumberFormat::Uscaled:
135+
case AmdGpu::NumberFormat::Srgb:
135136
return ctx.F32;
136137
case AmdGpu::NumberFormat::Sint:
137138
return ctx.S32;
@@ -140,7 +141,7 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
140141
default:
141142
break;
142143
}
143-
throw InvalidArgument("Invalid attribute type {}", fmt);
144+
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
144145
}
145146

146147
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) {
@@ -161,7 +162,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
161162
default:
162163
break;
163164
}
164-
throw InvalidArgument("Invalid attribute type {}", fmt);
165+
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
165166
}
166167

167168
void EmitContext::DefineBufferOffsets() {
@@ -204,6 +205,11 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
204205
}
205206

206207
void EmitContext::DefineInputs() {
208+
if (info.uses_lane_id) {
209+
subgroup_local_invocation_id = DefineVariable(
210+
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
211+
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
212+
}
207213
switch (stage) {
208214
case Stage::Vertex: {
209215
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
@@ -238,9 +244,6 @@ void EmitContext::DefineInputs() {
238244
break;
239245
}
240246
case Stage::Fragment:
241-
subgroup_local_invocation_id = DefineVariable(
242-
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
243-
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
244247
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
245248
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
246249
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
@@ -354,12 +357,12 @@ void EmitContext::DefineBuffers() {
354357
};
355358

356359
for (const auto& desc : info.buffers) {
357-
const auto sharp = desc.GetVsharp(info);
360+
const auto sharp = desc.GetSharp(info);
358361
const bool is_storage = desc.IsStorage(sharp);
359362
const auto* data_types = True(desc.used_types & IR::Type::F32) ? &F32 : &U32;
360363
const Id data_type = (*data_types)[1];
361364
const Id record_array_type{is_storage ? TypeRuntimeArray(data_type)
362-
: TypeArray(data_type, ConstU32(desc.length))};
365+
: TypeArray(data_type, ConstU32(sharp.NumDwords()))};
363366
const Id struct_type{define_struct(record_array_type, desc.is_instance_data)};
364367

365368
const auto storage_class =
@@ -369,6 +372,9 @@ void EmitContext::DefineBuffers() {
369372
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
370373
Decorate(id, spv::Decoration::Binding, binding);
371374
Decorate(id, spv::Decoration::DescriptorSet, 0U);
375+
if (is_storage && !desc.is_written) {
376+
Decorate(id, spv::Decoration::NonWritable);
377+
}
372378
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sgpr_base));
373379

374380
buffers.push_back({
@@ -503,17 +509,8 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
503509

504510
void EmitContext::DefineImagesAndSamplers() {
505511
for (const auto& image_desc : info.images) {
506-
const VectorIds* data_types = [&] {
507-
switch (image_desc.nfmt) {
508-
case AmdGpu::NumberFormat::Uint:
509-
return &U32;
510-
case AmdGpu::NumberFormat::Sint:
511-
return &S32;
512-
default:
513-
return &F32;
514-
}
515-
}();
516-
const Id sampled_type = data_types->Get(1);
512+
const VectorIds& data_types = GetAttributeType(*this, image_desc.nfmt);
513+
const Id sampled_type = data_types[1];
517514
const Id image_type{ImageType(*this, image_desc, sampled_type)};
518515
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
519516
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
@@ -522,7 +519,7 @@ void EmitContext::DefineImagesAndSamplers() {
522519
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base,
523520
image_desc.dword_offset));
524521
images.push_back({
525-
.data_types = data_types,
522+
.data_types = &data_types,
526523
.id = id,
527524
.sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type),
528525
.pointer_type = pointer_type,
@@ -531,13 +528,12 @@ void EmitContext::DefineImagesAndSamplers() {
531528
interfaces.push_back(id);
532529
++binding;
533530
}
534-
535-
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
536-
531+
if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) {
532+
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
533+
}
537534
if (info.samplers.empty()) {
538535
return;
539536
}
540-
541537
sampler_type = TypeSampler();
542538
sampler_pointer_type = TypePointer(spv::StorageClass::UniformConstant, sampler_type);
543539
for (const auto& samp_desc : info.samplers) {
@@ -553,7 +549,7 @@ void EmitContext::DefineImagesAndSamplers() {
553549
}
554550

555551
void EmitContext::DefineSharedMemory() {
556-
static constexpr size_t DefaultSharedMemSize = 16_KB;
552+
static constexpr size_t DefaultSharedMemSize = 2_KB;
557553
if (!info.uses_shared) {
558554
return;
559555
}

src/shader_recompiler/frontend/translate/export.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
22
// SPDX-License-Identifier: GPL-2.0-or-later
33

4-
#include "common/logging/log.h"
54
#include "shader_recompiler/frontend/translate/translate.h"
65

76
namespace Shader::Gcn {
87

98
void Translator::EmitExport(const GcnInst& inst) {
109
if (ir.block->has_multiple_predecessors && info.stage == Stage::Fragment) {
11-
LOG_WARNING(Render_Recompiler, "An ambiguous export appeared in translation");
1210
ir.Discard(ir.LogicalNot(ir.GetExec()));
1311
}
1412

src/shader_recompiler/frontend/translate/translate.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,6 @@ void Translator::EmitFetch(const GcnInst& inst) {
399399
info.buffers.push_back({
400400
.sgpr_base = attrib.sgpr_base,
401401
.dword_offset = attrib.dword_offset,
402-
.length = buffer.num_records,
403402
.used_types = IR::Type::F32,
404403
.is_instance_data = true,
405404
});

src/shader_recompiler/frontend/translate/vector_alu.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -415,14 +415,20 @@ void Translator::V_ADDC_U32(const GcnInst& inst) {
415415
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
416416
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
417417

418-
IR::U32 scarry;
418+
IR::U1 carry;
419419
if (inst.src_count == 3) { // VOP3
420-
IR::U1 thread_bit{ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code))};
421-
scarry = IR::U32{ir.Select(thread_bit, ir.Imm32(1), ir.Imm32(0))};
420+
if (inst.src[2].field == OperandField::VccLo) {
421+
carry = ir.GetVcc();
422+
} else if (inst.src[2].field == OperandField::ScalarGPR) {
423+
carry = ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code));
424+
} else {
425+
UNREACHABLE();
426+
}
422427
} else { // VOP2
423-
scarry = ir.GetVccLo();
428+
carry = ir.GetVcc();
424429
}
425430

431+
const IR::U32 scarry = IR::U32{ir.Select(carry, ir.Imm32(1), ir.Imm32(0))};
426432
const IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry);
427433

428434
const IR::VectorReg dst_reg{inst.dst[0].code};

src/shader_recompiler/ir/passes/resource_tracking_pass.cpp

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include <algorithm>
55
#include <boost/container/small_vector.hpp>
6+
#include "common/alignment.h"
67
#include "shader_recompiler/ir/basic_block.h"
78
#include "shader_recompiler/ir/breadth_first_search.h"
89
#include "shader_recompiler/ir/ir_emitter.h"
@@ -195,7 +196,6 @@ class Descriptors {
195196
desc.inline_cbuf == existing.inline_cbuf;
196197
})};
197198
auto& buffer = buffer_resources[index];
198-
ASSERT(buffer.length == desc.length);
199199
buffer.used_types |= desc.used_types;
200200
buffer.is_written |= desc.is_written;
201201
return index;
@@ -227,7 +227,7 @@ class Descriptors {
227227
return true;
228228
}
229229
// Samplers with different bindings might still be the same.
230-
return existing.GetSsharp(info) == desc.GetSsharp(info);
230+
return existing.GetSharp(info) == desc.GetSharp(info);
231231
})};
232232
return index;
233233
}
@@ -342,19 +342,6 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
342342
};
343343
}
344344

345-
static u32 BufferLength(const AmdGpu::Buffer& buffer) {
346-
const auto stride = buffer.GetStride();
347-
if (stride < sizeof(f32)) {
348-
ASSERT(sizeof(f32) % stride == 0);
349-
return (((buffer.num_records - 1) / sizeof(f32)) + 1) * stride;
350-
} else if (stride == sizeof(f32)) {
351-
return buffer.num_records;
352-
} else {
353-
ASSERT(stride % sizeof(f32) == 0);
354-
return buffer.num_records * (stride / sizeof(f32));
355-
}
356-
}
357-
358345
s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
359346
AmdGpu::Buffer& cbuf) {
360347

@@ -381,7 +368,6 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
381368
return descriptors.Add(BufferResource{
382369
.sgpr_base = std::numeric_limits<u32>::max(),
383370
.dword_offset = 0,
384-
.length = BufferLength(cbuf),
385371
.used_types = BufferDataType(inst, cbuf.GetNumberFmt()),
386372
.inline_cbuf = cbuf,
387373
});
@@ -399,7 +385,6 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
399385
binding = descriptors.Add(BufferResource{
400386
.sgpr_base = sharp.sgpr_base,
401387
.dword_offset = sharp.dword_offset,
402-
.length = BufferLength(buffer),
403388
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
404389
.is_written = IsBufferStore(inst),
405390
});

src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ void Visit(Info& info, IR::Inst& inst) {
5050
case IR::Opcode::ImageQueryLod:
5151
info.has_image_query = true;
5252
break;
53+
case IR::Opcode::LaneId:
54+
info.uses_lane_id = true;
55+
break;
5356
default:
5457
break;
5558
}

0 commit comments

Comments
 (0)