Skip to content

Commit 77f10ce

Browse files
video_core: Attempt no2 for specialization
1 parent 31470d9 commit 77f10ce

19 files changed

+457
-356
lines changed

src/core/libraries/avplayer/avplayer.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,6 @@ bool PS4_SYSV_ABI sceAvPlayerGetVideoDataEx(SceAvPlayerHandle handle,
120120
}
121121

122122
SceAvPlayerHandle PS4_SYSV_ABI sceAvPlayerInit(SceAvPlayerInitData* data) {
123-
return nullptr;
124123
LOG_TRACE(Lib_AvPlayer, "called");
125124
if (data == nullptr) {
126125
return nullptr;

src/core/libraries/kernel/thread_management.cpp

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,16 +1065,7 @@ ScePthread PThreadPool::Create() {
10651065
}
10661066
}
10671067

1068-
#ifdef _WIN64
10691068
auto* ret = new PthreadInternal{};
1070-
#else
1071-
// TODO: Linux specific hack
1072-
static u8* hint_address = reinterpret_cast<u8*>(0x7FFFFC000ULL);
1073-
auto* ret = reinterpret_cast<PthreadInternal*>(
1074-
mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE,
1075-
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
1076-
hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB);
1077-
#endif
10781069
ret->is_free = false;
10791070
ret->is_detached = false;
10801071
ret->is_almost_done = false;

src/shader_recompiler/backend/spirv/emit_spirv.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,12 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
200200
if (info.has_image_query) {
201201
ctx.AddCapability(spv::Capability::ImageQuery);
202202
}
203+
if (info.uses_lane_id) {
204+
ctx.AddCapability(spv::Capability::GroupNonUniform);
205+
}
206+
if (info.uses_group_quad) {
207+
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
208+
}
203209
switch (program.info.stage) {
204210
case Stage::Compute: {
205211
const std::array<u32, 3> workgroup_size{program.info.workgroup_size};
@@ -218,10 +224,6 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
218224
} else {
219225
ctx.AddExecutionMode(main, spv::ExecutionMode::OriginUpperLeft);
220226
}
221-
if (info.uses_group_quad) {
222-
ctx.AddCapability(spv::Capability::GroupNonUniform);
223-
ctx.AddCapability(spv::Capability::GroupNonUniformQuad);
224-
}
225227
if (info.has_discard) {
226228
ctx.AddCapability(spv::Capability::DemoteToHelperInvocationEXT);
227229
}

src/shader_recompiler/backend/spirv/spirv_emit_context.cpp

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
132132
case AmdGpu::NumberFormat::SnormNz:
133133
case AmdGpu::NumberFormat::Sscaled:
134134
case AmdGpu::NumberFormat::Uscaled:
135+
case AmdGpu::NumberFormat::Srgb:
135136
return ctx.F32;
136137
case AmdGpu::NumberFormat::Sint:
137138
return ctx.S32;
@@ -140,7 +141,7 @@ const VectorIds& GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
140141
default:
141142
break;
142143
}
143-
throw InvalidArgument("Invalid attribute type {}", fmt);
144+
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
144145
}
145146

146147
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) {
@@ -161,7 +162,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
161162
default:
162163
break;
163164
}
164-
throw InvalidArgument("Invalid attribute type {}", fmt);
165+
UNREACHABLE_MSG("Invalid attribute type {}", fmt);
165166
}
166167

167168
void EmitContext::DefineBufferOffsets() {
@@ -204,6 +205,11 @@ Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
204205
}
205206

206207
void EmitContext::DefineInputs() {
208+
if (info.uses_lane_id) {
209+
subgroup_local_invocation_id = DefineVariable(
210+
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
211+
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
212+
}
207213
switch (stage) {
208214
case Stage::Vertex: {
209215
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
@@ -238,9 +244,6 @@ void EmitContext::DefineInputs() {
238244
break;
239245
}
240246
case Stage::Fragment:
241-
subgroup_local_invocation_id = DefineVariable(
242-
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
243-
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
244247
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
245248
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
246249
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
@@ -354,12 +357,12 @@ void EmitContext::DefineBuffers() {
354357
};
355358

356359
for (const auto& desc : info.buffers) {
357-
const auto sharp = desc.GetVsharp(info);
360+
const auto sharp = desc.GetSharp(info);
358361
const bool is_storage = desc.IsStorage(sharp);
359362
const auto* data_types = True(desc.used_types & IR::Type::F32) ? &F32 : &U32;
360363
const Id data_type = (*data_types)[1];
361364
const Id record_array_type{is_storage ? TypeRuntimeArray(data_type)
362-
: TypeArray(data_type, ConstU32(desc.length))};
365+
: TypeArray(data_type, ConstU32(sharp.NumDwords()))};
363366
const Id struct_type{define_struct(record_array_type, desc.is_instance_data)};
364367

365368
const auto storage_class =
@@ -369,6 +372,9 @@ void EmitContext::DefineBuffers() {
369372
const Id id{AddGlobalVariable(struct_pointer_type, storage_class)};
370373
Decorate(id, spv::Decoration::Binding, binding);
371374
Decorate(id, spv::Decoration::DescriptorSet, 0U);
375+
if (is_storage && !desc.is_written) {
376+
Decorate(id, spv::Decoration::NonWritable);
377+
}
372378
Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "cbuf", desc.sgpr_base));
373379

374380
buffers.push_back({
@@ -499,17 +505,8 @@ Id ImageType(EmitContext& ctx, const ImageResource& desc, Id sampled_type) {
499505

500506
void EmitContext::DefineImagesAndSamplers() {
501507
for (const auto& image_desc : info.images) {
502-
const VectorIds* data_types = [&] {
503-
switch (image_desc.nfmt) {
504-
case AmdGpu::NumberFormat::Uint:
505-
return &U32;
506-
case AmdGpu::NumberFormat::Sint:
507-
return &S32;
508-
default:
509-
return &F32;
510-
}
511-
}();
512-
const Id sampled_type = data_types->Get(1);
508+
const VectorIds& data_types = GetAttributeType(*this, image_desc.nfmt);
509+
const Id sampled_type = data_types[1];
513510
const Id image_type{ImageType(*this, image_desc, sampled_type)};
514511
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, image_type)};
515512
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
@@ -518,7 +515,7 @@ void EmitContext::DefineImagesAndSamplers() {
518515
Name(id, fmt::format("{}_{}{}_{:02x}", stage, "img", image_desc.sgpr_base,
519516
image_desc.dword_offset));
520517
images.push_back({
521-
.data_types = data_types,
518+
.data_types = &data_types,
522519
.id = id,
523520
.sampled_type = image_desc.is_storage ? sampled_type : TypeSampledImage(image_type),
524521
.pointer_type = pointer_type,
@@ -527,13 +524,12 @@ void EmitContext::DefineImagesAndSamplers() {
527524
interfaces.push_back(id);
528525
++binding;
529526
}
530-
531-
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
532-
527+
if (std::ranges::any_of(info.images, &ImageResource::is_atomic)) {
528+
image_u32 = TypePointer(spv::StorageClass::Image, U32[1]);
529+
}
533530
if (info.samplers.empty()) {
534531
return;
535532
}
536-
537533
sampler_type = TypeSampler();
538534
sampler_pointer_type = TypePointer(spv::StorageClass::UniformConstant, sampler_type);
539535
for (const auto& samp_desc : info.samplers) {
@@ -549,7 +545,7 @@ void EmitContext::DefineImagesAndSamplers() {
549545
}
550546

551547
void EmitContext::DefineSharedMemory() {
552-
static constexpr size_t DefaultSharedMemSize = 16_KB;
548+
static constexpr size_t DefaultSharedMemSize = 2_KB;
553549
if (!info.uses_shared) {
554550
return;
555551
}

src/shader_recompiler/frontend/translate/export.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
11
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
22
// SPDX-License-Identifier: GPL-2.0-or-later
33

4-
#include "common/logging/log.h"
54
#include "shader_recompiler/frontend/translate/translate.h"
65

76
namespace Shader::Gcn {
87

98
void Translator::EmitExport(const GcnInst& inst) {
109
if (ir.block->has_multiple_predecessors && info.stage == Stage::Fragment) {
11-
LOG_WARNING(Render_Recompiler, "An ambiguous export appeared in translation");
1210
ir.Discard(ir.LogicalNot(ir.GetExec()));
1311
}
1412

src/shader_recompiler/frontend/translate/translate.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,6 @@ void Translator::EmitFetch(const GcnInst& inst) {
399399
info.buffers.push_back({
400400
.sgpr_base = attrib.sgpr_base,
401401
.dword_offset = attrib.dword_offset,
402-
.length = buffer.num_records,
403402
.used_types = IR::Type::F32,
404403
.is_instance_data = true,
405404
});

src/shader_recompiler/frontend/translate/vector_alu.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -399,14 +399,20 @@ void Translator::V_ADDC_U32(const GcnInst& inst) {
399399
const auto src0 = GetSrc<IR::U32>(inst.src[0]);
400400
const auto src1 = GetSrc<IR::U32>(inst.src[1]);
401401

402-
IR::U32 scarry;
402+
IR::U1 carry;
403403
if (inst.src_count == 3) { // VOP3
404-
IR::U1 thread_bit{ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code))};
405-
scarry = IR::U32{ir.Select(thread_bit, ir.Imm32(1), ir.Imm32(0))};
404+
if (inst.src[2].field == OperandField::VccLo) {
405+
carry = ir.GetVcc();
406+
} else if (inst.src[2].field == OperandField::ScalarGPR) {
407+
carry = ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[2].code));
408+
} else {
409+
UNREACHABLE();
410+
}
406411
} else { // VOP2
407-
scarry = ir.GetVccLo();
412+
carry = ir.GetVcc();
408413
}
409414

415+
const IR::U32 scarry = IR::U32{ir.Select(carry, ir.Imm32(1), ir.Imm32(0))};
410416
const IR::U32 result = ir.IAdd(ir.IAdd(src0, src1), scarry);
411417

412418
const IR::VectorReg dst_reg{inst.dst[0].code};

src/shader_recompiler/ir/passes/resource_tracking_pass.cpp

Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include <algorithm>
55
#include <boost/container/small_vector.hpp>
6+
#include "common/alignment.h"
67
#include "shader_recompiler/ir/basic_block.h"
78
#include "shader_recompiler/ir/breadth_first_search.h"
89
#include "shader_recompiler/ir/ir_emitter.h"
@@ -194,7 +195,6 @@ class Descriptors {
194195
desc.inline_cbuf == existing.inline_cbuf;
195196
})};
196197
auto& buffer = buffer_resources[index];
197-
ASSERT(buffer.length == desc.length);
198198
buffer.used_types |= desc.used_types;
199199
buffer.is_written |= desc.is_written;
200200
return index;
@@ -226,7 +226,7 @@ class Descriptors {
226226
return true;
227227
}
228228
// Samplers with different bindings might still be the same.
229-
return existing.GetSsharp(info) == desc.GetSsharp(info);
229+
return existing.GetSharp(info) == desc.GetSharp(info);
230230
})};
231231
return index;
232232
}
@@ -340,19 +340,6 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
340340
};
341341
}
342342

343-
static u32 BufferLength(const AmdGpu::Buffer& buffer) {
344-
const auto stride = buffer.GetStride();
345-
if (stride < sizeof(f32)) {
346-
ASSERT(sizeof(f32) % stride == 0);
347-
return (((buffer.num_records - 1) / sizeof(f32)) + 1) * stride;
348-
} else if (stride == sizeof(f32)) {
349-
return buffer.num_records;
350-
} else {
351-
ASSERT(stride % sizeof(f32) == 0);
352-
return buffer.num_records * (stride / sizeof(f32));
353-
}
354-
}
355-
356343
s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
357344
AmdGpu::Buffer& cbuf) {
358345

@@ -379,7 +366,6 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
379366
return descriptors.Add(BufferResource{
380367
.sgpr_base = std::numeric_limits<u32>::max(),
381368
.dword_offset = 0,
382-
.length = BufferLength(cbuf),
383369
.used_types = BufferDataType(inst, cbuf.GetNumberFmt()),
384370
.inline_cbuf = cbuf,
385371
});
@@ -397,7 +383,6 @@ void PatchBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info,
397383
binding = descriptors.Add(BufferResource{
398384
.sgpr_base = sharp.sgpr_base,
399385
.dword_offset = sharp.dword_offset,
400-
.length = BufferLength(buffer),
401386
.used_types = BufferDataType(inst, buffer.GetNumberFmt()),
402387
.is_written = IsBufferStore(inst),
403388
});

src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ void Visit(Info& info, IR::Inst& inst) {
5050
case IR::Opcode::ImageQueryLod:
5151
info.has_image_query = true;
5252
break;
53+
case IR::Opcode::LaneId:
54+
info.uses_lane_id = true;
55+
break;
5356
default:
5457
break;
5558
}

0 commit comments

Comments
 (0)