Skip to content

Commit 664e61f

Browse files
committed
WIP Tessellation partial implementation. Squash commits
1 parent 6065dd8 commit 664e61f

34 files changed

+1143
-180
lines changed

CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,10 @@ if (NOT CMAKE_BUILD_TYPE)
1515
set(CMAKE_BUILD_TYPE Release)
1616
endif()
1717

18+
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
19+
add_compile_definitions(_DEBUG)
20+
endif()
21+
1822
project(shadPS4)
1923

2024
# Forcing PIE makes sure that the base address is high enough so that it doesn't clash with the PS4 memory.

externals/ext-boost

Submodule ext-boost updated 1563 files

src/common/logging/backend.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,7 @@ class ColorConsoleBackend {
6262
class FileBackend {
6363
public:
6464
explicit FileBackend(const std::filesystem::path& filename)
65-
: file{std::filesystem::path("/dev/null"), FS::FileAccessMode::Write,
66-
FS::FileType::TextFile} {}
65+
: file{filename, FS::FileAccessMode::Write, FS::FileType::TextFile} {}
6766

6867
~FileBackend() = default;
6968

src/core/libraries/gnmdriver/gnmdriver.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1599,7 +1599,6 @@ s32 PS4_SYSV_ABI sceGnmSetGsShader(u32* cmdbuf, u32 size, const u32* gs_regs) {
15991599

16001600
s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u32 param4) {
16011601
LOG_TRACE(Lib_GnmDriver, "called");
1602-
16031602
if (!cmdbuf || size < 0x1E) {
16041603
return -1;
16051604
}
@@ -1617,11 +1616,19 @@ s32 PS4_SYSV_ABI sceGnmSetHsShader(u32* cmdbuf, u32 size, const u32* hs_regs, u3
16171616
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x108u, hs_regs[0], 0u); // SPI_SHADER_PGM_LO_HS
16181617
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x10au, hs_regs[2],
16191618
hs_regs[3]); // SPI_SHADER_PGM_RSRC1_HS/SPI_SHADER_PGM_RSRC2_HS
1619+
// This is wrong but just stash them here for now
1620+
// Should read the tess constants buffer instead, which is bound as V#, into runtime_info.
1621+
// HsConstants member of HsProgram is used to derive TessellationDataConstantBuffer, its members
1622+
// dont correspond to real registers
1623+
cmdbuf = PM4CmdSetData::SetShReg(cmdbuf, 0x11cu, hs_regs[4], hs_regs[5], hs_regs[6], hs_regs[7],
1624+
hs_regs[8], hs_regs[9], hs_regs[10], hs_regs[11], hs_regs[12],
1625+
hs_regs[13]); // TODO comment
16201626
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x286u, hs_regs[5],
1621-
hs_regs[5]); // VGT_HOS_MAX_TESS_LEVEL
1627+
hs_regs[6]); // VGT_HOS_MAX_TESS_LEVEL
16221628
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2dbu, hs_regs[4]); // VGT_TF_PARAM
16231629
cmdbuf = PM4CmdSetData::SetContextReg(cmdbuf, 0x2d6u, param4); // VGT_LS_HS_CONFIG
16241630

1631+
// right padding?
16251632
WriteTrailingNop<11>(cmdbuf);
16261633
return ORBIS_OK;
16271634
}

src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp

Lines changed: 40 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,15 +47,24 @@ Id VsOutputAttrPointer(EmitContext& ctx, VsOutput output) {
4747
}
4848
}
4949

50-
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
50+
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, Id array_index, u32 element) {
5151
if (IR::IsParam(attr)) {
5252
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
5353
const auto& info{ctx.output_params.at(index)};
5454
ASSERT(info.num_components > 0);
55-
if (info.num_components == 1) {
55+
Id base = info.id;
56+
boost::container::small_vector<Id, 2> indices;
57+
if (ctx.l_stage == LogicalStage::TessellationControl) {
58+
indices.push_back(array_index);
59+
}
60+
if (info.num_components > 1) {
61+
indices.push_back(ctx.ConstU32(element));
62+
}
63+
64+
if (indices.empty()) {
5665
return info.id;
5766
} else {
58-
return ctx.OpAccessChain(info.pointer_type, info.id, ctx.ConstU32(element));
67+
return ctx.OpAccessChain(info.pointer_type, info.id, indices);
5968
}
6069
}
6170
if (IR::IsMrt(attr)) {
@@ -84,6 +93,10 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
8493
}
8594
}
8695

96+
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
97+
return OutputAttrPointer(ctx, attr, {}, element);
98+
}
99+
87100
std::pair<Id, bool> OutputAttrComponentType(EmitContext& ctx, IR::Attribute attr) {
88101
if (IR::IsParam(attr)) {
89102
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
@@ -175,23 +188,31 @@ Id EmitReadStepRate(EmitContext& ctx, int rate_idx) {
175188
rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value));
176189
}
177190

178-
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index) {
179-
if (ctx.info.stage == Stage::Geometry) {
191+
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) {
192+
if (ctx.info.l_stage == LogicalStage::Geometry ||
193+
ctx.info.l_stage == LogicalStage::TessellationControl ||
194+
ctx.info.l_stage == LogicalStage::TessellationEval) {
180195
if (IR::IsPosition(attr)) {
196+
ASSERT(ctx.info.l_stage != LogicalStage::TessellationControl &&
197+
ctx.info.l_stage != LogicalStage::TessellationEval);
181198
ASSERT(attr == IR::Attribute::Position0);
182199
const auto position_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
183-
const auto pointer{ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, ctx.ConstU32(index),
184-
ctx.ConstU32(0u))};
200+
const auto pointer{
201+
ctx.OpAccessChain(position_arr_ptr, ctx.gl_in, index, ctx.ConstU32(0u))};
185202
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
186203
return ctx.OpLoad(ctx.F32[1],
187204
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
188-
}
189-
190-
if (IR::IsParam(attr)) {
205+
} else if (IR::IsTessCoord(attr)) {
206+
const u32 component = attr == IR::Attribute::TessellationEvaluationPointU ? 0 : 1;
207+
const auto component_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
208+
const auto pointer{
209+
ctx.OpAccessChain(component_ptr, ctx.tess_coord, ctx.ConstU32(component))};
210+
return ctx.OpLoad(ctx.F32[1], pointer);
211+
} else if (IR::IsParam(attr)) {
191212
const u32 param_id{u32(attr) - u32(IR::Attribute::Param0)};
192213
const auto param = ctx.input_params.at(param_id).id;
193214
const auto param_arr_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[4]);
194-
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, ctx.ConstU32(index))};
215+
const auto pointer{ctx.OpAccessChain(param_arr_ptr, param, index)};
195216
const auto position_comp_ptr = ctx.TypePointer(spv::StorageClass::Input, ctx.F32[1]);
196217
return ctx.OpLoad(ctx.F32[1],
197218
ctx.OpAccessChain(position_comp_ptr, pointer, ctx.ConstU32(comp)));
@@ -273,6 +294,7 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
273294
return ctx.OpSelect(ctx.U32[1], ctx.OpLoad(ctx.U1[1], ctx.front_facing), ctx.u32_one_value,
274295
ctx.u32_zero_value);
275296
case IR::Attribute::PrimitiveId:
297+
case IR::Attribute::TessPatchIdInVgt: // TODO see why this isnt DCEd
276298
ASSERT(ctx.info.l_stage == LogicalStage::Geometry ||
277299
ctx.info.l_stage == LogicalStage::TessellationControl ||
278300
ctx.info.l_stage == LogicalStage::TessellationEval);
@@ -298,7 +320,13 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 elemen
298320
LOG_WARNING(Render_Vulkan, "Ignoring pos1 export");
299321
return;
300322
}
301-
const Id pointer{OutputAttrPointer(ctx, attr, element)};
323+
324+
Id pointer;
325+
if (ctx.l_stage == LogicalStage::TessellationControl) {
326+
pointer = OutputAttrPointer(ctx, attr, ctx.OpLoad(ctx.U32[1], ctx.invocation_id), element);
327+
} else {
328+
pointer = OutputAttrPointer(ctx, attr, element);
329+
}
302330
const auto component_type{OutputAttrComponentType(ctx, attr)};
303331
if (component_type.second) {
304332
ctx.OpStore(pointer, ctx.OpBitcast(component_type.first, value));

src/shader_recompiler/backend/spirv/emit_spirv_instructions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ Id EmitBufferAtomicAnd32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addres
8585
Id EmitBufferAtomicOr32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
8686
Id EmitBufferAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
8787
Id EmitBufferAtomicSwap32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value);
88-
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, u32 index);
88+
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index);
8989
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
9090
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
9191
Id EmitGetPatch(EmitContext& ctx, IR::Patch patch);

src/shader_recompiler/backend/spirv/spirv_emit_context.cpp

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,13 +343,51 @@ void EmitContext::DefineInputs() {
343343
}
344344
case LogicalStage::TessellationControl: {
345345
invocation_id =
346-
DefineVariable(U32[3], spv::BuiltIn::InvocationId, spv::StorageClass::Input);
346+
DefineVariable(U32[1], spv::BuiltIn::InvocationId, spv::StorageClass::Input);
347347
patch_vertices =
348348
DefineVariable(U32[1], spv::BuiltIn::PatchVertices, spv::StorageClass::Input);
349+
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
350+
351+
for (u32 i = 0; i < IR::NumParams; i++) {
352+
const IR::Attribute param{IR::Attribute::Param0 + i};
353+
if (!info.loads.GetAny(param)) {
354+
continue;
355+
}
356+
const u32 num_components = info.loads.NumComponents(param);
357+
// The input vertex count isn't statically known, so make length 32 (what glslang does)
358+
const Id type{TypeArray(F32[4], ConstU32(32u))};
359+
const Id id{DefineInput(type, i)};
360+
Name(id, fmt::format("in_attr{}", i));
361+
input_params[i] = {id, input_f32, F32[1], 4};
362+
}
349363
break;
350364
}
351365
case LogicalStage::TessellationEval: {
352366
tess_coord = DefineInput(F32[3], std::nullopt, spv::BuiltIn::TessCoord);
367+
primitive_id = DefineVariable(U32[1], spv::BuiltIn::PrimitiveId, spv::StorageClass::Input);
368+
369+
for (u32 i = 0; i < IR::NumParams; i++) {
370+
const IR::Attribute param{IR::Attribute::Param0 + i};
371+
if (!info.loads.GetAny(param)) {
372+
continue;
373+
}
374+
const u32 num_components = info.loads.NumComponents(param);
375+
// The input vertex count isn't statically known, so make length 32 (what glslang does)
376+
const Id type{TypeArray(F32[4], ConstU32(32u))};
377+
const Id id{DefineInput(type, i)};
378+
Name(id, fmt::format("in_attr{}", i));
379+
input_params[i] = {id, input_f32, F32[1], 4};
380+
}
381+
382+
for (size_t index = 0; index < 30; ++index) {
383+
if (!(info.uses_patches & (1U << index))) {
384+
continue;
385+
}
386+
const Id id{DefineInput(F32[4], index)};
387+
Decorate(id, spv::Decoration::Patch);
388+
Name(id, fmt::format("patch_in{}", index));
389+
patches[index] = id;
390+
}
353391
break;
354392
}
355393
default:
@@ -360,6 +398,9 @@ void EmitContext::DefineInputs() {
360398
void EmitContext::DefineOutputs() {
361399
switch (l_stage) {
362400
case LogicalStage::Vertex: {
401+
// No point in defining builtin outputs (i.e. position) unless next stage is fragment?
402+
// Might cause problems linking with tcs
403+
363404
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
364405
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
365406
info.stores.Get(IR::Attribute::Position2) ||
@@ -397,16 +438,60 @@ void EmitContext::DefineOutputs() {
397438
DefineOutput(type, std::nullopt, spv::BuiltIn::TessLevelInner);
398439
Decorate(output_tess_level_inner, spv::Decoration::Patch);
399440
}
441+
442+
for (u32 i = 0; i < IR::NumParams; i++) {
443+
const IR::Attribute param{IR::Attribute::Param0 + i};
444+
if (!info.stores.GetAny(param)) {
445+
continue;
446+
}
447+
const u32 num_components = info.stores.NumComponents(param);
448+
// The input vertex count isn't statically known, so make length 32 (what glslang does)
449+
const Id type{TypeArray(F32[4], ConstU32(runtime_info.hs_info.output_control_points))};
450+
const Id id{DefineOutput(type, i)};
451+
Name(id, fmt::format("out_attr{}", i));
452+
output_params[i] = {id, output_f32, F32[1], 4};
453+
}
454+
455+
// TODO is it ok to share output locations between patch consts and
456+
// per-vertex output attrs?
457+
// spirv-val doesn't complain so idk
400458
for (size_t index = 0; index < 30; ++index) {
401459
if (!(info.uses_patches & (1U << index))) {
402460
continue;
403461
}
404462
const Id id{DefineOutput(F32[4], index)};
405463
Decorate(id, spv::Decoration::Patch);
464+
Name(id, fmt::format("patch_out{}", index));
406465
patches[index] = id;
407466
}
408467
break;
409468
}
469+
case LogicalStage::TessellationEval: {
470+
// TODO copied from logical vertex, figure this out
471+
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
472+
const bool has_extra_pos_stores = info.stores.Get(IR::Attribute::Position1) ||
473+
info.stores.Get(IR::Attribute::Position2) ||
474+
info.stores.Get(IR::Attribute::Position3);
475+
if (has_extra_pos_stores) {
476+
const Id type{TypeArray(F32[1], ConstU32(8U))};
477+
clip_distances =
478+
DefineVariable(type, spv::BuiltIn::ClipDistance, spv::StorageClass::Output);
479+
cull_distances =
480+
DefineVariable(type, spv::BuiltIn::CullDistance, spv::StorageClass::Output);
481+
}
482+
for (u32 i = 0; i < IR::NumParams; i++) {
483+
const IR::Attribute param{IR::Attribute::Param0 + i};
484+
if (!info.stores.GetAny(param)) {
485+
continue;
486+
}
487+
const u32 num_components = info.stores.NumComponents(param);
488+
const Id id{DefineOutput(F32[num_components], i)};
489+
Name(id, fmt::format("out_attr{}", i));
490+
output_params[i] =
491+
GetAttributeInfo(AmdGpu::NumberFormat::Float, id, num_components, true);
492+
}
493+
break;
494+
}
410495
case LogicalStage::Fragment:
411496
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
412497
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
2+
// SPDX-License-Identifier: GPL-2.0-or-later
3+
4+
#pragma once
5+
6+
#include "common/types.h"
7+
8+
namespace Shader {
9+
10+
struct TessellationDataConstantBuffer {
11+
u32 m_lsStride;
12+
u32 m_hsCpStride; // HullStateConstants::m_cpStride != 0 ? HullStateConstants::m_cpStride :
13+
// ls_stride
14+
u32 m_hsNumPatch; // num patches submitted in threadgroup
15+
u32 m_hsOutputBase; // HullStateConstants::m_numInputCP::m_cpStride != 0 ?
16+
// HullStateConstants::m_numInputCP * ls_stride * num_patches : 0
17+
u32 m_patchConstSize; // 16 * num_patch_attrs
18+
u32 m_patchConstBase; // hs_output_base + patch_output_size
19+
u32 m_patchOutputSize; // output_cp_stride * num_output_cp
20+
f32 m_offChipTessellationFactorThreshold;
21+
u32 m_firstEdgeTessFactorIndex;
22+
};
23+
24+
} // namespace Shader

src/shader_recompiler/frontend/translate/scalar_alu.cpp

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
7979
case Opcode::S_MUL_I32:
8080
return S_MUL_I32(inst);
8181
case Opcode::S_BFE_I32:
82-
return S_BFE_I32(inst);
82+
return S_BFE(inst, true);
8383
case Opcode::S_BFE_U32:
84-
return S_BFE_U32(inst);
84+
return S_BFE(inst, false);
8585
case Opcode::S_ABSDIFF_I32:
8686
return S_ABSDIFF_I32(inst);
8787

@@ -411,30 +411,12 @@ void Translator::S_MUL_I32(const GcnInst& inst) {
411411
SetDst(inst.dst[0], ir.IMul(GetSrc(inst.src[0]), GetSrc(inst.src[1])));
412412
}
413413

414-
void Translator::S_BFE_U32(const GcnInst& inst) {
414+
void Translator::S_BFE(const GcnInst& inst, bool is_signed) {
415415
const IR::U32 src0{GetSrc(inst.src[0])};
416416
const IR::U32 src1{GetSrc(inst.src[1])};
417417
const IR::U32 offset{ir.BitwiseAnd(src1, ir.Imm32(0x1F))};
418418
const IR::U32 count{ir.BitFieldExtract(src1, ir.Imm32(16), ir.Imm32(7))};
419-
const IR::U32 result{ir.BitFieldExtract(src0, offset, count)};
420-
SetDst(inst.dst[0], result);
421-
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
422-
}
423-
424-
void Translator::S_BFE_I32(const GcnInst& inst) {
425-
const IR::U32 src0{GetSrc(inst.src[0])};
426-
const IR::U32 src1{GetSrc(inst.src[1])};
427-
IR::U32 result;
428-
429-
ASSERT_MSG(src1.IsImmediate(), "Unhandled S_BFE_I32 with non-immediate mask");
430-
u32 mask = src1.U32();
431-
ASSERT(mask != 0);
432-
u32 offset = std::countr_zero(mask);
433-
u32 count = std::popcount(mask);
434-
mask = mask >> offset;
435-
ASSERT_MSG((mask & (mask + 1)) == 0, "mask {} has non-adjacent bits set");
436-
437-
result = ir.BitFieldExtract(src0, ir.Imm32(offset), ir.Imm32(count), true);
419+
const IR::U32 result{ir.BitFieldExtract(src0, offset, count, is_signed)};
438420
SetDst(inst.dst[0], result);
439421
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
440422
}

src/shader_recompiler/frontend/translate/scalar_flow.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,12 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
3535
}
3636

3737
void Translator::S_BARRIER() {
38-
ir.Barrier();
38+
if (info.l_stage == LogicalStage::TessellationControl) {
39+
// TODO: ASSERT that we're in uniform control flow
40+
ir.TcsOutputBarrier();
41+
} else {
42+
ir.Barrier();
43+
}
3944
}
4045

4146
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {

0 commit comments

Comments
 (0)