Skip to content

Commit 56f4b8a

Browse files
authored
shader_recompiler: Implement shader export formats. (#2226)
1 parent b3c573f commit 56f4b8a

File tree

14 files changed

+286
-57
lines changed

14 files changed

+286
-57
lines changed

src/shader_recompiler/backend/spirv/emit_spirv_bitwise_conversion.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,4 +58,48 @@ Id EmitUnpackHalf2x16(EmitContext& ctx, Id value) {
5858
return ctx.OpUnpackHalf2x16(ctx.F32[2], value);
5959
}
6060

61+
Id EmitPackUnorm2x16(EmitContext& ctx, Id value) {
62+
return ctx.OpPackUnorm2x16(ctx.U32[1], value);
63+
}
64+
65+
Id EmitUnpackUnorm2x16(EmitContext& ctx, Id value) {
66+
return ctx.OpUnpackUnorm2x16(ctx.F32[2], value);
67+
}
68+
69+
Id EmitPackSnorm2x16(EmitContext& ctx, Id value) {
70+
return ctx.OpPackSnorm2x16(ctx.U32[1], value);
71+
}
72+
73+
Id EmitUnpackSnorm2x16(EmitContext& ctx, Id value) {
74+
return ctx.OpUnpackSnorm2x16(ctx.F32[2], value);
75+
}
76+
77+
Id EmitPackUint2x16(EmitContext& ctx, Id value) {
78+
// No SPIR-V instruction for this, do it manually.
79+
const auto x{ctx.OpCompositeExtract(ctx.U32[1], value, 0)};
80+
const auto y{ctx.OpCompositeExtract(ctx.U32[1], value, 1)};
81+
return ctx.OpBitFieldInsert(ctx.U32[1], x, y, ctx.ConstU32(16U), ctx.ConstU32(16U));
82+
}
83+
84+
Id EmitUnpackUint2x16(EmitContext& ctx, Id value) {
85+
// No SPIR-V instruction for this, do it manually.
86+
const auto x{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(0U), ctx.ConstU32(16U))};
87+
const auto y{ctx.OpBitFieldUExtract(ctx.U32[1], value, ctx.ConstU32(16U), ctx.ConstU32(16U))};
88+
return ctx.OpCompositeConstruct(ctx.U32[2], x, y);
89+
}
90+
91+
Id EmitPackSint2x16(EmitContext& ctx, Id value) {
92+
// No SPIR-V instruction for this, do it manually.
93+
const auto x{ctx.OpCompositeExtract(ctx.U32[1], value, 0)};
94+
const auto y{ctx.OpCompositeExtract(ctx.U32[1], value, 1)};
95+
return ctx.OpBitFieldInsert(ctx.U32[1], x, y, ctx.ConstU32(16U), ctx.ConstU32(16U));
96+
}
97+
98+
Id EmitUnpackSint2x16(EmitContext& ctx, Id value) {
99+
// No SPIR-V instruction for this, do it manually.
100+
const auto x{ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.ConstU32(0U), ctx.ConstU32(16U))};
101+
const auto y{ctx.OpBitFieldSExtract(ctx.U32[1], value, ctx.ConstU32(16U), ctx.ConstU32(16U))};
102+
return ctx.OpCompositeConstruct(ctx.U32[2], x, y);
103+
}
104+
61105
} // namespace Shader::Backend::SPIRV

src/shader_recompiler/backend/spirv/emit_spirv_instructions.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,14 @@ Id EmitPackFloat2x16(EmitContext& ctx, Id value);
197197
Id EmitUnpackFloat2x16(EmitContext& ctx, Id value);
198198
Id EmitPackHalf2x16(EmitContext& ctx, Id value);
199199
Id EmitUnpackHalf2x16(EmitContext& ctx, Id value);
200+
Id EmitPackUnorm2x16(EmitContext& ctx, Id value);
201+
Id EmitUnpackUnorm2x16(EmitContext& ctx, Id value);
202+
Id EmitPackSnorm2x16(EmitContext& ctx, Id value);
203+
Id EmitUnpackSnorm2x16(EmitContext& ctx, Id value);
204+
Id EmitPackUint2x16(EmitContext& ctx, Id value);
205+
Id EmitUnpackUint2x16(EmitContext& ctx, Id value);
206+
Id EmitPackSint2x16(EmitContext& ctx, Id value);
207+
Id EmitUnpackSint2x16(EmitContext& ctx, Id value);
200208
Id EmitFPAbs16(EmitContext& ctx, Id value);
201209
Id EmitFPAbs32(EmitContext& ctx, Id value);
202210
Id EmitFPAbs64(EmitContext& ctx, Id value);

src/shader_recompiler/frontend/translate/export.cpp

Lines changed: 122 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,125 @@
77

88
namespace Shader::Gcn {
99

10+
u32 SwizzleMrtComponent(const FragmentRuntimeInfo::PsColorBuffer& color_buffer, u32 comp) {
11+
const auto [r, g, b, a] = color_buffer.swizzle;
12+
const std::array swizzle_array = {r, g, b, a};
13+
const auto swizzled_comp_type = static_cast<u32>(swizzle_array[comp]);
14+
constexpr auto min_comp_type = static_cast<u32>(AmdGpu::CompSwizzle::Red);
15+
return swizzled_comp_type >= min_comp_type ? swizzled_comp_type - min_comp_type : comp;
16+
}
17+
18+
void Translator::ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value,
19+
const FragmentRuntimeInfo::PsColorBuffer& color_buffer) {
20+
const auto converted = ApplyWriteNumberConversion(ir, value, color_buffer.num_conversion);
21+
ir.SetAttribute(attribute, converted, comp);
22+
}
23+
24+
void Translator::ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
25+
const u32 color_buffer_idx =
26+
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
27+
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
28+
29+
IR::Value unpacked_value;
30+
bool is_integer = false;
31+
switch (color_buffer.export_format) {
32+
case AmdGpu::Liverpool::ShaderExportFormat::Zero:
33+
// No export
34+
return;
35+
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_FP16:
36+
unpacked_value = ir.UnpackHalf2x16(value);
37+
break;
38+
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UNORM16:
39+
unpacked_value = ir.UnpackUnorm2x16(value);
40+
break;
41+
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SNORM16:
42+
unpacked_value = ir.UnpackSnorm2x16(value);
43+
break;
44+
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_UINT16:
45+
unpacked_value = ir.UnpackUint2x16(value);
46+
is_integer = true;
47+
break;
48+
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_SINT16:
49+
unpacked_value = ir.UnpackSint2x16(value);
50+
is_integer = true;
51+
break;
52+
default:
53+
UNREACHABLE_MSG("Unimplemented compressed MRT export format {}",
54+
static_cast<u32>(color_buffer.export_format));
55+
break;
56+
}
57+
58+
const auto r = ir.CompositeExtract(unpacked_value, 0);
59+
const auto g = ir.CompositeExtract(unpacked_value, 1);
60+
const IR::F32 float_r = is_integer ? ir.BitCast<IR::F32>(IR::U32{r}) : IR::F32{r};
61+
const IR::F32 float_g = is_integer ? ir.BitCast<IR::F32>(IR::U32{g}) : IR::F32{g};
62+
63+
const auto swizzled_r = SwizzleMrtComponent(color_buffer, idx * 2);
64+
const auto swizzled_g = SwizzleMrtComponent(color_buffer, idx * 2 + 1);
65+
66+
ExportMrtValue(attribute, swizzled_r, float_r, color_buffer);
67+
ExportMrtValue(attribute, swizzled_g, float_g, color_buffer);
68+
}
69+
70+
void Translator::ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
71+
const u32 color_buffer_idx =
72+
static_cast<u32>(attribute) - static_cast<u32>(IR::Attribute::RenderTarget0);
73+
const auto color_buffer = runtime_info.fs_info.color_buffers[color_buffer_idx];
74+
const auto swizzled_comp = SwizzleMrtComponent(color_buffer, comp);
75+
76+
switch (color_buffer.export_format) {
77+
case AmdGpu::Liverpool::ShaderExportFormat::Zero:
78+
// No export
79+
return;
80+
case AmdGpu::Liverpool::ShaderExportFormat::R_32:
81+
// Red only
82+
if (swizzled_comp != 0) {
83+
return;
84+
}
85+
break;
86+
case AmdGpu::Liverpool::ShaderExportFormat::GR_32:
87+
// Red and Green only
88+
if (swizzled_comp != 0 && swizzled_comp != 1) {
89+
return;
90+
}
91+
break;
92+
case AmdGpu::Liverpool::ShaderExportFormat::AR_32:
93+
// Red and Alpha only
94+
if (swizzled_comp != 0 && swizzled_comp != 3) {
95+
return;
96+
}
97+
break;
98+
case AmdGpu::Liverpool::ShaderExportFormat::ABGR_32:
99+
// All components
100+
break;
101+
default:
102+
UNREACHABLE_MSG("Unimplemented uncompressed MRT export format {}",
103+
static_cast<u32>(color_buffer.export_format));
104+
break;
105+
}
106+
ExportMrtValue(attribute, swizzled_comp, value, color_buffer);
107+
}
108+
109+
void Translator::ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value) {
110+
if (IsMrt(attribute)) {
111+
ExportMrtCompressed(attribute, idx, value);
112+
return;
113+
}
114+
const IR::Value unpacked_value = ir.UnpackHalf2x16(value);
115+
const IR::F32 r = IR::F32{ir.CompositeExtract(unpacked_value, 0)};
116+
const IR::F32 g = IR::F32{ir.CompositeExtract(unpacked_value, 1)};
117+
ir.SetAttribute(attribute, r, idx * 2);
118+
ir.SetAttribute(attribute, g, idx * 2 + 1);
119+
}
120+
121+
void Translator::ExportUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value) {
122+
if (IsMrt(attribute)) {
123+
ExportMrtUncompressed(attribute, comp, value);
124+
return;
125+
}
126+
ir.SetAttribute(attribute, value, comp);
127+
}
128+
10129
void Translator::EmitExport(const GcnInst& inst) {
11130
if (ir.block->has_multiple_predecessors && info.stage == Stage::Fragment) {
12131
ir.Discard(ir.LogicalNot(ir.GetExec()));
@@ -26,41 +145,15 @@ void Translator::EmitExport(const GcnInst& inst) {
26145
IR::VectorReg(inst.src[3].code),
27146
};
28147

29-
const auto set_attribute = [&](u32 comp, IR::F32 value) {
30-
if (!IR::IsMrt(attrib)) {
31-
ir.SetAttribute(attrib, value, comp);
32-
return;
33-
}
34-
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
35-
const auto col_buf = runtime_info.fs_info.color_buffers[index];
36-
const auto converted = IR::ApplyWriteNumberConversion(ir, value, col_buf.num_conversion);
37-
const auto [r, g, b, a] = col_buf.swizzle;
38-
const std::array swizzle_array = {r, g, b, a};
39-
const auto swizzled_comp = swizzle_array[comp];
40-
if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) {
41-
ir.SetAttribute(attrib, converted, comp);
42-
return;
43-
}
44-
ir.SetAttribute(attrib, converted, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red));
45-
};
46-
47-
const auto unpack = [&](u32 idx) {
48-
const IR::Value value = ir.UnpackHalf2x16(ir.GetVectorReg(vsrc[idx]));
49-
const IR::F32 r = IR::F32{ir.CompositeExtract(value, 0)};
50-
const IR::F32 g = IR::F32{ir.CompositeExtract(value, 1)};
51-
set_attribute(idx * 2, r);
52-
set_attribute(idx * 2 + 1, g);
53-
};
54-
55148
// Components are float16 packed into a VGPR
56149
if (exp.compr) {
57150
// Export R, G
58151
if (exp.en & 1) {
59-
unpack(0);
152+
ExportCompressed(attrib, 0, ir.GetVectorReg<IR::U32>(vsrc[0]));
60153
}
61154
// Export B, A
62155
if ((exp.en >> 2) & 1) {
63-
unpack(1);
156+
ExportCompressed(attrib, 1, ir.GetVectorReg<IR::U32>(vsrc[1]));
64157
}
65158
} else {
66159
// Components are float32 into separate VGPRS
@@ -69,8 +162,7 @@ void Translator::EmitExport(const GcnInst& inst) {
69162
if ((mask & 1) == 0) {
70163
continue;
71164
}
72-
const IR::F32 comp = ir.GetVectorReg<IR::F32>(vsrc[i]);
73-
set_attribute(i, comp);
165+
ExportUncompressed(attrib, i, ir.GetVectorReg<IR::F32>(vsrc[i]));
74166
}
75167
}
76168
if (IR::IsMrt(attrib)) {

src/shader_recompiler/frontend/translate/translate.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ class Translator {
170170
void V_SUBBREV_U32(const GcnInst& inst);
171171
void V_LDEXP_F32(const GcnInst& inst);
172172
void V_CVT_PKNORM_U16_F32(const GcnInst& inst);
173+
void V_CVT_PKNORM_I16_F32(const GcnInst& inst);
173174
void V_CVT_PKRTZ_F16_F32(const GcnInst& inst);
174175

175176
// VOP1
@@ -244,6 +245,7 @@ class Translator {
244245
void V_SAD(const GcnInst& inst);
245246
void V_SAD_U32(const GcnInst& inst);
246247
void V_CVT_PK_U16_U32(const GcnInst& inst);
248+
void V_CVT_PK_I16_I32(const GcnInst& inst);
247249
void V_CVT_PK_U8_F32(const GcnInst& inst);
248250
void V_LSHL_B64(const GcnInst& inst);
249251
void V_MUL_F64(const GcnInst& inst);
@@ -306,6 +308,13 @@ class Translator {
306308
IR::F32 SelectCubeResult(const IR::F32& x, const IR::F32& y, const IR::F32& z,
307309
const IR::F32& x_res, const IR::F32& y_res, const IR::F32& z_res);
308310

311+
void ExportMrtValue(IR::Attribute attribute, u32 comp, const IR::F32& value,
312+
const FragmentRuntimeInfo::PsColorBuffer& color_buffer);
313+
void ExportMrtCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value);
314+
void ExportMrtUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value);
315+
void ExportCompressed(IR::Attribute attribute, u32 idx, const IR::U32& value);
316+
void ExportUncompressed(IR::Attribute attribute, u32 comp, const IR::F32& value);
317+
309318
void LogMissingOpcode(const GcnInst& inst);
310319

311320
private:

src/shader_recompiler/frontend/translate/vector_alu.cpp

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
9696
return V_LDEXP_F32(inst);
9797
case Opcode::V_CVT_PKNORM_U16_F32:
9898
return V_CVT_PKNORM_U16_F32(inst);
99+
case Opcode::V_CVT_PKNORM_I16_F32:
100+
return V_CVT_PKNORM_I16_F32(inst);
99101
case Opcode::V_CVT_PKRTZ_F16_F32:
100102
return V_CVT_PKRTZ_F16_F32(inst);
101103

@@ -376,6 +378,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
376378
return V_SAD_U32(inst);
377379
case Opcode::V_CVT_PK_U16_U32:
378380
return V_CVT_PK_U16_U32(inst);
381+
case Opcode::V_CVT_PK_I16_I32:
382+
return V_CVT_PK_I16_I32(inst);
379383
case Opcode::V_CVT_PK_U8_F32:
380384
return V_CVT_PK_U8_F32(inst);
381385
case Opcode::V_LSHL_B64:
@@ -645,12 +649,15 @@ void Translator::V_LDEXP_F32(const GcnInst& inst) {
645649
}
646650

647651
void Translator::V_CVT_PKNORM_U16_F32(const GcnInst& inst) {
648-
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
649-
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
650-
const IR::U32 dst0 = ir.ConvertFToU(32, ir.FPMul(src0, ir.Imm32(65535.f)));
651-
const IR::U32 dst1 = ir.ConvertFToU(32, ir.FPMul(src1, ir.Imm32(65535.f)));
652-
const IR::VectorReg dst_reg{inst.dst[0].code};
653-
ir.SetVectorReg(dst_reg, ir.BitFieldInsert(dst0, dst1, ir.Imm32(16), ir.Imm32(16)));
652+
const IR::Value vec_f32 =
653+
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
654+
SetDst(inst.dst[0], ir.PackUnorm2x16(vec_f32));
655+
}
656+
657+
void Translator::V_CVT_PKNORM_I16_F32(const GcnInst& inst) {
658+
const IR::Value vec_f32 =
659+
ir.CompositeConstruct(GetSrc<IR::F32>(inst.src[0]), GetSrc<IR::F32>(inst.src[1]));
660+
SetDst(inst.dst[0], ir.PackSnorm2x16(vec_f32));
654661
}
655662

656663
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
@@ -1237,11 +1244,15 @@ void Translator::V_SAD_U32(const GcnInst& inst) {
12371244
}
12381245

12391246
void Translator::V_CVT_PK_U16_U32(const GcnInst& inst) {
1240-
const IR::U32 src0{GetSrc(inst.src[0])};
1241-
const IR::U32 src1{GetSrc(inst.src[1])};
1242-
const IR::U32 lo = ir.IMin(src0, ir.Imm32(0xFFFF), false);
1243-
const IR::U32 hi = ir.IMin(src1, ir.Imm32(0xFFFF), false);
1244-
SetDst(inst.dst[0], ir.BitFieldInsert(lo, hi, ir.Imm32(16), ir.Imm32(16)));
1247+
const IR::Value vec_u32 =
1248+
ir.CompositeConstruct(GetSrc<IR::U32>(inst.src[0]), GetSrc<IR::U32>(inst.src[1]));
1249+
SetDst(inst.dst[0], ir.PackUint2x16(vec_u32));
1250+
}
1251+
1252+
void Translator::V_CVT_PK_I16_I32(const GcnInst& inst) {
1253+
const IR::Value vec_u32 =
1254+
ir.CompositeConstruct(GetSrc<IR::U32>(inst.src[0]), GetSrc<IR::U32>(inst.src[1]));
1255+
SetDst(inst.dst[0], ir.PackSint2x16(vec_u32));
12451256
}
12461257

12471258
void Translator::V_CVT_PK_U8_F32(const GcnInst& inst) {

src/shader_recompiler/ir/ir_emitter.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,38 @@ Value IREmitter::UnpackHalf2x16(const U32& value) {
795795
return Inst(Opcode::UnpackHalf2x16, value);
796796
}
797797

798+
U32 IREmitter::PackUnorm2x16(const Value& vector) {
799+
return Inst<U32>(Opcode::PackUnorm2x16, vector);
800+
}
801+
802+
Value IREmitter::UnpackUnorm2x16(const U32& value) {
803+
return Inst(Opcode::UnpackUnorm2x16, value);
804+
}
805+
806+
U32 IREmitter::PackSnorm2x16(const Value& vector) {
807+
return Inst<U32>(Opcode::PackSnorm2x16, vector);
808+
}
809+
810+
Value IREmitter::UnpackSnorm2x16(const U32& value) {
811+
return Inst(Opcode::UnpackSnorm2x16, value);
812+
}
813+
814+
U32 IREmitter::PackUint2x16(const Value& value) {
815+
return Inst<U32>(Opcode::PackUint2x16, value);
816+
}
817+
818+
Value IREmitter::UnpackUint2x16(const U32& value) {
819+
return Inst(Opcode::UnpackUint2x16, value);
820+
}
821+
822+
U32 IREmitter::PackSint2x16(const Value& value) {
823+
return Inst<U32>(Opcode::PackSint2x16, value);
824+
}
825+
826+
Value IREmitter::UnpackSint2x16(const U32& value) {
827+
return Inst(Opcode::UnpackSint2x16, value);
828+
}
829+
798830
F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) {
799831
if (a.Type() != b.Type()) {
800832
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());

src/shader_recompiler/ir/ir_emitter.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,14 @@ class IREmitter {
175175

176176
[[nodiscard]] U32 PackHalf2x16(const Value& vector);
177177
[[nodiscard]] Value UnpackHalf2x16(const U32& value);
178+
[[nodiscard]] U32 PackUnorm2x16(const Value& vector);
179+
[[nodiscard]] Value UnpackUnorm2x16(const U32& value);
180+
[[nodiscard]] U32 PackSnorm2x16(const Value& vector);
181+
[[nodiscard]] Value UnpackSnorm2x16(const U32& value);
182+
[[nodiscard]] U32 PackUint2x16(const Value& value);
183+
[[nodiscard]] Value UnpackUint2x16(const U32& value);
184+
[[nodiscard]] U32 PackSint2x16(const Value& value);
185+
[[nodiscard]] Value UnpackSint2x16(const U32& value);
178186

179187
[[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
180188
[[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b);

0 commit comments

Comments
 (0)