Skip to content

Commit 070c4af

Browse files
committed
shader_recompiler: Clean up swizzle handling and handle ImageRead storage swizzle.
1 parent 4e6a7e5 commit 070c4af

File tree

11 files changed

+284
-56
lines changed

11 files changed

+284
-56
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
694694
src/shader_recompiler/ir/post_order.h
695695
src/shader_recompiler/ir/program.cpp
696696
src/shader_recompiler/ir/program.h
697+
src/shader_recompiler/ir/reinterpret.h
697698
src/shader_recompiler/ir/reg.h
698699
src/shader_recompiler/ir/type.cpp
699700
src/shader_recompiler/ir/type.h

src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,20 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index
4242
return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index);
4343
}
4444

45+
Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
46+
return ctx.OpVectorShuffle(ctx.U32[2], composite1, composite2, comp0, comp1);
47+
}
48+
49+
Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
50+
u32 comp2) {
51+
return ctx.OpVectorShuffle(ctx.U32[3], composite1, composite2, comp0, comp1, comp2);
52+
}
53+
54+
Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
55+
u32 comp2, u32 comp3) {
56+
return ctx.OpVectorShuffle(ctx.U32[4], composite1, composite2, comp0, comp1, comp2, comp3);
57+
}
58+
4559
Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) {
4660
return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2);
4761
}
@@ -78,6 +92,20 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index
7892
return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index);
7993
}
8094

95+
Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
96+
return ctx.OpVectorShuffle(ctx.F16[2], composite1, composite2, comp0, comp1);
97+
}
98+
99+
Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
100+
u32 comp2) {
101+
return ctx.OpVectorShuffle(ctx.F16[3], composite1, composite2, comp0, comp1, comp2);
102+
}
103+
104+
Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
105+
u32 comp2, u32 comp3) {
106+
return ctx.OpVectorShuffle(ctx.F16[4], composite1, composite2, comp0, comp1, comp2, comp3);
107+
}
108+
81109
Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) {
82110
return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2);
83111
}
@@ -114,6 +142,20 @@ Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index
114142
return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index);
115143
}
116144

145+
Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
146+
return ctx.OpVectorShuffle(ctx.F32[2], composite1, composite2, comp0, comp1);
147+
}
148+
149+
Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
150+
u32 comp2) {
151+
return ctx.OpVectorShuffle(ctx.F32[3], composite1, composite2, comp0, comp1, comp2);
152+
}
153+
154+
Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
155+
u32 comp2, u32 comp3) {
156+
return ctx.OpVectorShuffle(ctx.F32[4], composite1, composite2, comp0, comp1, comp2, comp3);
157+
}
158+
117159
void EmitCompositeConstructF64x2(EmitContext&) {
118160
UNREACHABLE_MSG("SPIR-V Instruction");
119161
}
@@ -150,4 +192,18 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index
150192
return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index);
151193
}
152194

195+
Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
196+
return ctx.OpVectorShuffle(ctx.F64[2], composite1, composite2, comp0, comp1);
197+
}
198+
199+
Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
200+
u32 comp2) {
201+
return ctx.OpVectorShuffle(ctx.F64[3], composite1, composite2, comp0, comp1, comp2);
202+
}
203+
204+
Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
205+
u32 comp2, u32 comp3) {
206+
return ctx.OpVectorShuffle(ctx.F64[4], composite1, composite2, comp0, comp1, comp2, comp3);
207+
}
208+
153209
} // namespace Shader::Backend::SPIRV

src/shader_recompiler/backend/spirv/emit_spirv_instructions.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,11 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
127127
Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index);
128128
Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index);
129129
Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index);
130+
Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
131+
Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
132+
u32 comp2);
133+
Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
134+
u32 comp2, u32 comp3);
130135
Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2);
131136
Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3);
132137
Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
@@ -136,6 +141,11 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
136141
Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index);
137142
Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index);
138143
Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index);
144+
Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
145+
Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
146+
u32 comp2);
147+
Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
148+
u32 comp2, u32 comp3);
139149
Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2);
140150
Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
141151
Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
@@ -145,6 +155,11 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
145155
Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index);
146156
Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index);
147157
Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index);
158+
Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
159+
Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
160+
u32 comp2);
161+
Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
162+
u32 comp2, u32 comp3);
148163
void EmitCompositeConstructF64x2(EmitContext& ctx);
149164
void EmitCompositeConstructF64x3(EmitContext& ctx);
150165
void EmitCompositeConstructF64x4(EmitContext& ctx);
@@ -154,6 +169,11 @@ void EmitCompositeExtractF64x4(EmitContext& ctx);
154169
Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index);
155170
Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index);
156171
Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index);
172+
Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
173+
Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
174+
u32 comp2);
175+
Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
176+
u32 comp2, u32 comp3);
157177
Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value);
158178
Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value);
159179
Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value);

src/shader_recompiler/frontend/translate/translate.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "shader_recompiler/info.h"
1111
#include "shader_recompiler/ir/attribute.h"
1212
#include "shader_recompiler/ir/reg.h"
13+
#include "shader_recompiler/ir/reinterpret.h"
1314
#include "shader_recompiler/runtime_info.h"
1415
#include "video_core/amdgpu/resource.h"
1516
#include "video_core/amdgpu/types.h"
@@ -475,11 +476,12 @@ void Translator::EmitFetch(const GcnInst& inst) {
475476

476477
// Read the V# of the attribute to figure out component number and type.
477478
const auto buffer = info.ReadUdReg<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
478-
const std::array components = buffer.DstSelect().Apply<IR::F32>(
479-
[&](const u32 index) { return ir.GetAttribute(attr, index); },
480-
[&](const u32 imm) { return ir.Imm32(float(imm)); });
481-
for (u32 i = 0; i < components.size(); i++) {
482-
ir.SetVectorReg(dst_reg++, components[i]);
479+
const auto values =
480+
ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1),
481+
ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3));
482+
const auto swizzled = ApplySwizzle(ir, values, buffer.DstSelect());
483+
for (u32 i = 0; i < 4; i++) {
484+
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)});
483485
}
484486

485487
// In case of programmable step rates we need to fallback to instance data pulling in

src/shader_recompiler/ir/ir_emitter.cpp

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,86 @@ Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_
657657
}
658658
}
659659

660+
Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
661+
size_t comp1) {
662+
if (vector1.Type() != vector2.Type()) {
663+
UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type());
664+
}
665+
if (comp0 >= 4 || comp1 >= 4) {
666+
UNREACHABLE_MSG("One or more out of bounds elements {}, {}", comp0, comp1);
667+
}
668+
const auto shuffle{[&](Opcode opcode) -> Value {
669+
return Inst(opcode, vector1, vector2, Value{static_cast<u32>(comp0)},
670+
Value{static_cast<u32>(comp1)});
671+
}};
672+
switch (vector1.Type()) {
673+
case Type::U32x4:
674+
return shuffle(Opcode::CompositeShuffleU32x2);
675+
case Type::F16x4:
676+
return shuffle(Opcode::CompositeShuffleF16x2);
677+
case Type::F32x4:
678+
return shuffle(Opcode::CompositeShuffleF32x2);
679+
case Type::F64x4:
680+
return shuffle(Opcode::CompositeShuffleF64x2);
681+
default:
682+
ThrowInvalidType(vector1.Type());
683+
}
684+
}
685+
686+
Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
687+
size_t comp1, size_t comp2) {
688+
if (vector1.Type() != vector2.Type()) {
689+
UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type());
690+
}
691+
if (comp0 >= 6 || comp1 >= 6 || comp2 >= 6) {
692+
UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}", comp0, comp1, comp2);
693+
}
694+
const auto shuffle{[&](Opcode opcode) -> Value {
695+
return Inst(opcode, vector1, vector2, Value{static_cast<u32>(comp0)},
696+
Value{static_cast<u32>(comp1)}, Value{static_cast<u32>(comp2)});
697+
}};
698+
switch (vector1.Type()) {
699+
case Type::U32x4:
700+
return shuffle(Opcode::CompositeShuffleU32x3);
701+
case Type::F16x4:
702+
return shuffle(Opcode::CompositeShuffleF16x3);
703+
case Type::F32x4:
704+
return shuffle(Opcode::CompositeShuffleF32x3);
705+
case Type::F64x4:
706+
return shuffle(Opcode::CompositeShuffleF64x3);
707+
default:
708+
ThrowInvalidType(vector1.Type());
709+
}
710+
}
711+
712+
Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
713+
size_t comp1, size_t comp2, size_t comp3) {
714+
if (vector1.Type() != vector2.Type()) {
715+
UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type());
716+
}
717+
if (comp0 >= 8 || comp1 >= 8 || comp2 >= 8 || comp3 >= 8) {
718+
UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}, {}", comp0, comp1, comp2,
719+
comp3);
720+
}
721+
const auto shuffle{[&](Opcode opcode) -> Value {
722+
return Inst(opcode, vector1, vector2, Value{static_cast<u32>(comp0)},
723+
Value{static_cast<u32>(comp1)}, Value{static_cast<u32>(comp2)},
724+
Value{static_cast<u32>(comp3)});
725+
}};
726+
switch (vector1.Type()) {
727+
case Type::U32x4:
728+
return shuffle(Opcode::CompositeShuffleU32x4);
729+
case Type::F16x4:
730+
return shuffle(Opcode::CompositeShuffleF16x4);
731+
case Type::F32x4:
732+
return shuffle(Opcode::CompositeShuffleF32x4);
733+
case Type::F64x4:
734+
return shuffle(Opcode::CompositeShuffleF64x4);
735+
default:
736+
ThrowInvalidType(vector1.Type());
737+
}
738+
}
739+
660740
Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
661741
if (true_value.Type() != false_value.Type()) {
662742
UNREACHABLE_MSG("Mismatching types {} and {}", true_value.Type(), false_value.Type());

src/shader_recompiler/ir/ir_emitter.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,13 @@ class IREmitter {
152152
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
153153
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
154154

155+
[[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
156+
size_t comp1);
157+
[[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
158+
size_t comp1, size_t comp2);
159+
[[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
160+
size_t comp1, size_t comp2, size_t comp3);
161+
155162
[[nodiscard]] Value Select(const U1& condition, const Value& true_value,
156163
const Value& false_value);
157164

src/shader_recompiler/ir/opcodes.inc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,9 @@ OPCODE(CompositeExtractU32x4, U32, U32x
122122
OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
123123
OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
124124
OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
125+
OPCODE(CompositeShuffleU32x2, U32x2, U32x2, U32x2, U32, U32, )
126+
OPCODE(CompositeShuffleU32x3, U32x3, U32x3, U32x3, U32, U32, U32, )
127+
OPCODE(CompositeShuffleU32x4, U32x4, U32x4, U32x4, U32, U32, U32, U32, )
125128
OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
126129
OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
127130
OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
@@ -131,6 +134,9 @@ OPCODE(CompositeExtractF16x4, F16, F16x
131134
OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
132135
OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
133136
OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
137+
OPCODE(CompositeShuffleF16x2, F16x2, F16x2, F16x2, U32, U32, )
138+
OPCODE(CompositeShuffleF16x3, F16x3, F16x3, F16x3, U32, U32, U32, )
139+
OPCODE(CompositeShuffleF16x4, F16x4, F16x4, F16x4, U32, U32, U32, U32, )
134140
OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
135141
OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
136142
OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
@@ -140,6 +146,9 @@ OPCODE(CompositeExtractF32x4, F32, F32x
140146
OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
141147
OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
142148
OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
149+
OPCODE(CompositeShuffleF32x2, F32x2, F32x2, F32x2, U32, U32, )
150+
OPCODE(CompositeShuffleF32x3, F32x3, F32x3, F32x3, U32, U32, U32, )
151+
OPCODE(CompositeShuffleF32x4, F32x4, F32x4, F32x4, U32, U32, U32, U32, )
143152
OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
144153
OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
145154
OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
@@ -149,6 +158,9 @@ OPCODE(CompositeExtractF64x4, F64, F64x
149158
OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
150159
OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
151160
OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
161+
OPCODE(CompositeShuffleF64x2, F64x2, F64x2, F64x2, U32, U32, )
162+
OPCODE(CompositeShuffleF64x3, F64x3, F64x3, F64x3, U32, U32, U32, )
163+
OPCODE(CompositeShuffleF64x4, F64x4, F64x4, F64x4, U32, U32, U32, U32, )
152164

153165
// Select operations
154166
OPCODE(SelectU1, U1, U1, U1, U1, )

0 commit comments

Comments
 (0)