Skip to content

shader_recompiler: Add swizzle support for unsupported formats. #1869

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 31, 2024
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/post_order.h
src/shader_recompiler/ir/program.cpp
src/shader_recompiler/ir/program.h
src/shader_recompiler/ir/reinterpret.h
src/shader_recompiler/ir/reg.h
src/shader_recompiler/ir/type.cpp
src/shader_recompiler/ir/type.h
Expand Down
56 changes: 56 additions & 0 deletions src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,20 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index);
}

Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
return ctx.OpVectorShuffle(ctx.U32[2], composite1, composite2, comp0, comp1);
}

Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2) {
return ctx.OpVectorShuffle(ctx.U32[3], composite1, composite2, comp0, comp1, comp2);
}

Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3) {
return ctx.OpVectorShuffle(ctx.U32[4], composite1, composite2, comp0, comp1, comp2, comp3);
}

Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) {
return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2);
}
Expand Down Expand Up @@ -78,6 +92,20 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index);
}

Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
return ctx.OpVectorShuffle(ctx.F16[2], composite1, composite2, comp0, comp1);
}

Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2) {
return ctx.OpVectorShuffle(ctx.F16[3], composite1, composite2, comp0, comp1, comp2);
}

Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3) {
return ctx.OpVectorShuffle(ctx.F16[4], composite1, composite2, comp0, comp1, comp2, comp3);
}

Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) {
return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2);
}
Expand Down Expand Up @@ -114,6 +142,20 @@ Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index);
}

Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
return ctx.OpVectorShuffle(ctx.F32[2], composite1, composite2, comp0, comp1);
}

Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2) {
return ctx.OpVectorShuffle(ctx.F32[3], composite1, composite2, comp0, comp1, comp2);
}

Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3) {
return ctx.OpVectorShuffle(ctx.F32[4], composite1, composite2, comp0, comp1, comp2, comp3);
}

void EmitCompositeConstructF64x2(EmitContext&) {
UNREACHABLE_MSG("SPIR-V Instruction");
}
Expand Down Expand Up @@ -150,4 +192,18 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index);
}

Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
return ctx.OpVectorShuffle(ctx.F64[2], composite1, composite2, comp0, comp1);
}

Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2) {
return ctx.OpVectorShuffle(ctx.F64[3], composite1, composite2, comp0, comp1, comp2);
}

Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3) {
return ctx.OpVectorShuffle(ctx.F64[4], composite1, composite2, comp0, comp1, comp2, comp3);
}

} // namespace Shader::Backend::SPIRV
20 changes: 20 additions & 0 deletions src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2);
Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3);
Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
Expand All @@ -136,6 +141,11 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2);
Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3);
Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
Expand All @@ -145,6 +155,11 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2);
Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3);
void EmitCompositeConstructF64x2(EmitContext& ctx);
void EmitCompositeConstructF64x3(EmitContext& ctx);
void EmitCompositeConstructF64x4(EmitContext& ctx);
Expand All @@ -154,6 +169,11 @@ void EmitCompositeExtractF64x4(EmitContext& ctx);
Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2);
Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3);
Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value);
Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value);
Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
Expand Down
32 changes: 13 additions & 19 deletions src/shader_recompiler/frontend/translate/export.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,34 +25,28 @@ void Translator::EmitExport(const GcnInst& inst) {
IR::VectorReg(inst.src[3].code),
};

const auto swizzle = [&](u32 comp) {
const auto set_attribute = [&](u32 comp, IR::F32 value) {
if (!IR::IsMrt(attrib)) {
return comp;
ir.SetAttribute(attrib, value, comp);
return;
}
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
switch (runtime_info.fs_info.color_buffers[index].mrt_swizzle) {
case MrtSwizzle::Identity:
return comp;
case MrtSwizzle::Alt:
static constexpr std::array<u32, 4> AltSwizzle = {2, 1, 0, 3};
return AltSwizzle[comp];
case MrtSwizzle::Reverse:
static constexpr std::array<u32, 4> RevSwizzle = {3, 2, 1, 0};
return RevSwizzle[comp];
case MrtSwizzle::ReverseAlt:
static constexpr std::array<u32, 4> AltRevSwizzle = {3, 0, 1, 2};
return AltRevSwizzle[comp];
default:
UNREACHABLE();
const auto [r, g, b, a] = runtime_info.fs_info.color_buffers[index].swizzle;
const std::array swizzle_array = {r, g, b, a};
const auto swizzled_comp = swizzle_array[comp];
if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) {
ir.SetAttribute(attrib, value, comp);
return;
}
ir.SetAttribute(attrib, value, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red));
};

const auto unpack = [&](u32 idx) {
const IR::Value value = ir.UnpackHalf2x16(ir.GetVectorReg(vsrc[idx]));
const IR::F32 r = IR::F32{ir.CompositeExtract(value, 0)};
const IR::F32 g = IR::F32{ir.CompositeExtract(value, 1)};
ir.SetAttribute(attrib, r, swizzle(idx * 2));
ir.SetAttribute(attrib, g, swizzle(idx * 2 + 1));
set_attribute(idx * 2, r);
set_attribute(idx * 2 + 1, g);
};

// Components are float16 packed into a VGPR
Expand All @@ -73,7 +67,7 @@ void Translator::EmitExport(const GcnInst& inst) {
continue;
}
const IR::F32 comp = ir.GetVectorReg<IR::F32>(vsrc[i]);
ir.SetAttribute(attrib, comp, swizzle(i));
set_attribute(i, comp);
}
}
if (IR::IsMrt(attrib)) {
Expand Down
25 changes: 6 additions & 19 deletions src/shader_recompiler/frontend/translate/translate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/reinterpret.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/amdgpu/types.h"
Expand Down Expand Up @@ -475,26 +476,12 @@ void Translator::EmitFetch(const GcnInst& inst) {

// Read the V# of the attribute to figure out component number and type.
const auto buffer = info.ReadUdReg<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
const auto values =
ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1),
ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3));
const auto swizzled = ApplySwizzle(ir, values, buffer.DstSelect());
for (u32 i = 0; i < 4; i++) {
const IR::F32 comp = [&] {
switch (buffer.GetSwizzle(i)) {
case AmdGpu::CompSwizzle::One:
return ir.Imm32(1.f);
case AmdGpu::CompSwizzle::Zero:
return ir.Imm32(0.f);
case AmdGpu::CompSwizzle::Red:
return ir.GetAttribute(attr, 0);
case AmdGpu::CompSwizzle::Green:
return ir.GetAttribute(attr, 1);
case AmdGpu::CompSwizzle::Blue:
return ir.GetAttribute(attr, 2);
case AmdGpu::CompSwizzle::Alpha:
return ir.GetAttribute(attr, 3);
default:
UNREACHABLE();
}
}();
ir.SetVectorReg(dst_reg++, comp);
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)});
}

// In case of programmable step rates we need to fallback to instance data pulling in
Expand Down
80 changes: 80 additions & 0 deletions src/shader_recompiler/ir/ir_emitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,86 @@ Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_
}
}

Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
size_t comp1) {
if (vector1.Type() != vector2.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type());
}
if (comp0 >= 4 || comp1 >= 4) {
UNREACHABLE_MSG("One or more out of bounds elements {}, {}", comp0, comp1);
}
const auto shuffle{[&](Opcode opcode) -> Value {
return Inst(opcode, vector1, vector2, Value{static_cast<u32>(comp0)},
Value{static_cast<u32>(comp1)});
}};
switch (vector1.Type()) {
case Type::U32x4:
return shuffle(Opcode::CompositeShuffleU32x2);
case Type::F16x4:
return shuffle(Opcode::CompositeShuffleF16x2);
case Type::F32x4:
return shuffle(Opcode::CompositeShuffleF32x2);
case Type::F64x4:
return shuffle(Opcode::CompositeShuffleF64x2);
default:
ThrowInvalidType(vector1.Type());
}
}

Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
size_t comp1, size_t comp2) {
if (vector1.Type() != vector2.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type());
}
if (comp0 >= 6 || comp1 >= 6 || comp2 >= 6) {
UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}", comp0, comp1, comp2);
}
const auto shuffle{[&](Opcode opcode) -> Value {
return Inst(opcode, vector1, vector2, Value{static_cast<u32>(comp0)},
Value{static_cast<u32>(comp1)}, Value{static_cast<u32>(comp2)});
}};
switch (vector1.Type()) {
case Type::U32x4:
return shuffle(Opcode::CompositeShuffleU32x3);
case Type::F16x4:
return shuffle(Opcode::CompositeShuffleF16x3);
case Type::F32x4:
return shuffle(Opcode::CompositeShuffleF32x3);
case Type::F64x4:
return shuffle(Opcode::CompositeShuffleF64x3);
default:
ThrowInvalidType(vector1.Type());
}
}

Value IREmitter::CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
size_t comp1, size_t comp2, size_t comp3) {
if (vector1.Type() != vector2.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", vector1.Type(), vector2.Type());
}
if (comp0 >= 8 || comp1 >= 8 || comp2 >= 8 || comp3 >= 8) {
UNREACHABLE_MSG("One or more out of bounds elements {}, {}, {}, {}", comp0, comp1, comp2,
comp3);
}
const auto shuffle{[&](Opcode opcode) -> Value {
return Inst(opcode, vector1, vector2, Value{static_cast<u32>(comp0)},
Value{static_cast<u32>(comp1)}, Value{static_cast<u32>(comp2)},
Value{static_cast<u32>(comp3)});
}};
switch (vector1.Type()) {
case Type::U32x4:
return shuffle(Opcode::CompositeShuffleU32x4);
case Type::F16x4:
return shuffle(Opcode::CompositeShuffleF16x4);
case Type::F32x4:
return shuffle(Opcode::CompositeShuffleF32x4);
case Type::F64x4:
return shuffle(Opcode::CompositeShuffleF64x4);
default:
ThrowInvalidType(vector1.Type());
}
}

Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
if (true_value.Type() != false_value.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", true_value.Type(), false_value.Type());
Expand Down
7 changes: 7 additions & 0 deletions src/shader_recompiler/ir/ir_emitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,13 @@ class IREmitter {
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);

[[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
size_t comp1);
[[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
size_t comp1, size_t comp2);
[[nodiscard]] Value CompositeShuffle(const Value& vector1, const Value& vector2, size_t comp0,
size_t comp1, size_t comp2, size_t comp3);

[[nodiscard]] Value Select(const U1& condition, const Value& true_value,
const Value& false_value);

Expand Down
Loading
Loading