Skip to content

shader_recompiler: Add swizzle support for unsupported formats. #1869

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 31, 2024
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/post_order.h
src/shader_recompiler/ir/program.cpp
src/shader_recompiler/ir/program.h
src/shader_recompiler/ir/reinterpret.h
src/shader_recompiler/ir/reg.h
src/shader_recompiler/ir/type.cpp
src/shader_recompiler/ir/type.h
Expand Down
98 changes: 80 additions & 18 deletions src/shader_recompiler/backend/spirv/emit_spirv_composite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,22 @@

namespace Shader::Backend::SPIRV {

Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) {
return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2);
template <typename... Args>
Id EmitCompositeConstruct(EmitContext& ctx, IR::Inst* inst, Args&&... args) {
return inst->AreAllArgsImmediates() ? ctx.ConstantComposite(args...)
: ctx.OpCompositeConstruct(args...);
}

Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
return ctx.OpCompositeConstruct(ctx.U32[3], e1, e2, e3);
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) {
return EmitCompositeConstruct(ctx, inst, ctx.U32[2], e1, e2);
}

Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
return ctx.OpCompositeConstruct(ctx.U32[4], e1, e2, e3, e4);
Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) {
return EmitCompositeConstruct(ctx, inst, ctx.U32[3], e1, e2, e3);
}

Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) {
return EmitCompositeConstruct(ctx, inst, ctx.U32[4], e1, e2, e3, e4);
}

Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index) {
Expand All @@ -42,16 +48,30 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index);
}

Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) {
return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2);
Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
return ctx.OpVectorShuffle(ctx.U32[2], composite1, composite2, comp0, comp1);
}

Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2) {
return ctx.OpVectorShuffle(ctx.U32[3], composite1, composite2, comp0, comp1, comp2);
}

Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3);
Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3) {
return ctx.OpVectorShuffle(ctx.U32[4], composite1, composite2, comp0, comp1, comp2, comp3);
}

Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4);
Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) {
return EmitCompositeConstruct(ctx, inst, ctx.F16[2], e1, e2);
}

Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) {
return EmitCompositeConstruct(ctx, inst, ctx.F16[3], e1, e2, e3);
}

Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) {
return EmitCompositeConstruct(ctx, inst, ctx.F16[4], e1, e2, e3, e4);
}

Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) {
Expand All @@ -78,16 +98,30 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index);
}

Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) {
return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2);
Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
return ctx.OpVectorShuffle(ctx.F16[2], composite1, composite2, comp0, comp1);
}

Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2) {
return ctx.OpVectorShuffle(ctx.F16[3], composite1, composite2, comp0, comp1, comp2);
}

Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3) {
return ctx.OpVectorShuffle(ctx.F16[4], composite1, composite2, comp0, comp1, comp2, comp3);
}

Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) {
return EmitCompositeConstruct(ctx, inst, ctx.F32[2], e1, e2);
}

Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3);
Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3) {
return EmitCompositeConstruct(ctx, inst, ctx.F32[3], e1, e2, e3);
}

Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4);
Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) {
return EmitCompositeConstruct(ctx, inst, ctx.F32[4], e1, e2, e3, e4);
}

Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) {
Expand All @@ -114,6 +148,20 @@ Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index);
}

Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
return ctx.OpVectorShuffle(ctx.F32[2], composite1, composite2, comp0, comp1);
}

Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2) {
return ctx.OpVectorShuffle(ctx.F32[3], composite1, composite2, comp0, comp1, comp2);
}

Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3) {
return ctx.OpVectorShuffle(ctx.F32[4], composite1, composite2, comp0, comp1, comp2, comp3);
}

void EmitCompositeConstructF64x2(EmitContext&) {
UNREACHABLE_MSG("SPIR-V Instruction");
}
Expand Down Expand Up @@ -150,4 +198,18 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index);
}

Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1) {
return ctx.OpVectorShuffle(ctx.F64[2], composite1, composite2, comp0, comp1);
}

Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2) {
return ctx.OpVectorShuffle(ctx.F64[3], composite1, composite2, comp0, comp1, comp2);
}

Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3) {
return ctx.OpVectorShuffle(ctx.F64[4], composite1, composite2, comp0, comp1, comp2, comp3);
}

} // namespace Shader::Backend::SPIRV
6 changes: 3 additions & 3 deletions src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod
}
texel = ctx.OpImageRead(color_type, image, coords, operands.mask, operands.operands);
}
return !texture.is_integer ? ctx.OpBitcast(ctx.U32[4], texel) : texel;
return texture.is_integer ? ctx.OpBitcast(ctx.F32[4], texel) : texel;
}

void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod, Id ms,
Expand All @@ -253,8 +253,8 @@ void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id
} else if (Sirit::ValidId(lod)) {
LOG_WARNING(Render, "Image write with LOD not supported by driver");
}
ctx.OpImageWrite(image, coords, ctx.OpBitcast(color_type, color), operands.mask,
operands.operands);
const Id texel = texture.is_integer ? ctx.OpBitcast(color_type, color) : color;
ctx.OpImageWrite(image, coords, texel, operands.mask, operands.operands);
}

} // namespace Shader::Backend::SPIRV
38 changes: 29 additions & 9 deletions src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,33 +118,48 @@ Id EmitSharedAtomicSMin32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicAnd32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicOr32(EmitContext& ctx, Id offset, Id value);
Id EmitSharedAtomicXor32(EmitContext& ctx, Id offset, Id value);
Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
Id EmitCompositeConstructU32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3);
Id EmitCompositeConstructU32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4);
Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
Id EmitCompositeShuffleU32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
Id EmitCompositeShuffleU32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2);
Id EmitCompositeShuffleU32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3);
Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
Id EmitCompositeConstructF16x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3);
Id EmitCompositeConstructF16x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4);
Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
Id EmitCompositeShuffleF16x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
Id EmitCompositeShuffleF16x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2);
Id EmitCompositeShuffleF16x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3);
Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
Id EmitCompositeConstructF32x3(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3);
Id EmitCompositeConstructF32x4(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4);
Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeShuffleF32x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
Id EmitCompositeShuffleF32x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2);
Id EmitCompositeShuffleF32x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3);
void EmitCompositeConstructF64x2(EmitContext& ctx);
void EmitCompositeConstructF64x3(EmitContext& ctx);
void EmitCompositeConstructF64x4(EmitContext& ctx);
Expand All @@ -154,6 +169,11 @@ void EmitCompositeExtractF64x4(EmitContext& ctx);
Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeShuffleF64x2(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1);
Id EmitCompositeShuffleF64x3(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2);
Id EmitCompositeShuffleF64x4(EmitContext& ctx, Id composite1, Id composite2, u32 comp0, u32 comp1,
u32 comp2, u32 comp3);
Id EmitSelectU1(EmitContext& ctx, Id cond, Id true_value, Id false_value);
Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value);
Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
Expand Down
32 changes: 13 additions & 19 deletions src/shader_recompiler/frontend/translate/export.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,34 +25,28 @@ void Translator::EmitExport(const GcnInst& inst) {
IR::VectorReg(inst.src[3].code),
};

const auto swizzle = [&](u32 comp) {
const auto set_attribute = [&](u32 comp, IR::F32 value) {
if (!IR::IsMrt(attrib)) {
return comp;
ir.SetAttribute(attrib, value, comp);
return;
}
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
switch (runtime_info.fs_info.color_buffers[index].mrt_swizzle) {
case MrtSwizzle::Identity:
return comp;
case MrtSwizzle::Alt:
static constexpr std::array<u32, 4> AltSwizzle = {2, 1, 0, 3};
return AltSwizzle[comp];
case MrtSwizzle::Reverse:
static constexpr std::array<u32, 4> RevSwizzle = {3, 2, 1, 0};
return RevSwizzle[comp];
case MrtSwizzle::ReverseAlt:
static constexpr std::array<u32, 4> AltRevSwizzle = {3, 0, 1, 2};
return AltRevSwizzle[comp];
default:
UNREACHABLE();
const auto [r, g, b, a] = runtime_info.fs_info.color_buffers[index].swizzle;
const std::array swizzle_array = {r, g, b, a};
const auto swizzled_comp = swizzle_array[comp];
if (u32(swizzled_comp) < u32(AmdGpu::CompSwizzle::Red)) {
ir.SetAttribute(attrib, value, comp);
return;
}
ir.SetAttribute(attrib, value, u32(swizzled_comp) - u32(AmdGpu::CompSwizzle::Red));
};

const auto unpack = [&](u32 idx) {
const IR::Value value = ir.UnpackHalf2x16(ir.GetVectorReg(vsrc[idx]));
const IR::F32 r = IR::F32{ir.CompositeExtract(value, 0)};
const IR::F32 g = IR::F32{ir.CompositeExtract(value, 1)};
ir.SetAttribute(attrib, r, swizzle(idx * 2));
ir.SetAttribute(attrib, g, swizzle(idx * 2 + 1));
set_attribute(idx * 2, r);
set_attribute(idx * 2 + 1, g);
};

// Components are float16 packed into a VGPR
Expand All @@ -73,7 +67,7 @@ void Translator::EmitExport(const GcnInst& inst) {
continue;
}
const IR::F32 comp = ir.GetVectorReg<IR::F32>(vsrc[i]);
ir.SetAttribute(attrib, comp, swizzle(i));
set_attribute(i, comp);
}
}
if (IR::IsMrt(attrib)) {
Expand Down
25 changes: 6 additions & 19 deletions src/shader_recompiler/frontend/translate/translate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "shader_recompiler/info.h"
#include "shader_recompiler/ir/attribute.h"
#include "shader_recompiler/ir/reg.h"
#include "shader_recompiler/ir/reinterpret.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
#include "video_core/amdgpu/types.h"
Expand Down Expand Up @@ -475,26 +476,12 @@ void Translator::EmitFetch(const GcnInst& inst) {

// Read the V# of the attribute to figure out component number and type.
const auto buffer = info.ReadUdReg<AmdGpu::Buffer>(attrib.sgpr_base, attrib.dword_offset);
const auto values =
ir.CompositeConstruct(ir.GetAttribute(attr, 0), ir.GetAttribute(attr, 1),
ir.GetAttribute(attr, 2), ir.GetAttribute(attr, 3));
const auto swizzled = ApplySwizzle(ir, values, buffer.DstSelect());
for (u32 i = 0; i < 4; i++) {
const IR::F32 comp = [&] {
switch (buffer.GetSwizzle(i)) {
case AmdGpu::CompSwizzle::One:
return ir.Imm32(1.f);
case AmdGpu::CompSwizzle::Zero:
return ir.Imm32(0.f);
case AmdGpu::CompSwizzle::Red:
return ir.GetAttribute(attr, 0);
case AmdGpu::CompSwizzle::Green:
return ir.GetAttribute(attr, 1);
case AmdGpu::CompSwizzle::Blue:
return ir.GetAttribute(attr, 2);
case AmdGpu::CompSwizzle::Alpha:
return ir.GetAttribute(attr, 3);
default:
UNREACHABLE();
}
}();
ir.SetVectorReg(dst_reg++, comp);
ir.SetVectorReg(dst_reg++, IR::F32{ir.CompositeExtract(swizzled, i)});
}

// In case of programmable step rates we need to fallback to instance data pulling in
Expand Down
4 changes: 2 additions & 2 deletions src/shader_recompiler/frontend/translate/vector_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {

const IR::VectorReg src_reg{inst.src[1].code};

std::array<IR::Value, 4> comps{};
std::array<IR::F32, 4> comps{};
for (u32 i = 0; i < num_dwords; i++) {
comps[i] = ir.GetVectorReg<IR::F32>(src_reg + i);
}
Expand Down Expand Up @@ -426,7 +426,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
if (((mimg.dmask >> i) & 1) == 0) {
continue;
}
IR::U32 value = IR::U32{ir.CompositeExtract(texel, i)};
IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
ir.SetVectorReg(dest_reg++, value);
}
}
Expand Down
Loading
Loading