Skip to content

Commit 028be3b

Browse files
authored
shader_recompiler: Emulate unnormalized sampler coordinates in shader. (#1762)
* shader_recompiler: Emulate unnormalized sampler coordinates in shader. * Address review comments.
1 parent 3062799 commit 028be3b

File tree

10 files changed

+78
-12
lines changed

10 files changed

+78
-12
lines changed

src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,14 @@ Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
8787
return Decorate(ctx, inst, ctx.OpFMul(ctx.F64[1], a, b));
8888
}
8989

90+
Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
91+
return Decorate(ctx, inst, ctx.OpFDiv(ctx.F32[1], a, b));
92+
}
93+
94+
Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b) {
95+
return Decorate(ctx, inst, ctx.OpFDiv(ctx.F64[1], a, b));
96+
}
97+
9098
Id EmitFPNeg16(EmitContext& ctx, Id value) {
9199
return ctx.OpFNegate(ctx.F16[1], value);
92100
}

src/shader_recompiler/backend/spirv/emit_spirv_instructions.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@ Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
189189
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
190190
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
191191
Id EmitFPMul64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
192+
Id EmitFPDiv32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
193+
Id EmitFPDiv64(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
192194
Id EmitFPNeg16(EmitContext& ctx, Id value);
193195
Id EmitFPNeg32(EmitContext& ctx, Id value);
194196
Id EmitFPNeg64(EmitContext& ctx, Id value);

src/shader_recompiler/frontend/translate/vector_memory.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::Scal
527527
info.has_offset.Assign(flags.test(MimgModifier::Offset));
528528
info.has_lod.Assign(flags.any(MimgModifier::Lod));
529529
info.is_array.Assign(mimg.da);
530+
info.is_unnormalized.Assign(mimg.unrm);
530531

531532
if (gather) {
532533
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);

src/shader_recompiler/ir/ir_emitter.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,20 @@ F32F64 IREmitter::FPMul(const F32F64& a, const F32F64& b) {
692692
}
693693
}
694694

695+
F32F64 IREmitter::FPDiv(const F32F64& a, const F32F64& b) {
696+
if (a.Type() != b.Type()) {
697+
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
698+
}
699+
switch (a.Type()) {
700+
case Type::F32:
701+
return Inst<F32>(Opcode::FPDiv32, a, b);
702+
case Type::F64:
703+
return Inst<F64>(Opcode::FPDiv64, a, b);
704+
default:
705+
ThrowInvalidType(a.Type());
706+
}
707+
}
708+
695709
F32F64 IREmitter::FPFma(const F32F64& a, const F32F64& b, const F32F64& c) {
696710
if (a.Type() != b.Type() || a.Type() != c.Type()) {
697711
UNREACHABLE_MSG("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());

src/shader_recompiler/ir/ir_emitter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ class IREmitter {
158158
[[nodiscard]] F32F64 FPAdd(const F32F64& a, const F32F64& b);
159159
[[nodiscard]] F32F64 FPSub(const F32F64& a, const F32F64& b);
160160
[[nodiscard]] F32F64 FPMul(const F32F64& a, const F32F64& b);
161+
[[nodiscard]] F32F64 FPDiv(const F32F64& a, const F32F64& b);
161162
[[nodiscard]] F32F64 FPFma(const F32F64& a, const F32F64& b, const F32F64& c);
162163

163164
[[nodiscard]] F32F64 FPAbs(const F32F64& value);

src/shader_recompiler/ir/opcodes.inc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ OPCODE(FPMin32, F32, F32,
184184
OPCODE(FPMin64, F64, F64, F64, )
185185
OPCODE(FPMul32, F32, F32, F32, )
186186
OPCODE(FPMul64, F64, F64, F64, )
187+
OPCODE(FPDiv32, F32, F32, F32, )
188+
OPCODE(FPDiv64, F64, F64, F64, )
187189
OPCODE(FPNeg32, F32, F32, )
188190
OPCODE(FPNeg64, F64, F64, )
189191
OPCODE(FPRecip32, F32, F32, )

src/shader_recompiler/ir/passes/resource_tracking_pass.cpp

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -420,26 +420,29 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
420420
Descriptors& descriptors, const IR::Inst* producer,
421421
const u32 image_binding, const AmdGpu::Image& image) {
422422
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
423-
const u32 sampler_binding = [&] {
423+
const auto [sampler_binding, sampler] = [&] -> std::pair<u32, AmdGpu::Sampler> {
424424
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
425425
const IR::Value& handle = producer->Arg(1);
426426
// Inline sampler resource.
427427
if (handle.IsImmediate()) {
428428
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
429-
return descriptors.Add(SamplerResource{
429+
const auto inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()};
430+
const auto binding = descriptors.Add(SamplerResource{
430431
.sharp_idx = std::numeric_limits<u32>::max(),
431-
.inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()},
432+
.inline_sampler = inline_sampler,
432433
});
434+
return {binding, inline_sampler};
433435
}
434436
// Normal sampler resource.
435437
const auto ssharp_handle = handle.InstRecursive();
436438
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
437439
const auto ssharp = TrackSharp(ssharp_ud, info);
438-
return descriptors.Add(SamplerResource{
440+
const auto binding = descriptors.Add(SamplerResource{
439441
.sharp_idx = ssharp,
440442
.associated_image = image_binding,
441443
.disable_aniso = disable_aniso,
442444
});
445+
return {binding, info.ReadUdSharp<AmdGpu::Sampler>(ssharp)};
443446
}();
444447

445448
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
@@ -539,28 +542,46 @@ void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
539542
}
540543
}();
541544

545+
const auto unnormalized = sampler.force_unnormalized || inst_info.is_unnormalized;
546+
// Query dimensions of image if needed for normalization.
547+
// We can't use the image sharp because it could be bound to a different image later.
548+
const auto dimensions =
549+
unnormalized ? ir.ImageQueryDimension(ir.Imm32(image_binding), ir.Imm32(0u), ir.Imm1(false))
550+
: IR::Value{};
551+
const auto get_coord = [&](u32 idx, u32 dim_idx) -> IR::Value {
552+
const auto coord = get_addr_reg(idx);
553+
if (unnormalized) {
554+
// Normalize the coordinate for sampling, dividing by its corresponding dimension.
555+
return ir.FPDiv(coord,
556+
ir.BitCast<IR::F32>(IR::U32{ir.CompositeExtract(dimensions, dim_idx)}));
557+
}
558+
return coord;
559+
};
560+
542561
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
543562
const IR::Value coords = [&] -> IR::Value {
544563
switch (image.GetType()) {
545564
case AmdGpu::ImageType::Color1D: // x
546565
addr_reg = addr_reg + 1;
547-
return get_addr_reg(addr_reg - 1);
566+
return get_coord(addr_reg - 1, 0);
548567
case AmdGpu::ImageType::Color1DArray: // x, slice
549568
[[fallthrough]];
550569
case AmdGpu::ImageType::Color2D: // x, y
551570
addr_reg = addr_reg + 2;
552-
return ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1));
571+
return ir.CompositeConstruct(get_coord(addr_reg - 2, 0), get_coord(addr_reg - 1, 1));
553572
case AmdGpu::ImageType::Color2DArray: // x, y, slice
554573
[[fallthrough]];
555574
case AmdGpu::ImageType::Color2DMsaa: // x, y, frag
556-
[[fallthrough]];
557-
case AmdGpu::ImageType::Color3D: // x, y, z
558575
addr_reg = addr_reg + 3;
559-
return ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
576+
return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
560577
get_addr_reg(addr_reg - 1));
578+
case AmdGpu::ImageType::Color3D: // x, y, z
579+
addr_reg = addr_reg + 3;
580+
return ir.CompositeConstruct(get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
581+
get_coord(addr_reg - 1, 2));
561582
case AmdGpu::ImageType::Cube: // x, y, face
562583
addr_reg = addr_reg + 3;
563-
return PatchCubeCoord(ir, get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
584+
return PatchCubeCoord(ir, get_coord(addr_reg - 3, 0), get_coord(addr_reg - 2, 1),
564585
get_addr_reg(addr_reg - 1), false, inst_info.is_array);
565586
default:
566587
UNREACHABLE();

src/shader_recompiler/ir/reg.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ union TextureInstInfo {
4040
BitField<6, 2, u32> gather_comp;
4141
BitField<8, 1, u32> has_derivatives;
4242
BitField<9, 1, u32> is_array;
43-
BitField<10, 1, u32> is_gather;
43+
BitField<10, 1, u32> is_unnormalized;
44+
BitField<11, 1, u32> is_gather;
4445
};
4546

4647
union BufferInstInfo {

src/shader_recompiler/specialization.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,12 @@ struct FMaskSpecialization {
4949
auto operator<=>(const FMaskSpecialization&) const = default;
5050
};
5151

52+
struct SamplerSpecialization {
53+
bool force_unnormalized = false;
54+
55+
auto operator<=>(const SamplerSpecialization&) const = default;
56+
};
57+
5258
/**
5359
* Alongside runtime information, this structure also checks bound resources
5460
* for compatibility. Can be used as a key for storing shader permutations.
@@ -67,6 +73,7 @@ struct StageSpecialization {
6773
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
6874
boost::container::small_vector<ImageSpecialization, 16> images;
6975
boost::container::small_vector<FMaskSpecialization, 8> fmasks;
76+
boost::container::small_vector<SamplerSpecialization, 16> samplers;
7077
Backend::Bindings start{};
7178

7279
explicit StageSpecialization(const Info& info_, RuntimeInfo runtime_info_,
@@ -107,6 +114,10 @@ struct StageSpecialization {
107114
spec.width = sharp.width;
108115
spec.height = sharp.height;
109116
});
117+
ForEachSharp(samplers, info->samplers,
118+
[](auto& spec, const auto& desc, AmdGpu::Sampler sharp) {
119+
spec.force_unnormalized = sharp.force_unnormalized;
120+
});
110121
}
111122

112123
void ForEachSharp(auto& spec_list, auto& desc_list, auto&& func) {
@@ -175,6 +186,11 @@ struct StageSpecialization {
175186
return false;
176187
}
177188
}
189+
for (u32 i = 0; i < samplers.size(); i++) {
190+
if (samplers[i] != other.samplers[i]) {
191+
return false;
192+
}
193+
}
178194
return true;
179195
}
180196
};

src/video_core/texture_cache/sampler.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Sampler::Sampler(const Vulkan::Instance& instance, const AmdGpu::Sampler& sample
2525
.minLod = sampler.MinLod(),
2626
.maxLod = sampler.MaxLod(),
2727
.borderColor = LiverpoolToVK::BorderColor(sampler.border_color_type),
28-
.unnormalizedCoordinates = bool(sampler.force_unnormalized),
28+
.unnormalizedCoordinates = false, // Handled in shader due to Vulkan limitations.
2929
};
3030
auto [sampler_result, smplr] = instance.GetDevice().createSamplerUnique(sampler_ci);
3131
ASSERT_MSG(sampler_result == vk::Result::eSuccess, "Failed to create sampler: {}",

0 commit comments

Comments
 (0)