Skip to content

Commit f118dc7

Browse files
video_core: Use texture buffers for untyped format load/store
1 parent 833a366 commit f118dc7

20 files changed

+399
-394
lines changed

src/core/libraries/avplayer/avplayer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,7 @@ bool PS4_SYSV_ABI sceAvPlayerGetVideoDataEx(SceAvPlayerHandle handle,
120120
}
121121

122122
SceAvPlayerHandle PS4_SYSV_ABI sceAvPlayerInit(SceAvPlayerInitData* data) {
123+
return nullptr;
123124
LOG_TRACE(Lib_AvPlayer, "called");
124125
if (data == nullptr) {
125126
return nullptr;
@@ -325,4 +326,4 @@ void RegisterlibSceAvPlayer(Core::Loader::SymbolsResolver* sym) {
325326
LIB_FUNCTION("yN7Jhuv8g24", "libSceAvPlayer", 1, "libSceAvPlayer", 1, 0, sceAvPlayerVprintf);
326327
};
327328

328-
} // namespace Libraries::AvPlayer
329+
} // namespace Libraries::AvPlayer

src/core/libraries/kernel/thread_management.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,7 +1066,16 @@ ScePthread PThreadPool::Create() {
10661066
}
10671067
}
10681068

1069+
#ifdef _WIN64
10691070
auto* ret = new PthreadInternal{};
1071+
#else
1072+
// TODO: Linux specific hack
1073+
static u8* hint_address = reinterpret_cast<u8*>(0x7FFFFC000ULL);
1074+
auto* ret = reinterpret_cast<PthreadInternal*>(
1075+
mmap(hint_address, sizeof(PthreadInternal), PROT_READ | PROT_WRITE,
1076+
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0));
1077+
hint_address += Common::AlignUp(sizeof(PthreadInternal), 4_KB);
1078+
#endif
10701079
ret->is_free = false;
10711080
ret->is_detached = false;
10721081
ret->is_almost_done = false;

src/shader_recompiler/backend/spirv/emit_spirv.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,9 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
189189
ctx.AddCapability(spv::Capability::StorageImageExtendedFormats);
190190
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
191191
}
192+
if (info.has_texel_buffers) {
193+
ctx.AddCapability(spv::Capability::SampledBuffer);
194+
}
192195
switch (program.info.stage) {
193196
case Stage::Compute: {
194197
const std::array<u32, 3> workgroup_size{program.info.workgroup_size};

src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp

Lines changed: 18 additions & 180 deletions
Original file line numberDiff line numberDiff line change
@@ -262,171 +262,15 @@ Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
262262
return EmitLoadBufferF32xN<4>(ctx, handle, address);
263263
}
264264

265-
static bool IsSignedInteger(AmdGpu::NumberFormat format) {
266-
switch (format) {
267-
case AmdGpu::NumberFormat::Unorm:
268-
case AmdGpu::NumberFormat::Uscaled:
269-
case AmdGpu::NumberFormat::Uint:
270-
return false;
271-
case AmdGpu::NumberFormat::Snorm:
272-
case AmdGpu::NumberFormat::Sscaled:
273-
case AmdGpu::NumberFormat::Sint:
274-
case AmdGpu::NumberFormat::SnormNz:
275-
return true;
276-
case AmdGpu::NumberFormat::Float:
277-
default:
278-
UNREACHABLE();
279-
}
280-
}
281-
282-
static u32 UXBitsMax(u32 bit_width) {
283-
return (1u << bit_width) - 1u;
284-
}
285-
286-
static u32 SXBitsMax(u32 bit_width) {
287-
return (1u << (bit_width - 1u)) - 1u;
288-
}
289-
290-
static Id ConvertValue(EmitContext& ctx, Id value, AmdGpu::NumberFormat format, u32 bit_width) {
291-
switch (format) {
292-
case AmdGpu::NumberFormat::Unorm:
293-
return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(UXBitsMax(bit_width))));
294-
case AmdGpu::NumberFormat::Snorm:
295-
return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(SXBitsMax(bit_width))));
296-
case AmdGpu::NumberFormat::SnormNz:
297-
// (x * 2 + 1) / (Format::SMAX * 2)
298-
value = ctx.OpFMul(ctx.F32[1], value, ctx.ConstF32(2.f));
299-
value = ctx.OpFAdd(ctx.F32[1], value, ctx.ConstF32(1.f));
300-
return ctx.OpFDiv(ctx.F32[1], value, ctx.ConstF32(float(SXBitsMax(bit_width) * 2)));
301-
case AmdGpu::NumberFormat::Uscaled:
302-
case AmdGpu::NumberFormat::Sscaled:
303-
case AmdGpu::NumberFormat::Uint:
304-
case AmdGpu::NumberFormat::Sint:
305-
case AmdGpu::NumberFormat::Float:
306-
return value;
307-
default:
308-
UNREACHABLE_MSG("Unsupported number format for conversion: {}",
309-
magic_enum::enum_name(format));
310-
}
311-
}
312-
313-
static Id ComponentOffset(EmitContext& ctx, Id address, u32 stride, u32 bit_offset) {
314-
Id comp_offset = ctx.ConstU32(bit_offset);
315-
if (stride < 4) {
316-
// comp_offset += (address % 4) * 8;
317-
const Id byte_offset = ctx.OpUMod(ctx.U32[1], address, ctx.ConstU32(4u));
318-
const Id bit_offset = ctx.OpShiftLeftLogical(ctx.U32[1], byte_offset, ctx.ConstU32(3u));
319-
comp_offset = ctx.OpIAdd(ctx.U32[1], comp_offset, bit_offset);
320-
}
321-
return comp_offset;
322-
}
323-
324-
static Id GetBufferFormatValue(EmitContext& ctx, u32 handle, Id address, u32 comp) {
325-
auto& buffer = ctx.buffers[handle];
326-
const auto format = buffer.dfmt;
327-
switch (format) {
328-
case AmdGpu::DataFormat::FormatInvalid:
329-
return ctx.f32_zero_value;
330-
case AmdGpu::DataFormat::Format8:
331-
case AmdGpu::DataFormat::Format16:
332-
case AmdGpu::DataFormat::Format32:
333-
case AmdGpu::DataFormat::Format8_8:
334-
case AmdGpu::DataFormat::Format16_16:
335-
case AmdGpu::DataFormat::Format10_11_11:
336-
case AmdGpu::DataFormat::Format11_11_10:
337-
case AmdGpu::DataFormat::Format10_10_10_2:
338-
case AmdGpu::DataFormat::Format2_10_10_10:
339-
case AmdGpu::DataFormat::Format8_8_8_8:
340-
case AmdGpu::DataFormat::Format32_32:
341-
case AmdGpu::DataFormat::Format16_16_16_16:
342-
case AmdGpu::DataFormat::Format32_32_32:
343-
case AmdGpu::DataFormat::Format32_32_32_32: {
344-
const u32 num_components = AmdGpu::NumComponents(format);
345-
if (comp >= num_components) {
346-
return ctx.f32_zero_value;
347-
}
348-
349-
// uint index = address / 4;
350-
Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
351-
const u32 stride = buffer.stride;
352-
if (stride > 4) {
353-
const u32 index_offset = u32(AmdGpu::ComponentOffset(format, comp) / 32);
354-
if (index_offset > 0) {
355-
// index += index_offset;
356-
index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(index_offset));
357-
}
358-
}
359-
const Id ptr = ctx.OpAccessChain(buffer.pointer_type, buffer.id, ctx.u32_zero_value, index);
360-
361-
const u32 bit_offset = AmdGpu::ComponentOffset(format, comp) % 32;
362-
const u32 bit_width = AmdGpu::ComponentBits(format, comp);
363-
const auto num_format = buffer.nfmt;
364-
if (num_format == AmdGpu::NumberFormat::Float) {
365-
if (bit_width == 32) {
366-
return ctx.OpLoad(ctx.F32[1], ptr);
367-
} else if (bit_width == 16) {
368-
const Id comp_offset = ComponentOffset(ctx, address, stride, bit_offset);
369-
Id value = ctx.OpLoad(ctx.U32[1], ptr);
370-
value =
371-
ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset, ctx.ConstU32(bit_width));
372-
value = ctx.OpSConvert(ctx.U16, value);
373-
value = ctx.OpBitcast(ctx.F16[1], value);
374-
return ctx.OpFConvert(ctx.F32[1], value);
375-
} else {
376-
UNREACHABLE_MSG("Invalid float bit width {}", bit_width);
377-
}
378-
} else {
379-
Id value = ctx.OpLoad(ctx.U32[1], ptr);
380-
const bool is_signed = IsSignedInteger(num_format);
381-
if (bit_width < 32) {
382-
const Id comp_offset = ComponentOffset(ctx, address, stride, bit_offset);
383-
if (is_signed) {
384-
value = ctx.OpBitFieldSExtract(ctx.S32[1], value, comp_offset,
385-
ctx.ConstU32(bit_width));
386-
} else {
387-
value = ctx.OpBitFieldUExtract(ctx.U32[1], value, comp_offset,
388-
ctx.ConstU32(bit_width));
389-
}
390-
}
391-
value = ctx.OpBitcast(ctx.F32[1], value);
392-
return ConvertValue(ctx, value, num_format, bit_width);
393-
}
394-
break;
395-
}
396-
default:
397-
UNREACHABLE_MSG("Invalid format for conversion: {}", magic_enum::enum_name(format));
398-
}
399-
}
400-
401-
template <u32 N>
402-
static Id EmitLoadBufferFormatF32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
403-
auto& buffer = ctx.buffers[handle];
404-
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset);
405-
if constexpr (N == 1) {
406-
return GetBufferFormatValue(ctx, handle, address, 0);
407-
} else {
408-
boost::container::static_vector<Id, N> ids;
409-
for (u32 i = 0; i < N; i++) {
410-
ids.push_back(GetBufferFormatValue(ctx, handle, address, i));
411-
}
412-
return ctx.OpCompositeConstruct(ctx.F32[N], ids);
413-
}
414-
}
415-
416265
Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
417-
return EmitLoadBufferFormatF32xN<1>(ctx, inst, handle, address);
418-
}
419-
420-
Id EmitLoadBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
421-
return EmitLoadBufferFormatF32xN<2>(ctx, inst, handle, address);
422-
}
423-
424-
Id EmitLoadBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
425-
return EmitLoadBufferFormatF32xN<3>(ctx, inst, handle, address);
426-
}
427-
428-
Id EmitLoadBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
429-
return EmitLoadBufferFormatF32xN<4>(ctx, inst, handle, address);
266+
const auto& buffer = ctx.texture_buffers[handle];
267+
const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
268+
const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
269+
Id texel = ctx.OpImageFetch(buffer.result_type, tex_buffer, coord);
270+
if (buffer.is_integer) {
271+
texel = ctx.OpBitcast(ctx.F32[4], texel);
272+
}
273+
return texel;
430274
}
431275

432276
template <u32 N>
@@ -467,6 +311,7 @@ void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address
467311
EmitStoreBufferF32xN<1>(ctx, handle, address, value);
468312
}
469313

314+
<<<<<<< HEAD
470315
static Id ConvertF32ToFormat(EmitContext& ctx, Id value, AmdGpu::NumberFormat format,
471316
u32 bit_width) {
472317
switch (format) {
@@ -541,23 +386,16 @@ static void EmitStoreBufferFormatF32xN(EmitContext& ctx, u32 handle, Id address,
541386
}
542387
}
543388

389+
=======
390+
>>>>>>> 8b824588 (video_core: Use texture buffers for untyped format load/store)
544391
void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
545-
EmitStoreBufferFormatF32xN<1>(ctx, handle, address, value);
546-
}
547-
548-
void EmitStoreBufferFormatF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
549-
Id value) {
550-
EmitStoreBufferFormatF32xN<2>(ctx, handle, address, value);
551-
}
552-
553-
void EmitStoreBufferFormatF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
554-
Id value) {
555-
EmitStoreBufferFormatF32xN<3>(ctx, handle, address, value);
556-
}
557-
558-
void EmitStoreBufferFormatF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
559-
Id value) {
560-
EmitStoreBufferFormatF32xN<4>(ctx, handle, address, value);
392+
const auto& buffer = ctx.texture_buffers[handle];
393+
const Id tex_buffer = ctx.OpLoad(buffer.image_type, buffer.id);
394+
const Id coord = ctx.OpIAdd(ctx.U32[1], address, buffer.coord_offset);
395+
if (buffer.is_integer) {
396+
value = ctx.OpBitcast(ctx.U32[4], value);
397+
}
398+
ctx.OpImageWrite(tex_buffer, coord, value);
561399
}
562400

563401
} // namespace Shader::Backend::SPIRV

0 commit comments

Comments
 (0)