@@ -262,171 +262,15 @@ Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) {
262
262
return EmitLoadBufferF32xN<4 >(ctx, handle, address);
263
263
}
264
264
265
- static bool IsSignedInteger (AmdGpu::NumberFormat format) {
266
- switch (format) {
267
- case AmdGpu::NumberFormat::Unorm:
268
- case AmdGpu::NumberFormat::Uscaled:
269
- case AmdGpu::NumberFormat::Uint:
270
- return false ;
271
- case AmdGpu::NumberFormat::Snorm:
272
- case AmdGpu::NumberFormat::Sscaled:
273
- case AmdGpu::NumberFormat::Sint:
274
- case AmdGpu::NumberFormat::SnormNz:
275
- return true ;
276
- case AmdGpu::NumberFormat::Float:
277
- default :
278
- UNREACHABLE ();
279
- }
280
- }
281
-
282
- static u32 UXBitsMax (u32 bit_width) {
283
- return (1u << bit_width) - 1u ;
284
- }
285
-
286
- static u32 SXBitsMax (u32 bit_width) {
287
- return (1u << (bit_width - 1u )) - 1u ;
288
- }
289
-
290
- static Id ConvertValue (EmitContext& ctx, Id value, AmdGpu::NumberFormat format, u32 bit_width) {
291
- switch (format) {
292
- case AmdGpu::NumberFormat::Unorm:
293
- return ctx.OpFDiv (ctx.F32 [1 ], value, ctx.ConstF32 (float (UXBitsMax (bit_width))));
294
- case AmdGpu::NumberFormat::Snorm:
295
- return ctx.OpFDiv (ctx.F32 [1 ], value, ctx.ConstF32 (float (SXBitsMax (bit_width))));
296
- case AmdGpu::NumberFormat::SnormNz:
297
- // (x * 2 + 1) / (Format::SMAX * 2)
298
- value = ctx.OpFMul (ctx.F32 [1 ], value, ctx.ConstF32 (2 .f ));
299
- value = ctx.OpFAdd (ctx.F32 [1 ], value, ctx.ConstF32 (1 .f ));
300
- return ctx.OpFDiv (ctx.F32 [1 ], value, ctx.ConstF32 (float (SXBitsMax (bit_width) * 2 )));
301
- case AmdGpu::NumberFormat::Uscaled:
302
- case AmdGpu::NumberFormat::Sscaled:
303
- case AmdGpu::NumberFormat::Uint:
304
- case AmdGpu::NumberFormat::Sint:
305
- case AmdGpu::NumberFormat::Float:
306
- return value;
307
- default :
308
- UNREACHABLE_MSG (" Unsupported number format for conversion: {}" ,
309
- magic_enum::enum_name (format));
310
- }
311
- }
312
-
313
- static Id ComponentOffset (EmitContext& ctx, Id address, u32 stride, u32 bit_offset) {
314
- Id comp_offset = ctx.ConstU32 (bit_offset);
315
- if (stride < 4 ) {
316
- // comp_offset += (address % 4) * 8;
317
- const Id byte_offset = ctx.OpUMod (ctx.U32 [1 ], address, ctx.ConstU32 (4u ));
318
- const Id bit_offset = ctx.OpShiftLeftLogical (ctx.U32 [1 ], byte_offset, ctx.ConstU32 (3u ));
319
- comp_offset = ctx.OpIAdd (ctx.U32 [1 ], comp_offset, bit_offset);
320
- }
321
- return comp_offset;
322
- }
323
-
324
- static Id GetBufferFormatValue (EmitContext& ctx, u32 handle, Id address, u32 comp) {
325
- auto & buffer = ctx.buffers [handle];
326
- const auto format = buffer.dfmt ;
327
- switch (format) {
328
- case AmdGpu::DataFormat::FormatInvalid:
329
- return ctx.f32_zero_value ;
330
- case AmdGpu::DataFormat::Format8:
331
- case AmdGpu::DataFormat::Format16:
332
- case AmdGpu::DataFormat::Format32:
333
- case AmdGpu::DataFormat::Format8_8:
334
- case AmdGpu::DataFormat::Format16_16:
335
- case AmdGpu::DataFormat::Format10_11_11:
336
- case AmdGpu::DataFormat::Format11_11_10:
337
- case AmdGpu::DataFormat::Format10_10_10_2:
338
- case AmdGpu::DataFormat::Format2_10_10_10:
339
- case AmdGpu::DataFormat::Format8_8_8_8:
340
- case AmdGpu::DataFormat::Format32_32:
341
- case AmdGpu::DataFormat::Format16_16_16_16:
342
- case AmdGpu::DataFormat::Format32_32_32:
343
- case AmdGpu::DataFormat::Format32_32_32_32: {
344
- const u32 num_components = AmdGpu::NumComponents (format);
345
- if (comp >= num_components) {
346
- return ctx.f32_zero_value ;
347
- }
348
-
349
- // uint index = address / 4;
350
- Id index = ctx.OpShiftRightLogical (ctx.U32 [1 ], address, ctx.ConstU32 (2u ));
351
- const u32 stride = buffer.stride ;
352
- if (stride > 4 ) {
353
- const u32 index_offset = u32 (AmdGpu::ComponentOffset (format, comp) / 32 );
354
- if (index_offset > 0 ) {
355
- // index += index_offset;
356
- index = ctx.OpIAdd (ctx.U32 [1 ], index, ctx.ConstU32 (index_offset));
357
- }
358
- }
359
- const Id ptr = ctx.OpAccessChain (buffer.pointer_type , buffer.id , ctx.u32_zero_value , index);
360
-
361
- const u32 bit_offset = AmdGpu::ComponentOffset (format, comp) % 32 ;
362
- const u32 bit_width = AmdGpu::ComponentBits (format, comp);
363
- const auto num_format = buffer.nfmt ;
364
- if (num_format == AmdGpu::NumberFormat::Float) {
365
- if (bit_width == 32 ) {
366
- return ctx.OpLoad (ctx.F32 [1 ], ptr);
367
- } else if (bit_width == 16 ) {
368
- const Id comp_offset = ComponentOffset (ctx, address, stride, bit_offset);
369
- Id value = ctx.OpLoad (ctx.U32 [1 ], ptr);
370
- value =
371
- ctx.OpBitFieldSExtract (ctx.S32 [1 ], value, comp_offset, ctx.ConstU32 (bit_width));
372
- value = ctx.OpSConvert (ctx.U16 , value);
373
- value = ctx.OpBitcast (ctx.F16 [1 ], value);
374
- return ctx.OpFConvert (ctx.F32 [1 ], value);
375
- } else {
376
- UNREACHABLE_MSG (" Invalid float bit width {}" , bit_width);
377
- }
378
- } else {
379
- Id value = ctx.OpLoad (ctx.U32 [1 ], ptr);
380
- const bool is_signed = IsSignedInteger (num_format);
381
- if (bit_width < 32 ) {
382
- const Id comp_offset = ComponentOffset (ctx, address, stride, bit_offset);
383
- if (is_signed) {
384
- value = ctx.OpBitFieldSExtract (ctx.S32 [1 ], value, comp_offset,
385
- ctx.ConstU32 (bit_width));
386
- } else {
387
- value = ctx.OpBitFieldUExtract (ctx.U32 [1 ], value, comp_offset,
388
- ctx.ConstU32 (bit_width));
389
- }
390
- }
391
- value = ctx.OpBitcast (ctx.F32 [1 ], value);
392
- return ConvertValue (ctx, value, num_format, bit_width);
393
- }
394
- break ;
395
- }
396
- default :
397
- UNREACHABLE_MSG (" Invalid format for conversion: {}" , magic_enum::enum_name (format));
398
- }
399
- }
400
-
401
- template <u32 N>
402
- static Id EmitLoadBufferFormatF32xN (EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
403
- auto & buffer = ctx.buffers [handle];
404
- address = ctx.OpIAdd (ctx.U32 [1 ], address, buffer.offset );
405
- if constexpr (N == 1 ) {
406
- return GetBufferFormatValue (ctx, handle, address, 0 );
407
- } else {
408
- boost::container::static_vector<Id, N> ids;
409
- for (u32 i = 0 ; i < N; i++) {
410
- ids.push_back (GetBufferFormatValue (ctx, handle, address, i));
411
- }
412
- return ctx.OpCompositeConstruct (ctx.F32 [N], ids);
413
- }
414
- }
415
-
416
265
Id EmitLoadBufferFormatF32 (EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
417
- return EmitLoadBufferFormatF32xN<1 >(ctx, inst, handle, address);
418
- }
419
-
420
- Id EmitLoadBufferFormatF32x2 (EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
421
- return EmitLoadBufferFormatF32xN<2 >(ctx, inst, handle, address);
422
- }
423
-
424
- Id EmitLoadBufferFormatF32x3 (EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
425
- return EmitLoadBufferFormatF32xN<3 >(ctx, inst, handle, address);
426
- }
427
-
428
- Id EmitLoadBufferFormatF32x4 (EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
429
- return EmitLoadBufferFormatF32xN<4 >(ctx, inst, handle, address);
266
+ const auto & buffer = ctx.texture_buffers [handle];
267
+ const Id tex_buffer = ctx.OpLoad (buffer.image_type , buffer.id );
268
+ const Id coord = ctx.OpIAdd (ctx.U32 [1 ], address, buffer.coord_offset );
269
+ Id texel = ctx.OpImageFetch (buffer.result_type , tex_buffer, coord);
270
+ if (buffer.is_integer ) {
271
+ texel = ctx.OpBitcast (ctx.F32 [4 ], texel);
272
+ }
273
+ return texel;
430
274
}
431
275
432
276
template <u32 N>
@@ -467,6 +311,7 @@ void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address
467
311
EmitStoreBufferF32xN<1 >(ctx, handle, address, value);
468
312
}
469
313
314
+ <<<<<<< HEAD
470
315
static Id ConvertF32ToFormat (EmitContext& ctx, Id value, AmdGpu::NumberFormat format,
471
316
u32 bit_width) {
472
317
switch (format) {
@@ -541,23 +386,16 @@ static void EmitStoreBufferFormatF32xN(EmitContext& ctx, u32 handle, Id address,
541
386
}
542
387
}
543
388
389
+ =======
390
+ >>>>>>> 8b824588 (video_core: Use texture buffers for untyped format load/store)
544
391
void EmitStoreBufferFormatF32 (EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) {
545
- EmitStoreBufferFormatF32xN<1 >(ctx, handle, address, value);
546
- }
547
-
548
- void EmitStoreBufferFormatF32x2 (EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
549
- Id value) {
550
- EmitStoreBufferFormatF32xN<2 >(ctx, handle, address, value);
551
- }
552
-
553
- void EmitStoreBufferFormatF32x3 (EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
554
- Id value) {
555
- EmitStoreBufferFormatF32xN<3 >(ctx, handle, address, value);
556
- }
557
-
558
- void EmitStoreBufferFormatF32x4 (EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
559
- Id value) {
560
- EmitStoreBufferFormatF32xN<4 >(ctx, handle, address, value);
392
+ const auto & buffer = ctx.texture_buffers [handle];
393
+ const Id tex_buffer = ctx.OpLoad (buffer.image_type , buffer.id );
394
+ const Id coord = ctx.OpIAdd (ctx.U32 [1 ], address, buffer.coord_offset );
395
+ if (buffer.is_integer ) {
396
+ value = ctx.OpBitcast (ctx.U32 [4 ], value);
397
+ }
398
+ ctx.OpImageWrite (tex_buffer, coord, value);
561
399
}
562
400
563
401
} // namespace Shader::Backend::SPIRV
0 commit comments