@@ -164,8 +164,8 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
164
164
}
165
165
166
166
void Translator::BUFFER_LOAD (u32 num_dwords, bool is_typed, const GcnInst& inst) {
167
- const auto & mtbuf = inst.control .mtbuf ;
168
- const bool is_ring = mtbuf .glc && mtbuf .slc ;
167
+ const auto & mubuf = inst.control .mubuf ;
168
+ const bool is_ring = mubuf .glc && mubuf .slc ;
169
169
const IR::VectorReg vaddr{inst.src [0 ].code };
170
170
const IR::ScalarReg sharp{inst.src [2 ].code * 4 };
171
171
const IR::Value soffset{GetSrc (inst.src [3 ])};
@@ -178,22 +178,23 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst)
178
178
if (is_ring) {
179
179
return ir.CompositeConstruct (ir.GetVectorReg (vaddr), soffset);
180
180
}
181
- if (mtbuf .idxen && mtbuf .offen ) {
181
+ if (mubuf .idxen && mubuf .offen ) {
182
182
return ir.CompositeConstruct (ir.GetVectorReg (vaddr), ir.GetVectorReg (vaddr + 1 ));
183
183
}
184
- if (mtbuf .idxen || mtbuf .offen ) {
184
+ if (mubuf .idxen || mubuf .offen ) {
185
185
return ir.GetVectorReg (vaddr);
186
186
}
187
187
return {};
188
188
}();
189
189
190
190
IR::BufferInstInfo buffer_info{};
191
- buffer_info.index_enable .Assign (mtbuf .idxen );
192
- buffer_info.offset_enable .Assign (mtbuf .offen );
193
- buffer_info.inst_offset .Assign (mtbuf .offset );
194
- buffer_info.globally_coherent .Assign (mtbuf .glc );
195
- buffer_info.system_coherent .Assign (mtbuf .slc );
191
+ buffer_info.index_enable .Assign (mubuf .idxen );
192
+ buffer_info.offset_enable .Assign (mubuf .offen );
193
+ buffer_info.inst_offset .Assign (mubuf .offset );
194
+ buffer_info.globally_coherent .Assign (mubuf .glc );
195
+ buffer_info.system_coherent .Assign (mubuf .slc );
196
196
if (is_typed) {
197
+ const auto & mtbuf = inst.control .mtbuf ;
197
198
const auto dmft = static_cast <AmdGpu::DataFormat>(mtbuf.dfmt );
198
199
const auto nfmt = static_cast <AmdGpu::NumberFormat>(mtbuf.nfmt );
199
200
ASSERT (nfmt == AmdGpu::NumberFormat::Float &&
@@ -220,32 +221,38 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) {
220
221
const auto & mubuf = inst.control .mubuf ;
221
222
const IR::VectorReg vaddr{inst.src [0 ].code };
222
223
const IR::ScalarReg sharp{inst.src [2 ].code * 4 };
223
- ASSERT_MSG (!mubuf.offen && mubuf.offset == 0 , " Offsets for image buffers are not supported" );
224
224
const IR::Value address = [&] -> IR::Value {
225
- if (mubuf.idxen ) {
225
+ if (mubuf.idxen && mubuf.offen ) {
226
+ return ir.CompositeConstruct (ir.GetVectorReg (vaddr), ir.GetVectorReg (vaddr + 1 ));
227
+ }
228
+ if (mubuf.idxen || mubuf.offen ) {
226
229
return ir.GetVectorReg (vaddr);
227
230
}
228
231
return {};
229
232
}();
230
233
const IR::Value soffset{GetSrc (inst.src [3 ])};
231
234
ASSERT_MSG (soffset.IsImmediate () && soffset.U32 () == 0 , " Non immediate offset not supported" );
232
235
233
- IR::BufferInstInfo info{};
234
- info.index_enable .Assign (mubuf.idxen );
236
+ IR::BufferInstInfo buffer_info{};
237
+ buffer_info.index_enable .Assign (mubuf.idxen );
238
+ buffer_info.offset_enable .Assign (mubuf.offen );
239
+ buffer_info.inst_offset .Assign (mubuf.offset );
240
+ buffer_info.globally_coherent .Assign (mubuf.glc );
241
+ buffer_info.system_coherent .Assign (mubuf.slc );
235
242
236
243
const IR::Value handle =
237
244
ir.CompositeConstruct (ir.GetScalarReg (sharp), ir.GetScalarReg (sharp + 1 ),
238
245
ir.GetScalarReg (sharp + 2 ), ir.GetScalarReg (sharp + 3 ));
239
- const IR::Value value = ir.LoadBufferFormat (handle, address, info );
246
+ const IR::Value value = ir.LoadBufferFormat (handle, address, buffer_info );
240
247
const IR::VectorReg dst_reg{inst.src [1 ].code };
241
248
for (u32 i = 0 ; i < num_dwords; i++) {
242
249
ir.SetVectorReg (dst_reg + i, IR::F32{ir.CompositeExtract (value, i)});
243
250
}
244
251
}
245
252
246
253
void Translator::BUFFER_STORE (u32 num_dwords, bool is_typed, const GcnInst& inst) {
247
- const auto & mtbuf = inst.control .mtbuf ;
248
- const bool is_ring = mtbuf .glc && mtbuf .slc ;
254
+ const auto & mubuf = inst.control .mubuf ;
255
+ const bool is_ring = mubuf .glc && mubuf .slc ;
249
256
const IR::VectorReg vaddr{inst.src [0 ].code };
250
257
const IR::ScalarReg sharp{inst.src [2 ].code * 4 };
251
258
const IR::Value soffset{GetSrc (inst.src [3 ])};
@@ -259,22 +266,23 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst
259
266
if (is_ring) {
260
267
return ir.CompositeConstruct (ir.GetVectorReg (vaddr), soffset);
261
268
}
262
- if (mtbuf .idxen && mtbuf .offen ) {
269
+ if (mubuf .idxen && mubuf .offen ) {
263
270
return ir.CompositeConstruct (ir.GetVectorReg (vaddr), ir.GetVectorReg (vaddr + 1 ));
264
271
}
265
- if (mtbuf .idxen || mtbuf .offen ) {
272
+ if (mubuf .idxen || mubuf .offen ) {
266
273
return ir.GetVectorReg (vaddr);
267
274
}
268
275
return {};
269
276
}();
270
277
271
278
IR::BufferInstInfo buffer_info{};
272
- buffer_info.index_enable .Assign (mtbuf .idxen );
273
- buffer_info.offset_enable .Assign (mtbuf .offen );
274
- buffer_info.inst_offset .Assign (mtbuf .offset );
275
- buffer_info.globally_coherent .Assign (mtbuf .glc );
276
- buffer_info.system_coherent .Assign (mtbuf .slc );
279
+ buffer_info.index_enable .Assign (mubuf .idxen );
280
+ buffer_info.offset_enable .Assign (mubuf .offen );
281
+ buffer_info.inst_offset .Assign (mubuf .offset );
282
+ buffer_info.globally_coherent .Assign (mubuf .glc );
283
+ buffer_info.system_coherent .Assign (mubuf .slc );
277
284
if (is_typed) {
285
+ const auto & mtbuf = inst.control .mtbuf ;
278
286
const auto dmft = static_cast <AmdGpu::DataFormat>(mtbuf.dfmt );
279
287
const auto nfmt = static_cast <AmdGpu::NumberFormat>(mtbuf.nfmt );
280
288
ASSERT (nfmt == AmdGpu::NumberFormat::Float &&
@@ -321,8 +329,12 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {
321
329
const IR::Value soffset{GetSrc (inst.src [3 ])};
322
330
ASSERT_MSG (soffset.IsImmediate () && soffset.U32 () == 0 , " Non immediate offset not supported" );
323
331
324
- IR::BufferInstInfo info{};
325
- info.index_enable .Assign (mubuf.idxen );
332
+ IR::BufferInstInfo buffer_info{};
333
+ buffer_info.index_enable .Assign (mubuf.idxen );
334
+ buffer_info.offset_enable .Assign (mubuf.offen );
335
+ buffer_info.inst_offset .Assign (mubuf.offset );
336
+ buffer_info.globally_coherent .Assign (mubuf.glc );
337
+ buffer_info.system_coherent .Assign (mubuf.slc );
326
338
327
339
const IR::VectorReg src_reg{inst.src [1 ].code };
328
340
@@ -338,7 +350,7 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) {
338
350
const IR::Value handle =
339
351
ir.CompositeConstruct (ir.GetScalarReg (sharp), ir.GetScalarReg (sharp + 1 ),
340
352
ir.GetScalarReg (sharp + 2 ), ir.GetScalarReg (sharp + 3 ));
341
- ir.StoreBufferFormat (handle, address, value, info );
353
+ ir.StoreBufferFormat (handle, address, value, buffer_info );
342
354
}
343
355
344
356
void Translator::BUFFER_ATOMIC (AtomicOp op, const GcnInst& inst) {
@@ -358,10 +370,12 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
358
370
const IR::U32 soffset{GetSrc (inst.src [3 ])};
359
371
ASSERT_MSG (soffset.IsImmediate () && soffset.U32 () == 0 , " Non immediate offset not supported" );
360
372
361
- IR::BufferInstInfo info{};
362
- info.index_enable .Assign (mubuf.idxen );
363
- info.inst_offset .Assign (mubuf.offset );
364
- info.offset_enable .Assign (mubuf.offen );
373
+ IR::BufferInstInfo buffer_info{};
374
+ buffer_info.index_enable .Assign (mubuf.idxen );
375
+ buffer_info.offset_enable .Assign (mubuf.offen );
376
+ buffer_info.inst_offset .Assign (mubuf.offset );
377
+ buffer_info.globally_coherent .Assign (mubuf.glc );
378
+ buffer_info.system_coherent .Assign (mubuf.slc );
365
379
366
380
IR::Value vdata_val = ir.GetVectorReg <Shader::IR::U32>(vdata);
367
381
const IR::Value handle =
@@ -371,27 +385,27 @@ void Translator::BUFFER_ATOMIC(AtomicOp op, const GcnInst& inst) {
371
385
const IR::Value original_val = [&] {
372
386
switch (op) {
373
387
case AtomicOp::Swap:
374
- return ir.BufferAtomicSwap (handle, address, vdata_val, info );
388
+ return ir.BufferAtomicSwap (handle, address, vdata_val, buffer_info );
375
389
case AtomicOp::Add:
376
- return ir.BufferAtomicIAdd (handle, address, vdata_val, info );
390
+ return ir.BufferAtomicIAdd (handle, address, vdata_val, buffer_info );
377
391
case AtomicOp::Smin:
378
- return ir.BufferAtomicIMin (handle, address, vdata_val, true , info );
392
+ return ir.BufferAtomicIMin (handle, address, vdata_val, true , buffer_info );
379
393
case AtomicOp::Umin:
380
- return ir.BufferAtomicIMin (handle, address, vdata_val, false , info );
394
+ return ir.BufferAtomicIMin (handle, address, vdata_val, false , buffer_info );
381
395
case AtomicOp::Smax:
382
- return ir.BufferAtomicIMax (handle, address, vdata_val, true , info );
396
+ return ir.BufferAtomicIMax (handle, address, vdata_val, true , buffer_info );
383
397
case AtomicOp::Umax:
384
- return ir.BufferAtomicIMax (handle, address, vdata_val, false , info );
398
+ return ir.BufferAtomicIMax (handle, address, vdata_val, false , buffer_info );
385
399
case AtomicOp::And:
386
- return ir.BufferAtomicAnd (handle, address, vdata_val, info );
400
+ return ir.BufferAtomicAnd (handle, address, vdata_val, buffer_info );
387
401
case AtomicOp::Or:
388
- return ir.BufferAtomicOr (handle, address, vdata_val, info );
402
+ return ir.BufferAtomicOr (handle, address, vdata_val, buffer_info );
389
403
case AtomicOp::Xor:
390
- return ir.BufferAtomicXor (handle, address, vdata_val, info );
404
+ return ir.BufferAtomicXor (handle, address, vdata_val, buffer_info );
391
405
case AtomicOp::Inc:
392
- return ir.BufferAtomicInc (handle, address, vdata_val, info );
406
+ return ir.BufferAtomicInc (handle, address, vdata_val, buffer_info );
393
407
case AtomicOp::Dec:
394
- return ir.BufferAtomicDec (handle, address, vdata_val, info );
408
+ return ir.BufferAtomicDec (handle, address, vdata_val, buffer_info );
395
409
default :
396
410
UNREACHABLE ();
397
411
}
0 commit comments