Skip to content

Commit 77d2172

Browse files
renderer_vulkan: Cleanup and improve barriers in caches (#1865)
* texture_cache: Stricter barriers on image upload * buffer_cache: Stricter barrier for vkCmdUpdateBuffer * vk_rasterizer: Barrier also normal buffers and make it apply to all stages * texture_cache: Minor barrier cleanup * Batch image and buffer barriers in a single command * clang format
1 parent f7a8e24 commit 77d2172

File tree

5 files changed

+190
-76
lines changed

5 files changed

+190
-76
lines changed

src/video_core/buffer_cache/buffer_cache.cpp

Lines changed: 117 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,16 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
259259
const BufferId buffer_id = FindBuffer(address, num_bytes);
260260
return &slot_buffers[buffer_id];
261261
}();
262-
const vk::BufferMemoryBarrier2 buf_barrier = {
262+
const vk::BufferMemoryBarrier2 pre_barrier = {
263+
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
264+
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
265+
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
266+
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
267+
.buffer = buffer->Handle(),
268+
.offset = buffer->Offset(address),
269+
.size = num_bytes,
270+
};
271+
const vk::BufferMemoryBarrier2 post_barrier = {
263272
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
264273
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
265274
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
@@ -271,9 +280,14 @@ void BufferCache::InlineData(VAddr address, const void* value, u32 num_bytes, bo
271280
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
272281
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
273282
.bufferMemoryBarrierCount = 1,
274-
.pBufferMemoryBarriers = &buf_barrier,
283+
.pBufferMemoryBarriers = &pre_barrier,
284+
});
285+
cmdbuf.updateBuffer(buffer->Handle(), buffer->Offset(address), num_bytes, value);
286+
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
287+
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
288+
.bufferMemoryBarrierCount = 1,
289+
.pBufferMemoryBarriers = &post_barrier,
275290
});
276-
cmdbuf.updateBuffer(buffer->Handle(), buf_barrier.offset, num_bytes, value);
277291
}
278292

279293
std::pair<Buffer*, u32> BufferCache::ObtainHostUBO(std::span<const u32> data) {
@@ -465,21 +479,48 @@ void BufferCache::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
465479
};
466480
scheduler.EndRendering();
467481
const auto cmdbuf = scheduler.CommandBuffer();
468-
static constexpr vk::MemoryBarrier READ_BARRIER{
469-
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
470-
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
482+
const std::array pre_barriers = {
483+
vk::BufferMemoryBarrier2{
484+
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
485+
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
486+
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
487+
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
488+
.buffer = overlap.Handle(),
489+
.offset = 0,
490+
.size = overlap.SizeBytes(),
491+
},
471492
};
472-
static constexpr vk::MemoryBarrier WRITE_BARRIER{
473-
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
474-
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
493+
const std::array post_barriers = {
494+
vk::BufferMemoryBarrier2{
495+
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
496+
.srcAccessMask = vk::AccessFlagBits2::eTransferRead,
497+
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
498+
.dstAccessMask = vk::AccessFlagBits2::eMemoryWrite,
499+
.buffer = overlap.Handle(),
500+
.offset = 0,
501+
.size = overlap.SizeBytes(),
502+
},
503+
vk::BufferMemoryBarrier2{
504+
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
505+
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
506+
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
507+
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
508+
.buffer = new_buffer.Handle(),
509+
.offset = dst_base_offset,
510+
.size = overlap.SizeBytes(),
511+
},
475512
};
476-
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
477-
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
478-
READ_BARRIER, {}, {});
479-
cmdbuf.copyBuffer(overlap.buffer, new_buffer.buffer, copy);
480-
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
481-
vk::PipelineStageFlagBits::eAllCommands,
482-
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
513+
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
514+
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
515+
.bufferMemoryBarrierCount = 1,
516+
.pBufferMemoryBarriers = pre_barriers.data(),
517+
});
518+
cmdbuf.copyBuffer(overlap.Handle(), new_buffer.Handle(), copy);
519+
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
520+
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
521+
.bufferMemoryBarrierCount = static_cast<u32>(post_barriers.size()),
522+
.pBufferMemoryBarriers = post_barriers.data(),
523+
});
483524
DeleteBuffer(overlap_id);
484525
}
485526

@@ -583,21 +624,35 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
583624
}
584625
scheduler.EndRendering();
585626
const auto cmdbuf = scheduler.CommandBuffer();
586-
static constexpr vk::MemoryBarrier READ_BARRIER{
587-
.srcAccessMask = vk::AccessFlagBits::eMemoryWrite,
588-
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite,
627+
const vk::BufferMemoryBarrier2 pre_barrier = {
628+
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
629+
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
630+
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
631+
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
632+
.buffer = buffer.Handle(),
633+
.offset = 0,
634+
.size = buffer.SizeBytes(),
589635
};
590-
static constexpr vk::MemoryBarrier WRITE_BARRIER{
591-
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
592-
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
636+
const vk::BufferMemoryBarrier2 post_barrier = {
637+
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
638+
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
639+
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
640+
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
641+
.buffer = buffer.Handle(),
642+
.offset = 0,
643+
.size = buffer.SizeBytes(),
593644
};
594-
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands,
595-
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
596-
READ_BARRIER, {}, {});
645+
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
646+
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
647+
.bufferMemoryBarrierCount = 1,
648+
.pBufferMemoryBarriers = &pre_barrier,
649+
});
597650
cmdbuf.copyBuffer(src_buffer, buffer.buffer, copies);
598-
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer,
599-
vk::PipelineStageFlagBits::eAllCommands,
600-
vk::DependencyFlagBits::eByRegion, WRITE_BARRIER, {}, {});
651+
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
652+
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
653+
.bufferMemoryBarrierCount = 1,
654+
.pBufferMemoryBarriers = &post_barrier,
655+
});
601656
}
602657

603658
bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, u32 size) {
@@ -647,10 +702,42 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr,
647702
}
648703
if (!copies.empty()) {
649704
scheduler.EndRendering();
650-
image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead, {});
705+
const vk::BufferMemoryBarrier2 pre_barrier = {
706+
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
707+
.srcAccessMask = vk::AccessFlagBits2::eMemoryRead,
708+
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
709+
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
710+
.buffer = buffer.Handle(),
711+
.offset = max_offset - size,
712+
.size = size,
713+
};
714+
const vk::BufferMemoryBarrier2 post_barrier = {
715+
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
716+
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
717+
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
718+
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead,
719+
.buffer = buffer.Handle(),
720+
.offset = max_offset - size,
721+
.size = size,
722+
};
723+
auto barriers = image.GetBarriers(vk::ImageLayout::eTransferSrcOptimal,
724+
vk::AccessFlagBits2::eTransferRead,
725+
vk::PipelineStageFlagBits2::eTransfer, {});
651726
const auto cmdbuf = scheduler.CommandBuffer();
652-
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.buffer,
727+
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
728+
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
729+
.bufferMemoryBarrierCount = 1,
730+
.pBufferMemoryBarriers = &pre_barrier,
731+
.imageMemoryBarrierCount = static_cast<u32>(barriers.size()),
732+
.pImageMemoryBarriers = barriers.data(),
733+
});
734+
cmdbuf.copyImageToBuffer(image.image, vk::ImageLayout::eTransferSrcOptimal, buffer.Handle(),
653735
copies);
736+
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
737+
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
738+
.bufferMemoryBarrierCount = 1,
739+
.pBufferMemoryBarriers = &post_barrier,
740+
});
654741
}
655742
return true;
656743
}

src/video_core/renderer_vulkan/vk_rasterizer.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,12 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
562562
push_data.AddOffset(binding.buffer, adjust);
563563
buffer_infos.emplace_back(vk_buffer->Handle(), offset_aligned,
564564
vsharp.GetSize() + adjust);
565+
if (auto barrier =
566+
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
567+
: vk::AccessFlagBits2::eShaderRead,
568+
vk::PipelineStageFlagBits2::eAllCommands)) {
569+
buffer_barriers.emplace_back(*barrier);
570+
}
565571
}
566572

567573
set_writes.push_back({
@@ -600,7 +606,7 @@ void Rasterizer::BindBuffers(const Shader::Info& stage, Shader::Backend::Binding
600606
if (auto barrier =
601607
vk_buffer->GetBarrier(desc.is_written ? vk::AccessFlagBits2::eShaderWrite
602608
: vk::AccessFlagBits2::eShaderRead,
603-
vk::PipelineStageFlagBits2::eComputeShader)) {
609+
vk::PipelineStageFlagBits2::eAllCommands)) {
604610
buffer_barriers.emplace_back(*barrier);
605611
}
606612
if (desc.is_written) {

src/video_core/texture_cache/texture_cache.cpp

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -542,31 +542,62 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
542542
sched_ptr->EndRendering();
543543

544544
const auto cmdbuf = sched_ptr->CommandBuffer();
545-
image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite, {},
546-
cmdbuf);
547-
548545
const VAddr image_addr = image.info.guest_address;
549546
const size_t image_size = image.info.guest_size_bytes;
550547
const auto [vk_buffer, buf_offset] =
551548
buffer_cache.ObtainViewBuffer(image_addr, image_size, is_gpu_dirty);
549+
552550
// The obtained buffer may be written by a shader so we need to emit a barrier to prevent RAW
553551
// hazard
554552
if (auto barrier = vk_buffer->GetBarrier(vk::AccessFlagBits2::eTransferRead,
555553
vk::PipelineStageFlagBits2::eTransfer)) {
556-
const auto dependencies = vk::DependencyInfo{
554+
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
557555
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
558556
.bufferMemoryBarrierCount = 1,
559557
.pBufferMemoryBarriers = &barrier.value(),
560-
};
561-
cmdbuf.pipelineBarrier2(dependencies);
558+
});
562559
}
563560

564-
const auto [buffer, offset] = tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image);
561+
const auto [buffer, offset] =
562+
tile_manager.TryDetile(vk_buffer->Handle(), buf_offset, image.info);
565563
for (auto& copy : image_copy) {
566564
copy.bufferOffset += offset;
567565
}
568566

567+
const vk::BufferMemoryBarrier2 pre_barrier{
568+
.srcStageMask = vk::PipelineStageFlagBits2::eAllCommands,
569+
.srcAccessMask = vk::AccessFlagBits2::eMemoryWrite,
570+
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
571+
.dstAccessMask = vk::AccessFlagBits2::eTransferRead,
572+
.buffer = buffer,
573+
.offset = offset,
574+
.size = image_size,
575+
};
576+
const vk::BufferMemoryBarrier2 post_barrier{
577+
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
578+
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
579+
.dstStageMask = vk::PipelineStageFlagBits2::eAllCommands,
580+
.dstAccessMask = vk::AccessFlagBits2::eMemoryRead | vk::AccessFlagBits2::eMemoryWrite,
581+
.buffer = buffer,
582+
.offset = offset,
583+
.size = image_size,
584+
};
585+
const auto image_barriers =
586+
image.GetBarriers(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
587+
vk::PipelineStageFlagBits2::eTransfer, {});
588+
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
589+
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
590+
.bufferMemoryBarrierCount = 1,
591+
.pBufferMemoryBarriers = &pre_barrier,
592+
.imageMemoryBarrierCount = static_cast<u32>(image_barriers.size()),
593+
.pImageMemoryBarriers = image_barriers.data(),
594+
});
569595
cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
596+
cmdbuf.pipelineBarrier2(vk::DependencyInfo{
597+
.dependencyFlags = vk::DependencyFlagBits::eByRegion,
598+
.bufferMemoryBarrierCount = 1,
599+
.pBufferMemoryBarriers = &post_barrier,
600+
});
570601
image.flags &= ~ImageFlagBits::Dirty;
571602
}
572603

src/video_core/texture_cache/tile_manager.cpp

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "video_core/renderer_vulkan/vk_instance.h"
55
#include "video_core/renderer_vulkan/vk_scheduler.h"
66
#include "video_core/renderer_vulkan/vk_shader_util.h"
7+
#include "video_core/texture_cache/image_info.h"
78
#include "video_core/texture_cache/image_view.h"
89
#include "video_core/texture_cache/tile_manager.h"
910

@@ -86,10 +87,10 @@ static vk::Format DemoteImageFormatForDetiling(vk::Format format) {
8687
return format;
8788
}
8889

89-
const DetilerContext* TileManager::GetDetiler(const Image& image) const {
90-
const auto format = DemoteImageFormatForDetiling(image.info.pixel_format);
90+
const DetilerContext* TileManager::GetDetiler(const ImageInfo& info) const {
91+
const auto format = DemoteImageFormatForDetiling(info.pixel_format);
9192

92-
switch (image.info.tiling_mode) {
93+
switch (info.tiling_mode) {
9394
case AmdGpu::TilingMode::Texture_MicroTiled:
9495
switch (format) {
9596
case vk::Format::eR8Uint:
@@ -258,23 +259,23 @@ void TileManager::FreeBuffer(ScratchBuffer buffer) {
258259
}
259260

260261
std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_offset,
261-
Image& image) {
262-
if (!image.info.props.is_tiled) {
262+
const ImageInfo& info) {
263+
if (!info.props.is_tiled) {
263264
return {in_buffer, in_offset};
264265
}
265266

266-
const auto* detiler = GetDetiler(image);
267+
const auto* detiler = GetDetiler(info);
267268
if (!detiler) {
268-
if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled &&
269-
image.info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled &&
270-
image.info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) {
269+
if (info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled &&
270+
info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled &&
271+
info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) {
271272
LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})",
272-
vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode));
273+
vk::to_string(info.pixel_format), NameOf(info.tiling_mode));
273274
}
274275
return {in_buffer, in_offset};
275276
}
276277

277-
const u32 image_size = image.info.guest_size_bytes;
278+
const u32 image_size = info.guest_size_bytes;
278279

279280
// Prepare output buffer
280281
auto out_buffer = AllocBuffer(image_size, true);
@@ -317,22 +318,21 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
317318
set_writes);
318319

319320
DetilerParams params;
320-
params.num_levels = image.info.resources.levels;
321-
params.pitch0 = image.info.pitch >> (image.info.props.is_block ? 2u : 0u);
322-
params.height = image.info.size.height;
323-
if (image.info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) {
324-
ASSERT(image.info.resources.levels == 1);
325-
ASSERT(image.info.num_bits >= 32);
326-
const auto tiles_per_row = image.info.pitch / 8u;
327-
const auto tiles_per_slice = tiles_per_row * ((image.info.size.height + 7u) / 8u);
321+
params.num_levels = info.resources.levels;
322+
params.pitch0 = info.pitch >> (info.props.is_block ? 2u : 0u);
323+
params.height = info.size.height;
324+
if (info.tiling_mode == AmdGpu::TilingMode::Texture_Volume) {
325+
ASSERT(info.resources.levels == 1);
326+
ASSERT(info.num_bits >= 32);
327+
const auto tiles_per_row = info.pitch / 8u;
328+
const auto tiles_per_slice = tiles_per_row * ((info.size.height + 7u) / 8u);
328329
params.sizes[0] = tiles_per_row;
329330
params.sizes[1] = tiles_per_slice;
330331
} else {
331-
332-
ASSERT(image.info.resources.levels <= 14);
332+
ASSERT(info.resources.levels <= 14);
333333
std::memset(&params.sizes, 0, sizeof(params.sizes));
334-
for (int m = 0; m < image.info.resources.levels; ++m) {
335-
params.sizes[m] = image.info.mips_layout[m].size * image.info.resources.layers +
334+
for (int m = 0; m < info.resources.levels; ++m) {
335+
params.sizes[m] = info.mips_layout[m].size * info.resources.layers +
336336
(m > 0 ? params.sizes[m - 1] : 0);
337337
}
338338
}
@@ -341,20 +341,9 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
341341
&params);
342342

343343
ASSERT((image_size % 64) == 0);
344-
const auto bpp = image.info.num_bits * (image.info.props.is_block ? 16u : 1u);
344+
const auto bpp = info.num_bits * (info.props.is_block ? 16u : 1u);
345345
const auto num_tiles = image_size / (64 * (bpp / 8));
346346
cmdbuf.dispatch(num_tiles, 1, 1);
347-
348-
const vk::BufferMemoryBarrier post_barrier{
349-
.srcAccessMask = vk::AccessFlagBits::eShaderWrite,
350-
.dstAccessMask = vk::AccessFlagBits::eTransferRead,
351-
.buffer = out_buffer.first,
352-
.size = image_size,
353-
};
354-
cmdbuf.pipelineBarrier(vk::PipelineStageFlagBits::eComputeShader,
355-
vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlagBits::eByRegion,
356-
{}, post_barrier, {});
357-
358347
return {out_buffer.first, 0};
359348
}
360349

0 commit comments

Comments
 (0)