Skip to content

Commit 5040be1

Browse files
authored
renderer_vulkan: Handle depth-stencil copies through depth render overrides. (shadps4-emu#2134)
1 parent d94abff commit 5040be1

File tree

8 files changed

+146
-13
lines changed

8 files changed

+146
-13
lines changed

src/core/devtools/widget/reg_popup.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ void RegPopup::DrawDepthBuffer(const DepthBuffer& depth_data) {
105105
"DEPTH_SLICE.TILE_MAX", depth_buffer.depth_slice.tile_max,
106106
"Pitch()", depth_buffer.Pitch(),
107107
"Height()", depth_buffer.Height(),
108-
"Address()", depth_buffer.Address(),
108+
"DepthAddress()", depth_buffer.DepthAddress(),
109+
"StencilAddress()", depth_buffer.StencilAddress(),
109110
"NumSamples()", depth_buffer.NumSamples(),
110111
"NumBits()", depth_buffer.NumBits(),
111112
"GetDepthSliceSize()", depth_buffer.GetDepthSliceSize()

src/core/devtools/widget/reg_view.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ void RegView::DrawGraphicsRegs() {
155155
TableNextColumn();
156156
TextUnformatted("Depth buffer");
157157
TableNextColumn();
158-
if (regs.depth_buffer.Address() == 0 || !regs.depth_control.depth_enable) {
158+
if (regs.depth_buffer.DepthAddress() == 0 || !regs.depth_control.depth_enable) {
159159
TextUnformatted("N/A");
160160
} else {
161161
const char* text = last_selected_cb == depth_id && default_reg_popup.open ? "x" : "->";
@@ -241,7 +241,7 @@ void RegView::SetData(DebugStateType::RegDump _data, const std::string& base_tit
241241
default_reg_popup.open = false;
242242
if (last_selected_cb == depth_id) {
243243
const auto& has_depth =
244-
regs.depth_buffer.Address() != 0 && regs.depth_control.depth_enable;
244+
regs.depth_buffer.DepthAddress() != 0 && regs.depth_control.depth_enable;
245245
if (has_depth) {
246246
default_reg_popup.SetData(title, regs.depth_buffer, regs.depth_control);
247247
default_reg_popup.open = true;

src/video_core/amdgpu/liverpool.h

Lines changed: 61 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -429,11 +429,19 @@ struct Liverpool {
429429
} depth_slice;
430430

431431
bool DepthValid() const {
432-
return Address() != 0 && z_info.format != ZFormat::Invalid;
432+
return DepthAddress() != 0 && z_info.format != ZFormat::Invalid;
433433
}
434434

435435
bool StencilValid() const {
436-
return Address() != 0 && stencil_info.format != StencilFormat::Invalid;
436+
return StencilAddress() != 0 && stencil_info.format != StencilFormat::Invalid;
437+
}
438+
439+
bool DepthWriteValid() const {
440+
return DepthWriteAddress() != 0 && z_info.format != ZFormat::Invalid;
441+
}
442+
443+
bool StencilWriteValid() const {
444+
return StencilWriteAddress() != 0 && stencil_info.format != StencilFormat::Invalid;
437445
}
438446

439447
u32 Pitch() const {
@@ -444,14 +452,22 @@ struct Liverpool {
444452
return (depth_size.height_tile_max + 1) << 3;
445453
}
446454

447-
u64 Address() const {
455+
u64 DepthAddress() const {
448456
return u64(z_read_base) << 8;
449457
}
450458

451459
u64 StencilAddress() const {
452460
return u64(stencil_read_base) << 8;
453461
}
454462

463+
u64 DepthWriteAddress() const {
464+
return u64(z_write_base) << 8;
465+
}
466+
467+
u64 StencilWriteAddress() const {
468+
return u64(stencil_write_base) << 8;
469+
}
470+
455471
u32 NumSamples() const {
456472
return 1u << z_info.num_samples; // spec doesn't say it is a log2
457473
}
@@ -1008,6 +1024,46 @@ struct Liverpool {
10081024
}
10091025
};
10101026

1027+
enum class ForceEnable : u32 {
1028+
Off = 0,
1029+
Enable = 1,
1030+
Disable = 2,
1031+
};
1032+
1033+
enum class ForceSumm : u32 {
1034+
Off = 0,
1035+
MinZ = 1,
1036+
MaxZ = 2,
1037+
Both = 3,
1038+
};
1039+
1040+
union DepthRenderOverride {
1041+
u32 raw;
1042+
BitField<0, 2, ForceEnable> force_hiz_enable;
1043+
BitField<2, 2, ForceEnable> force_his_enable0;
1044+
BitField<4, 2, ForceEnable> force_his_enable1;
1045+
BitField<6, 1, u32> force_shader_z_order;
1046+
BitField<7, 1, u32> fast_z_disable;
1047+
BitField<8, 1, u32> fast_stencil_disable;
1048+
BitField<9, 1, u32> noop_cull_disable;
1049+
BitField<10, 1, u32> force_color_kill;
1050+
BitField<11, 1, u32> force_z_read;
1051+
BitField<12, 1, u32> force_stencil_read;
1052+
BitField<13, 2, ForceEnable> force_full_z_range;
1053+
BitField<15, 1, u32> force_qc_smask_conflict;
1054+
BitField<16, 1, u32> disable_viewport_clamp;
1055+
BitField<17, 1, u32> ignore_sc_zrange;
1056+
BitField<18, 1, u32> disable_fully_covered;
1057+
BitField<19, 2, ForceSumm> force_z_limit_summ;
1058+
BitField<21, 5, u32> max_tiles_in_dtt;
1059+
BitField<26, 1, u32> disable_tile_rate_tiles;
1060+
BitField<27, 1, u32> force_z_dirty;
1061+
BitField<28, 1, u32> force_stencil_dirty;
1062+
BitField<29, 1, u32> force_z_valid;
1063+
BitField<30, 1, u32> force_stencil_valid;
1064+
BitField<31, 1, u32> preserve_compression;
1065+
};
1066+
10111067
union AaConfig {
10121068
BitField<0, 3, u32> msaa_num_samples;
10131069
BitField<4, 1, u32> aa_mask_centroid_dtmn;
@@ -1209,7 +1265,8 @@ struct Liverpool {
12091265
DepthRenderControl depth_render_control;
12101266
INSERT_PADDING_WORDS(1);
12111267
DepthView depth_view;
1212-
INSERT_PADDING_WORDS(2);
1268+
DepthRenderOverride depth_render_override;
1269+
INSERT_PADDING_WORDS(1);
12131270
Address depth_htile_data_base;
12141271
INSERT_PADDING_WORDS(2);
12151272
float depth_bounds_min;

src/video_core/renderer_vulkan/vk_rasterizer.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,26 @@ bool Rasterizer::FilterDraw() {
7070
return false;
7171
}
7272

73+
const bool cb_disabled =
74+
regs.color_control.mode == AmdGpu::Liverpool::ColorControl::OperationMode::Disable;
75+
const auto depth_copy =
76+
regs.depth_render_override.force_z_dirty && regs.depth_render_override.force_z_valid &&
77+
regs.depth_buffer.DepthValid() && regs.depth_buffer.DepthWriteValid() &&
78+
regs.depth_buffer.DepthAddress() != regs.depth_buffer.DepthWriteAddress();
79+
const auto stencil_copy =
80+
regs.depth_render_override.force_stencil_dirty &&
81+
regs.depth_render_override.force_stencil_valid && regs.depth_buffer.StencilValid() &&
82+
regs.depth_buffer.StencilWriteValid() &&
83+
regs.depth_buffer.StencilAddress() != regs.depth_buffer.StencilWriteAddress();
84+
if (cb_disabled && (depth_copy || stencil_copy)) {
85+
// Games may disable color buffer and enable force depth/stencil dirty and valid to
86+
// do a copy from one depth-stencil surface to another, without a pixel shader.
87+
// We need to detect this case and perform the copy, otherwise it will have no effect.
88+
LOG_TRACE(Render_Vulkan, "Performing depth-stencil override copy");
89+
DepthStencilCopy(depth_copy, stencil_copy);
90+
return false;
91+
}
92+
7393
return true;
7494
}
7595

@@ -899,6 +919,59 @@ void Rasterizer::Resolve() {
899919
}
900920
}
901921

922+
void Rasterizer::DepthStencilCopy(bool is_depth, bool is_stencil) {
923+
auto& regs = liverpool->regs;
924+
925+
auto read_desc = VideoCore::TextureCache::DepthTargetDesc(
926+
regs.depth_buffer, regs.depth_view, regs.depth_control,
927+
regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, false);
928+
auto write_desc = VideoCore::TextureCache::DepthTargetDesc(
929+
regs.depth_buffer, regs.depth_view, regs.depth_control,
930+
regs.depth_htile_data_base.GetAddress(), liverpool->last_db_extent, true);
931+
932+
auto& read_image = texture_cache.GetImage(texture_cache.FindImage(read_desc));
933+
auto& write_image = texture_cache.GetImage(texture_cache.FindImage(write_desc));
934+
935+
VideoCore::SubresourceRange sub_range;
936+
sub_range.base.layer = liverpool->regs.depth_view.slice_start;
937+
sub_range.extent.layers = liverpool->regs.depth_view.NumSlices() - sub_range.base.layer;
938+
939+
read_image.Transit(vk::ImageLayout::eTransferSrcOptimal, vk::AccessFlagBits2::eTransferRead,
940+
sub_range);
941+
write_image.Transit(vk::ImageLayout::eTransferDstOptimal, vk::AccessFlagBits2::eTransferWrite,
942+
sub_range);
943+
944+
auto aspect_mask = vk::ImageAspectFlags(0);
945+
if (is_depth) {
946+
aspect_mask |= vk::ImageAspectFlagBits::eDepth;
947+
}
948+
if (is_stencil) {
949+
aspect_mask |= vk::ImageAspectFlagBits::eStencil;
950+
}
951+
vk::ImageCopy region = {
952+
.srcSubresource =
953+
{
954+
.aspectMask = aspect_mask,
955+
.mipLevel = 0,
956+
.baseArrayLayer = sub_range.base.layer,
957+
.layerCount = sub_range.extent.layers,
958+
},
959+
.srcOffset = {0, 0, 0},
960+
.dstSubresource =
961+
{
962+
.aspectMask = aspect_mask,
963+
.mipLevel = 0,
964+
.baseArrayLayer = sub_range.base.layer,
965+
.layerCount = sub_range.extent.layers,
966+
},
967+
.dstOffset = {0, 0, 0},
968+
.extent = {write_image.info.size.width, write_image.info.size.height, 1},
969+
};
970+
const auto cmdbuf = scheduler.CommandBuffer();
971+
cmdbuf.copyImage(read_image.image, vk::ImageLayout::eTransferSrcOptimal, write_image.image,
972+
vk::ImageLayout::eTransferDstOptimal, region);
973+
}
974+
902975
void Rasterizer::InlineData(VAddr address, const void* value, u32 num_bytes, bool is_gds) {
903976
buffer_cache.InlineData(address, value, num_bytes, is_gds);
904977
}

src/video_core/renderer_vulkan/vk_rasterizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ class Rasterizer {
7171
RenderState PrepareRenderState(u32 mrt_mask);
7272
void BeginRendering(const GraphicsPipeline& pipeline, RenderState& state);
7373
void Resolve();
74+
void DepthStencilCopy(bool is_depth, bool is_stencil);
7475
void EliminateFastClear();
7576

7677
void UpdateDynamicState(const GraphicsPipeline& pipeline);

src/video_core/texture_cache/image_info.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
9898
}
9999

100100
ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices,
101-
VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint) noexcept {
101+
VAddr htile_address, const AmdGpu::Liverpool::CbDbExtent& hint,
102+
bool write_buffer) noexcept {
102103
props.is_tiled = false;
103104
pixel_format = LiverpoolToVK::DepthFormat(buffer.z_info.format, buffer.stencil_info.format);
104105
type = vk::ImageType::e2D;
@@ -111,10 +112,10 @@ ImageInfo::ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slice
111112
resources.layers = num_slices;
112113
meta_info.htile_addr = buffer.z_info.tile_surface_en ? htile_address : 0;
113114

114-
stencil_addr = buffer.StencilAddress();
115+
stencil_addr = write_buffer ? buffer.StencilWriteAddress() : buffer.StencilAddress();
115116
stencil_size = pitch * size.height * sizeof(u8);
116117

117-
guest_address = buffer.Address();
118+
guest_address = write_buffer ? buffer.DepthWriteAddress() : buffer.DepthAddress();
118119
const auto depth_slice_sz = buffer.GetDepthSliceSize();
119120
guest_size = depth_slice_sz * num_slices;
120121
mips_layout.emplace_back(depth_slice_sz, pitch, 0);

src/video_core/texture_cache/image_info.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ struct ImageInfo {
1919
ImageInfo(const AmdGpu::Liverpool::ColorBuffer& buffer,
2020
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
2121
ImageInfo(const AmdGpu::Liverpool::DepthBuffer& buffer, u32 num_slices, VAddr htile_address,
22-
const AmdGpu::Liverpool::CbDbExtent& hint = {}) noexcept;
22+
const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false) noexcept;
2323
ImageInfo(const AmdGpu::Image& image, const Shader::ImageResource& desc) noexcept;
2424

2525
bool IsTiled() const {

src/video_core/texture_cache/texture_cache.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ class TextureCache {
7979
DepthTargetDesc(const AmdGpu::Liverpool::DepthBuffer& buffer,
8080
const AmdGpu::Liverpool::DepthView& view,
8181
const AmdGpu::Liverpool::DepthControl& ctl, VAddr htile_address,
82-
const AmdGpu::Liverpool::CbDbExtent& hint = {})
82+
const AmdGpu::Liverpool::CbDbExtent& hint = {}, bool write_buffer = false)
8383
: BaseDesc{BindingType::DepthTarget,
84-
ImageInfo{buffer, view.NumSlices(), htile_address, hint},
84+
ImageInfo{buffer, view.NumSlices(), htile_address, hint, write_buffer},
8585
ImageViewInfo{buffer, view, ctl}} {}
8686
};
8787

0 commit comments

Comments
 (0)