Skip to content

Mali: Turn off any depth writes in the shader if depth test == NEVER #18813

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Common/GPU/Vulkan/thin3d_vulkan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -973,7 +973,7 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
// See: https://github.com/hrydgard/ppsspp/pull/11684
if (deviceProps.deviceID >= 0x05000000 && deviceProps.deviceID < 0x06000000) {
if (deviceProps.driverVersion < 0x80180000) {
bugs_.Infest(Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL);
bugs_.Infest(Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO);
}
}
// Color write mask not masking write in certain scenarios with a depth test, see #10421.
Expand Down Expand Up @@ -1006,8 +1006,9 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
bugs_.Infest(Bugs::EQUAL_WZ_CORRUPTS_DEPTH);

// Nearly identical to the the Adreno bug, see #13833 (Midnight Club map broken) and other issues.
// It has the additional caveat that combining depth writes with NEVER depth tests crashes the driver.
// Reported fixed in major version 40 - let's add a check once confirmed.
bugs_.Infest(Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL);
bugs_.Infest(Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI);

// This started in driver 31 or 32, fixed in 40 - let's add a check once confirmed.
if (majorVersion >= 32) {
Expand Down Expand Up @@ -1042,6 +1043,8 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
INFO_LOG(G3D, "KHR_depth_stencil_resolve not supported, disabling multisampling");
}

bugs_.Infest(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI);

// We limit multisampling functionality to reasonably recent and known-good tiling GPUs.
if (multisampleAllowed) {
// Check for depth stencil resolve. Without it, depth textures won't work, and we don't want that mess
Expand Down
3 changes: 2 additions & 1 deletion Common/GPU/thin3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,8 @@ void ConvertToD16(uint8_t *dst, const uint8_t *src, uint32_t dstStride, uint32_t

const char *Bugs::GetBugName(uint32_t bug) {
switch (bug) {
case NO_DEPTH_CANNOT_DISCARD_STENCIL: return "NO_DEPTH_CANNOT_DISCARD_STENCIL";
case NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI: return "NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI";
case NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO: return "NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO";
case DUAL_SOURCE_BLENDING_BROKEN: return "DUAL_SOURCE_BLENDING_BROKEN";
case ANY_MAP_BUFFER_RANGE_SLOW: return "ANY_MAP_BUFFER_RANGE_SLOW";
case PVR_GENMIPMAP_HEIGHT_GREATER: return "PVR_GENMIPMAP_HEIGHT_GREATER";
Expand Down
3 changes: 2 additions & 1 deletion Common/GPU/thin3d.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ class Bugs {
const char *GetBugName(uint32_t bug);

enum : uint32_t {
NO_DEPTH_CANNOT_DISCARD_STENCIL = 0,
NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO = 0,
DUAL_SOURCE_BLENDING_BROKEN = 1,
ANY_MAP_BUFFER_RANGE_SLOW = 2,
PVR_GENMIPMAP_HEIGHT_GREATER = 3,
Expand All @@ -351,6 +351,7 @@ class Bugs {
ADRENO_RESOURCE_DEADLOCK = 12,
UNIFORM_INDEXING_BROKEN = 13, // not a properly diagnosed issue, a workaround attempt: #17386
PVR_BAD_16BIT_TEXFORMATS = 14,
NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI = 15,
MAX_BUG,
};

Expand Down
4 changes: 2 additions & 2 deletions Core/Util/GameManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -625,7 +625,7 @@ bool GameManager::InstallMemstickGame(struct zip *z, const Path &zipfile, const
g_OSD.SetProgressBar("install", di->T("Installing..."), 0.0f, 1.0f, 0.1f + (i + 1) / (float)info.numFiles * 0.9f, 0.1f);
}

INFO_LOG(HLE, "Extracted %d files from zip (%d bytes / %d).", info.numFiles, (int)bytesCopied, (int)allBytes);
INFO_LOG(HLE, "Unzipped %d files (%d bytes / %d).", info.numFiles, (int)bytesCopied, (int)allBytes);
zip_close(z);
z = nullptr;
installProgress_ = 1.0f;
Expand Down Expand Up @@ -733,7 +733,7 @@ bool GameManager::InstallZippedISO(struct zip *z, int isoFileIndex, const Path &
auto di = GetI18NCategory(I18NCat::DIALOG);
g_OSD.SetProgressBar("install", di->T("Installing..."), 0.0f, 0.0f, 0.0f, 0.1f);
if (ExtractFile(z, isoFileIndex, outputISOFilename, &bytesCopied, allBytes)) {
INFO_LOG(IO, "Successfully extracted ISO file to '%s'", outputISOFilename.c_str());
INFO_LOG(IO, "Successfully unzipped ISO file to '%s'", outputISOFilename.c_str());
success = true;
}
zip_close(z);
Expand Down
8 changes: 5 additions & 3 deletions GPU/Common/FragmentShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
shading = doFlatShading ? "flat" : "";
}

bool useDiscardStencilBugWorkaround = id.Bit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL);
bool forceDepthWritesOff = id.Bit(FS_BIT_DEPTH_TEST_NEVER);

bool useDiscardStencilBugWorkaround = id.Bit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL) && !forceDepthWritesOff;

GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4);
GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4);
Expand All @@ -177,7 +179,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
}

bool needFragCoord = readFramebufferTex || gstate_c.Use(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
bool writeDepth = gstate_c.Use(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
bool writeDepth = gstate_c.Use(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT) && !forceDepthWritesOff;

// TODO: We could have a separate mechanism to support more ops using the shader blending mechanism,
// on hardware that can do proper bit math in fragment shaders.
Expand All @@ -192,7 +194,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
std::vector<SamplerDef> samplers;

if (compat.shaderLanguage == ShaderLanguage::GLSL_VULKAN) {
if (useDiscardStencilBugWorkaround && !gstate_c.Use(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
if (useDiscardStencilBugWorkaround && !writeDepth) {
WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n");
}

Expand Down
20 changes: 16 additions & 4 deletions GPU/Common/ShaderId.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ std::string FragmentShaderDesc(const FShaderID &id) {
if (id.Bit(FS_BIT_FLATSHADE)) desc << "Flat ";
if (id.Bit(FS_BIT_BGRA_TEXTURE)) desc << "BGRA ";
if (id.Bit(FS_BIT_UBERSHADER)) desc << "FragUber ";
if (id.Bit(FS_BIT_DEPTH_TEST_NEVER)) desc << "DepthNever ";
switch ((ShaderDepalMode)id.Bits(FS_BIT_SHADER_DEPAL_MODE, 2)) {
case ShaderDepalMode::OFF: break;
case ShaderDepalMode::NORMAL: desc << "Depal "; break;
Expand Down Expand Up @@ -387,13 +388,24 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
id.SetBit(FS_BIT_STEREO);
}

if (g_Config.bVendorBugChecksEnabled && bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
bool stencilWithoutDepth = !IsStencilTestOutputDisabled() && (!gstate.isDepthTestEnabled() || !gstate.isDepthWriteEnabled());
if (stencilWithoutDepth) {
id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, stencilWithoutDepth);
if (g_Config.bVendorBugChecksEnabled) {
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO) || bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI)) {
// On Adreno, the workaround is safe, so we do simple checks.
bool stencilWithoutDepth = (!gstate.isDepthTestEnabled() || !gstate.isDepthWriteEnabled()) && !IsStencilTestOutputDisabled();
if (stencilWithoutDepth) {
id.SetBit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL, stencilWithoutDepth);
}
}
}

// Forcibly disable NEVER + depth-write on Mali.
// TODO: Take this from computed depth test instead of directly from the gstate.
// That will take more refactoring though.
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI) &&
gstate.getDepthTestFunction() == GE_COMP_NEVER && gstate.isDepthTestEnabled()) {
id.SetBit(FS_BIT_DEPTH_TEST_NEVER);
}

// In case the USE flag changes (for example, in multisampling we might disable input attachments),
// we don't want to accidentally use the wrong cached shader here. So moved it to a bit.
if (FragmentIdNeedsFramebufferRead(id)) {
Expand Down
1 change: 1 addition & 0 deletions GPU/Common/ShaderId.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ enum FShaderBit : uint8_t {
FS_BIT_STEREO = 58,
FS_BIT_USE_FRAMEBUFFER_FETCH = 59,
FS_BIT_UBERSHADER = 60,
FS_BIT_DEPTH_TEST_NEVER = 61, // Only used on Mali. Set when depth == NEVER. We forcibly avoid writing to depth in this case, since it crashes the driver.
};

static inline FShaderBit operator +(FShaderBit bit, int i) {
Expand Down
4 changes: 2 additions & 2 deletions GPU/Common/StencilCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw:
writer.HighPrecisionFloat();
writer.DeclareSamplers(samplers);

if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI) || bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO)) {
writer.C("layout (depth_unchanged) out float gl_FragDepth;\n");
}

Expand All @@ -137,7 +137,7 @@ void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw:
writer.C(" if (mod(floor(shifted), 2.0) < 0.99) DISCARD;\n");
}

if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI) || bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO)) {
writer.C(" gl_FragDepth = gl_FragCoord.z;\n");
}

Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUCommonHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_COLORWRITEMASK },
{ GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_COLORWRITEMASK },
{ GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE },
{ GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_ZTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_ZWRITEDISABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_LOGICOP, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
Expand Down