Skip to content

Commit 804220a

Browse files
authored
Merge pull request #16198 from unknownbrackets/gles-stencil
Readback stencil buffer for debugger on GLES
2 parents f9e736b + fc68cd9 commit 804220a

8 files changed

+197
-50
lines changed

Common/GPU/OpenGL/GLFeatures.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -544,8 +544,8 @@ void CheckGLExtensions() {
544544
}
545545
if (gl_extensions.VersionGEThan(4, 3)) {
546546
gl_extensions.ARB_copy_image = true;
547+
gl_extensions.ARB_stencil_texturing = true;
547548
// ARB_explicit_uniform_location = true;
548-
// ARB_stencil_texturing = true;
549549
// ARB_texture_view = true;
550550
// ARB_vertex_attrib_binding = true;
551551
}

Common/GPU/OpenGL/GLFeatures.h

+1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ struct GLExtensions {
7171
bool ARB_depth_clamp;
7272
bool ARB_uniform_buffer_object;
7373
bool ARB_texture_non_power_of_two;
74+
bool ARB_stencil_texturing;
7475

7576
// EXT
7677
bool EXT_swap_control_tear;

Common/GPU/OpenGL/GLQueueRunner.cpp

+15-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@
2424
#elif !defined(GL_CLIP_DISTANCE0)
2525
#define GL_CLIP_DISTANCE0 0x3000
2626
#endif
27+
#ifndef GL_DEPTH_STENCIL_TEXTURE_MODE
28+
#define GL_DEPTH_STENCIL_TEXTURE_MODE 0x90EA
29+
#endif
30+
#ifndef GL_STENCIL_INDEX
31+
#define GL_STENCIL_INDEX 0x1901
32+
#endif
2733

2834
static constexpr int TEXCACHE_NAME_CACHE_SIZE = 16;
2935

@@ -1114,8 +1120,16 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
11141120
glBindTexture(GL_TEXTURE_2D, c.bind_fb_texture.framebuffer->z_stencil_texture.texture);
11151121
curTex[slot] = &c.bind_fb_texture.framebuffer->z_stencil_texture;
11161122
}
1123+
// This should be uncommon, so always set the mode.
1124+
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT);
1125+
} else if (c.bind_fb_texture.aspect == GL_STENCIL_BUFFER_BIT) {
1126+
if (curTex[slot] != &c.bind_fb_texture.framebuffer->z_stencil_texture) {
1127+
glBindTexture(GL_TEXTURE_2D, c.bind_fb_texture.framebuffer->z_stencil_texture.texture);
1128+
curTex[slot] = &c.bind_fb_texture.framebuffer->z_stencil_texture;
1129+
}
1130+
// This should be uncommon, so always set the mode.
1131+
glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX);
11171132
} else {
1118-
// Can't texture from stencil buffers.
11191133
curTex[slot] = nullptr;
11201134
}
11211135
CHECK_GL_ERROR_IF_DEBUG();

GPU/Common/FramebufferManagerCommon.cpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -2579,6 +2579,10 @@ bool FramebufferManagerCommon::GetStencilbuffer(u32 fb_address, int fb_stride, G
25792579
// No need to free on failure, the caller/destructor will do that. Usually this is a reused buffer, anyway.
25802580
buffer.Allocate(w, h, GPU_DBG_FORMAT_8BIT, flipY);
25812581
bool retval = draw_->CopyFramebufferToMemorySync(vfb->fbo, Draw::FB_STENCIL_BIT, 0, 0, w,h, Draw::DataFormat::S8, buffer.GetData(), w, "GetStencilbuffer");
2582+
if (!retval) {
2583+
// Try ReadbackStencilbufferSync, in case GLES.
2584+
retval = ReadbackStencilbufferSync(vfb->fbo, 0, 0, w, h, buffer.GetData(), w);
2585+
}
25822586
// That may have unbound the framebuffer, rebind to avoid crashes when debugging.
25832587
RebindFramebuffer("RebindFramebuffer - GetStencilbuffer");
25842588
return retval;
@@ -2653,6 +2657,10 @@ bool FramebufferManagerCommon::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, i
26532657
return draw_->CopyFramebufferToMemorySync(fbo, Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, pixels, pixelsStride, "ReadbackDepthbufferSync");
26542658
}
26552659

2660+
bool FramebufferManagerCommon::ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) {
2661+
return draw_->CopyFramebufferToMemorySync(fbo, Draw::FB_DEPTH_BIT, x, y, w, h, Draw::DataFormat::S8, pixels, pixelsStride, "ReadbackStencilbufferSync");
2662+
}
2663+
26562664
void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel) {
26572665
// Clamp to bufferWidth. Sometimes block transfers can cause this to hit.
26582666
if (x + w >= vfb->bufferWidth) {
@@ -2808,8 +2816,10 @@ void FramebufferManagerCommon::DeviceLost() {
28082816
DoRelease(reinterpretFromTo_[i][j]);
28092817
}
28102818
}
2811-
DoRelease(stencilUploadSampler_);
2812-
DoRelease(stencilUploadPipeline_);
2819+
DoRelease(stencilWriteSampler_);
2820+
DoRelease(stencilWritePipeline_);
2821+
DoRelease(stencilReadbackSampler_);
2822+
DoRelease(stencilReadbackPipeline_);
28132823
DoRelease(depthReadbackSampler_);
28142824
DoRelease(depthReadbackPipeline_);
28152825
DoRelease(draw2DPipelineColor_);

GPU/Common/FramebufferManagerCommon.h

+6-2
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,7 @@ class FramebufferManagerCommon {
444444
virtual void ReadbackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel);
445445
// Used for when a shader is required, such as GLES.
446446
virtual bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride);
447+
virtual bool ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride);
447448
void SetViewport2D(int x, int y, int w, int h);
448449
Draw::Texture *MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height);
449450
void DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags);
@@ -568,9 +569,12 @@ class FramebufferManagerCommon {
568569

569570
// Common implementation of stencil buffer upload. Also not 100% optimal, but not performance
570571
// critical either.
571-
Draw::Pipeline *stencilUploadPipeline_ = nullptr;
572-
Draw::SamplerState *stencilUploadSampler_ = nullptr;
572+
Draw::Pipeline *stencilWritePipeline_ = nullptr;
573+
Draw::SamplerState *stencilWriteSampler_ = nullptr;
573574

575+
// Used on GLES where we can't directly readback depth or stencil, but here for simplicity.
576+
Draw::Pipeline *stencilReadbackPipeline_ = nullptr;
577+
Draw::SamplerState *stencilReadbackSampler_ = nullptr;
574578
Draw::Pipeline *depthReadbackPipeline_ = nullptr;
575579
Draw::SamplerState *depthReadbackSampler_ = nullptr;
576580

GPU/Common/StencilCommon.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
196196
shaderManager_->DirtyLastShader();
197197
textureCache_->ForgetLastTexture();
198198

199-
if (!stencilUploadPipeline_) {
199+
if (!stencilWritePipeline_) {
200200
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
201201

202202
char *fsCode = new char[8192];
@@ -237,8 +237,8 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
237237
{ stencilUploadVs, stencilUploadFs },
238238
inputLayout, stencilWrite, blendOff, rasterNoCull, &stencilUBDesc,
239239
};
240-
stencilUploadPipeline_ = draw_->CreateGraphicsPipeline(stencilWriteDesc, "stencil_upload");
241-
_assert_(stencilUploadPipeline_);
240+
stencilWritePipeline_ = draw_->CreateGraphicsPipeline(stencilWriteDesc, "stencil_upload");
241+
_assert_(stencilWritePipeline_);
242242

243243
delete[] fsCode;
244244
delete[] vsCode;
@@ -252,7 +252,7 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
252252
stencilUploadVs->Release();
253253

254254
SamplerStateDesc descNearest{};
255-
stencilUploadSampler_ = draw_->CreateSamplerState(descNearest);
255+
stencilWriteSampler_ = draw_->CreateSamplerState(descNearest);
256256
}
257257

258258
// Fullscreen triangle coordinates.
@@ -297,11 +297,11 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
297297
}
298298

299299
draw_->BindTextures(TEX_SLOT_PSP_TEXTURE, 1, &tex);
300-
draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &stencilUploadSampler_);
300+
draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &stencilWriteSampler_);
301301

302302
// We must bind the program after starting the render pass, and set the color mask after clearing.
303303
draw_->SetScissorRect(0, 0, w, h);
304-
draw_->BindPipeline(stencilUploadPipeline_);
304+
draw_->BindPipeline(stencilWritePipeline_);
305305

306306
for (int i = 1; i < values; i += i) {
307307
if (!(usedBits & i)) {

GPU/GLES/DepthBufferGLES.cpp

+155-38
Original file line numberDiff line numberDiff line change
@@ -85,13 +85,94 @@ const UniformBufferDesc depthUBDesc{ sizeof(DepthUB), {
8585
{ "u_depthTo8", -1, -1, UniformType::FLOAT4, 32 },
8686
} };
8787

88+
static const char *stencil_dl_fs = R"(
89+
#ifdef GL_ES
90+
#ifdef GL_FRAGMENT_PRECISION_HIGH
91+
precision highp float;
92+
#else
93+
precision mediump float;
94+
#endif
95+
#endif
96+
#if __VERSION__ >= 130
97+
#define varying in
98+
#define texture2D texture
99+
#define gl_FragColor fragColor0
100+
out vec4 fragColor0;
101+
#endif
102+
varying vec2 v_texcoord;
103+
lowp uniform usampler2D tex;
104+
void main() {
105+
uint stencil = texture2D(tex, v_texcoord).r;
106+
float scaled = float(stencil) / 255.0;
107+
gl_FragColor = vec4(scaled, scaled, scaled, scaled);
108+
}
109+
)";
110+
111+
static const char *stencil_vs = R"(
112+
#ifdef GL_ES
113+
precision highp float;
114+
#endif
115+
#if __VERSION__ >= 130
116+
#define attribute in
117+
#define varying out
118+
#endif
119+
attribute vec2 a_position;
120+
varying vec2 v_texcoord;
121+
void main() {
122+
v_texcoord = a_position * 2.0;
123+
gl_Position = vec4(v_texcoord * 2.0 - vec2(1.0, 1.0), 0.0, 1.0);
124+
}
125+
)";
126+
88127
static bool SupportsDepthTexturing() {
89128
if (gl_extensions.IsGLES) {
90129
return gl_extensions.OES_packed_depth_stencil && (gl_extensions.OES_depth_texture || gl_extensions.GLES3);
91130
}
92131
return gl_extensions.VersionGEThan(3, 0);
93132
}
94133

134+
static Draw::Pipeline *CreateReadbackPipeline(Draw::DrawContext *draw, const char *tag, const UniformBufferDesc *ubDesc, const char *fs, const char *fsTag, const char *vs, const char *vsTag) {
135+
using namespace Draw;
136+
137+
const ShaderLanguageDesc &shaderLanguageDesc = draw->GetShaderLanguageDesc();
138+
139+
ShaderModule *readbackFs = draw->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fs, strlen(fs), fsTag);
140+
ShaderModule *readbackVs = draw->CreateShaderModule(ShaderStage::Vertex, shaderLanguageDesc.shaderLanguage, (const uint8_t *)vs, strlen(vs), vsTag);
141+
_assert_(readbackFs && readbackVs);
142+
143+
InputLayoutDesc desc = {
144+
{
145+
{ 8, false },
146+
},
147+
{
148+
{ 0, SEM_POSITION, DataFormat::R32G32_FLOAT, 0 },
149+
},
150+
};
151+
InputLayout *inputLayout = draw->CreateInputLayout(desc);
152+
153+
BlendState *blendOff = draw->CreateBlendState({ false, 0xF });
154+
DepthStencilState *stencilIgnore = draw->CreateDepthStencilState({});
155+
RasterState *rasterNoCull = draw->CreateRasterState({});
156+
157+
PipelineDesc readbackDesc{
158+
Primitive::TRIANGLE_LIST,
159+
{ readbackVs, readbackFs },
160+
inputLayout, stencilIgnore, blendOff, rasterNoCull, ubDesc,
161+
};
162+
Draw::Pipeline *pipeline = draw->CreateGraphicsPipeline(readbackDesc, tag);
163+
_assert_(pipeline);
164+
165+
rasterNoCull->Release();
166+
blendOff->Release();
167+
stencilIgnore->Release();
168+
inputLayout->Release();
169+
170+
readbackFs->Release();
171+
readbackVs->Release();
172+
173+
return pipeline;
174+
}
175+
95176
bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) {
96177
using namespace Draw;
97178

@@ -117,44 +198,8 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int
117198

118199
if (useColorPath) {
119200
if (!depthReadbackPipeline_) {
120-
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
121-
122-
ShaderModule *depthReadbackFs = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)depth_dl_fs, strlen(depth_dl_fs), "depth_dl_fs");
123-
ShaderModule *depthReadbackVs = draw_->CreateShaderModule(ShaderStage::Vertex, shaderLanguageDesc.shaderLanguage, (const uint8_t *)depth_vs, strlen(depth_vs), "depth_vs");
124-
_assert_(depthReadbackFs && depthReadbackVs);
125-
126-
InputLayoutDesc desc = {
127-
{
128-
{ 8, false },
129-
},
130-
{
131-
{ 0, SEM_POSITION, DataFormat::R32G32_FLOAT, 0 },
132-
},
133-
};
134-
InputLayout *inputLayout = draw_->CreateInputLayout(desc);
135-
136-
BlendState *blendOff = draw_->CreateBlendState({ false, 0xF });
137-
DepthStencilState *stencilIgnore = draw_->CreateDepthStencilState({});
138-
RasterState *rasterNoCull = draw_->CreateRasterState({});
139-
140-
PipelineDesc depthReadbackDesc{
141-
Primitive::TRIANGLE_LIST,
142-
{ depthReadbackVs, depthReadbackFs },
143-
inputLayout, stencilIgnore, blendOff, rasterNoCull, &depthUBDesc,
144-
};
145-
depthReadbackPipeline_ = draw_->CreateGraphicsPipeline(depthReadbackDesc, "depth_dl");
146-
_assert_(depthReadbackPipeline_);
147-
148-
rasterNoCull->Release();
149-
blendOff->Release();
150-
stencilIgnore->Release();
151-
inputLayout->Release();
152-
153-
depthReadbackFs->Release();
154-
depthReadbackVs->Release();
155-
156-
SamplerStateDesc descNearest{};
157-
depthReadbackSampler_ = draw_->CreateSamplerState(descNearest);
201+
depthReadbackPipeline_ = CreateReadbackPipeline(draw_, "depth_dl", &depthUBDesc, depth_dl_fs, "depth_dl_fs", depth_vs, "depth_vs");
202+
depthReadbackSampler_ = draw_->CreateSamplerState({});
158203
}
159204

160205
shaderManager_->DirtyLastShader();
@@ -242,3 +287,75 @@ bool FramebufferManagerGLES::ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int
242287
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
243288
return true;
244289
}
290+
291+
// Well, this is not depth, but it's depth/stencil related.
292+
bool FramebufferManagerGLES::ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) {
293+
using namespace Draw;
294+
295+
if (!fbo) {
296+
ERROR_LOG_REPORT_ONCE(vfbfbozero, SCEGE, "ReadbackStencilbufferSync: bad fbo");
297+
return false;
298+
}
299+
300+
const bool useColorPath = gl_extensions.IsGLES;
301+
if (!useColorPath) {
302+
return draw_->CopyFramebufferToMemorySync(fbo, FB_STENCIL_BIT, x, y, w, h, DataFormat::S8, pixels, pixelsStride, "ReadbackStencilbufferSync");
303+
}
304+
305+
// Unsupported below GLES 3.1 or without ARB_stencil_texturing.
306+
// OES_texture_stencil8 is related, but used to specify texture data.
307+
if ((gl_extensions.IsGLES && !gl_extensions.VersionGEThan(3, 1)) && !gl_extensions.ARB_stencil_texturing)
308+
return false;
309+
310+
// Pixel size always 4 here because we always request RGBA back.
311+
const u32 bufSize = w * h * 4;
312+
if (!convBuf_ || convBufSize_ < bufSize) {
313+
delete[] convBuf_;
314+
convBuf_ = new u8[bufSize];
315+
convBufSize_ = bufSize;
316+
}
317+
318+
if (!stencilReadbackPipeline_) {
319+
stencilReadbackPipeline_ = CreateReadbackPipeline(draw_, "stencil_dl", &depthUBDesc, stencil_dl_fs, "stencil_dl_fs", stencil_vs, "stencil_vs");
320+
stencilReadbackSampler_ = draw_->CreateSamplerState({});
321+
}
322+
323+
shaderManager_->DirtyLastShader();
324+
auto *blitFBO = GetTempFBO(TempFBO::COPY, fbo->Width(), fbo->Height());
325+
draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackStencilbufferSync");
326+
Draw::Viewport viewport = { 0.0f, 0.0f, (float)fbo->Width(), (float)fbo->Height(), 0.0f, 1.0f };
327+
draw_->SetViewports(1, &viewport);
328+
329+
draw_->BindFramebufferAsTexture(fbo, TEX_SLOT_PSP_TEXTURE, FB_STENCIL_BIT, 0);
330+
draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &stencilReadbackSampler_);
331+
332+
// We must bind the program after starting the render pass.
333+
draw_->SetScissorRect(0, 0, w, h);
334+
draw_->BindPipeline(stencilReadbackPipeline_);
335+
336+
// Fullscreen triangle coordinates.
337+
static const float positions[6] = {
338+
0.0, 0.0,
339+
1.0, 0.0,
340+
0.0, 1.0,
341+
};
342+
draw_->DrawUP(positions, 3);
343+
344+
draw_->CopyFramebufferToMemorySync(blitFBO, FB_COLOR_BIT, x, y, w, h, DataFormat::R8G8B8A8_UNORM, convBuf_, w, "ReadbackStencilbufferSync");
345+
346+
textureCache_->ForgetLastTexture();
347+
348+
// TODO: Use 1/4 width to write all values directly and skip CPU conversion?
349+
uint8_t *dest = pixels;
350+
const u32_le *packed32 = (u32_le *)convBuf_;
351+
for (int yp = 0; yp < h; ++yp) {
352+
for (int xp = 0; xp < w; ++xp) {
353+
dest[xp] = packed32[xp] & 0xFF;
354+
}
355+
dest += pixelsStride;
356+
packed32 += w;
357+
}
358+
359+
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
360+
return true;
361+
}

GPU/GLES/FramebufferManagerGLES.h

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class FramebufferManagerGLES : public FramebufferManagerCommon {
3939
protected:
4040
void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) override;
4141
bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride) override;
42+
bool ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride) override;
4243

4344
private:
4445
u8 *convBuf_ = nullptr;

0 commit comments

Comments
 (0)