@@ -58,6 +58,34 @@ static u8 StencilBits8888(const u8 *ptr8, u32 numPixels) {
58
58
return bits >> 24 ;
59
59
}
60
60
61
+ static bool CheckStencilBits (const u8 *src, const VirtualFramebuffer *dstBuffer, int &values, u8 &usedBits) {
62
+ switch (dstBuffer->fb_format ) {
63
+ case GE_FORMAT_565:
64
+ // Well, this doesn't make much sense.
65
+ return false ;
66
+ case GE_FORMAT_5551:
67
+ usedBits = StencilBits5551 (src, dstBuffer->fb_stride * dstBuffer->bufferHeight );
68
+ values = 2 ;
69
+ break ;
70
+ case GE_FORMAT_4444:
71
+ usedBits = StencilBits4444 (src, dstBuffer->fb_stride * dstBuffer->bufferHeight );
72
+ values = 16 ;
73
+ break ;
74
+ case GE_FORMAT_8888:
75
+ usedBits = StencilBits8888 (src, dstBuffer->fb_stride * dstBuffer->bufferHeight );
76
+ values = 256 ;
77
+ break ;
78
+ case GE_FORMAT_INVALID:
79
+ case GE_FORMAT_DEPTH16:
80
+ case GE_FORMAT_CLUT8:
81
+ // Inconceivable.
82
+ _assert_ (false );
83
+ return false ;
84
+ }
85
+
86
+ return true ;
87
+ }
88
+
61
89
struct StencilUB {
62
90
float stencilValue;
63
91
};
@@ -83,8 +111,12 @@ static const SamplerDef samplers[1] = {
83
111
{ 0 , " tex" },
84
112
};
85
113
86
- void GenerateStencilFs (char *buffer, const ShaderLanguageDesc &lang, const Draw::Bugs &bugs) {
87
- ShaderWriter writer (buffer, lang, ShaderStage::Fragment);
114
+ void GenerateStencilFs (char *buffer, const ShaderLanguageDesc &lang, const Draw::Bugs &bugs, bool useExport) {
115
+ std::vector<const char *> extensions;
116
+ if (useExport)
117
+ extensions.push_back (" #extension GL_ARB_shader_stencil_export : require" );
118
+
119
+ ShaderWriter writer (buffer, lang, ShaderStage::Fragment, extensions);
88
120
writer.HighPrecisionFloat ();
89
121
writer.DeclareSamplers (samplers);
90
122
@@ -98,9 +130,13 @@ void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw:
98
130
99
131
writer.C (" vec4 index = " ).SampleTexture2D (" tex" , " v_texcoord.xy" ).C (" ;\n " );
100
132
writer.C (" vec4 outColor = index.aaaa;\n " ); // Only care about a.
101
- writer.C (" float shifted = roundAndScaleTo255f(index.a) / roundAndScaleTo255f(stencilValue);\n " );
102
- // Bitwise operations on floats, ugh.
103
- writer.C (" if (mod(floor(shifted), 2.0) < 0.99) DISCARD;\n " );
133
+ if (useExport) {
134
+ writer.C (" gl_FragStencilRefARB = int(roundAndScaleTo255f(index.a));\n " );
135
+ } else {
136
+ writer.C (" float shifted = roundAndScaleTo255f(index.a) / roundAndScaleTo255f(stencilValue);\n " );
137
+ // Bitwise operations on floats, ugh.
138
+ writer.C (" if (mod(floor(shifted), 2.0) < 0.99) DISCARD;\n " );
139
+ }
104
140
105
141
if (bugs.Has (Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {
106
142
writer.C (" gl_FragDepth = gl_FragCoord.z;\n " );
@@ -135,10 +171,11 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
135
171
return false ;
136
172
}
137
173
138
- VirtualFramebuffer *dstBuffer = 0 ;
174
+ VirtualFramebuffer *dstBuffer = nullptr ;
139
175
for (size_t i = 0 ; i < vfbs_.size (); ++i) {
140
176
VirtualFramebuffer *vfb = vfbs_[i];
141
- if (vfb->fb_address == addr) {
177
+ // TODO: Maybe we should broadcast to all? Most of the time, there's only one.
178
+ if (vfb->fb_address == addr && (!dstBuffer || dstBuffer->colorBindSeq < vfb->colorBindSeq )) {
142
179
dstBuffer = vfb;
143
180
}
144
181
}
@@ -148,34 +185,15 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
148
185
149
186
int values = 0 ;
150
187
u8 usedBits = 0 ;
188
+ bool useExportShader = draw_->GetDeviceCaps ().fragmentShaderStencilWriteSupported ;
151
189
152
190
const u8 *src = Memory::GetPointer (addr);
153
191
if (!src)
154
192
return false ;
155
193
156
- switch (dstBuffer->fb_format ) {
157
- case GE_FORMAT_565:
158
- // Well, this doesn't make much sense.
194
+ // Could skip this when doing useExportShader, but then we couldn't optimize usedBits == 0.
195
+ if (!CheckStencilBits (src, dstBuffer, values, usedBits))
159
196
return false ;
160
- case GE_FORMAT_5551:
161
- usedBits = StencilBits5551 (src, dstBuffer->fb_stride * dstBuffer->bufferHeight );
162
- values = 2 ;
163
- break ;
164
- case GE_FORMAT_4444:
165
- usedBits = StencilBits4444 (src, dstBuffer->fb_stride * dstBuffer->bufferHeight );
166
- values = 16 ;
167
- break ;
168
- case GE_FORMAT_8888:
169
- usedBits = StencilBits8888 (src, dstBuffer->fb_stride * dstBuffer->bufferHeight );
170
- values = 256 ;
171
- break ;
172
- case GE_FORMAT_INVALID:
173
- case GE_FORMAT_DEPTH16:
174
- case GE_FORMAT_CLUT8:
175
- // Inconceivable.
176
- _assert_ (false );
177
- break ;
178
- }
179
197
180
198
if (usedBits == 0 ) {
181
199
if (flags & WriteStencil::STENCIL_IS_ZERO) {
@@ -201,7 +219,7 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
201
219
202
220
char *fsCode = new char [8192 ];
203
221
char *vsCode = new char [8192 ];
204
- GenerateStencilFs (fsCode, shaderLanguageDesc, draw_->GetBugs ());
222
+ GenerateStencilFs (fsCode, shaderLanguageDesc, draw_->GetBugs (), useExportShader );
205
223
GenerateStencilVs (vsCode, shaderLanguageDesc);
206
224
207
225
_assert_msg_ (strlen (fsCode) < 8192 , " StenFS length error: %d" , (int )strlen (fsCode));
@@ -303,24 +321,32 @@ bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size,
303
321
draw_->SetScissorRect (0 , 0 , w, h);
304
322
draw_->BindPipeline (stencilWritePipeline_);
305
323
306
- for (int i = 1 ; i < values; i += i) {
307
- if (!(usedBits & i)) {
308
- // It's already zero, let's skip it.
309
- continue ;
310
- }
324
+ if (useExportShader) {
325
+ // We only need to do one pass if using an export shader.
311
326
StencilUB ub{};
312
- if (dstBuffer->fb_format == GE_FORMAT_4444) {
313
- draw_->SetStencilParams (0xFF , (i << 4 ) | i, 0xFF );
314
- ub.stencilValue = i * (16 .0f / 255 .0f );
315
- } else if (dstBuffer->fb_format == GE_FORMAT_5551) {
316
- draw_->SetStencilParams (0xFF , 0xFF , 0xFF );
317
- ub.stencilValue = i * (128 .0f / 255 .0f );
318
- } else {
319
- draw_->SetStencilParams (0xFF , i, 0xFF );
320
- ub.stencilValue = i * (1 .0f / 255 .0f );
321
- }
327
+ draw_->SetStencilParams (0xFF , 0xFF , 0xFF );
322
328
draw_->UpdateDynamicUniformBuffer (&ub, sizeof (ub));
323
329
draw_->DrawUP (positions, 3 );
330
+ } else {
331
+ for (int i = 1 ; i < values; i += i) {
332
+ if (!(usedBits & i)) {
333
+ // It's already zero, let's skip it.
334
+ continue ;
335
+ }
336
+ StencilUB ub{};
337
+ if (dstBuffer->fb_format == GE_FORMAT_4444) {
338
+ draw_->SetStencilParams (0xFF , (i << 4 ) | i, 0xFF );
339
+ ub.stencilValue = i * (16 .0f / 255 .0f );
340
+ } else if (dstBuffer->fb_format == GE_FORMAT_5551) {
341
+ draw_->SetStencilParams (0xFF , 0xFF , 0xFF );
342
+ ub.stencilValue = i * (128 .0f / 255 .0f );
343
+ } else {
344
+ draw_->SetStencilParams (0xFF , i, 0xFF );
345
+ ub.stencilValue = i * (1 .0f / 255 .0f );
346
+ }
347
+ draw_->UpdateDynamicUniformBuffer (&ub, sizeof (ub));
348
+ draw_->DrawUP (positions, 3 );
349
+ }
324
350
}
325
351
326
352
if (useBlit) {
0 commit comments