Skip to content

Commit cf48532

Browse files
authored
Merge pull request #18219 from hrydgard/get-index-bounds-autovec
Make GetIndexBounds friendlier to autovectorization. Works on x86 at least.
2 parents b8fa3a2 + 45bc4d8 commit cf48532

File tree

1 file changed

+20
-12
lines changed

1 file changed

+20
-12
lines changed

GPU/Common/VertexDecoderCommon.cpp

+20-12
Original file line numberDiff line numberDiff line change
@@ -108,28 +108,36 @@ void DecVtxFormat::InitializeFromID(uint32_t id) {
108108
void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound) {
109109
// Find index bounds. Could cache this in display lists.
110110
// Also, this could be greatly sped up with SSE2/NEON, although rarely a bottleneck.
111-
int lowerBound = 0x7FFFFFFF;
112-
int upperBound = 0;
113111
u32 idx = vertType & GE_VTYPE_IDX_MASK;
114-
if (idx == GE_VTYPE_IDX_8BIT) {
115-
const u8 *ind8 = (const u8 *)inds;
112+
if (idx == GE_VTYPE_IDX_16BIT) {
113+
uint16_t upperBound = 0;
114+
uint16_t lowerBound = 0xFFFF;
115+
const u16_le *ind16 = (const u16_le *)inds;
116116
for (int i = 0; i < count; i++) {
117-
u8 value = ind8[i];
117+
u16 value = ind16[i];
118118
if (value > upperBound)
119119
upperBound = value;
120120
if (value < lowerBound)
121121
lowerBound = value;
122122
}
123-
} else if (idx == GE_VTYPE_IDX_16BIT) {
124-
const u16_le *ind16 = (const u16_le *)inds;
123+
*indexLowerBound = lowerBound;
124+
*indexUpperBound = upperBound;
125+
} else if (idx == GE_VTYPE_IDX_8BIT) {
126+
uint8_t upperBound = 0;
127+
uint8_t lowerBound = 0xFF;
128+
const u8 *ind8 = (const u8 *)inds;
125129
for (int i = 0; i < count; i++) {
126-
u16 value = ind16[i];
130+
u8 value = ind8[i];
127131
if (value > upperBound)
128132
upperBound = value;
129133
if (value < lowerBound)
130134
lowerBound = value;
131135
}
136+
*indexLowerBound = lowerBound;
137+
*indexUpperBound = upperBound;
132138
} else if (idx == GE_VTYPE_IDX_32BIT) {
139+
int lowerBound = 0x7FFFFFFF;
140+
int upperBound = 0;
133141
WARN_LOG_REPORT_ONCE(indexBounds32, G3D, "GetIndexBounds: Decoding 32-bit indexes");
134142
const u32_le *ind32 = (const u32_le *)inds;
135143
for (int i = 0; i < count; i++) {
@@ -143,12 +151,12 @@ void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBo
143151
if (value < lowerBound)
144152
lowerBound = value;
145153
}
154+
*indexLowerBound = (u16)lowerBound;
155+
*indexUpperBound = (u16)upperBound;
146156
} else {
147-
lowerBound = 0;
148-
upperBound = count - 1;
157+
*indexLowerBound = 0;
158+
*indexUpperBound = count - 1;
149159
}
150-
*indexLowerBound = (u16)lowerBound;
151-
*indexUpperBound = (u16)upperBound;
152160
}
153161

154162
void PrintDecodedVertex(const VertexReader &vtx) {

0 commit comments

Comments
 (0)