Skip to content

Commit 083656f

Browse files
authored
Merge pull request #19241 from hrydgard/optimize-vertex-dec
Optimize color conversions in non-JIT vertex decoder
2 parents 328acdc + 2174a89 commit 083656f

File tree

3 files changed

+66
-29
lines changed

3 files changed

+66
-29
lines changed

Common/Data/Convert/ColorConv.h

+14-12
Original file line numberDiff line numberDiff line change
@@ -64,26 +64,28 @@ inline u32 RGBA4444ToRGBA8888(u16 src) {
6464
const u32 g = (src & 0x00F0) << 4;
6565
const u32 b = (src & 0x0F00) << 8;
6666
const u32 a = (src & 0xF000) << 12;
67-
6867
const u32 c = r | g | b | a;
6968
return c | (c << 4);
7069
}
7170

7271
inline u32 RGBA5551ToRGBA8888(u16 src) {
73-
u8 r = Convert5To8((src >> 0) & 0x1F);
74-
u8 g = Convert5To8((src >> 5) & 0x1F);
75-
u8 b = Convert5To8((src >> 10) & 0x1F);
76-
u8 a = (src >> 15) & 0x1;
77-
a = (a) ? 0xff : 0;
78-
return (a << 24) | (b << 16) | (g << 8) | r;
72+
u32 dark = ((src & 0x1F) << 3) | ((src & 0x3E0) << 6) | ((src & 0x7C00) << 9);
73+
// Replicate the top 3 upper bits into the missing lower bits.
74+
u32 full = (dark | ((dark >> 5) & 0x070707));
75+
if (src >> 15) {
76+
full |= 0xFF000000;
77+
}
78+
return full;
7979
}
8080

8181
inline u32 RGB565ToRGBA8888(u16 src) {
82-
u8 r = Convert5To8((src >> 0) & 0x1F);
83-
u8 g = Convert6To8((src >> 5) & 0x3F);
84-
u8 b = Convert5To8((src >> 11) & 0x1F);
85-
u8 a = 0xFF;
86-
return (a << 24) | (b << 16) | (g << 8) | r;
82+
u32 dark_rb = ((src & 0x1F) << 3) | ((src & 0xF800) << 8);
83+
// Replicate the top 3 upper bits into the missing lower bits.
84+
u32 full_rb = (dark_rb | ((dark_rb >> 5) & 0x070007));
85+
// Add in green (6 bits instead of 5).
86+
u32 dark_g = ((src & 0x7E0) << 5);
87+
u32 full_g = dark_g | ((dark_g >> 6) & 0x300);
88+
return full_rb | full_g | 0xFF000000;
8789
}
8890

8991
inline u16 RGBA8888ToRGB565(u32 value) {

GPU/Common/VertexDecoderCommon.cpp

+16-17
Original file line numberDiff line numberDiff line change
@@ -506,40 +506,39 @@ void VertexDecoder::Step_ColorInvalid() const
506506

507507
void VertexDecoder::Step_Color565() const
508508
{
509-
u8 *c = decoded_ + decFmt.c0off;
510509
u16 cdata = *(const u16_le *)(ptr_ + coloff);
511-
c[0] = Convert5To8(cdata & 0x1f);
512-
c[1] = Convert6To8((cdata >> 5) & 0x3f);
513-
c[2] = Convert5To8((cdata >> 11) & 0x1f);
514-
c[3] = 255;
515-
// Always full alpha.
510+
u32 *c = (u32 *)(decoded_ + decFmt.c0off);
511+
*c = RGB565ToRGBA8888(cdata);
516512
}
517513

518514
void VertexDecoder::Step_Color5551() const
519515
{
520-
u8 *c = decoded_ + decFmt.c0off;
521516
u16 cdata = *(const u16_le *)(ptr_ + coloff);
522-
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (cdata >> 15) != 0;
523-
c[0] = Convert5To8(cdata & 0x1f);
524-
c[1] = Convert5To8((cdata >> 5) & 0x1f);
525-
c[2] = Convert5To8((cdata >> 10) & 0x1f);
526-
c[3] = (cdata >> 15) ? 255 : 0;
517+
u32 *c = (u32 *)(decoded_ + decFmt.c0off);
518+
int alpha = (cdata >> 15);
519+
if (!alpha) {
520+
gstate_c.vertexFullAlpha = false;
521+
}
522+
*c = RGBA5551ToRGBA8888(cdata);
527523
}
528524

529525
void VertexDecoder::Step_Color4444() const
530526
{
531-
u8 *c = decoded_ + decFmt.c0off;
532527
u16 cdata = *(const u16_le *)(ptr_ + coloff);
533-
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (cdata >> 12) == 0xF;
534-
for (int j = 0; j < 4; j++)
535-
c[j] = Convert4To8((cdata >> (j * 4)) & 0xF);
528+
u32 *c = (u32 *)(decoded_ + decFmt.c0off);
529+
if ((cdata >> 12) != 0xF) {
530+
gstate_c.vertexFullAlpha = false;
531+
}
532+
*c = RGBA4444ToRGBA8888(cdata);
536533
}
537534

538535
void VertexDecoder::Step_Color8888() const
539536
{
540537
u8 *c = decoded_ + decFmt.c0off;
541538
const u8 *cdata = (const u8*)(ptr_ + coloff);
542-
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && cdata[3] == 255;
539+
if (cdata[3] != 255) {
540+
gstate_c.vertexFullAlpha = false;
541+
}
543542
memcpy(c, cdata, sizeof(u8) * 4);
544543
}
545544

unittest/UnitTest.cpp

+36
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
#include "Common/Log.h"
6868
#include "Common/StringUtils.h"
6969
#include "Core/Config.h"
70+
#include "Common/Data/Convert/ColorConv.h"
7071
#include "Common/File/VFS/VFS.h"
7172
#include "Common/File/VFS/DirectoryReader.h"
7273
#include "Core/FileSystems/ISOFileSystem.h"
@@ -997,6 +998,40 @@ bool TestIniFile() {
997998
return true;
998999
}
9991000

1001+
inline u32 ReferenceRGBA5551ToRGBA8888(u16 src) {
1002+
u8 r = Convert5To8((src >> 0) & 0x1F);
1003+
u8 g = Convert5To8((src >> 5) & 0x1F);
1004+
u8 b = Convert5To8((src >> 10) & 0x1F);
1005+
u8 a = (src >> 15) & 0x1;
1006+
a = (a) ? 0xff : 0;
1007+
return (a << 24) | (b << 16) | (g << 8) | r;
1008+
}
1009+
1010+
inline u32 ReferenceRGB565ToRGBA8888(u16 src) {
1011+
u8 r = Convert5To8((src >> 0) & 0x1F);
1012+
u8 g = Convert6To8((src >> 5) & 0x3F);
1013+
u8 b = Convert5To8((src >> 11) & 0x1F);
1014+
u8 a = 0xFF;
1015+
return (a << 24) | (b << 16) | (g << 8) | r;
1016+
}
1017+
1018+
bool TestColorConv() {
1019+
// Can exhaustively test the 16->32 conversions.
1020+
for (int i = 0; i < 65536; i++) {
1021+
u16 col16 = i;
1022+
1023+
u32 reference = ReferenceRGBA5551ToRGBA8888(col16);
1024+
u32 value = RGBA5551ToRGBA8888(col16);
1025+
EXPECT_EQ_INT(reference, value);
1026+
1027+
reference = ReferenceRGB565ToRGBA8888(col16);
1028+
value = RGB565ToRGBA8888(col16);
1029+
EXPECT_EQ_INT(reference, value);
1030+
}
1031+
1032+
return true;
1033+
}
1034+
10001035
typedef bool (*TestFunc)();
10011036
struct TestItem {
10021037
const char *name;
@@ -1056,6 +1091,7 @@ TestItem availableTests[] = {
10561091
TEST_ITEM(VFS),
10571092
TEST_ITEM(Substitutions),
10581093
TEST_ITEM(IniFile),
1094+
TEST_ITEM(ColorConv),
10591095
};
10601096

10611097
int main(int argc, const char *argv[]) {

0 commit comments

Comments
 (0)