Skip to content

Commit 8abc43a

Browse files
authored
texture_cache: 32bpp and 64bpp macro detilers (#1852)
* added 32bpp macro detiler * added 64bpp macro detiler * consider 3d depth alignment in size calculations
1 parent 7fe4df8 commit 8abc43a

File tree

12 files changed

+236
-173
lines changed

12 files changed

+236
-173
lines changed

src/video_core/amdgpu/resource.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ enum class TilingMode : u32 {
126126
Display_MacroTiled = 0xAu,
127127
Texture_MicroTiled = 0xDu,
128128
Texture_MacroTiled = 0xEu,
129+
Texture_Volume = 0x13u,
129130
};
130131

131132
constexpr std::string_view NameOf(TilingMode type) {
@@ -140,6 +141,8 @@ constexpr std::string_view NameOf(TilingMode type) {
140141
return "Texture_MicroTiled";
141142
case TilingMode::Texture_MacroTiled:
142143
return "Texture_MacroTiled";
144+
case TilingMode::Texture_Volume:
145+
return "Texture_Volume";
143146
default:
144147
return "Unknown";
145148
}
@@ -294,9 +297,6 @@ struct Image {
294297
return tiling_index == 5 ? TilingMode::Texture_MicroTiled
295298
: TilingMode::Depth_MacroTiled;
296299
}
297-
if (tiling_index == 0x13) {
298-
return TilingMode::Texture_MicroTiled;
299-
}
300300
return static_cast<TilingMode>(tiling_index);
301301
}
302302

src/video_core/host_shaders/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ set(SHADER_FILES
77
detile_m32x1.comp
88
detile_m32x2.comp
99
detile_m32x4.comp
10+
detile_macro32x1.comp
11+
detile_macro32x2.comp
1012
fs_tri.vert
1113
post_process.frag
1214
)

src/video_core/host_shaders/detile_m32x1.comp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ layout(std430, binding = 1) buffer output_buf {
1515
layout(push_constant) uniform image_info {
1616
uint num_levels;
1717
uint pitch;
18+
uint height;
1819
uint sizes[14];
1920
} info;
2021

src/video_core/host_shaders/detile_m32x2.comp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ layout(std430, binding = 1) buffer output_buf {
1515
layout(push_constant) uniform image_info {
1616
uint num_levels;
1717
uint pitch;
18+
uint height;
1819
uint sizes[14];
1920
} info;
2021

src/video_core/host_shaders/detile_m32x4.comp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ layout(std430, binding = 1) buffer output_buf {
1515
layout(push_constant) uniform image_info {
1616
uint num_levels;
1717
uint pitch;
18+
uint height;
1819
uint sizes[14];
1920
} info;
2021

src/video_core/host_shaders/detile_m8x1.comp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ layout(std430, binding = 1) buffer output_buf {
1818
layout(push_constant) uniform image_info {
1919
uint num_levels;
2020
uint pitch;
21+
uint height;
2122
uint sizes[14];
2223
} info;
2324

src/video_core/host_shaders/detile_m8x2.comp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ layout(std430, binding = 1) buffer output_buf {
1717
layout(push_constant) uniform image_info {
1818
uint num_levels;
1919
uint pitch;
20+
uint height;
2021
uint sizes[14];
2122
} info;
2223

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
2+
// SPDX-License-Identifier: GPL-2.0-or-later
3+
4+
#version 450
5+
6+
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
7+
8+
layout(std430, binding = 0) buffer input_buf {
9+
uint in_data[];
10+
};
11+
layout(std430, binding = 1) buffer output_buf {
12+
uint out_data[];
13+
};
14+
15+
layout(push_constant) uniform image_info {
16+
uint num_levels;
17+
uint pitch;
18+
uint height;
19+
uint c0;
20+
uint c1;
21+
} info;
22+
23+
// Each LUT is 64 bytes, so should fit into K$ given tiled slices locality
24+
const uint lut_32bpp[][64] = {
25+
{
26+
0x00, 0x01, 0x04, 0x05, 0x40, 0x41, 0x44, 0x45,
27+
0x02, 0x03, 0x06, 0x07, 0x42, 0x43, 0x46, 0x47,
28+
0x10, 0x11, 0x14, 0x15, 0x50, 0x51, 0x54, 0x55,
29+
0x12, 0x13, 0x16, 0x17, 0x52, 0x53, 0x56, 0x57,
30+
0x80, 0x81, 0x84, 0x85, 0xc0, 0xc1, 0xc4, 0xc5,
31+
0x82, 0x83, 0x86, 0x87, 0xc2, 0xc3, 0xc6, 0xc7,
32+
0x90, 0x91, 0x94, 0x95, 0xd0, 0xd1, 0xd4, 0xd5,
33+
0x92, 0x93, 0x96, 0x97, 0xd2, 0xd3, 0xd6, 0xd7,
34+
},
35+
{
36+
0x08, 0x09, 0x0c, 0x0d, 0x48, 0x49, 0x4c, 0x4d,
37+
0x0a, 0x0b, 0x0e, 0x0f, 0x4a, 0x4b, 0x4e, 0x4f,
38+
0x18, 0x19, 0x1c, 0x1d, 0x58, 0x59, 0x5c, 0x5d,
39+
0x1a, 0x1b, 0x1e, 0x1f, 0x5a, 0x5b, 0x5e, 0x5f,
40+
0x88, 0x89, 0x8c, 0x8d, 0xc8, 0xc9, 0xcc, 0xcd,
41+
0x8a, 0x8b, 0x8e, 0x8f, 0xca, 0xcb, 0xce, 0xcf,
42+
0x98, 0x99, 0x9c, 0x9d, 0xd8, 0xd9, 0xdc, 0xdd,
43+
0x9a, 0x9b, 0x9e, 0x9f, 0xda, 0xdb, 0xde, 0xdf,
44+
},
45+
{
46+
0x20, 0x21, 0x24, 0x25, 0x60, 0x61, 0x64, 0x65,
47+
0x22, 0x23, 0x26, 0x27, 0x62, 0x63, 0x66, 0x67,
48+
0x30, 0x31, 0x34, 0x35, 0x70, 0x71, 0x74, 0x75,
49+
0x32, 0x33, 0x36, 0x37, 0x72, 0x73, 0x76, 0x77,
50+
0xa0, 0xa1, 0xa4, 0xa5, 0xe0, 0xe1, 0xe4, 0xe5,
51+
0xa2, 0xa3, 0xa6, 0xa7, 0xe2, 0xe3, 0xe6, 0xe7,
52+
0xb0, 0xb1, 0xb4, 0xb5, 0xf0, 0xf1, 0xf4, 0xf5,
53+
0xb2, 0xb3, 0xb6, 0xb7, 0xf2, 0xf3, 0xf6, 0xf7,
54+
},
55+
{
56+
0x28, 0x29, 0x2c, 0x2d, 0x68, 0x69, 0x6c, 0x6d,
57+
0x2a, 0x2b, 0x2e, 0x2f, 0x6a, 0x6b, 0x6e, 0x6f,
58+
0x38, 0x39, 0x3c, 0x3d, 0x78, 0x79, 0x7c, 0x7d,
59+
0x3a, 0x3b, 0x3e, 0x3f, 0x7a, 0x7b, 0x7e, 0x7f,
60+
0xa8, 0xa9, 0xac, 0xad, 0xe8, 0xe9, 0xec, 0xed,
61+
0xaa, 0xab, 0xae, 0xaf, 0xea, 0xeb, 0xee, 0xef,
62+
0xb8, 0xb9, 0xbc, 0xbd, 0xf8, 0xf9, 0xfc, 0xfd,
63+
0xba, 0xbb, 0xbe, 0xbf, 0xfa, 0xfb, 0xfe, 0xff,
64+
}
65+
};
66+
67+
#define MICRO_TILE_DIM (8)
68+
#define MICRO_TILE_SZ (1024)
69+
#define TEXELS_PER_ELEMENT (1)
70+
#define BPP (32)
71+
72+
void main() {
73+
uint x = gl_GlobalInvocationID.x % info.pitch;
74+
uint y = (gl_GlobalInvocationID.x / info.pitch) % info.height;
75+
uint z = gl_GlobalInvocationID.x / (info.pitch * info.height);
76+
77+
uint col = bitfieldExtract(x, 0, 3);
78+
uint row = bitfieldExtract(y, 0, 3);
79+
uint lut = bitfieldExtract(z, 0, 2);
80+
uint idx = lut_32bpp[lut][col + row * MICRO_TILE_DIM];
81+
82+
uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ;
83+
uint tile_row = y / MICRO_TILE_DIM;
84+
uint tile_column = x / MICRO_TILE_DIM;
85+
uint tile_offs = ((tile_row * info.c0) + tile_column) * MICRO_TILE_SZ;
86+
uint offs = slice_offs + tile_offs + (idx * BPP / 8);
87+
88+
uint p0 = in_data[offs >> 2u];
89+
out_data[gl_GlobalInvocationID.x] = p0;
90+
}
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
2+
// SPDX-License-Identifier: GPL-2.0-or-later
3+
4+
#version 450
5+
6+
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
7+
8+
layout(std430, binding = 0) buffer input_buf {
9+
uint in_data[];
10+
};
11+
layout(std430, binding = 1) buffer output_buf {
12+
uint out_data[];
13+
};
14+
15+
layout(push_constant) uniform image_info {
16+
uint num_levels;
17+
uint pitch;
18+
uint height;
19+
uint c0;
20+
uint c1;
21+
} info;
22+
23+
const uint lut_64bpp[][64] = {
24+
{
25+
0x00, 0x01, 0x08, 0x09, 0x40, 0x41, 0x48, 0x49,
26+
0x02, 0x03, 0x0a, 0x0b, 0x42, 0x43, 0x4a, 0x4b,
27+
0x10, 0x11, 0x18, 0x19, 0x50, 0x51, 0x58, 0x59,
28+
0x12, 0x13, 0x1a, 0x1b, 0x52, 0x53, 0x5a, 0x5b,
29+
0x80, 0x81, 0x88, 0x89, 0xc0, 0xc1, 0xc8, 0xc9,
30+
0x82, 0x83, 0x8a, 0x8b, 0xc2, 0xc3, 0xca, 0xcb,
31+
0x90, 0x91, 0x98, 0x99, 0xd0, 0xd1, 0xd8, 0xd9,
32+
0x92, 0x93, 0x9a, 0x9b, 0xd2, 0xd3, 0xda, 0xdb,
33+
},
34+
{
35+
0x04, 0x05, 0x0c, 0x0d, 0x44, 0x45, 0x4c, 0x4d,
36+
0x06, 0x07, 0x0e, 0x0f, 0x46, 0x47, 0x4e, 0x4f,
37+
0x14, 0x15, 0x1c, 0x1d, 0x54, 0x55, 0x5c, 0x5d,
38+
0x16, 0x17, 0x1e, 0x1f, 0x56, 0x57, 0x5e, 0x5f,
39+
0x84, 0x85, 0x8c, 0x8d, 0xc4, 0xc5, 0xcc, 0xcd,
40+
0x86, 0x87, 0x8e, 0x8f, 0xc6, 0xc7, 0xce, 0xcf,
41+
0x94, 0x95, 0x9c, 0x9d, 0xd4, 0xd5, 0xdc, 0xdd,
42+
0x96, 0x97, 0x9e, 0x9f, 0xd6, 0xd7, 0xde, 0xdf,
43+
},
44+
{
45+
0x20, 0x21, 0x28, 0x29, 0x60, 0x61, 0x68, 0x69,
46+
0x22, 0x23, 0x2a, 0x2b, 0x62, 0x63, 0x6a, 0x6b,
47+
0x30, 0x31, 0x38, 0x39, 0x70, 0x71, 0x78, 0x79,
48+
0x32, 0x33, 0x3a, 0x3b, 0x72, 0x73, 0x7a, 0x7b,
49+
0xa0, 0xa1, 0xa8, 0xa9, 0xe0, 0xe1, 0xe8, 0xe9,
50+
0xa2, 0xa3, 0xaa, 0xab, 0xe2, 0xe3, 0xea, 0xeb,
51+
0xb0, 0xb1, 0xb8, 0xb9, 0xf0, 0xf1, 0xf8, 0xf9,
52+
0xb2, 0xb3, 0xba, 0xbb, 0xf2, 0xf3, 0xfa, 0xfb,
53+
},
54+
{
55+
0x24, 0x25, 0x2c, 0x2d, 0x64, 0x65, 0x6c, 0x6d,
56+
0x26, 0x27, 0x2e, 0x2f, 0x66, 0x67, 0x6e, 0x6f,
57+
0x34, 0x35, 0x3c, 0x3d, 0x74, 0x75, 0x7c, 0x7d,
58+
0x36, 0x37, 0x3e, 0x3f, 0x76, 0x77, 0x7e, 0x7f,
59+
0xa4, 0xa5, 0xac, 0xad, 0xe4, 0xe5, 0xec, 0xed,
60+
0xa6, 0xa7, 0xae, 0xaf, 0xe6, 0xe7, 0xee, 0xef,
61+
0xb4, 0xb5, 0xbc, 0xbd, 0xf4, 0xf5, 0xfc, 0xfd,
62+
0xb6, 0xb7, 0xbe, 0xbf, 0xf6, 0xf7, 0xfe, 0xff,
63+
},
64+
};
65+
66+
#define MICRO_TILE_DIM (8)
67+
#define MICRO_TILE_SZ (2048)
68+
#define TEXELS_PER_ELEMENT (1)
69+
#define BPP (64)
70+
71+
void main() {
72+
uint x = gl_GlobalInvocationID.x % info.pitch;
73+
uint y = (gl_GlobalInvocationID.x / info.pitch) % info.height;
74+
uint z = gl_GlobalInvocationID.x / (info.pitch * info.height);
75+
76+
uint col = bitfieldExtract(x, 0, 3);
77+
uint row = bitfieldExtract(y, 0, 3);
78+
uint lut = bitfieldExtract(z, 0, 2);
79+
uint idx = lut_64bpp[lut][col + row * MICRO_TILE_DIM];
80+
81+
uint slice_offs = (z >> 2u) * info.c1 * MICRO_TILE_SZ;
82+
uint tile_row = y / MICRO_TILE_DIM;
83+
uint tile_column = x / MICRO_TILE_DIM;
84+
uint tile_offs = ((tile_row * info.c0) + tile_column) * MICRO_TILE_SZ;
85+
uint offs = slice_offs + tile_offs + (idx * BPP / 8);
86+
87+
uint p0 = in_data[(offs >> 2) + 0];
88+
uint p1 = in_data[(offs >> 2) + 1];
89+
out_data[2 * gl_GlobalInvocationID.x + 0] = p0;
90+
out_data[2 * gl_GlobalInvocationID.x + 1] = p1;
91+
}

src/video_core/texture_cache/image_info.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,9 @@ void ImageInfo::UpdateSize() {
366366
mip_info.height = mip_h;
367367
break;
368368
}
369+
case AmdGpu::TilingMode::Texture_Volume:
370+
mip_d += (-mip_d) & 3u;
371+
[[fallthrough]];
369372
case AmdGpu::TilingMode::Texture_MicroTiled: {
370373
std::tie(mip_info.pitch, mip_info.size) =
371374
ImageSizeMicroTiled(mip_w, mip_h, bpp, num_samples);

0 commit comments

Comments
 (0)