Skip to content

Commit f4d36d8

Browse files
committed
#368 GBA delta encoding for room quads and mesh indices, OT depth calc optimization, merge near/far clipping flags (CLIP_PLANE) to clear one of high-bits, revert smull optimization for scaleUV due overflow issues (low half word affects high)
1 parent 40eb268 commit f4d36d8

18 files changed

+369
-232
lines changed

src/fixed/common.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -641,8 +641,9 @@ struct RoomQuad
641641
uint32 flags;
642642
uint16 indices[4];
643643
#else
644+
int8 indices[4];
644645
uint16 flags;
645-
uint16 indices[4];
646+
uint16 padding;
646647
#endif
647648
};
648649

@@ -652,8 +653,8 @@ struct RoomTriangle
652653
uint32 flags;
653654
uint16 indices[4];
654655
#else
655-
uint16 flags;
656656
uint16 indices[3];
657+
uint16 flags;
657658
#endif
658659
};
659660

@@ -663,8 +664,9 @@ struct MeshQuad
663664
uint32 flags;
664665
uint32 indices;
665666
#else
667+
int8 indices[4];
666668
uint16 flags;
667-
uint8 indices[4];
669+
uint16 padding;
668670
#endif
669671
};
670672

@@ -674,8 +676,9 @@ struct MeshTriangle
674676
uint32 flags;
675677
uint32 indices;
676678
#else
679+
int8 indices[4];
677680
uint16 flags;
678-
uint8 indices[4];
681+
uint16 padding;
679682
#endif
680683
};
681684

src/fixed/level.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -187,14 +187,18 @@ void readLevel(const uint8* data)
187187
m->start = spriteSeq->start;
188188
}
189189

190-
#ifdef USE_VRAM_MESH // experimental, should be per level or dynamic
191-
vramMeshesCount = 0;
190+
// experimental
191+
#if defined(USE_VRAM_MESH) || defined(USE_VRAM_ROOM)
192192
vramPtr = (uint8*)0x06014000;
193+
#endif
194+
195+
#ifdef USE_VRAM_MESH // should be per level or dynamic
196+
vramMeshesCount = 0;
193197
vramPtr = pushToVRAM(vramPtr, models + ITEM_LARA);
194198
vramPtr = pushToVRAM(vramPtr, models + ITEM_LARA_PISTOLS);
195199
vramPtr = pushToVRAM(vramPtr, models + ITEM_LARA_SHOTGUN);
196-
vramPtr = pushToVRAM(vramPtr, models + ITEM_LARA_MAGNUMS);
197-
vramPtr = pushToVRAM(vramPtr, models + ITEM_LARA_UZIS);
200+
//vramPtr = pushToVRAM(vramPtr, models + ITEM_LARA_MAGNUMS);
201+
//vramPtr = pushToVRAM(vramPtr, models + ITEM_LARA_UZIS);
198202
vramPtr = pushToVRAM(vramPtr, models + ITEM_WOLF);
199203
vramPtr = pushToVRAM(vramPtr, models + ITEM_BAT);
200204
vramPtr = pushToVRAM(vramPtr, models + ITEM_BRIDGE_FLAT);

src/fixed/room.h

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -383,10 +383,10 @@ bool Room::checkPortal(const Portal* portal)
383383
return false;
384384
//#endif
385385

386-
int32 x0 = clip.x1;
387-
int32 y0 = clip.y1;
388-
int32 x1 = clip.x0;
389-
int32 y1 = clip.y0;
386+
int32 x0 = clip.x1 - (FRAME_WIDTH >> 1);
387+
int32 y0 = clip.y1 - (FRAME_HEIGHT >> 1);
388+
int32 x1 = clip.x0 - (FRAME_WIDTH >> 1);
389+
int32 y1 = clip.y0 - (FRAME_HEIGHT >> 1);
390390

391391
int32 znear = 0, zfar = 0;
392392

@@ -419,17 +419,7 @@ bool Room::checkPortal(const Portal* portal)
419419
y >>= FIXED_SHIFT;
420420
z >>= FIXED_SHIFT;
421421

422-
int32 dz = PERSPECTIVE_DZ(z);
423-
424-
if (dz > 0) {
425-
PERSPECTIVE(x, y, z);
426-
427-
x += FRAME_WIDTH >> 1;
428-
y += FRAME_HEIGHT >> 1;
429-
} else {
430-
x = (x < 0) ? viewport.x0 : viewport.x1;
431-
y = (y < 0) ? viewport.y0 : viewport.y1;
432-
}
422+
PERSPECTIVE(x, y, z);
433423

434424
if (x < x0) x0 = x;
435425
if (x > x1) x1 = x;
@@ -440,6 +430,11 @@ bool Room::checkPortal(const Portal* portal)
440430
if (znear == 4 || zfar == 4)
441431
return false;
442432

433+
x0 += (FRAME_WIDTH >> 1);
434+
y0 += (FRAME_HEIGHT >> 1);
435+
x1 += (FRAME_WIDTH >> 1);
436+
y1 += (FRAME_HEIGHT >> 1);
437+
443438
if (znear)
444439
{
445440
vec3i *a = pv;

src/platform/gba/asm/common_asm.inc

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,9 @@
3838
.equ CLIP_RIGHT, ((1 << 1) << 8)
3939
.equ CLIP_TOP, ((1 << 2) << 8)
4040
.equ CLIP_BOTTOM, ((1 << 3) << 8)
41-
.equ CLIP_FAR, ((1 << 4) << 8)
42-
.equ CLIP_NEAR, ((1 << 5) << 8)
43-
.equ CLIP_FRAME, ((1 << 6) << 8)
44-
.equ CLIP_DISCARD, (CLIP_LEFT + CLIP_RIGHT + CLIP_TOP + CLIP_BOTTOM + CLIP_FAR + CLIP_NEAR)
41+
.equ CLIP_PLANE, ((1 << 4) << 8)
42+
.equ CLIP_FRAME, ((1 << 5) << 8)
43+
.equ CLIP_DISCARD, (CLIP_LEFT + CLIP_RIGHT + CLIP_TOP + CLIP_BOTTOM + CLIP_PLANE)
4544
.equ CLIP_MASK, 0xFF00
4645

4746
.equ FACE_TYPE_SHIFT, 14
@@ -121,15 +120,17 @@
121120
ble \skip
122121
.endm
123122

124-
.macro scaleUV uv, tmp, tmp2, f
125-
smullne \tmp2, \tmp, \uv, \f // u = (f * uv) >> 32
123+
.macro scaleUV uv, tmp, unused, f
124+
asrs \tmp, \uv, #16
125+
mulne \tmp, \f, \tmp // u = f * int16(uv >> 16)
126+
lsrne \tmp, #16
127+
lslne \tmp, #16
126128

127-
lsl \uv, #16
128-
asrs \uv, #16
129-
mulne \uv, \f // v = f * int16(uv)
130-
lsr \uv, #16
129+
lsls \uv, \uv, #16
130+
asrne \uv, #16
131+
mulne \uv, \f, \uv // v = f * int16(uv)
131132

132-
orr \uv, \uv, \tmp, lsl #16 // uv = (u & 0xFFFF0000) | (v >> 16)
133+
orr \uv, \tmp, \uv, lsr #16 // uv = (u & 0xFFFF0000) | (v >> 16)
133134
.endm
134135

135136
.macro tex index, uv

src/platform/gba/asm/faceAddMeshQuads.s

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
polys .req r0 // arg
44
count .req r1 // arg
5-
vp .req r2
5+
mask .req r2
66
vg0 .req r3
77
vg1 .req r4
88
vg2 .req r5
@@ -26,6 +26,14 @@ vy2 .req vg2
2626

2727
depth .req vg0
2828

29+
i0 .req vp0
30+
i1 .req vp1
31+
i2 .req vp2
32+
i3 .req vg3 // vg to save vp3 value between iterations
33+
34+
vp01 .req vp1
35+
vp23 .req vp2
36+
2937
tmp .req flags
3038
next .req vp0
3139

@@ -34,8 +42,8 @@ faceAddMeshQuads_asm:
3442
stmfd sp!, {r4-r7}
3543
fiq_on
3644

37-
ldr vp, =gVerticesBase
38-
ldr vp, [vp]
45+
ldr vp3, =gVerticesBase
46+
ldr vp3, [vp3]
3947

4048
ldr vertices, =gVertices
4149
lsr vertices, #3
@@ -44,26 +52,27 @@ faceAddMeshQuads_asm:
4452
ldr face, [face]
4553

4654
ldr ot, =gOT
47-
48-
add polys, #2 // skip flags
55+
mov mask, #(0xFF << 24)
56+
orr mask, #(3 << 8) // div 4 mul 4 for depth
4957

5058
.loop:
51-
ldrh vp0, [polys], #2
52-
ldrh vp2, [polys], #4 // + flags
59+
// sizeof(MeshQuad) == 8
60+
ldr tmp, [polys], #8 // skip flags
5361

54-
lsr vp1, vp0, #8
55-
and vp0, #0xFF
56-
lsr vp3, vp2, #8
57-
and vp2, #0xFF
62+
// unpack index deltas
63+
and vg0, mask, tmp, lsl #24
64+
and vg1, mask, tmp, lsl #16
65+
and vg2, mask, tmp, lsl #8
66+
and vg3, mask, tmp
5867

59-
add vp0, vp, vp0, lsl #3
60-
add vp1, vp, vp1, lsl #3
61-
add vp2, vp, vp2, lsl #3
68+
// sizeof(Vertex) = (1 << 3)
69+
add vp0, vp3, vg0, asr #(24 - 3)
70+
add vp1, vp0, vg1, asr #(24 - 3)
71+
add vp2, vp1, vg2, asr #(24 - 3)
72+
add vp3, vp2, vg3, asr #(24 - 3)
6273

6374
CCW .skip
6475

65-
add vp3, vp, vp3, lsl #3
66-
6776
// fetch [c, g, zz]
6877
ldr vg0, [vp0, #VERTEX_Z]
6978
ldr vg1, [vp1, #VERTEX_Z]
@@ -82,28 +91,27 @@ faceAddMeshQuads_asm:
8291
orr tmp, vg2
8392
orr tmp, vg3
8493
tst tmp, #(CLIP_FRAME << 16)
85-
ldrh flags, [polys, #-8]
94+
ldrh flags, [polys, #-4]
8695
orrne flags, #FACE_CLIPPED
8796

8897
// depth = AVG_Z4
89-
lsl vg0, #16 // clip g part (high half)
90-
add depth, vg0, vg1, lsl #16 // depth = vz0 + vz1
91-
add depth, vg2, lsl #16 // depth += vz2
92-
add depth, vg3, lsl #16 // depth += vz3
93-
lsr depth, #(16 + 2) // dpeth /= 4
98+
add depth, vg0, vg1 // depth = vz0 + vz1
99+
add depth, vg2 // depth += vz2
100+
add depth, vg3 // depth += vz3
101+
bic depth, depth, mask, asr #8 // clear high half (g & clip flags) and low 2 bits
94102

95103
// faceAdd
96-
rsb vp0, vertices, vp0, lsr #3
97-
rsb vp1, vertices, vp1, lsr #3
98-
rsb vp2, vertices, vp2, lsr #3
99-
rsb vp3, vertices, vp3, lsr #3
104+
rsb i0, vertices, vp0, lsr #3
105+
rsb i1, vertices, vp1, lsr #3
106+
rsb i2, vertices, vp2, lsr #3
107+
rsb i3, vertices, vp3, lsr #3
100108

101-
orr vp1, vp0, vp1, lsl #16
102-
orr vp3, vp2, vp3, lsl #16
109+
orr vp01, i0, i1, lsl #16
110+
orr vp23, i2, i3, lsl #16
103111

104-
ldr next, [ot, depth, lsl #2]
105-
str face, [ot, depth, lsl #2]
106-
stmia face!, {flags, next, vp1, vp3}
112+
ldr next, [ot, depth]
113+
str face, [ot, depth]
114+
stmia face!, {flags, next, vp01, vp23}
107115
.skip:
108116
subs count, #1
109117
bne .loop

src/platform/gba/asm/faceAddMeshTriangles.s

Lines changed: 42 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,16 @@
22

33
polys .req r0 // arg
44
count .req r1 // arg
5-
vp .req r2
6-
vg0 .req r3
7-
vg1 .req r4
8-
vg2 .req r5
9-
vg3 .req r6
5+
mask .req r2
6+
flags .req r3
7+
vp0 .req r4
8+
vp1 .req r5
9+
vp2 .req r6
1010
// FIQ regs
11-
flags .req r8
12-
vp0 .req r9
13-
vp1 .req r10
14-
vp2 .req r11
11+
vg0 .req r8
12+
vg1 .req r9
13+
vg2 .req r10
14+
vg3 .req r11
1515
vertices .req r12
1616
ot .req r13
1717
face .req r14
@@ -25,6 +25,12 @@ vy2 .req vg2
2525

2626
depth .req vg0
2727

28+
i0 .req vg1
29+
i1 .req vg2
30+
i2 .req vg3 // vg to save vp2 value between iterations
31+
32+
vp01 .req vp1
33+
2834
tmp .req flags
2935
next .req vp0
3036

@@ -33,29 +39,31 @@ faceAddMeshTriangles_asm:
3339
stmfd sp!, {r4-r6}
3440
fiq_on
3541

36-
ldr vp, =gVerticesBase
37-
ldr vp, [vp]
42+
ldr vp2, =gVerticesBase
43+
ldr vp2, [vp2]
44+
45+
ldr vertices, =gVertices
46+
lsr vertices, #3
3847

3948
ldr face, =gFacesBase
4049
ldr face, [face]
4150

4251
ldr ot, =gOT
43-
ldr vertices, =gVertices
44-
lsr vertices, #3
45-
46-
add polys, #2 // skip flags
52+
mov mask, #(0xFF << 24)
53+
orr mask, #(3 << 8) // div 4 mul 4 for depth
4754

4855
.loop:
49-
ldrh vp0, [polys], #2
50-
ldrh vp2, [polys], #4 // + flags
56+
// sizeof(MeshTriangle) == 8
57+
ldr tmp, [polys], #8 // skip flags
5158

52-
lsr vp1, vp0, #8
53-
and vp0, #0xFF
54-
and vp2, #0xFF
59+
// unpack index deltas
60+
and vg0, mask, tmp, lsl #24
61+
and vg1, mask, tmp, lsl #16
5562

56-
add vp0, vp, vp0, lsl #3
57-
add vp1, vp, vp1, lsl #3
58-
add vp2, vp, vp2, lsl #3
63+
// sizeof(Vertex) = (1 << 3)
64+
add vp0, vp2, vg0, asr #(24 - 3)
65+
add vp1, vp0, vg1, asr #(24 - 3)
66+
add vp2, vp1, tmp, asr #(24 - 3) // 3rd vertex in 4th byte after zero byte to save one masking op
5967

6068
CCW .skip
6169

@@ -74,27 +82,26 @@ faceAddMeshTriangles_asm:
7482
orr tmp, vg0, vg1
7583
orr tmp, vg2
7684
tst tmp, #(CLIP_FRAME << 16)
77-
ldrh flags, [polys, #-8]
85+
ldrh flags, [polys, #-4]
7886
orrne flags, #FACE_CLIPPED
7987

8088
// depth = AVG_Z3
81-
lsl vg0, #16 // clip g part (high half)
82-
add depth, vg0, vg1, lsl #16 // depth = vz0 + vz1
83-
add depth, vg2, lsl #17 // depth += vz2 * 2
84-
lsr depth, #(16 + 2) // depth /= 4
89+
add depth, vg0, vg1 // depth = vz0 + vz1
90+
add depth, vg2, lsl #1 // depth += vz2 * 2
91+
bic depth, depth, mask, asr #8 // clear high half (g & clip flags) and low 2 bits
8592

8693
// faceAdd
87-
rsb vp0, vertices, vp0, lsr #3
88-
rsb vp1, vertices, vp1, lsr #3
89-
rsb vp2, vertices, vp2, lsr #3
94+
rsb i0, vertices, vp0, lsr #3
95+
rsb i1, vertices, vp1, lsr #3
96+
rsb i2, vertices, vp2, lsr #3
9097

91-
orr vp1, vp0, vp1, lsl #16
98+
orr vp01, i0, i1, lsl #16
9299

93100
orr flags, #FACE_TRIANGLE
94101

95-
ldr next, [ot, depth, lsl #2]
96-
str face, [ot, depth, lsl #2]
97-
stmia face!, {flags, next, vp1, vp2}
102+
ldr next, [ot, depth]
103+
str face, [ot, depth]
104+
stmia face!, {flags, next, vp01, i2}
98105
.skip:
99106
subs count, #1
100107
bne .loop

0 commit comments

Comments
 (0)