Skip to content

Commit 0fac65b

Browse files
committed
AArch64: Implement arraytranslateTRTO255
This commit implements arraytranslateTRTO255 for AArch64. Signed-off-by: KONNO Kazuhiro <[email protected]>
1 parent bd36b1d commit 0fac65b

File tree

4 files changed

+135
-0
lines changed

4 files changed

+135
-0
lines changed
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
/*******************************************************************************
2+
* Copyright IBM Corp. and others 2024
3+
*
4+
* This program and the accompanying materials are made available under
5+
* the terms of the Eclipse Public License 2.0 which accompanies this
6+
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
7+
* or the Apache License, Version 2.0 which accompanies this distribution and
8+
* is available at https://www.apache.org/licenses/LICENSE-2.0.
9+
*
10+
* This Source Code may also be made available under the following
11+
* Secondary Licenses when the conditions for such availability set
12+
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
13+
* General Public License, version 2 with the GNU Classpath
14+
* Exception [1] and GNU General Public License, version 2 with the
15+
* OpenJDK Assembly Exception [2].
16+
*
17+
* [1] https://www.gnu.org/software/classpath/license.html
18+
* [2] https://openjdk.org/legal/assembly-exception.html
19+
*
20+
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 OR GPL-2.0-only WITH OpenJDK-assembly-exception-1.0
21+
*******************************************************************************/
22+
23+
#include "aarch64/runtime/arm64asmdefs.inc"
24+
25+
.file "ArrayTranslate.s"
26+
27+
.globl FUNC_LABEL(__arrayTranslateTRTO255)
28+
29+
.text
30+
.align 2
31+
32+
// ----
33+
// arrayTranslateTRTO255
34+
// ----
35+
// TO stands for Two bytes to One byte
36+
//
37+
// uint16 input[];
38+
// uint8 output[];
39+
// int32 len;
40+
//
41+
// int32 i = 0;
42+
// for (i < len) {
43+
// uint16 ch = input[i];
44+
// if (ch > 0xFF) break;
45+
// output[i] = ch & 0xFF;
46+
// i++;
47+
// }
48+
// return i;
49+
//
50+
// in: x0: input
51+
// x1: output
52+
// x2: len
53+
// out: x0: num of translated elements
54+
// trash: x4-x6, v0-v2
55+
56+
FUNC_LABEL(__arrayTranslateTRTO255):
57+
// preserve output address
58+
mov x6, x1
59+
cmp w2, #16
60+
b.cc atTRTO255_15
61+
lsr w4, w2, #4
62+
atTRTO255_16Loop:
63+
// load 16 elements
64+
ldp q0, q1, [x0]
65+
// collect upper 8 bits
66+
uzp2 v2.16b, v0.16b, v1.16b
67+
// fail when any one of them is non-zero
68+
umaxp v2.4s, v2.4s, v2.4s
69+
mov x5, v2.D[0]
70+
cbnz x5, atTRTO255_Fail
71+
// collect lower 8 bits
72+
uzp1 v2.16b, v0.16b, v1.16b
73+
add x0, x0, #32
74+
subs w4, w4, #1
75+
// store 16 elements
76+
str q2, [x1], #16
77+
b.ne atTRTO255_16Loop
78+
atTRTO255_15:
79+
// 15 elements or less remaining
80+
tst w2, #8
81+
b.eq atTRTO255_7
82+
// load 8 elements
83+
ldr q0, [x0]
84+
// collect upper 8 bits
85+
trn2 v2.16b, v0.16b, v0.16b
86+
// fail when any one of them is non-zero
87+
umaxp v2.4s, v2.4s, v2.4s
88+
mov x5, v2.D[0]
89+
cbnz x5, atTRTO255_Fail
90+
// collect lower 8 bits
91+
xtn v2.8b, v0.8h
92+
add x0, x0, #16
93+
// store 8 elements
94+
str d2, [x1], #8
95+
atTRTO255_7:
96+
// 7 elements or less remaining
97+
tst w2, #4
98+
b.eq atTRTO255_3
99+
// load 4 elements
100+
ldr d0, [x0]
101+
// collect upper 8 bits
102+
trn2 v2.8b, v0.8b, v0.8b
103+
// fail when any one of them is non-zero
104+
mov x5, v2.D[0]
105+
cbnz x5, atTRTO255_Fail
106+
// collect lower 8 bits
107+
xtn v2.8b, v0.8h
108+
add x0, x0, #8
109+
// store 4 elements
110+
str s2, [x1], #4
111+
atTRTO255_3:
112+
// 3 elements or less remaining
113+
ands w4, w2, #3
114+
atTRTO255_1Loop:
115+
b.eq atTRTO255_Done
116+
ldrh w5, [x0], #2
117+
cmp w5, #256
118+
b.cs atTRTO255_Done
119+
subs w4, w4, #1
120+
strb w5, [x1], #1
121+
b atTRTO255_1Loop
122+
atTRTO255_Fail:
123+
ldrh w5, [x0], #2
124+
cmp w5, #256
125+
b.cs atTRTO255_Done
126+
strb w5, [x1], #1
127+
b atTRTO255_Fail
128+
atTRTO255_Done:
129+
// number of translated elements
130+
sub x0, x1, x6
131+
ret

runtime/compiler/aarch64/runtime/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ j9jit_files(
2525
${omr_SOURCE_DIR}/compiler/aarch64/runtime/CodeSync.cpp
2626
${omr_SOURCE_DIR}/compiler/aarch64/runtime/VirtualGuardRuntime.cpp
2727
aarch64/runtime/ARM64RelocationTarget.cpp
28+
aarch64/runtime/ArrayTranslate.spp
2829
aarch64/runtime/FlushICache.spp
2930
aarch64/runtime/PicBuilder.spp
3031
aarch64/runtime/Recomp.cpp

runtime/compiler/build/files/host/aarch64.mk

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ JIT_PRODUCT_BACKEND_SOURCES+= \
2525

2626
JIT_PRODUCT_SOURCE_FILES+= \
2727
compiler/aarch64/runtime/ARM64RelocationTarget.cpp \
28+
compiler/aarch64/runtime/ArrayTranslate.spp \
2829
compiler/aarch64/runtime/FlushICache.spp \
2930
compiler/aarch64/runtime/PicBuilder.spp \
3031
compiler/aarch64/runtime/Recomp.cpp \

runtime/compiler/runtime/Runtime.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,7 @@ JIT_HELPER(__forwardArrayCopy);
596596
JIT_HELPER(__backwardArrayCopy);
597597
JIT_HELPER(_patchGCRHelper);
598598
JIT_HELPER(_fieldWatchHelper);
599+
JIT_HELPER(__arrayTranslateTRTO255);
599600

600601
#elif defined(TR_HOST_S390)
601602
JIT_HELPER(__double2Long);
@@ -1580,6 +1581,7 @@ void initializeCodeRuntimeHelperTable(J9JITConfig *jitConfig, char isSMP)
15801581
#else
15811582
SET(TR_ARM64fieldWatchHelper, (void *) 0, TR_Helper);
15821583
#endif
1584+
SET(TR_ARM64arrayTranslateTRTO255, (void *) __arrayTranslateTRTO255, TR_Helper);
15831585

15841586
#elif defined(TR_HOST_S390)
15851587
SET(TR_S390double2Long, (void *) 0, TR_Helper);

0 commit comments

Comments
 (0)