|
| 1 | +/******************************************************************************* |
| 2 | + * Copyright IBM Corp. and others 2024 |
| 3 | + * |
| 4 | + * This program and the accompanying materials are made available under |
| 5 | + * the terms of the Eclipse Public License 2.0 which accompanies this |
| 6 | + * distribution and is available at https://www.eclipse.org/legal/epl-2.0/ |
| 7 | + * or the Apache License, Version 2.0 which accompanies this distribution and |
| 8 | + * is available at https://www.apache.org/licenses/LICENSE-2.0. |
| 9 | + * |
| 10 | + * This Source Code may also be made available under the following |
| 11 | + * Secondary Licenses when the conditions for such availability set |
| 12 | + * forth in the Eclipse Public License, v. 2.0 are satisfied: GNU |
| 13 | + * General Public License, version 2 with the GNU Classpath |
| 14 | + * Exception [1] and GNU General Public License, version 2 with the |
| 15 | + * OpenJDK Assembly Exception [2]. |
| 16 | + * |
| 17 | + * [1] https://www.gnu.org/software/classpath/license.html |
| 18 | + * [2] https://openjdk.org/legal/assembly-exception.html |
| 19 | + * |
| 20 | + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 OR GPL-2.0-only WITH OpenJDK-assembly-exception-1.0 |
| 21 | + *******************************************************************************/ |
| 22 | + |
| 23 | +#include "aarch64/runtime/arm64asmdefs.inc" |
| 24 | + |
| 25 | + .file "ArrayTranslate.s" |
| 26 | + |
| 27 | + .globl FUNC_LABEL(__arrayTranslateTRTO255) |
| 28 | + |
| 29 | + .text |
| 30 | + .align 2 |
| 31 | + |
| 32 | +// ---- |
| 33 | +// arrayTranslateTRTO255 |
| 34 | +// ---- |
| 35 | +// TO stands for Two bytes to One byte |
| 36 | +// |
| 37 | +// uint16 input[]; |
| 38 | +// uint8 output[]; |
| 39 | +// int32 len; |
| 40 | +// |
| 41 | +// int32 i = 0; |
| 42 | +// for (i < len) { |
| 43 | +// uint16 ch = input[i]; |
| 44 | +// if (ch > 0xFF) break; |
| 45 | +// output[i] = ch & 0xFF; |
| 46 | +// i++; |
| 47 | +// } |
| 48 | +// return i; |
| 49 | +// |
| 50 | +// in: x0: input |
| 51 | +// x1: output |
| 52 | +// x2: len |
| 53 | +// out: x0: num of translated elements |
| 54 | +// trash: x4-x6, v0-v2 |
| 55 | + |
| 56 | +FUNC_LABEL(__arrayTranslateTRTO255): |
| 57 | + // preserve output address |
| 58 | + mov x6, x1 |
| 59 | + cmp w2, #16 |
| 60 | + b.cc atTRTO255_15 |
| 61 | + lsr w4, w2, #4 |
| 62 | +atTRTO255_16Loop: |
| 63 | + // load 16 elements |
| 64 | + ldp q0, q1, [x0] |
| 65 | + // collect upper 8 bits |
| 66 | + uzp2 v2.16b, v0.16b, v1.16b |
| 67 | + // fail when any one of them is non-zero |
| 68 | + umaxp v2.4s, v2.4s, v2.4s |
| 69 | + mov x5, v2.D[0] |
| 70 | + cbnz x5, atTRTO255_Fail |
| 71 | + // collect lower 8 bits |
| 72 | + uzp1 v2.16b, v0.16b, v1.16b |
| 73 | + add x0, x0, #32 |
| 74 | + subs w4, w4, #1 |
| 75 | + // store 16 elements |
| 76 | + str q2, [x1], #16 |
| 77 | + b.ne atTRTO255_16Loop |
| 78 | +atTRTO255_15: |
| 79 | + // 15 elements or less remaining |
| 80 | + tst w2, #8 |
| 81 | + b.eq atTRTO255_7 |
| 82 | + // load 8 elements |
| 83 | + ldr q0, [x0] |
| 84 | + // collect upper 8 bits |
| 85 | + trn2 v2.16b, v0.16b, v0.16b |
| 86 | + // fail when any one of them is non-zero |
| 87 | + umaxp v2.4s, v2.4s, v2.4s |
| 88 | + mov x5, v2.D[0] |
| 89 | + cbnz x5, atTRTO255_Fail |
| 90 | + // collect lower 8 bits |
| 91 | + xtn v2.8b, v0.8h |
| 92 | + add x0, x0, #16 |
| 93 | + // store 8 elements |
| 94 | + str d2, [x1], #8 |
| 95 | +atTRTO255_7: |
| 96 | + // 7 elements or less remaining |
| 97 | + tst w2, #4 |
| 98 | + b.eq atTRTO255_3 |
| 99 | + // load 4 elements |
| 100 | + ldr d0, [x0] |
| 101 | + // collect upper 8 bits |
| 102 | + trn2 v2.8b, v0.8b, v0.8b |
| 103 | + // fail when any one of them is non-zero |
| 104 | + mov x5, v2.D[0] |
| 105 | + cbnz x5, atTRTO255_Fail |
| 106 | + // collect lower 8 bits |
| 107 | + xtn v2.8b, v0.8h |
| 108 | + add x0, x0, #8 |
| 109 | + // store 4 elements |
| 110 | + str s2, [x1], #4 |
| 111 | +atTRTO255_3: |
| 112 | + // 3 elements or less remaining |
| 113 | + ands w4, w2, #3 |
| 114 | +atTRTO255_1Loop: |
| 115 | + b.eq atTRTO255_Done |
| 116 | + ldrh w5, [x0], #2 |
| 117 | + cmp w5, #256 |
| 118 | + b.cs atTRTO255_Done |
| 119 | + subs w4, w4, #1 |
| 120 | + strb w5, [x1], #1 |
| 121 | + b atTRTO255_1Loop |
| 122 | +atTRTO255_Fail: |
| 123 | + ldrh w5, [x0], #2 |
| 124 | + cmp w5, #256 |
| 125 | + b.cs atTRTO255_Done |
| 126 | + strb w5, [x1], #1 |
| 127 | + b atTRTO255_Fail |
| 128 | +atTRTO255_Done: |
| 129 | + // number of translated elements |
| 130 | + sub x0, x1, x6 |
| 131 | + ret |
0 commit comments