Skip to content

Commit

Permalink
AArch64: Implement arraytranslateTRTO255
Browse files Browse the repository at this point in the history
This commit implements arraytranslateTRTO255 for AArch64.

Signed-off-by: KONNO Kazuhiro <[email protected]>
  • Loading branch information
knn-k committed Oct 24, 2024
1 parent bd36b1d commit 0fac65b
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 0 deletions.
131 changes: 131 additions & 0 deletions runtime/compiler/aarch64/runtime/ArrayTranslate.spp
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*******************************************************************************
* Copyright IBM Corp. and others 2024
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
* distribution and is available at https://www.eclipse.org/legal/epl-2.0/
* or the Apache License, Version 2.0 which accompanies this distribution and
* is available at https://www.apache.org/licenses/LICENSE-2.0.
*
* This Source Code may also be made available under the following
* Secondary Licenses when the conditions for such availability set
* forth in the Eclipse Public License, v. 2.0 are satisfied: GNU
* General Public License, version 2 with the GNU Classpath
* Exception [1] and GNU General Public License, version 2 with the
* OpenJDK Assembly Exception [2].
*
* [1] https://www.gnu.org/software/classpath/license.html
* [2] https://openjdk.org/legal/assembly-exception.html
*
* SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 OR GPL-2.0-only WITH OpenJDK-assembly-exception-1.0
*******************************************************************************/

#include "aarch64/runtime/arm64asmdefs.inc"

.file "ArrayTranslate.s"

.globl FUNC_LABEL(__arrayTranslateTRTO255)

.text
.align 2

// ----
// arrayTranslateTRTO255
// ----
// TO stands for Two bytes to One byte
//
// uint16 input[];
// uint8 output[];
// int32 len;
//
// int32 i = 0;
// for (i < len) {
// uint16 ch = input[i];
// if (ch > 0xFF) break;
// output[i] = ch & 0xFF;
// i++;
// }
// return i;
//
// in: x0: input
// x1: output
// x2: len
// out: x0: num of translated elements
// trash: x4-x6, v0-v2

FUNC_LABEL(__arrayTranslateTRTO255):
// preserve output address
mov x6, x1
cmp w2, #16
b.cc atTRTO255_15
lsr w4, w2, #4
atTRTO255_16Loop:
// load 16 elements
ldp q0, q1, [x0]
// collect upper 8 bits
uzp2 v2.16b, v0.16b, v1.16b
// fail when any one of them is non-zero
umaxp v2.4s, v2.4s, v2.4s
mov x5, v2.D[0]
cbnz x5, atTRTO255_Fail
// collect lower 8 bits
uzp1 v2.16b, v0.16b, v1.16b
add x0, x0, #32
subs w4, w4, #1
// store 16 elements
str q2, [x1], #16
b.ne atTRTO255_16Loop
atTRTO255_15:
// 15 elements or less remaining
tst w2, #8
b.eq atTRTO255_7
// load 8 elements
ldr q0, [x0]
// collect upper 8 bits
trn2 v2.16b, v0.16b, v0.16b
// fail when any one of them is non-zero
umaxp v2.4s, v2.4s, v2.4s
mov x5, v2.D[0]
cbnz x5, atTRTO255_Fail
// collect lower 8 bits
xtn v2.8b, v0.8h
add x0, x0, #16
// store 8 elements
str d2, [x1], #8
atTRTO255_7:
// 7 elements or less remaining
tst w2, #4
b.eq atTRTO255_3
// load 4 elements
ldr d0, [x0]
// collect upper 8 bits
trn2 v2.8b, v0.8b, v0.8b
// fail when any one of them is non-zero
mov x5, v2.D[0]
cbnz x5, atTRTO255_Fail
// collect lower 8 bits
xtn v2.8b, v0.8h
add x0, x0, #8
// store 4 elements
str s2, [x1], #4
atTRTO255_3:
// 3 elements or less remaining
ands w4, w2, #3
atTRTO255_1Loop:
b.eq atTRTO255_Done
ldrh w5, [x0], #2
cmp w5, #256
b.cs atTRTO255_Done
subs w4, w4, #1
strb w5, [x1], #1
b atTRTO255_1Loop
atTRTO255_Fail:
ldrh w5, [x0], #2
cmp w5, #256
b.cs atTRTO255_Done
strb w5, [x1], #1
b atTRTO255_Fail
atTRTO255_Done:
// number of translated elements
sub x0, x1, x6
ret
1 change: 1 addition & 0 deletions runtime/compiler/aarch64/runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ j9jit_files(
${omr_SOURCE_DIR}/compiler/aarch64/runtime/CodeSync.cpp
${omr_SOURCE_DIR}/compiler/aarch64/runtime/VirtualGuardRuntime.cpp
aarch64/runtime/ARM64RelocationTarget.cpp
aarch64/runtime/ArrayTranslate.spp
aarch64/runtime/FlushICache.spp
aarch64/runtime/PicBuilder.spp
aarch64/runtime/Recomp.cpp
Expand Down
1 change: 1 addition & 0 deletions runtime/compiler/build/files/host/aarch64.mk
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ JIT_PRODUCT_BACKEND_SOURCES+= \

JIT_PRODUCT_SOURCE_FILES+= \
compiler/aarch64/runtime/ARM64RelocationTarget.cpp \
compiler/aarch64/runtime/ArrayTranslate.spp \
compiler/aarch64/runtime/FlushICache.spp \
compiler/aarch64/runtime/PicBuilder.spp \
compiler/aarch64/runtime/Recomp.cpp \
Expand Down
2 changes: 2 additions & 0 deletions runtime/compiler/runtime/Runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,7 @@ JIT_HELPER(__forwardArrayCopy);
JIT_HELPER(__backwardArrayCopy);
JIT_HELPER(_patchGCRHelper);
JIT_HELPER(_fieldWatchHelper);
JIT_HELPER(__arrayTranslateTRTO255);

#elif defined(TR_HOST_S390)
JIT_HELPER(__double2Long);
Expand Down Expand Up @@ -1580,6 +1581,7 @@ void initializeCodeRuntimeHelperTable(J9JITConfig *jitConfig, char isSMP)
#else
SET(TR_ARM64fieldWatchHelper, (void *) 0, TR_Helper);
#endif
SET(TR_ARM64arrayTranslateTRTO255, (void *) __arrayTranslateTRTO255, TR_Helper);

#elif defined(TR_HOST_S390)
SET(TR_S390double2Long, (void *) 0, TR_Helper);
Expand Down

0 comments on commit 0fac65b

Please sign in to comment.