From ea32e0b7782bedfbf8a52b4469e0ce5e0dbf6453 Mon Sep 17 00:00:00 2001 From: KONNO Kazuhiro Date: Thu, 10 Oct 2024 11:53:54 +0900 Subject: [PATCH] AArch64: Implement arraytranslateTRTO255 This commit implements arraytranslateTRTO255 for AArch64. Signed-off-by: KONNO Kazuhiro --- compiler/aarch64/CMakeLists.txt | 3 +- compiler/aarch64/codegen/OMRCodeGenerator.cpp | 15 +- compiler/aarch64/codegen/OMRTreeEvaluator.cpp | 84 ++++++++++- ...{ARM64arrayCopy.spp => ARM64ArrayCopy.spp} | 0 .../aarch64/runtime/ARM64ArrayTranslate.spp | 130 ++++++++++++++++++ compiler/ras/Debug.cpp | 1 + compiler/runtime/Helpers.inc | 3 +- 7 files changed, 226 insertions(+), 10 deletions(-) rename compiler/aarch64/runtime/{ARM64arrayCopy.spp => ARM64ArrayCopy.spp} (100%) create mode 100644 compiler/aarch64/runtime/ARM64ArrayTranslate.spp diff --git a/compiler/aarch64/CMakeLists.txt b/compiler/aarch64/CMakeLists.txt index d7f49bdb050..f38b017d6de 100644 --- a/compiler/aarch64/CMakeLists.txt +++ b/compiler/aarch64/CMakeLists.txt @@ -47,6 +47,7 @@ compiler_library(aarch64 ${CMAKE_CURRENT_LIST_DIR}/codegen/UnaryEvaluator.cpp ${CMAKE_CURRENT_LIST_DIR}/env/OMRCPU.cpp ${CMAKE_CURRENT_LIST_DIR}/env/OMRDebugEnv.cpp - ${CMAKE_CURRENT_LIST_DIR}/runtime/ARM64arrayCopy.spp + ${CMAKE_CURRENT_LIST_DIR}/runtime/ARM64ArrayCopy.spp + ${CMAKE_CURRENT_LIST_DIR}/runtime/ARM64ArrayTranslate.spp ${CMAKE_CURRENT_LIST_DIR}/runtime/CodeSync.cpp ) diff --git a/compiler/aarch64/codegen/OMRCodeGenerator.cpp b/compiler/aarch64/codegen/OMRCodeGenerator.cpp index 8608f20b21c..73ebdeb7227 100644 --- a/compiler/aarch64/codegen/OMRCodeGenerator.cpp +++ b/compiler/aarch64/codegen/OMRCodeGenerator.cpp @@ -190,10 +190,17 @@ OMR::ARM64::CodeGenerator::initialize() cg->setSupportsArrayCmpLen(); } } - if (!comp->getOption(TR_DisableArraySetOpts)) - { - cg->setSupportsArraySet(); - } + + if (!comp->getOption(TR_DisableArraySetOpts)) + { + cg->setSupportsArraySet(); + } + + static bool disableTRTO255 = (feGetEnv("TR_disableTRTO255") != NULL); + if (!disableTRTO255) + { + cg->setSupportsArrayTranslateTRTO255(); + } } void diff --git a/compiler/aarch64/codegen/OMRTreeEvaluator.cpp b/compiler/aarch64/codegen/OMRTreeEvaluator.cpp index f43256af930..209ae1d3a23 100644 --- a/compiler/aarch64/codegen/OMRTreeEvaluator.cpp +++ b/compiler/aarch64/codegen/OMRTreeEvaluator.cpp @@ -6400,10 +6400,86 @@ OMR::ARM64::TreeEvaluator::arraytranslateAndTestEvaluator(TR::Node *node, TR::Co TR::Register * OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - // TODO:ARM64: Enable TR::TreeEvaluator::arraytranslateEvaluator in compiler/aarch64/codegen/TreeEvaluatorTable.hpp when Implemented. - return OMR::ARM64::TreeEvaluator::unImpOpEvaluator(node, cg); - } + { + // tree looks as follows: + // arraytranslate + // (0) input ptr + // (1) output ptr + // (2) translation table (dummy) + // (3) stop character (terminal character, either 0xff00ff00 (ISO8859) or 0xff80ff80 (ASCII) + // (4) input length (in elements) + // (5) stopping char (dummy) + // + // Number of translated elements is returned + + TR::Compilation *comp = cg->comp(); + + TR_ASSERT_FATAL(!node->isSourceByteArrayTranslate(), "Source is byte[] for arraytranslate"); + TR_ASSERT_FATAL(node->isTargetByteArrayTranslate(), "Target is char[] for arraytranslate"); + TR_ASSERT_FATAL(node->getChild(3)->getOpCodeValue() == TR::iconst && node->getChild(3)->getInt() == 0x0ff00ff00, "Non-ISO8859 stop character for arraytranslate"); + + static bool verboseArrayTranslate = (feGetEnv("TR_verboseArrayTranslate") != NULL); + if (verboseArrayTranslate) + { + fprintf(stderr, "arrayTranslateTRTO255: %s @ %s\n", + comp->signature(), + comp->getHotnessName(comp->getMethodHotness()) + ); + } + + TR::Register *inputReg = cg->gprClobberEvaluate(node->getChild(0)); + TR::Register *outputReg = cg->gprClobberEvaluate(node->getChild(1)); + TR::Register *inputLenReg = cg->gprClobberEvaluate(node->getChild(4)); + TR::Register *outputLenReg = cg->allocateRegister(); + + int numDeps = 10; + + TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(1, numDeps, cg->trMemory()); + + deps->addPreCondition(inputReg, TR::RealRegister::x0); + + deps->addPostCondition(outputLenReg, TR::RealRegister::x0); + deps->addPostCondition(outputReg, TR::RealRegister::x1); + deps->addPostCondition(inputLenReg, TR::RealRegister::x2); + + // Clobbered by the helper + TR::Register *clobberedReg; + deps->addPostCondition(clobberedReg = cg->allocateRegister(), TR::RealRegister::x4); + cg->stopUsingRegister(clobberedReg); + deps->addPostCondition(clobberedReg = cg->allocateRegister(), TR::RealRegister::x5); + cg->stopUsingRegister(clobberedReg); + deps->addPostCondition(clobberedReg = cg->allocateRegister(), TR::RealRegister::x6); + cg->stopUsingRegister(clobberedReg); + + deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v0); + cg->stopUsingRegister(clobberedReg); + deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v1); + cg->stopUsingRegister(clobberedReg); + deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v2); + cg->stopUsingRegister(clobberedReg); + + // Array Translate helper call + TR_RuntimeHelper helper = TR_ARM64arrayTranslateTRTO255; + TR::SymbolReference *helperSym = cg->symRefTab()->findOrCreateRuntimeHelper(helper); + uintptr_t addr = reinterpret_cast(helperSym->getMethodAddress()); + generateImmSymInstruction(cg, TR::InstOpCode::bl, node, addr, deps, helperSym, NULL); + + for (uint32_t i = 0; i < node->getNumChildren(); i++) + cg->decReferenceCount(node->getChild(i)); + + if (inputReg != node->getChild(0)->getRegister()) + cg->stopUsingRegister(inputReg); + + if (outputReg != node->getChild(1)->getRegister()) + cg->stopUsingRegister(outputReg); + + if (inputLenReg != node->getChild(4)->getRegister()) + cg->stopUsingRegister(inputLenReg); + + cg->machine()->setLinkRegisterKilled(true); + node->setRegister(outputLenReg); + return outputLenReg; + } TR::Register * OMR::ARM64::TreeEvaluator::arraysetEvaluator(TR::Node *node, TR::CodeGenerator *cg) diff --git a/compiler/aarch64/runtime/ARM64arrayCopy.spp b/compiler/aarch64/runtime/ARM64ArrayCopy.spp similarity index 100% rename from compiler/aarch64/runtime/ARM64arrayCopy.spp rename to compiler/aarch64/runtime/ARM64ArrayCopy.spp diff --git a/compiler/aarch64/runtime/ARM64ArrayTranslate.spp b/compiler/aarch64/runtime/ARM64ArrayTranslate.spp new file mode 100644 index 00000000000..65840c4326d --- /dev/null +++ b/compiler/aarch64/runtime/ARM64ArrayTranslate.spp @@ -0,0 +1,130 @@ +/******************************************************************************* + * Copyright IBM Corp. and others 2024 + * + * This program and the accompanying materials are made available under + * the terms of the Eclipse Public License 2.0 which accompanies this + * distribution and is available at https://www.eclipse.org/legal/epl-2.0/ + * or the Apache License, Version 2.0 which accompanies this distribution + * and is available at https://www.apache.org/licenses/LICENSE-2.0. + * + * This Source Code may also be made available under the following Secondary + * Licenses when the conditions for such availability set forth in the + * Eclipse Public License, v. 2.0 are satisfied: GNU General Public License, + * version 2 with the GNU Classpath Exception [1] and GNU General Public + * License, version 2 with the OpenJDK Assembly Exception [2]. + * + * [1] https://www.gnu.org/software/classpath/license.html + * [2] https://openjdk.org/legal/assembly-exception.html + * + * SPDX-License-Identifier: EPL-2.0 OR Apache-2.0 OR GPL-2.0-only WITH Classpath-exception-2.0 OR GPL-2.0-only WITH OpenJDK-assembly-exception-1.0 + *******************************************************************************/ + + .file "ARM64ArrayTranslate.s" + +#include "aarch64/runtime/arm64asmdefs.inc" + + .globl FUNC_LABEL(__arrayTranslateTRTO255) + + .text + .align 2 + +// ---- +// arrayTranslateTRTO255 +// ---- +// TO stands for Two bytes to One byte +// +// uint16 input[]; +// uint8 output[]; +// int32 len; +// +// int32 i = 0; +// for (i < len) { +// uint16 ch = input[i]; +// if (ch > 0xFF) break; +// output[i] = ch & 0xFF; +// i++; +// } +// return i; +// +// in: x0: input +// x1: output +// x2: len +// out: x0: num of translated elements +// trash: x4-x6, v0-v2 + +FUNC_LABEL(__arrayTranslateTRTO255): + // preserve output address + mov x6, x1 + cmp w2, #16 + b.cc atTRTO255_15 + lsr w4, w2, #4 +atTRTO255_16Loop: + // load 16 elements + ldp q0, q1, [x0] + // collect upper 8 bits + uzp2 v2.16b, v0.16b, v1.16b + // fail when any one of them is non-zero + umaxp v2.4s, v2.4s, v2.4s + mov x5, v2.D[0] + cbnz x5, atTRTO255_Fail + // collect lower 8 bits + uzp1 v2.16b, v0.16b, v1.16b + add x0, x0, #32 + subs w4, w4, #1 + // store 16 elements + str q2, [x1], #16 + b.ne atTRTO255_16Loop +atTRTO255_15: + // 15 elements or less remaining + tst w2, #8 + b.eq atTRTO255_7 + // load 8 elements + ldr q0, [x0] + // collect upper 8 bits + trn2 v2.16b, v0.16b, v0.16b + // fail when any one of them is non-zero + umaxp v2.4s, v2.4s, v2.4s + mov x5, v2.D[0] + cbnz x5, atTRTO255_Fail + // collect lower 8 bits + xtn v2.8b, v0.8h + add x0, x0, #16 + // store 8 elements + str d2, [x1], #8 +atTRTO255_7: + // 7 elements or less remaining + tst w2, #4 + b.eq atTRTO255_3 + // load 4 elements + ldr d0, [x0] + // collect upper 8 bits + trn2 v2.8b, v0.8b, v0.8b + // fail when any one of them is non-zero + mov x5, v2.D[0] + cbnz x5, atTRTO255_Fail + // collect lower 8 bits + xtn v2.8b, v0.8h + add x0, x0, #8 + // store 4 elements + str s2, [x1], #4 +atTRTO255_3: + // 3 elements or less remaining + ands w4, w2, #3 +atTRTO255_1Loop: + b.eq atTRTO255_Done + ldrh w5, [x0], #2 + cmp w5, #256 + b.cs atTRTO255_Done + subs w4, w4, #1 + strb w5, [x1], #1 + b atTRTO255_1Loop +atTRTO255_Fail: + ldrh w5, [x0], #2 + cmp w5, #256 + b.cs atTRTO255_Done + strb w5, [x1], #1 + b atTRTO255_Fail +atTRTO255_Done: + // number of translated elements + sub x0, x1, x6 + ret diff --git a/compiler/ras/Debug.cpp b/compiler/ras/Debug.cpp index 98b4431f136..d1c1aacf8fc 100644 --- a/compiler/ras/Debug.cpp +++ b/compiler/ras/Debug.cpp @@ -4237,6 +4237,7 @@ TR_Debug::getRuntimeHelperName(int32_t index) case TR_ARM64interfaceCompleteSlot2: return "_interfaceCompleteSlot2"; case TR_ARM64interfaceSlotsUnavailable: return "_interfaceSlotsUnavailable"; case TR_ARM64PatchGCRHelper: return "_patchGCRHelper" ; + case TR_ARM64arrayTranslateTRTO255: return "__arrayTranslateTRTO255"; } } #endif diff --git a/compiler/runtime/Helpers.inc b/compiler/runtime/Helpers.inc index 6740d70c45d..e101e2ae8d2 100644 --- a/compiler/runtime/Helpers.inc +++ b/compiler/runtime/Helpers.inc @@ -497,7 +497,8 @@ SETVAL(TR_ARM64interfaceCompleteSlot2,TR_FSRH+42) SETVAL(TR_ARM64interfaceSlotsUnavailable,TR_FSRH+43) SETVAL(TR_ARM64PatchGCRHelper,TR_FSRH+44) SETVAL(TR_ARM64fieldWatchHelper,TR_FSRH+45) -SETVAL(TR_ARM64numRuntimeHelpers,TR_FSRH+46) +SETVAL(TR_ARM64arrayTranslateTRTO255,TR_FSRH+46) +SETVAL(TR_ARM64numRuntimeHelpers,TR_FSRH+47) SETVAL(TR_S390longDivide,TR_FSRH) SETVAL(TR_S390interfaceCallHelper,TR_FSRH+1)