Skip to content

Commit

Permalink
Merge pull request #7544 from knn-k/aarch64atTRTO
Browse files Browse the repository at this point in the history
AArch64: Implement arrayTranslateTRTO
  • Loading branch information
0xdaryl authored Nov 15, 2024
2 parents 0c0927f + 7717f1e commit 52b4dd4
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 8 deletions.
6 changes: 6 additions & 0 deletions compiler/aarch64/codegen/OMRCodeGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,12 @@ OMR::ARM64::CodeGenerator::initialize()
cg->setSupportsArraySet();
}

static bool disableTRTO = (feGetEnv("TR_disableTRTO") != NULL);
if (!disableTRTO)
{
cg->setSupportsArrayTranslateTRTO();
}

static bool disableTRTO255 = (feGetEnv("TR_disableTRTO255") != NULL);
if (!disableTRTO255)
{
Expand Down
34 changes: 29 additions & 5 deletions compiler/aarch64/codegen/OMRTreeEvaluator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6433,26 +6433,38 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener
// Number of translated elements is returned

TR::Compilation *comp = cg->comp();
bool arrayTranslateTRTO255 = false;

TR_ASSERT_FATAL(!node->isSourceByteArrayTranslate(), "Source is byte[] for arraytranslate");
TR_ASSERT_FATAL(node->isTargetByteArrayTranslate(), "Target is char[] for arraytranslate");
TR_ASSERT_FATAL(node->getChild(3)->getOpCodeValue() == TR::iconst && node->getChild(3)->getInt() == 0x0ff00ff00, "Non-ISO8859 stop character for arraytranslate");
TR_ASSERT_FATAL(node->getChild(3)->getOpCodeValue() == TR::iconst, "Non-constant stop char for arraytranslate");

if (node->getChild(3)->getInt() == 0x0ff00ff00)
{
arrayTranslateTRTO255 = true;
}
else
{
TR_ASSERT_FATAL(node->getChild(3)->getInt() == 0x0ff80ff80, "Unknown stop char for arraytranslate");
}

static bool verboseArrayTranslate = (feGetEnv("TR_verboseArrayTranslate") != NULL);
if (verboseArrayTranslate)
{
fprintf(stderr, "arrayTranslateTRTO255: %s @ %s\n",
fprintf(stderr, "arrayTranslateTRTO: %s @ %s [isTO255: %d]\n",
comp->signature(),
comp->getHotnessName(comp->getMethodHotness())
comp->getHotnessName(comp->getMethodHotness()),
arrayTranslateTRTO255
);
}

TR::Register *inputReg = cg->gprClobberEvaluate(node->getChild(0));
TR::Register *outputReg = cg->gprClobberEvaluate(node->getChild(1));
TR::Register *stopCharReg = arrayTranslateTRTO255 ? NULL : cg->gprClobberEvaluate(node->getChild(3));
TR::Register *inputLenReg = cg->gprClobberEvaluate(node->getChild(4));
TR::Register *outputLenReg = cg->allocateRegister();

int numDeps = 10;
int numDeps = arrayTranslateTRTO255 ? 10 : 12;

TR::RegisterDependencyConditions *deps = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(1, numDeps, cg->trMemory());

Expand All @@ -6461,6 +6473,10 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener
deps->addPostCondition(outputLenReg, TR::RealRegister::x0);
deps->addPostCondition(outputReg, TR::RealRegister::x1);
deps->addPostCondition(inputLenReg, TR::RealRegister::x2);
if (!arrayTranslateTRTO255)
{
deps->addPostCondition(stopCharReg, TR::RealRegister::x3);
}

// Clobbered by the helper
TR::Register *clobberedReg;
Expand All @@ -6477,9 +6493,14 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener
cg->stopUsingRegister(clobberedReg);
deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v2);
cg->stopUsingRegister(clobberedReg);
if (!arrayTranslateTRTO255)
{
deps->addPostCondition(clobberedReg = cg->allocateRegister(TR_VRF), TR::RealRegister::v3);
cg->stopUsingRegister(clobberedReg);
}

// Array Translate helper call
TR_RuntimeHelper helper = TR_ARM64arrayTranslateTRTO255;
TR_RuntimeHelper helper = arrayTranslateTRTO255 ? TR_ARM64arrayTranslateTRTO255 : TR_ARM64arrayTranslateTRTO;
TR::SymbolReference *helperSym = cg->symRefTab()->findOrCreateRuntimeHelper(helper);
uintptr_t addr = reinterpret_cast<uintptr_t>(helperSym->getMethodAddress());
generateImmSymInstruction(cg, TR::InstOpCode::bl, node, addr, deps, helperSym, NULL);
Expand All @@ -6493,6 +6514,9 @@ OMR::ARM64::TreeEvaluator::arraytranslateEvaluator(TR::Node *node, TR::CodeGener
if (outputReg != node->getChild(1)->getRegister())
cg->stopUsingRegister(outputReg);

if (!arrayTranslateTRTO255 && stopCharReg != node->getChild(3)->getRegister())
cg->stopUsingRegister(stopCharReg);

if (inputLenReg != node->getChild(4)->getRegister())
cg->stopUsingRegister(inputLenReg);

Expand Down
112 changes: 112 additions & 0 deletions compiler/aarch64/runtime/ARM64ArrayTranslate.spp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,123 @@

#include "aarch64/runtime/arm64asmdefs.inc"

.globl FUNC_LABEL(__arrayTranslateTRTO)
.globl FUNC_LABEL(__arrayTranslateTRTO255)

.text
.align 2

// ----
// arrayTranslateTRTO
// ----
// TO stands for Two bytes to One byte
//
// uint16 input[];
// uint8 output[];
// int32 len;
// uint16 mask; // 0xff00ff00 (ISO8859) or 0xff80ff80 (ASCII)
//
// int32 i = 0;
// while (i < len) {
// uint16 ch = input[i];
// if ((ch & mask) != 0) break;
// output[i] = ch & 0xFF;
// i++;
// }
// return i;
//
// in: x0: input
// x1: output
// x2: len
// x3: mask
// out: x0: num of translated elements
// trash: x4-x6, v0-v3

FUNC_LABEL(__arrayTranslateTRTO):
// preserve output address
mov x6, x1
// load mask to a SIMD register
dup v3.8h, w3
cmp w2, #16
b.cc atTRTO_15
lsr w4, w2, #4
atTRTO_16Loop:
// load 16 elements
ldp q0, q1, [x0]
// mask first 8 elements
and v2.16b, v0.16b, v3.16b
// fail when any one of them is non-zero
umaxp v2.4s, v2.4s, v2.4s
mov x5, v2.D[0]
cbnz x5, atTRTO_Fail
// mask next 8 elements
and v2.16b, v1.16b, v3.16b
// fail when any one of them is non-zero
umaxp v2.4s, v2.4s, v2.4s
mov x5, v2.D[0]
cbnz x5, atTRTO_Fail
// collect lower 8 bits
uzp1 v2.16b, v0.16b, v1.16b
add x0, x0, #32
subs w4, w4, #1
// store 16 elements
str q2, [x1], #16
b.ne atTRTO_16Loop
atTRTO_15:
// 15 elements or less remaining
tst w2, #8
b.eq atTRTO_7
// load 8 elements
ldr q0, [x0]
// mask 8 elements
and v2.16b, v0.16b, v3.16b
// fail when any one of them is non-zero
umaxp v2.4s, v2.4s, v2.4s
mov x5, v2.D[0]
cbnz x5, atTRTO_Fail
// collect lower 8 bits
xtn v2.8b, v0.8h
add x0, x0, #16
// store 8 elements
str d2, [x1], #8
atTRTO_7:
// 7 elements or less remaining
tst w2, #4
b.eq atTRTO_3
// load 4 elements
ldr d0, [x0]
// mask 4 elements
and v2.8b, v0.8b, v3.8b
// fail when any one of them is non-zero
mov x5, v2.D[0]
cbnz x5, atTRTO_Fail
// collect lower 8 bits
xtn v2.8b, v0.8h
add x0, x0, #8
// store 4 elements
str s2, [x1], #4
atTRTO_3:
// 3 elements or less remaining
ands w4, w2, #3
atTRTO_1Loop:
b.eq atTRTO_Done
ldrh w5, [x0], #2
tst w5, w3
b.ne atTRTO_Done
subs w4, w4, #1
strb w5, [x1], #1
b atTRTO_1Loop
atTRTO_Fail:
ldrh w5, [x0], #2
tst w5, w3
b.ne atTRTO_Done
strb w5, [x1], #1
b atTRTO_Fail
atTRTO_Done:
// number of translated elements
sub x0, x1, x6
ret

// ----
// arrayTranslateTRTO255
// ----
Expand Down
3 changes: 2 additions & 1 deletion compiler/ras/Debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4240,7 +4240,8 @@ TR_Debug::getRuntimeHelperName(int32_t index)
case TR_ARM64backwardArrayCopy: return "__backwardArrayCopy";
case TR_ARM64interfaceCompleteSlot2: return "_interfaceCompleteSlot2";
case TR_ARM64interfaceSlotsUnavailable: return "_interfaceSlotsUnavailable";
case TR_ARM64PatchGCRHelper: return "_patchGCRHelper" ;
case TR_ARM64PatchGCRHelper: return "_patchGCRHelper";
case TR_ARM64arrayTranslateTRTO: return "__arrayTranslateTRTO";
case TR_ARM64arrayTranslateTRTO255: return "__arrayTranslateTRTO255";
}
}
Expand Down
5 changes: 3 additions & 2 deletions compiler/runtime/Helpers.inc
Original file line number Diff line number Diff line change
Expand Up @@ -497,8 +497,9 @@ SETVAL(TR_ARM64interfaceCompleteSlot2,TR_FSRH+42)
SETVAL(TR_ARM64interfaceSlotsUnavailable,TR_FSRH+43)
SETVAL(TR_ARM64PatchGCRHelper,TR_FSRH+44)
SETVAL(TR_ARM64fieldWatchHelper,TR_FSRH+45)
SETVAL(TR_ARM64arrayTranslateTRTO255,TR_FSRH+46)
SETVAL(TR_ARM64numRuntimeHelpers,TR_FSRH+47)
SETVAL(TR_ARM64arrayTranslateTRTO,TR_FSRH+46)
SETVAL(TR_ARM64arrayTranslateTRTO255,TR_FSRH+47)
SETVAL(TR_ARM64numRuntimeHelpers,TR_FSRH+48)

SETVAL(TR_S390longDivide,TR_FSRH)
SETVAL(TR_S390interfaceCallHelper,TR_FSRH+1)
Expand Down

0 comments on commit 52b4dd4

Please sign in to comment.