diff --git a/runtime/compiler/codegen/J9CodeGenerator.hpp b/runtime/compiler/codegen/J9CodeGenerator.hpp index b5d425b2fe2..5bb0a356203 100644 --- a/runtime/compiler/codegen/J9CodeGenerator.hpp +++ b/runtime/compiler/codegen/J9CodeGenerator.hpp @@ -461,6 +461,16 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz); */ void setSupportsInlineStringHashCode() { _j9Flags.set(SupportsInlineStringHashCode); } + /** \brief + * Determines whether the code generator supports inlining of java/lang/StringCoding.hasNegatives + */ + bool getSupportsInlineStringCodingHasNegatives() { return _j9Flags.testAny(SupportsInlineStringCodingHasNegatives); } + + /** \brief + * The code generator supports inlining of java/lang/StringCoding.hasNegatives + */ + void setSupportsInlineStringCodingHasNegatives() { _j9Flags.set(SupportsInlineStringCodingHasNegatives); } + /** \brief * Determines whether the code generator supports inlining of java/lang/StringLatin1.inflate */ @@ -677,6 +687,7 @@ void addMonClass(TR::Node* monNode, TR_OpaqueClassBlock* clazz); SavesNonVolatileGPRsForGC = 0x00000800, SupportsInlineVectorizedMismatch = 0x00001000, SupportsInlineVectorizedHashCode = 0x00002000, + SupportsInlineStringCodingHasNegatives = 0x00004000, }; flags32_t _j9Flags; diff --git a/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp b/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp index 0220c705f04..193bf181cb8 100644 --- a/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp +++ b/runtime/compiler/codegen/J9RecognizedMethodsEnum.hpp @@ -1226,6 +1226,7 @@ java_lang_StringCoding_encode, java_lang_StringCoding_StringDecoder_decode, java_lang_StringCoding_StringEncoder_encode, + java_lang_StringCoding_hasNegatives, java_lang_StringCoding_implEncodeISOArray, java_lang_StringCoding_implEncodeAsciiArray, java_lang_StringCoding_encode8859_1, diff --git a/runtime/compiler/env/j9method.cpp b/runtime/compiler/env/j9method.cpp index 9521088f519..1ee4697d82a 100644 --- a/runtime/compiler/env/j9method.cpp +++ b/runtime/compiler/env/j9method.cpp @@ -2832,6 +2832,7 @@ void TR_ResolvedJ9Method::construct() { {x(TR::java_lang_StringCoding_decode, "decode", "(Ljava/nio/charset/Charset;[BII)[C")}, {x(TR::java_lang_StringCoding_encode, "encode", "(Ljava/nio/charset/Charset;[CII)[B")}, + {x(TR::java_lang_StringCoding_hasNegatives, "hasNegatives", "([BII)Z")}, {x(TR::java_lang_StringCoding_implEncodeISOArray, "implEncodeISOArray", "([BI[BII)I")}, {x(TR::java_lang_StringCoding_implEncodeAsciiArray, "implEncodeAsciiArray", "([CI[BII)I")}, {x(TR::java_lang_StringCoding_encode8859_1, "encode8859_1", "(B[B)[B")}, @@ -5135,6 +5136,7 @@ TR_ResolvedJ9Method::setRecognizedMethodInfo(TR::RecognizedMethod rm) case TR::java_lang_String_hashCodeImplCompressed: case TR::java_lang_String_hashCodeImplDecompressed: case TR::java_lang_StringLatin1_inflate: + case TR::java_lang_StringCoding_hasNegatives: case TR::sun_nio_ch_NativeThread_current: case TR::com_ibm_crypto_provider_AEScryptInHardware_cbcDecrypt: case TR::com_ibm_crypto_provider_AEScryptInHardware_cbcEncrypt: diff --git a/runtime/compiler/optimizer/InlinerTempForJ9.cpp b/runtime/compiler/optimizer/InlinerTempForJ9.cpp index e10324c10bb..0bdc3fd0097 100644 --- a/runtime/compiler/optimizer/InlinerTempForJ9.cpp +++ b/runtime/compiler/optimizer/InlinerTempForJ9.cpp @@ -5582,6 +5582,12 @@ TR_J9InlinerPolicy::supressInliningRecognizedInitialCallee(TR_CallSite* callsite return true; } break; + case TR::java_lang_StringCoding_hasNegatives: + if (comp->cg()->getSupportsInlineStringCodingHasNegatives()) + { + return true; + } + break; case TR::java_lang_Integer_stringSize: case TR::java_lang_Long_stringSize: if (comp->cg()->getSupportsIntegerStringSize()) diff --git a/runtime/compiler/z/codegen/J9CodeGenerator.cpp b/runtime/compiler/z/codegen/J9CodeGenerator.cpp index ca878950dbd..bc1c9dadf8e 100644 --- a/runtime/compiler/z/codegen/J9CodeGenerator.cpp +++ b/runtime/compiler/z/codegen/J9CodeGenerator.cpp @@ -120,6 +120,13 @@ J9::Z::CodeGenerator::initialize() cg->setSupportsInlineConcurrentLinkedQueue(); } + static bool disableInlineStringCodingHasNegatives = feGetEnv("TR_DisableInlineStringCodingHasNegatives") != NULL; + if (cg->getSupportsVectorRegisters() && !disableInlineStringCodingHasNegatives && + !TR::Compiler->om.canGenerateArraylets()) + { + cg->setSupportsInlineStringCodingHasNegatives(); + } + // Similar to AOT, array translate instructions are not supported for remote compiles because instructions such as // TRTO allocate lookup tables in persistent memory that cannot be relocated. if (comp->isOutOfProcessCompilation()) @@ -4013,6 +4020,13 @@ J9::Z::CodeGenerator::inlineDirectCall( return resultReg != NULL; } break; + case TR::java_lang_StringCoding_hasNegatives: + if (cg->getSupportsInlineStringCodingHasNegatives()) + { + resultReg = TR::TreeEvaluator::inlineStringCodingHasNegatives(node, cg); + return true; + } + break; case TR::com_ibm_jit_JITHelpers_transformedEncodeUTF16Big: return resultReg = comp->getOption(TR_DisableUTF16BEEncoder) ? TR::TreeEvaluator::inlineUTF16BEEncodeSIMD(node, cg) : TR::TreeEvaluator::inlineUTF16BEEncode (node, cg); diff --git a/runtime/compiler/z/codegen/J9TreeEvaluator.cpp b/runtime/compiler/z/codegen/J9TreeEvaluator.cpp index 3976af7e72c..854aaf0a037 100644 --- a/runtime/compiler/z/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/z/codegen/J9TreeEvaluator.cpp @@ -858,6 +858,119 @@ J9::Z::TreeEvaluator::pdclearSetSignEvaluator(TR::Node *node, TR::CodeGenerator return TR::TreeEvaluator::pdclearEvaluator(node, cg); } +/* + * This method inlines the Java API StringCoding.hasNegatives(byte src, int off, int len) using + * SIMD instructions. + * The method looks like below on Java 17: + * + * @IntrinsicCandidate + * public static boolean hasNegatives(byte[] ba, int off, int len) { + * for (int i = off; i < off + len; i++) { + * if (ba[i] < 0) { + * return true; + * } + * } + * return false; + * } + * This routine behaves similarly on Java 11 and 21 as well and so is supported on those platforms too. + */ +TR::Register* +J9::Z::TreeEvaluator::inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGenerator *cg) + { + TR::Register *inputPtrReg = cg->gprClobberEvaluate(node->getChild(0)); + TR::Register *offsetReg = cg->evaluate(node->getChild(1)); + TR::Register *lengthReg = cg->evaluate(node->getChild(2)); + + TR::LabelSymbol *processMultiple16CharsStart = generateLabelSymbol(cg); + TR::LabelSymbol *processMultiple16CharsEnd = generateLabelSymbol(cg); + TR::LabelSymbol *cFlowRegionEnd = generateLabelSymbol(cg); + TR::LabelSymbol *cFlowRegionStart = generateLabelSymbol(cg); + TR::LabelSymbol *processOutOfRangeChar = generateLabelSymbol(cg); + + TR::Register *vInput = cg->allocateRegister(TR_VRF); + TR::Register *vUpperLimit = cg->allocateRegister(TR_VRF); + TR::Register *vComparison = cg->allocateRegister(TR_VRF); + TR::Register *numCharsLeftToProcess = cg->allocateRegister(); // off + len + TR::Register *outOfRangeCharIndex = cg->allocateRegister(TR_VRF); + + TR::Register *returnReg = cg->allocateRegister(); + generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 0); + + generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionStart); + generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, lengthReg, 0, TR::InstOpCode::COND_BE, cFlowRegionEnd, false, false); + generateRRInstruction(cg, TR::InstOpCode::AGFR, node, inputPtrReg, offsetReg); + generateRRInstruction(cg, TR::InstOpCode::LR, node, numCharsLeftToProcess, lengthReg); + + const uint8_t upperLimit = 127; + const uint8_t rangeComparison = 0x20; // > comparison + + generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, vUpperLimit, upperLimit, 0); + generateVRIaInstruction(cg, TR::InstOpCode::VREPI, node, vComparison, rangeComparison, 0); + + generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, numCharsLeftToProcess, 16, TR::InstOpCode::COND_BNH, processMultiple16CharsEnd, false, false); + + generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processMultiple16CharsStart); + processMultiple16CharsStart->setStartInternalControlFlow(); + + // Load bytes and search for out of range character + generateVRXInstruction(cg, TR::InstOpCode::VL, node, vInput, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg)); + + generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, outOfRangeCharIndex, vInput, vUpperLimit, vComparison, 0x1, 0); + + // process bad character by setting return register to true and exiting + generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processOutOfRangeChar); + + // Update the counters + generateRXInstruction(cg, TR::InstOpCode::getLoadAddressOpCode(), node, inputPtrReg, generateS390MemoryReference(inputPtrReg, 16, cg)); + generateRIInstruction(cg, TR::InstOpCode::AHI, node, numCharsLeftToProcess, -16); + + // Branch back up if we still have more than 16 characters to process. + generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::C, node, numCharsLeftToProcess, 16, TR::InstOpCode::COND_BH, processMultiple16CharsStart, false, false); + + generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processMultiple16CharsEnd); + + // Zero out the input register to avoid invalid VSTRC result + generateVRIaInstruction(cg, TR::InstOpCode::VGBM, node, vInput, 0, 0 /*unused*/); + + // VLL and VSTL work on indices so we subtract 1 + generateRIInstruction(cg, TR::InstOpCode::AHI, node, numCharsLeftToProcess, -1); + // Load residue bytes and check for out of range character + generateVRSbInstruction(cg, TR::InstOpCode::VLL, node, vInput, numCharsLeftToProcess, generateS390MemoryReference(inputPtrReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg)); + + generateVRRdInstruction(cg, TR::InstOpCode::VSTRC, node, outOfRangeCharIndex, vInput, vUpperLimit, vComparison, 0x1, 0); + generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_CC1, node, processOutOfRangeChar); + + generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionEnd); + + generateS390LabelInstruction(cg, TR::InstOpCode::label, node, processOutOfRangeChar); + generateRIInstruction(cg, TR::InstOpCode::LGHI, node, returnReg, 1); + + TR::RegisterDependencyConditions* dependencies = new (cg->trHeapMemory()) TR::RegisterDependencyConditions(0, 7, cg); + dependencies->addPostConditionIfNotAlreadyInserted(vInput, TR::RealRegister::AssignAny); + dependencies->addPostConditionIfNotAlreadyInserted(outOfRangeCharIndex, TR::RealRegister::AssignAny); + dependencies->addPostConditionIfNotAlreadyInserted(vUpperLimit, TR::RealRegister::AssignAny); + dependencies->addPostConditionIfNotAlreadyInserted(vComparison, TR::RealRegister::AssignAny); + dependencies->addPostConditionIfNotAlreadyInserted(inputPtrReg, TR::RealRegister::AssignAny); + dependencies->addPostConditionIfNotAlreadyInserted(numCharsLeftToProcess, TR::RealRegister::AssignAny); + dependencies->addPostConditionIfNotAlreadyInserted(returnReg, TR::RealRegister::AssignAny); + + generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionEnd, dependencies); + cFlowRegionEnd->setEndInternalControlFlow(); + + for (int i = 0; i < node->getNumChildren(); i++) + { + cg->decReferenceCount(node->getChild(i)); + } + + cg->stopUsingRegister(vInput); + cg->stopUsingRegister(outOfRangeCharIndex); + cg->stopUsingRegister(vUpperLimit); + cg->stopUsingRegister(vComparison); + cg->stopUsingRegister(numCharsLeftToProcess); + node->setRegister(returnReg); + return returnReg; + } + /* Moved from Codegen to FE */ /////////////////////////////////////////////////////////////////////////////////// // Generate code to perform a comparison and branch to a snippet. diff --git a/runtime/compiler/z/codegen/J9TreeEvaluator.hpp b/runtime/compiler/z/codegen/J9TreeEvaluator.hpp index 7744037efb0..c153e03ece3 100644 --- a/runtime/compiler/z/codegen/J9TreeEvaluator.hpp +++ b/runtime/compiler/z/codegen/J9TreeEvaluator.hpp @@ -73,6 +73,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public J9::TreeEvaluator * Inline Java's (Java 11 onwards) StringLatin1.inflate([BI[CII)V */ static TR::Register *inlineStringLatin1Inflate(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *inlineStringCodingHasNegatives(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *VMinlineCompareAndSwap( TR::Node *node, TR::CodeGenerator *cg, TR::InstOpCode::Mnemonic casOp, bool isObj, bool isExchange = false); static TR::Register *inlineAtomicOps(TR::Node *node, TR::CodeGenerator *cg, int8_t size, TR::MethodSymbol *method, bool isArray = false); static TR::Register *inlineAtomicFieldUpdater(TR::Node *node, TR::CodeGenerator *cg, TR::MethodSymbol *method);