From 35faffb7701e1fff65b9ec9156e7d67399391410 Mon Sep 17 00:00:00 2001 From: Spencer Comin Date: Tue, 19 Nov 2024 16:33:10 -0500 Subject: [PATCH] X: Implement popcnt Implement lpopcnt and ipopcnt IL opcodes for x86. Signed-off-by: Spencer Comin --- compiler/x/amd64/codegen/OMRTreeEvaluator.cpp | 15 +++++++------ compiler/x/amd64/codegen/OMRTreeEvaluator.hpp | 1 - compiler/x/codegen/OMRTreeEvaluator.hpp | 1 + compiler/x/codegen/UnaryEvaluator.cpp | 12 ++++++++++ compiler/x/env/OMRCPU.cpp | 9 ++++++++ compiler/x/env/OMRCPU.hpp | 6 +++++ compiler/x/i386/codegen/OMRTreeEvaluator.cpp | 22 +++++++++++++------ compiler/x/i386/codegen/OMRTreeEvaluator.hpp | 1 - 8 files changed, 51 insertions(+), 16 deletions(-) diff --git a/compiler/x/amd64/codegen/OMRTreeEvaluator.cpp b/compiler/x/amd64/codegen/OMRTreeEvaluator.cpp index d8bbbc1de81..3bf33265fec 100644 --- a/compiler/x/amd64/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/amd64/codegen/OMRTreeEvaluator.cpp @@ -2195,12 +2195,6 @@ OMR::X86::AMD64::TreeEvaluator::inotzEvaluator(TR::Node *node, TR::CodeGenerator return TR::TreeEvaluator::badILOpEvaluator(node, cg); } -TR::Register* -OMR::X86::AMD64::TreeEvaluator::ipopcntEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - return TR::TreeEvaluator::badILOpEvaluator(node, cg); - } - TR::Register* OMR::X86::AMD64::TreeEvaluator::lhbitEvaluator(TR::Node *node, TR::CodeGenerator *cg) { @@ -2228,7 +2222,14 @@ OMR::X86::AMD64::TreeEvaluator::lnotzEvaluator(TR::Node *node, TR::CodeGenerator TR::Register* OMR::X86::AMD64::TreeEvaluator::lpopcntEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - return TR::TreeEvaluator::badILOpEvaluator(node, cg); + TR::Node *child = node->getFirstChild(); + TR::Register *inputReg = cg->longClobberEvaluate(child); + + generateRegRegInstruction(TR::InstOpCode::POPCNT8RegReg, node, inputReg, inputReg, cg); + + node->setRegister(inputReg); + cg->decReferenceCount(child); + return inputReg; } TR::Register* diff --git a/compiler/x/amd64/codegen/OMRTreeEvaluator.hpp b/compiler/x/amd64/codegen/OMRTreeEvaluator.hpp index 29c68ec7b7d..0f3a14460c7 100644 --- a/compiler/x/amd64/codegen/OMRTreeEvaluator.hpp +++ b/compiler/x/amd64/codegen/OMRTreeEvaluator.hpp @@ -411,7 +411,6 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::X86::TreeEvaluator static TR::Register *ilbitEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *inolzEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *inotzEvaluator(TR::Node *node, TR::CodeGenerator *cg); - static TR::Register *ipopcntEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *lhbitEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *llbitEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *lnolzEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/compiler/x/codegen/OMRTreeEvaluator.hpp b/compiler/x/codegen/OMRTreeEvaluator.hpp index 71979d5d23c..52c571ebd0c 100644 --- a/compiler/x/codegen/OMRTreeEvaluator.hpp +++ b/compiler/x/codegen/OMRTreeEvaluator.hpp @@ -149,6 +149,7 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::TreeEvaluator static TR::Register *s2iEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *su2iEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *c2iEvaluator(TR::Node *node, TR::CodeGenerator *cg); + static TR::Register *ipopcntEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *ibits2fEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *fbits2iEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *compareFloatAndBranchEvaluator(TR::Node *node, TR::CodeGenerator *cg); diff --git a/compiler/x/codegen/UnaryEvaluator.cpp b/compiler/x/codegen/UnaryEvaluator.cpp index ac7f2c4ba1c..88eb02ebe4c 100644 --- a/compiler/x/codegen/UnaryEvaluator.cpp +++ b/compiler/x/codegen/UnaryEvaluator.cpp @@ -455,3 +455,15 @@ TR::Register *OMR::X86::TreeEvaluator::c2iEvaluator(TR::Node *node, TR::CodeGene { return TR::TreeEvaluator::conversionAnalyser(node, TR::InstOpCode::MOVZXReg4Mem2, TR::InstOpCode::MOVZXReg4Reg2, cg); } + +TR::Register *OMR::X86::TreeEvaluator::ipopcntEvaluator(TR::Node *node, TR::CodeGenerator *cg) + { + TR::Node *child = node->getFirstChild(); + TR::Register *inputReg = cg->intClobberEvaluate(child); + + generateRegRegInstruction(TR::InstOpCode::POPCNT4RegReg, node, inputReg, inputReg, cg); + + node->setRegister(inputReg); + cg->decReferenceCount(child); + return inputReg; + } diff --git a/compiler/x/env/OMRCPU.cpp b/compiler/x/env/OMRCPU.cpp index ea931a89fd6..c0e52ee3d44 100644 --- a/compiler/x/env/OMRCPU.cpp +++ b/compiler/x/env/OMRCPU.cpp @@ -158,6 +158,15 @@ OMR::X86::CPU::getSupportsHardwareSQRT() return true; } +bool +OMR::X86::CPU::hasPopulationCountInstruction() + { + if ((self()->getX86ProcessorFeatureFlags2() & TR_POPCNT) != 0x00000000) + return true; + else + return false; + } + bool OMR::X86::CPU::supportsTransactionalMemoryInstructions() { diff --git a/compiler/x/env/OMRCPU.hpp b/compiler/x/env/OMRCPU.hpp index c16c731326e..d2b842ce3c9 100644 --- a/compiler/x/env/OMRCPU.hpp +++ b/compiler/x/env/OMRCPU.hpp @@ -71,6 +71,12 @@ class OMR_EXTENSIBLE CPU : public OMR::CPU bool getSupportsHardwareSQRT(); + /** @brief Determines whether the popcnt instruction is available on the current processor. + * + * @return true if popcnt is available, false otherwise. + */ + bool hasPopulationCountInstruction(); + /** @brief Determines whether the Transactional Memory (TM) facility is available on the current processor. * * @return true if TM is available, false otherwise. diff --git a/compiler/x/i386/codegen/OMRTreeEvaluator.cpp b/compiler/x/i386/codegen/OMRTreeEvaluator.cpp index bf2d79a6782..52692611982 100644 --- a/compiler/x/i386/codegen/OMRTreeEvaluator.cpp +++ b/compiler/x/i386/codegen/OMRTreeEvaluator.cpp @@ -2274,12 +2274,6 @@ OMR::X86::I386::TreeEvaluator::inotzEvaluator(TR::Node *node, TR::CodeGenerator return TR::TreeEvaluator::badILOpEvaluator(node, cg); } -TR::Register* -OMR::X86::I386::TreeEvaluator::ipopcntEvaluator(TR::Node *node, TR::CodeGenerator *cg) - { - return TR::TreeEvaluator::badILOpEvaluator(node, cg); - } - TR::Register* OMR::X86::I386::TreeEvaluator::lhbitEvaluator(TR::Node *node, TR::CodeGenerator *cg) { @@ -2307,7 +2301,21 @@ OMR::X86::I386::TreeEvaluator::lnotzEvaluator(TR::Node *node, TR::CodeGenerator TR::Register* OMR::X86::I386::TreeEvaluator::lpopcntEvaluator(TR::Node *node, TR::CodeGenerator *cg) { - return TR::TreeEvaluator::badILOpEvaluator(node, cg); + TR::Node *child = node->getFirstChild(); + TR::Register *inputReg = cg->longClobberEvaluate(child); + TR::Register *inputHigh = inputReg->getHighOrder(); + TR::Register *inputLow = inputReg->getLowOrder(); + TR::Register *resultReg = inputLow; + + //add low result and high result together + generateRegRegInstruction(TR::InstOpCode::POPCNT4RegReg, node, inputLow, inputLow, cg); + generateRegRegInstruction(TR::InstOpCode::POPCNT4RegReg, node, inputHigh, inputHigh, cg); + generateRegRegInstruction(TR::InstOpCode::ADD4RegReg, node, inputLow, inputHigh, cg); + + cg->stopUsingRegister(inputHigh); + node->setRegister(resultReg); + cg->decReferenceCount(child); + return resultReg; } TR::Register* diff --git a/compiler/x/i386/codegen/OMRTreeEvaluator.hpp b/compiler/x/i386/codegen/OMRTreeEvaluator.hpp index cf1943a4386..e9d5c873961 100644 --- a/compiler/x/i386/codegen/OMRTreeEvaluator.hpp +++ b/compiler/x/i386/codegen/OMRTreeEvaluator.hpp @@ -392,7 +392,6 @@ class OMR_EXTENSIBLE TreeEvaluator: public OMR::X86::TreeEvaluator static TR::Register *ilbitEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *inolzEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *inotzEvaluator(TR::Node *node, TR::CodeGenerator *cg); - static TR::Register *ipopcntEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *lhbitEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *llbitEvaluator(TR::Node *node, TR::CodeGenerator *cg); static TR::Register *lnolzEvaluator(TR::Node *node, TR::CodeGenerator *cg);