From ff665cf851a780da431360c3f1bae288ebf08396 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 11:39:12 +0000 Subject: [PATCH 01/69] Rebasing to dev --- src/include/simeng/BranchPredictor.hh | 67 +++++++++++++++++++++++++++ src/lib/models/inorder/Core.cc | 2 +- test/unit/MockBranchPredictor.hh | 1 + 3 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 src/include/simeng/BranchPredictor.hh diff --git a/src/include/simeng/BranchPredictor.hh b/src/include/simeng/BranchPredictor.hh new file mode 100644 index 0000000000..145f709f0f --- /dev/null +++ b/src/include/simeng/BranchPredictor.hh @@ -0,0 +1,67 @@ +#pragma once + +#include +#include + +namespace simeng { + +/** The types of branches recognised. */ +enum class BranchType { + Conditional = 0, + LoopClosing, + Return, + SubroutineCall, + Unconditional, + Unknown +}; + +/** A branch result prediction for an instruction. */ +struct BranchPrediction { + /** Whether the branch will be taken. */ + bool taken; + + /** The branch instruction's target address. If `taken = false`, the value + * will be ignored. */ + uint64_t target; + + /** Check for equality of two branch predictions . */ + bool operator==(const BranchPrediction& other) { + if ((taken == other.taken) && (target == other.target)) + return true; + else + return false; + } + + /** Check for inequality of two branch predictions . */ + bool operator!=(const BranchPrediction& other) { + if ((taken != other.taken) || (target != other.target)) + return true; + else + return false; + } +}; + +/** An abstract branch predictor interface. */ +class BranchPredictor { + public: + virtual ~BranchPredictor(){}; + + /** Generate a branch prediction for the specified instruction address with a + * branch type and possible known branch offset. */ + virtual BranchPrediction predict(uint64_t address, BranchType type, + int64_t knownOffset) = 0; + + /** Provide branch results to update the prediction model for the specified + * instruction address. */ + virtual void update(uint64_t address, bool taken, uint64_t targetAddress, + BranchType type) = 0; + + /** Provides flushing behaviour for the implemented branch prediction schemes + * via the instruction address. + */ + virtual void flush(uint64_t address) = 0; + + virtual void addToFTQ(uint64_t address) = 0; +}; + +} // namespace simeng \ No newline at end of file diff --git a/src/lib/models/inorder/Core.cc b/src/lib/models/inorder/Core.cc index add307f714..80b1281118 100644 --- a/src/lib/models/inorder/Core.cc +++ b/src/lib/models/inorder/Core.cc @@ -150,7 +150,7 @@ std::map Core::getStats() const { return {{"cycles", std::to_string(ticks_)}, {"retired", std::to_string(retired)}, {"ipc", ipcStr.str()}, - {"flushes", std::to_string(flushes_)}}; + {"flushes", std::to_string(flushes_)}, } void Core::raiseException(const std::shared_ptr& instruction) { diff --git a/test/unit/MockBranchPredictor.hh b/test/unit/MockBranchPredictor.hh index 2727e6db51..39a7a2cf21 100644 --- a/test/unit/MockBranchPredictor.hh +++ b/test/unit/MockBranchPredictor.hh @@ -14,6 +14,7 @@ class MockBranchPredictor : public BranchPredictor { void(uint64_t address, bool taken, uint64_t targetAddress, BranchType type, uint64_t instructionId)); MOCK_METHOD1(flush, void(uint64_t address)); + MOCK_METHOD1(addToFTQ, void(uint64_t address)); }; } // namespace simeng From e81673f1f4883c4b216043ab5463981f12f45fda Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 11:44:51 +0000 Subject: [PATCH 02/69] Rebasing to dev --- src/lib/branchpredictors/GenericPredictor.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/lib/branchpredictors/GenericPredictor.cc b/src/lib/branchpredictors/GenericPredictor.cc index fce3fd2b1d..17f813d1f6 100644 --- a/src/lib/branchpredictors/GenericPredictor.cc +++ b/src/lib/branchpredictors/GenericPredictor.cc @@ -121,6 +121,8 @@ void GenericPredictor::update(uint64_t address, bool isTaken, // We know how many predictions there have since been by the size of the FTQ globalHistory_ ^= (1ull << (ftq_.size())); } + + return; } void GenericPredictor::flush(uint64_t address) { From 40e77095bac81b31821a5cdb87307312cd417723 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 11:48:58 +0000 Subject: [PATCH 03/69] Rebasing to dev --- src/include/simeng/BranchPredictor.hh | 2 +- test/unit/MockBranchPredictor.hh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/include/simeng/BranchPredictor.hh b/src/include/simeng/BranchPredictor.hh index 145f709f0f..4446f46bf5 100644 --- a/src/include/simeng/BranchPredictor.hh +++ b/src/include/simeng/BranchPredictor.hh @@ -61,7 +61,7 @@ class BranchPredictor { */ virtual void flush(uint64_t address) = 0; - virtual void addToFTQ(uint64_t address) = 0; + virtual void addToFTQ(uint64_t address, bool taken) = 0; }; } // namespace simeng \ No newline at end of file diff --git a/test/unit/MockBranchPredictor.hh b/test/unit/MockBranchPredictor.hh index 39a7a2cf21..413db7ef98 100644 --- a/test/unit/MockBranchPredictor.hh +++ b/test/unit/MockBranchPredictor.hh @@ -14,7 +14,7 @@ class MockBranchPredictor : public BranchPredictor { void(uint64_t address, bool taken, uint64_t targetAddress, BranchType type, uint64_t instructionId)); MOCK_METHOD1(flush, void(uint64_t address)); - MOCK_METHOD1(addToFTQ, void(uint64_t address)); + MOCK_METHOD2(addToFTQ, void(uint64_t address, bool taken)); }; } // namespace simeng From 6fa281d3f79c3f2824edc8a62d5014600f36afe8 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 5 Mar 2024 17:52:45 +0000 Subject: [PATCH 04/69] Addressing superficial comments on PR --- src/include/simeng/BranchPredictor.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/include/simeng/BranchPredictor.hh b/src/include/simeng/BranchPredictor.hh index 4446f46bf5..39d2c2f2d5 100644 --- a/src/include/simeng/BranchPredictor.hh +++ b/src/include/simeng/BranchPredictor.hh @@ -61,6 +61,7 @@ class BranchPredictor { */ virtual void flush(uint64_t address) = 0; + /** Adds instruction to the Fetch Target Queue without making a new prediction */ virtual void addToFTQ(uint64_t address, bool taken) = 0; }; From 31c871ef8f4949699d8484c1396aa445f20425e3 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 11 Mar 2024 16:48:32 +0000 Subject: [PATCH 05/69] Clang format Rebasing --- src/include/simeng/BranchPredictor.hh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/include/simeng/BranchPredictor.hh b/src/include/simeng/BranchPredictor.hh index 39d2c2f2d5..eac6e5aaa0 100644 --- a/src/include/simeng/BranchPredictor.hh +++ b/src/include/simeng/BranchPredictor.hh @@ -61,7 +61,8 @@ class BranchPredictor { */ virtual void flush(uint64_t address) = 0; - /** Adds instruction to the Fetch Target Queue without making a new prediction */ + /** Adds instruction to the Fetch Target Queue without making a new prediction + */ virtual void addToFTQ(uint64_t address, bool taken) = 0; }; From 110c1c6acabf3e5a172c03f56c2283f96f3c2f59 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 30 Apr 2024 14:03:19 +0100 Subject: [PATCH 06/69] Adding more detail to virtual flush and update functions re order of calls --- src/include/simeng/BranchPredictor.hh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/include/simeng/BranchPredictor.hh b/src/include/simeng/BranchPredictor.hh index eac6e5aaa0..1b62beea98 100644 --- a/src/include/simeng/BranchPredictor.hh +++ b/src/include/simeng/BranchPredictor.hh @@ -52,13 +52,16 @@ class BranchPredictor { int64_t knownOffset) = 0; /** Provide branch results to update the prediction model for the specified - * instruction address. */ + * instruction address. Update must be called on instructions in program + * order */ virtual void update(uint64_t address, bool taken, uint64_t targetAddress, BranchType type) = 0; /** Provides flushing behaviour for the implemented branch prediction schemes - * via the instruction address. - */ + * via the instruction address. Branches must be flushed in reverse + * program order (though, if a block of n instructions is being flushed at + * once, the exact order that the individual instructions within this block + * are flushed does not matter so long as they are all flushed) */ virtual void flush(uint64_t address) = 0; /** Adds instruction to the Fetch Target Queue without making a new prediction From 4b3617ced49933f6a93c07cc1b4446ad2ba0c5e7 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 30 Apr 2024 14:52:16 +0100 Subject: [PATCH 07/69] Moving buffer branch flush functionality from core.cc to PipelineBuffer.hh rebasing --- src/include/simeng/pipeline/PipelineBuffer.hh | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/include/simeng/pipeline/PipelineBuffer.hh b/src/include/simeng/pipeline/PipelineBuffer.hh index 6e128ae684..b65d21c61a 100644 --- a/src/include/simeng/pipeline/PipelineBuffer.hh +++ b/src/include/simeng/pipeline/PipelineBuffer.hh @@ -4,6 +4,8 @@ #include #include +#include "simeng/BranchPredictor.hh" + namespace simeng { namespace pipeline { @@ -73,6 +75,32 @@ class PipelineBuffer { /** Get the width of the buffer slots. */ uint16_t getWidth() const { return width; } + void flushBranchMicroOps(BranchPredictor& branchPredictor) { + for (size_t slot = 0; slot < width; slot++) { + auto& uop = getTailSlots()[slot]; + if (uop != nullptr && uop->isBranch()) { + branchPredictor.flush(uop->getInstructionAddress()); + } + uop = getHeadSlots()[slot]; + if (uop != nullptr && uop->isBranch()) { + branchPredictor.flush(uop->getInstructionAddress()); + } + } + } + + void flushBranchMacroOps(BranchPredictor& branchPredictor) { + for (size_t slot = 0; slot < width; slot++) { + auto& macroOp = getTailSlots()[slot]; + if (!macroOp.empty() && macroOp[0]->isBranch()) { + branchPredictor.flush(macroOp[0]->getInstructionAddress()); + } + macroOp = getHeadSlots()[slot]; + if (!macroOp.empty() && macroOp[0]->isBranch()) { + branchPredictor.flush(macroOp[0]->getInstructionAddress()); + } + } + } + private: /** The width of each row of slots. */ uint16_t width; From a55e292bfcb2e1066a335d2609a580bdc5365e5f Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 11:52:37 +0000 Subject: [PATCH 08/69] Rebasing to dev --- .../developer/arch/supported/aarch64.rst | 4 ++-- .../developer/components/branchPred.rst | 8 ++++---- .../developer/components/coreinstance.rst | 2 +- .../components/pipeline/components.rst | 2 +- .../developer/components/pipeline/units.rst | 10 +++++----- src/include/simeng/BranchPredictor.hh | 14 ++++++------- .../arch/aarch64/helpers/conditional.hh | 4 ++-- src/include/simeng/config/yaml/ryml.hh | 20 +++++++++---------- src/lib/arch/aarch64/Instruction.cc | 2 +- src/lib/arch/riscv/Instruction.cc | 2 +- test/unit/aarch64/InstructionTest.cc | 12 +++++------ test/unit/pipeline/FetchUnitTest.cc | 8 ++++---- test/unit/riscv/InstructionTest.cc | 12 +++++------ 13 files changed, 50 insertions(+), 50 deletions(-) diff --git a/docs/sphinx/developer/arch/supported/aarch64.rst b/docs/sphinx/developer/arch/supported/aarch64.rst index 092264e991..6df0028e48 100644 --- a/docs/sphinx/developer/arch/supported/aarch64.rst +++ b/docs/sphinx/developer/arch/supported/aarch64.rst @@ -55,12 +55,12 @@ Additional information The ``FP`` primary identifier is a placeholder to denote both the ``SCALAR`` and ``VECTOR`` primary identifiers such that, amongst the other combinations, ``FP_SIMPLE_ARTH`` expands to be ``SCALAR_SIMPLE_ARTH`` and ``VECTOR_SIMPLE_ARTH``. In some cases it was unnecessary and inconvenient to separate ``SCALAR`` and ``VECTOR`` operations within configuration options, therefore, this instruction group option was provided to solve the issue. -When setting the latencies for instruction groups, within the :ref:`Latencies ` section of the configurable options, the inheritance between instruction groups is taken into account (e.g. the ``VECTOR`` group latency assignment would be inherited by all ``VECTOR_*`` groups). If multiple entries could assign a latency value to an instruction group, the option with the least levels of inheritance to the instruction group takes priority. As an example, take the groups ``INT_SIMPLE`` and ``INT_SIMPLE_ARTH``. ``INT_SIMPLE_ARTH_NOSHIFT`` inherits from both of these groups but because ``INT_SIMPLE_ARTH`` has one less level of inheritance to traverse, ``INT_SIMPLE_ARTH_NOSHIFT`` inherits ``INT_SIMPLE_ARTH`` latency values. +When setting the latencies for instruction groups, within the :ref:`Latencies ` section of the configurable options, the inheritance between instruction groups is isTaken into account (e.g. the ``VECTOR`` group latency assignment would be inherited by all ``VECTOR_*`` groups). If multiple entries could assign a latency value to an instruction group, the option with the least levels of inheritance to the instruction group takes priority. As an example, take the groups ``INT_SIMPLE`` and ``INT_SIMPLE_ARTH``. ``INT_SIMPLE_ARTH_NOSHIFT`` inherits from both of these groups but because ``INT_SIMPLE_ARTH`` has one less level of inheritance to traverse, ``INT_SIMPLE_ARTH_NOSHIFT`` inherits ``INT_SIMPLE_ARTH`` latency values. Instruction Splitting ********************* -Instruction splitting is performed within the ``decode`` function in ``MicroDecoder.cc``. A macro-op is taken into the ``decode`` function and one or more micro-ops, taking the form of SimEng ``Instruction`` objects, are returned. The following instruction splitting is supported: +Instruction splitting is performed within the ``decode`` function in ``MicroDecoder.cc``. A macro-op is isTaken into the ``decode`` function and one or more micro-ops, taking the form of SimEng ``Instruction`` objects, are returned. The following instruction splitting is supported: - Load pair for X/W/S/D/Q registers. diff --git a/docs/sphinx/developer/components/branchPred.rst b/docs/sphinx/developer/components/branchPred.rst index 6a03c85129..f92a50f125 100644 --- a/docs/sphinx/developer/components/branchPred.rst +++ b/docs/sphinx/developer/components/branchPred.rst @@ -30,13 +30,13 @@ Global History Branch Target Buffer (BTB) For each entry, the BTB stores the most recent target along with an n-bit saturating counter for an associated direction. The indexing of this structure uses the lower bits of an instruction address XOR'ed with the current global branch history value. - If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. + If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be isTaken. If the supplied branch type is ``Conditional`` and the predicted direction is not isTaken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. Static Prediction - Based on the chosen static prediction method of "always taken" or "always not taken", the n-bit saturating counter value in the initial entries of the BTB structure are filled with the weakest variant of taken or not-taken respectively. + Based on the chosen static prediction method of "always isTaken" or "always not isTaken", the n-bit saturating counter value in the initial entries of the BTB structure are filled with the weakest variant of isTaken or not-isTaken respectively. Perceptron Predictor -------------------- @@ -48,9 +48,9 @@ Global History Branch Target Buffer (BTB) For each entry, the BTB stores the most recent target along with a perceptron for an associated direction. The indexing of this structure uses the lower, non-zero bits of an instruction address XOR'ed with the current global branch history value. - The direction prediction is obtained from the perceptron by taking its dot-product with the global history. The prediction is not taken if this is negative, or taken otherwise. The perceptron is updated when its prediction is wrong or when the magnitude of the dot-product is below a pre-determined threshold (i.e., the confidence of the prediction is low). To update, each ith weight of the perceptron is incremented if the actual outcome of the branch is the same as the ith bit of ``globalHistory_``, and decremented otherwise. + The direction prediction is obtained from the perceptron by taking its dot-product with the global history. The prediction is not isTaken if this is negative, or isTaken otherwise. The perceptron is updated when its prediction is wrong or when the magnitude of the dot-product is below a pre-determined threshold (i.e., the confidence of the prediction is low). To update, each ith weight of the perceptron is incremented if the actual outcome of the branch is the same as the ith bit of ``globalHistory_``, and decremented otherwise. - If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. + If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be isTaken. If the supplied branch type is ``Conditional`` and the predicted direction is not isTaken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. \ No newline at end of file diff --git a/docs/sphinx/developer/components/coreinstance.rst b/docs/sphinx/developer/components/coreinstance.rst index 8b9e99a449..89b6247db4 100644 --- a/docs/sphinx/developer/components/coreinstance.rst +++ b/docs/sphinx/developer/components/coreinstance.rst @@ -3,7 +3,7 @@ Core Instance The ``CoreInstance`` component supplies the functionality for instantiating all simulation objects and linking them together. -The standard process taken to create an instance of the modelled core is as follows: +The standard process isTaken to create an instance of the modelled core is as follows: Process the config file Either the passed configuration file path, or default configuration string, is used to generate the model configuration class. All subsequent parameterised instantiations of simulation objects utilise this configuration class. diff --git a/docs/sphinx/developer/components/pipeline/components.rst b/docs/sphinx/developer/components/pipeline/components.rst index ab62a6b919..f74d5e892e 100644 --- a/docs/sphinx/developer/components/pipeline/components.rst +++ b/docs/sphinx/developer/components/pipeline/components.rst @@ -69,7 +69,7 @@ Once a completion slot is available, the load will be executed, the results broa Stores ****** -As with loads, stores are considered pending when initially added to the LSQ. Whilst like load operations the generation of addresses to be accessed must occur before commitment, an additional operation of supplying the data to be stored must also occur. The ``supplyStoreData`` function facilitates this by placing the data to be stored within the ``storeQueue_`` entry of the associated store. Once the store is committed, the data is taken from the ``storeQueue_`` entry. +As with loads, stores are considered pending when initially added to the LSQ. Whilst like load operations the generation of addresses to be accessed must occur before commitment, an additional operation of supplying the data to be stored must also occur. The ``supplyStoreData`` function facilitates this by placing the data to be stored within the ``storeQueue_`` entry of the associated store. Once the store is committed, the data is isTaken from the ``storeQueue_`` entry. The generation of store instruction write requests are carried out after its commitment. The reasoning for this design decision is as followed. With SimEng supporting speculative execution, processed store instruction may come from an incorrectly speculated branch direction and will inevitably be removed from the pipeline. Therefore, it is important to ensure any write requests are valid, concerning speculative execution, as the performance cost of reversing a completed write request is high. diff --git a/docs/sphinx/developer/components/pipeline/units.rst b/docs/sphinx/developer/components/pipeline/units.rst index 52358f4658..922b24f5a6 100644 --- a/docs/sphinx/developer/components/pipeline/units.rst +++ b/docs/sphinx/developer/components/pipeline/units.rst @@ -23,7 +23,7 @@ Behaviour The fetch unit fetches memory in discrete boundary-aligned blocks, according to the current program counter (PC); this is to prevent the fetched block overlapping an inaccessible or unmapped memory region that may result in the request incorrectly responding with a fault despite the validity of the initial region. -Each cycle, it will process the most recently fetched memory block by passing it to the supplied ``Architecture`` instance for pre-decoding into macro-ops. Once pre-decoded, the head of the vector of micro-ops, or macro-op, is passed to the supplied branch predictor. If the instruction is predicted to be a taken branch, then the PC will be updated to the predicted target address and the cycle will end. If this is not the case, the PC is incremented by the number of bytes consumed to produce the pre-decoded macro-op. The remaining bytes in the block are once again passed to the architecture for pre-decoding. +Each cycle, it will process the most recently fetched memory block by passing it to the supplied ``Architecture`` instance for pre-decoding into macro-ops. Once pre-decoded, the head of the vector of micro-ops, or macro-op, is passed to the supplied branch predictor. If the instruction is predicted to be a isTaken branch, then the PC will be updated to the predicted target address and the cycle will end. If this is not the case, the PC is incremented by the number of bytes consumed to produce the pre-decoded macro-op. The remaining bytes in the block are once again passed to the architecture for pre-decoding. This standard process of pre-decoding, predicting, and updating the PC continues until one of the following occurs: @@ -32,7 +32,7 @@ This standard process of pre-decoding, predicting, and updating the PC continues The maximum number of fetched macro-ops is reached The current block is saved and processing resumes in the next cycle. - A branch is predicted as taken + A branch is predicted as isTaken A block of memory from the new address may be requested, and processing will resume once the data is available. The fetched memory block is exhausted @@ -43,7 +43,7 @@ This standard process of pre-decoding, predicting, and updating the PC continues Loop Buffer *********** -Within the fetch unit is a loop buffer that can store a configurable number of Macro-Ops. The loop buffer can be pulled from instead of memory if a loop is detected. This avoids the need to re-request data from memory if a branch is taken and increases the throughput of the fetch unit. +Within the fetch unit is a loop buffer that can store a configurable number of Macro-Ops. The loop buffer can be pulled from instead of memory if a loop is detected. This avoids the need to re-request data from memory if a branch is isTaken and increases the throughput of the fetch unit. Each entry of the loop buffer is the encoding of the Macro-Op. Therefore, when supplying an instruction from the loop buffer, the pre-decoding step must still be performed. This was required to avoid any issues with multiple instantiations of the same instruction editing each others class members. @@ -59,7 +59,7 @@ FILLING The branch representing the loop has been found and the buffer is being filled until it is seen again. SUPPLYING - The supply of instructions from the fetch unit has been handed over to the loop buffer. The stream of instructions is taken from the loop buffer in order and resets to the top of the buffer once it reaches the end of the loop body. + The supply of instructions from the fetch unit has been handed over to the loop buffer. The stream of instructions is isTaken from the loop buffer in order and resets to the top of the buffer once it reaches the end of the loop body. The detection of a loop and the branch which represents it comes from the ROB. More information can be found :ref:`here `. @@ -81,7 +81,7 @@ Behaviour Each cycle, the decode unit will read macro-ops from the input buffer, and split them into a stream of ``Instruction`` objects or micro-ops. These ``Instruction`` objects are passed into an internal buffer. -Once all macro-ops in the input buffer have been passed into the internal ``Instruction`` buffer or the ``Instruction`` buffer size exceeds the size of the output buffer, ``Instruction`` objects are checked for any trivially identifiable branch mispredictions (i.e., a non-branch predicted as a taken branch), and if discovered, the branch predictor is informed and a pipeline flush requested. +Once all macro-ops in the input buffer have been passed into the internal ``Instruction`` buffer or the ``Instruction`` buffer size exceeds the size of the output buffer, ``Instruction`` objects are checked for any trivially identifiable branch mispredictions (i.e., a non-branch predicted as a isTaken branch), and if discovered, the branch predictor is informed and a pipeline flush requested. The cycle ends when all ``Instruction`` objects in the internal buffer have been processed, or a misprediction is identified and all remaining ``Instruction`` objects are flushed. diff --git a/src/include/simeng/BranchPredictor.hh b/src/include/simeng/BranchPredictor.hh index 1b62beea98..c4da8e1ff9 100644 --- a/src/include/simeng/BranchPredictor.hh +++ b/src/include/simeng/BranchPredictor.hh @@ -17,16 +17,16 @@ enum class BranchType { /** A branch result prediction for an instruction. */ struct BranchPrediction { - /** Whether the branch will be taken. */ - bool taken; + /** Whether the branch will be isTaken. */ + bool isTaken; - /** The branch instruction's target address. If `taken = false`, the value + /** The branch instruction's target address. If `isTaken = false`, the value * will be ignored. */ uint64_t target; /** Check for equality of two branch predictions . */ bool operator==(const BranchPrediction& other) { - if ((taken == other.taken) && (target == other.target)) + if ((isTaken == other.isTaken) && (target == other.target)) return true; else return false; @@ -34,7 +34,7 @@ struct BranchPrediction { /** Check for inequality of two branch predictions . */ bool operator!=(const BranchPrediction& other) { - if ((taken != other.taken) || (target != other.target)) + if ((isTaken != other.isTaken) || (target != other.target)) return true; else return false; @@ -54,7 +54,7 @@ class BranchPredictor { /** Provide branch results to update the prediction model for the specified * instruction address. Update must be called on instructions in program * order */ - virtual void update(uint64_t address, bool taken, uint64_t targetAddress, + virtual void update(uint64_t address, bool isTaken, uint64_t targetAddress, BranchType type) = 0; /** Provides flushing behaviour for the implemented branch prediction schemes @@ -66,7 +66,7 @@ class BranchPredictor { /** Adds instruction to the Fetch Target Queue without making a new prediction */ - virtual void addToFTQ(uint64_t address, bool taken) = 0; + virtual void addToFTQ(uint64_t address, bool isTaken) = 0; }; } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/arch/aarch64/helpers/conditional.hh b/src/include/simeng/arch/aarch64/helpers/conditional.hh index e541eb276a..2b3ea1b9c3 100644 --- a/src/include/simeng/arch/aarch64/helpers/conditional.hh +++ b/src/include/simeng/arch/aarch64/helpers/conditional.hh @@ -56,7 +56,7 @@ uint8_t ccmp_reg(srcValContainer& sourceValues, /** Helper function for instructions with the format `cb rn, #imm`. * T represents the type of sourceValues (e.g. for xn, T = uint64_t). - * Returns tuple of type [bool branch taken, uint64_t address]. */ + * Returns tuple of type [bool branch isTaken, uint64_t address]. */ template std::tuple condBranch_cmpToZero( srcValContainer& sourceValues, @@ -91,7 +91,7 @@ T cs_4ops(srcValContainer& sourceValues, /** Helper function for instructions with the format `tb rn, #imm, * label`. * T represents the type of sourceValues (e.g. for xn, T = uint64_t). - * Returns tuple of type [bool branch taken, uint64_t address]. */ + * Returns tuple of type [bool branch isTaken, uint64_t address]. */ template std::tuple tbnz_tbz( srcValContainer& sourceValues, diff --git a/src/include/simeng/config/yaml/ryml.hh b/src/include/simeng/config/yaml/ryml.hh index bed8f4620b..c35a4925f9 100644 --- a/src/include/simeng/config/yaml/ryml.hh +++ b/src/include/simeng/config/yaml/ryml.hh @@ -229,7 +229,7 @@ #define C4_VERSION_CAT(major, minor, patch) ((major)*10000 + (minor)*100 + (patch)) -/** A preprocessor foreach. Spectacular trick taken from: +/** A preprocessor foreach. Spectacular trick isTaken from: * http://stackoverflow.com/a/1872506/5875572 * The first argument is for a macro receiving a single argument, * which will be called with every subsequent argument. There is @@ -1449,7 +1449,7 @@ using std::index_sequence_for; /** C++11 implementation of integer sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template struct integer_sequence { @@ -1461,7 +1461,7 @@ struct integer_sequence /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using index_sequence = integer_sequence; @@ -1544,19 +1544,19 @@ struct __make_integer_sequence /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using make_integer_sequence = typename __detail::__make_integer_sequence<_Tp, _Np>::type; /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using make_index_sequence = make_integer_sequence; /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using index_sequence_for = make_index_sequence; #endif @@ -4795,7 +4795,7 @@ namespace detail { /** @internal * @ingroup hash - * @see this was taken a great answer in stackoverflow: + * @see this was isTaken a great answer in stackoverflow: * https://stackoverflow.com/a/34597785/5875572 * @see http://aras-p.info/blog/2016/08/02/Hash-Functions-all-the-way-down/ */ template @@ -12377,7 +12377,7 @@ inline size_t scan_one(csubstr str, const char *type_fmt, T *v) * * So we fake it by using a dynamic format with an explicit * field size set to the length of the given span. - * This trick is taken from: + * This trick is isTaken from: * https://stackoverflow.com/a/18368910/5875572 */ /* this is the actual format we'll use for scanning */ @@ -14624,7 +14624,7 @@ C4_ALWAYS_INLINE DumpResults format_dump_resume(DumperFn &&dumpfn, substr buf, c namespace c4 { -//! taken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum +//! isTaken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum template using is_scoped_enum = std::integral_constant::value && !std::is_convertible::value>; @@ -15704,7 +15704,7 @@ template using cspanrs = spanrs; //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- /** A non-owning span which always retains the capacity of the original - * range it was taken from (though it may loose its original size). + * range it was isTaken from (though it may loose its original size). * The resizing methods resize(), ltrim(), rtrim() as well * as the subselection methods subspan(), range(), first() and last() can be * used at will without loosing the original capacity; the full capacity span diff --git a/src/lib/arch/aarch64/Instruction.cc b/src/lib/arch/aarch64/Instruction.cc index e3b697433e..1bf93c451f 100644 --- a/src/lib/arch/aarch64/Instruction.cc +++ b/src/lib/arch/aarch64/Instruction.cc @@ -106,7 +106,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { "Early branch misprediction check shouldn't be called after execution"); if (!isBranch()) { - // Instruction isn't a branch; if predicted as taken, it will require a + // Instruction isn't a branch; if predicted as isTaken, it will require a // flush return {prediction_.isTaken, instructionAddress_ + 4}; } diff --git a/src/lib/arch/riscv/Instruction.cc b/src/lib/arch/riscv/Instruction.cc index c71b581a60..5eb1091c6b 100644 --- a/src/lib/arch/riscv/Instruction.cc +++ b/src/lib/arch/riscv/Instruction.cc @@ -101,7 +101,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { "Early branch misprediction check shouldn't be called after execution"); if (!isBranch()) { - // Instruction isn't a branch; if predicted as taken, it will require a + // Instruction isn't a branch; if predicted as isTaken, it will require a // flush return {prediction_.isTaken, instructionAddress_ + 4}; } diff --git a/test/unit/aarch64/InstructionTest.cc b/test/unit/aarch64/InstructionTest.cc index 92b8e9393a..00279300b8 100644 --- a/test/unit/aarch64/InstructionTest.cc +++ b/test/unit/aarch64/InstructionTest.cc @@ -493,7 +493,7 @@ TEST_F(AArch64InstructionTest, earlyBranchMisprediction) { EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); } -// Test that a correct prediction (branch taken) is handled correctly +// Test that a correct prediction (branch isTaken) is handled correctly TEST_F(AArch64InstructionTest, correctPred_taken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -510,7 +510,7 @@ TEST_F(AArch64InstructionTest, correctPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where branch is taken is handled correctly + // Test a correct prediction where branch is isTaken is handled correctly pred = {true, 80 + 0x28}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -522,7 +522,7 @@ TEST_F(AArch64InstructionTest, correctPred_taken) { EXPECT_EQ(insn.getBranchAddress(), pred.target); } -// Test that a correct prediction (branch not taken) is handled correctly +// Test that a correct prediction (branch not isTaken) is handled correctly TEST_F(AArch64InstructionTest, correctPred_notTaken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -539,7 +539,7 @@ TEST_F(AArch64InstructionTest, correctPred_notTaken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where a branch isn't taken is handled correctly + // Test a correct prediction where a branch isn't isTaken is handled correctly pred = {false, 80 + 4}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -580,7 +580,7 @@ TEST_F(AArch64InstructionTest, incorrectPred_target) { EXPECT_EQ(insn.getBranchAddress(), 100 + 0x28); } -// Test that an incorrect prediction (wrong taken) is handled correctly +// Test that an incorrect prediction (wrong isTaken) is handled correctly TEST_F(AArch64InstructionTest, incorrectPred_taken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -597,7 +597,7 @@ TEST_F(AArch64InstructionTest, incorrectPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test an incorrect prediction is handled correctly - taken is wrong + // Test an incorrect prediction is handled correctly - isTaken is wrong pred = {true, 100 + 0x28}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); diff --git a/test/unit/pipeline/FetchUnitTest.cc b/test/unit/pipeline/FetchUnitTest.cc index 90870fb5e2..bbb5f32ddf 100644 --- a/test/unit/pipeline/FetchUnitTest.cc +++ b/test/unit/pipeline/FetchUnitTest.cc @@ -232,7 +232,7 @@ TEST_P(PipelineFetchUnitTest, halted) { EXPECT_TRUE(fetchUnit.hasHalted()); } -// Tests that fetching a branch instruction (predicted taken) mid block causes a +// Tests that fetching a branch instruction (predicted isTaken) mid block causes a // branch stall + discards the remaining fetched instructions TEST_P(PipelineFetchUnitTest, fetchTakenBranchMidBlock) { const uint8_t pc = 16; @@ -266,7 +266,7 @@ TEST_P(PipelineFetchUnitTest, fetchTakenBranchMidBlock) { EXPECT_CALL(*uop, isBranch()).WillOnce(Return(false)); fetchUnit.tick(); - // For second tick, process a taken branch meaning rest of block is discarded + // For second tick, process a isTaken branch meaning rest of block is discarded // & a new memory block is requested EXPECT_CALL(memory, getCompletedReads()).Times(0); EXPECT_CALL(memory, clearCompletedReads()).Times(1); @@ -388,7 +388,7 @@ TEST_P(PipelineFetchUnitTest, supplyFromLoopBuffer) { } // Tests the functionality of idling the supply to the Loop Buffer one of not -// taken branch at the loopBoundaryAddress_ +// isTaken branch at the loopBoundaryAddress_ TEST_P(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { // Set instructions to be fetched from memory memory::MemoryReadResult memReadResultA = { @@ -432,7 +432,7 @@ TEST_P(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { EXPECT_CALL(predictor, predict(_, _, _)) .WillRepeatedly(Return(BranchPrediction({false, 0x0}))); - // Attempt to fill Loop Buffer but prevent it on a not taken outcome at the + // Attempt to fill Loop Buffer but prevent it on a not isTaken outcome at the // loopBoundaryAddress_ branch // Tick 4 times to process all 16 bytes of fetched data for (int i = 0; i < 4; i++) { diff --git a/test/unit/riscv/InstructionTest.cc b/test/unit/riscv/InstructionTest.cc index 6103cd4f5c..c40b503a6c 100644 --- a/test/unit/riscv/InstructionTest.cc +++ b/test/unit/riscv/InstructionTest.cc @@ -467,7 +467,7 @@ TEST_F(RiscVInstructionTest, earlyBranchMisprediction) { EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); } -// Test that a correct prediction (branch taken) is handled correctly +// Test that a correct prediction (branch isTaken) is handled correctly TEST_F(RiscVInstructionTest, correctPred_taken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -484,7 +484,7 @@ TEST_F(RiscVInstructionTest, correctPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where branch is taken is handled correctly + // Test a correct prediction where branch is isTaken is handled correctly pred = {true, 400 - 86}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -497,7 +497,7 @@ TEST_F(RiscVInstructionTest, correctPred_taken) { EXPECT_EQ(insn.getBranchAddress(), pred.target); } -// Test that a correct prediction (branch not taken) is handled correctly +// Test that a correct prediction (branch not isTaken) is handled correctly TEST_F(RiscVInstructionTest, correctPred_notTaken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -514,7 +514,7 @@ TEST_F(RiscVInstructionTest, correctPred_notTaken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where a branch isn't taken is handled correctly + // Test a correct prediction where a branch isn't isTaken is handled correctly // imm operand 0x28 has 4 added implicitly by dissassembler pred = {false, 400 + 4}; insn.setBranchPrediction(pred); @@ -559,7 +559,7 @@ TEST_F(RiscVInstructionTest, incorrectPred_target) { EXPECT_EQ(insn.getBranchAddress(), 400 - 86); } -// Test that an incorrect prediction (wrong taken) is handled correctly +// Test that an incorrect prediction (wrong isTaken) is handled correctly TEST_F(RiscVInstructionTest, incorrectPred_taken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -576,7 +576,7 @@ TEST_F(RiscVInstructionTest, incorrectPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test an incorrect prediction is handled correctly - taken is wrong + // Test an incorrect prediction is handled correctly - isTaken is wrong // imm operand 0x28 has 4 added implicitly by dissassembler pred = {true, 400 - 86}; insn.setBranchPrediction(pred); From 17c9bafef46262f0a4871624e1ed7fd44c1e75ac Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:01:05 +0000 Subject: [PATCH 09/69] Rebasing to dev --- .../branchPredictors/BranchPredictor.hh | 72 +++++++ .../branchpredictors/BranchPredictor.hh | 115 ++++------ .../branchpredictors/GenericPredictor.hh | 89 -------- .../branchpredictors/PerceptronPredictor.hh | 105 --------- src/include/simeng/pipeline/PipelineBuffer.hh | 2 +- src/lib/branchpredictors/GenericPredictor.cc | 158 -------------- .../branchpredictors/PerceptronPredictor.cc | 201 ------------------ 7 files changed, 118 insertions(+), 624 deletions(-) create mode 100644 src/include/simeng/branchPredictors/BranchPredictor.hh delete mode 100644 src/include/simeng/branchpredictors/GenericPredictor.hh delete mode 100644 src/include/simeng/branchpredictors/PerceptronPredictor.hh delete mode 100644 src/lib/branchpredictors/GenericPredictor.cc delete mode 100644 src/lib/branchpredictors/PerceptronPredictor.cc diff --git a/src/include/simeng/branchPredictors/BranchPredictor.hh b/src/include/simeng/branchPredictors/BranchPredictor.hh new file mode 100644 index 0000000000..c4da8e1ff9 --- /dev/null +++ b/src/include/simeng/branchPredictors/BranchPredictor.hh @@ -0,0 +1,72 @@ +#pragma once + +#include +#include + +namespace simeng { + +/** The types of branches recognised. */ +enum class BranchType { + Conditional = 0, + LoopClosing, + Return, + SubroutineCall, + Unconditional, + Unknown +}; + +/** A branch result prediction for an instruction. */ +struct BranchPrediction { + /** Whether the branch will be isTaken. */ + bool isTaken; + + /** The branch instruction's target address. If `isTaken = false`, the value + * will be ignored. */ + uint64_t target; + + /** Check for equality of two branch predictions . */ + bool operator==(const BranchPrediction& other) { + if ((isTaken == other.isTaken) && (target == other.target)) + return true; + else + return false; + } + + /** Check for inequality of two branch predictions . */ + bool operator!=(const BranchPrediction& other) { + if ((isTaken != other.isTaken) || (target != other.target)) + return true; + else + return false; + } +}; + +/** An abstract branch predictor interface. */ +class BranchPredictor { + public: + virtual ~BranchPredictor(){}; + + /** Generate a branch prediction for the specified instruction address with a + * branch type and possible known branch offset. */ + virtual BranchPrediction predict(uint64_t address, BranchType type, + int64_t knownOffset) = 0; + + /** Provide branch results to update the prediction model for the specified + * instruction address. Update must be called on instructions in program + * order */ + virtual void update(uint64_t address, bool isTaken, uint64_t targetAddress, + BranchType type) = 0; + + /** Provides flushing behaviour for the implemented branch prediction schemes + * via the instruction address. Branches must be flushed in reverse + * program order (though, if a block of n instructions is being flushed at + * once, the exact order that the individual instructions within this block + * are flushed does not matter so long as they are all flushed) */ + virtual void flush(uint64_t address) = 0; + + /** Adds instruction to the Fetch Target Queue without making a new prediction + */ + virtual void addToFTQ(uint64_t address, bool isTaken) = 0; +}; + +} // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/branchpredictors/BranchPredictor.hh b/src/include/simeng/branchpredictors/BranchPredictor.hh index 7779fe0703..c4da8e1ff9 100644 --- a/src/include/simeng/branchpredictors/BranchPredictor.hh +++ b/src/include/simeng/branchpredictors/BranchPredictor.hh @@ -3,95 +3,70 @@ #include #include -#include "simeng/Instruction.hh" -#include "simeng/branchpredictors/BranchPrediction.hh" -#include "simeng/pipeline/PipelineBuffer.hh" - namespace simeng { +/** The types of branches recognised. */ +enum class BranchType { + Conditional = 0, + LoopClosing, + Return, + SubroutineCall, + Unconditional, + Unknown +}; + +/** A branch result prediction for an instruction. */ +struct BranchPrediction { + /** Whether the branch will be isTaken. */ + bool isTaken; + + /** The branch instruction's target address. If `isTaken = false`, the value + * will be ignored. */ + uint64_t target; + + /** Check for equality of two branch predictions . */ + bool operator==(const BranchPrediction& other) { + if ((isTaken == other.isTaken) && (target == other.target)) + return true; + else + return false; + } + + /** Check for inequality of two branch predictions . */ + bool operator!=(const BranchPrediction& other) { + if ((isTaken != other.isTaken) || (target != other.target)) + return true; + else + return false; + } +}; + /** An abstract branch predictor interface. */ class BranchPredictor { public: virtual ~BranchPredictor(){}; - /** Generate a branch prediction for the supplied instruction address, a - * branch type, and a known branch offset. Returns a branch direction and - * branch target address. */ + /** Generate a branch prediction for the specified instruction address with a + * branch type and possible known branch offset. */ virtual BranchPrediction predict(uint64_t address, BranchType type, int64_t knownOffset) = 0; - /** Updates appropriate predictor model objects based on the address, type and - * outcome of the branch instruction. Update must be called on - * branches in program order. To check this, instructionId is also passed - * to this function. */ + /** Provide branch results to update the prediction model for the specified + * instruction address. Update must be called on instructions in program + * order */ virtual void update(uint64_t address, bool isTaken, uint64_t targetAddress, - BranchType type, uint64_t instructionId) = 0; + BranchType type) = 0; /** Provides flushing behaviour for the implemented branch prediction schemes * via the instruction address. Branches must be flushed in reverse * program order (though, if a block of n instructions is being flushed at * once, the exact order that the individual instructions within this block - * are flushed does not matter so long as they are all flushed). */ + * are flushed does not matter so long as they are all flushed) */ virtual void flush(uint64_t address) = 0; - /** - * Overloaded function for flushing branch instructions from a - * PipelineBuffer. Accepts a reference to a PipelineBuffer of microOps. - * Iterates over the entries of the PipelineBuffer and, if they are a - * branch instruction, flushes them. - */ - void flushBranchesInBufferFromSelf( - pipeline::PipelineBuffer>& buffer) { - for (size_t slot = 0; slot < buffer.getWidth(); slot++) { - auto& uop = buffer.getTailSlots()[slot]; - if (uop != nullptr && uop->isBranch()) { - flush(uop->getInstructionAddress()); - } - - uop = buffer.getHeadSlots()[slot]; - if (uop != nullptr && uop->isBranch()) { - flush(uop->getInstructionAddress()); - } - } - } - - /** - * Overloaded function for flushing branch instructions from a - * PipelineBuffer. Accepts a reference to a PipelineBuffer macroOps. - * Iterates over the entries of the PipelineBuffer and, if they are a - * branch instruction, flushes them. + /** Adds instruction to the Fetch Target Queue without making a new prediction */ - void flushBranchesInBufferFromSelf( - pipeline::PipelineBuffer>>& - buffer) { - for (size_t slot = 0; slot < buffer.getWidth(); slot++) { - auto& macroOp = buffer.getTailSlots()[slot]; - for (size_t uop = 0; uop < macroOp.size(); uop++) { - if (macroOp[uop]->isBranch()) { - flush(macroOp[uop]->getInstructionAddress()); - } - } - macroOp = buffer.getHeadSlots()[slot]; - for (size_t uop = 0; uop < macroOp.size(); uop++) { - if (macroOp[uop]->isBranch()) { - flush(macroOp[uop]->getInstructionAddress()); - } - } - } - } - - /** lastUpdatedInstructionId_ is used only in debug mode. Clang throws a - * warning (which becomes an error with our cmake flags) for unused - * variables. If the [[maybe_unused]] attribute is added to avoid this, - * then gcc throws a warning (which becomes an error) because it ignores - * this attribute. Therefore, to avoid the above catch 22, this variable is - * hidden behind an ifdef such that it is declared only in debug mode; when - * it is used. */ -#ifndef NDEBUG - /** The Id of the last instruction that update was called on -- used to - * ensure that update is called in program order. */ - uint64_t lastUpdatedInstructionId_ = 0; -#endif + virtual void addToFTQ(uint64_t address, bool isTaken) = 0; }; } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/branchpredictors/GenericPredictor.hh b/src/include/simeng/branchpredictors/GenericPredictor.hh deleted file mode 100644 index ae1aff6d05..0000000000 --- a/src/include/simeng/branchpredictors/GenericPredictor.hh +++ /dev/null @@ -1,89 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "simeng/branchpredictors/BranchPredictor.hh" -#include "simeng/config/SimInfo.hh" - -namespace simeng { - -/** A generic branch predictor implementing well known/text book branch - * predictor logic. The following predictors have been included: - * - * - Static predictor based on pre-allocated branch type. - * - * - A Branch Target Buffer (BTB) with a local and global indexing scheme and a - * 2-bit saturating counter. - * - * - A Return Address Stack (RAS) is also in use. - */ - -class GenericPredictor : public BranchPredictor { - public: - /** Initialise predictor models. */ - GenericPredictor(ryml::ConstNodeRef config = config::SimInfo::getConfig()); - ~GenericPredictor(); - - /** Generate a branch prediction for the supplied instruction address, a - * branch type, and a known branch offset. Returns a branch direction and - * branch target address. */ - BranchPrediction predict(uint64_t address, BranchType type, - int64_t knownOffset) override; - - /** Updates appropriate predictor model objects based on the address, type and - * outcome of the branch instruction. Update must be called on - * branches in program order. To check this, instructionId is also passed - * to this function. */ - void update(uint64_t address, bool isTaken, uint64_t targetAddress, - BranchType type, uint64_t instructionId) override; - - /** Provides flushing behaviour for the implemented branch prediction schemes - * via the instruction address. Branches must be flushed in reverse - * program order (though, if a block of n instructions is being flushed at - * once, the exact order that the individual instructions within this block - * are flushed does not matter so long as they are all flushed). */ - void flush(uint64_t address) override; - - private: - /** The bitlength of the BTB index; BTB will have 2^bits entries. */ - uint8_t btbBits_; - - /** A 2^bits length vector of pairs containing a satCntBits_-bit saturating - * counter and a branch target. */ - std::vector> btb_; - - /** Fetch Target Queue containing the direction prediction and previous global - * history state of branches that are currently unresolved */ - std::deque> ftq_; - - /** The number of bits used to form the saturating counter in a BTB entry. */ - uint8_t satCntBits_; - - /** An n-bit history of previous branch directions where n is equal to - * globalHistoryLength_. Each bit represents a branch taken (1) or not - * taken (0), with the most recent branch being the least-significant-bit */ - uint64_t globalHistory_ = 0; - - /** The number of previous branch directions recorded globally. */ - uint16_t globalHistoryLength_; - - /** A bit mask for truncating the global history to the correct size. - * Stored as a member variable to avoid duplicative calculation */ - uint64_t globalHistoryMask_; - - /** A return address stack. */ - std::deque ras_; - - /** RAS history with instruction address as the keys. A non-zero value - * represents the target prediction for a return instruction and a 0 entry for - * a branch-and-link instruction. */ - std::map rasHistory_; - - /** The size of the RAS. */ - uint16_t rasSize_; -}; - -} // namespace simeng diff --git a/src/include/simeng/branchpredictors/PerceptronPredictor.hh b/src/include/simeng/branchpredictors/PerceptronPredictor.hh deleted file mode 100644 index d9e05bca52..0000000000 --- a/src/include/simeng/branchpredictors/PerceptronPredictor.hh +++ /dev/null @@ -1,105 +0,0 @@ -#pragma once - -#include -#include -#include -#include - -#include "simeng/branchpredictors/BranchPredictor.hh" -#include "simeng/config/SimInfo.hh" - -namespace simeng { - -/** A Perceptron branch predictor implementing the branch predictor described in - * Jimenez and Lin ("Dynamic branch prediction with perceptrons", IEEE High- - * Performance Computer Architecture Symposium Proceedings (2001), 197-206 -- - * https://www.cs.utexas.edu/~lin/papers/hpca01.pdf). - * The following predictors have been included: - * - * - Static predictor based on pre-allocated branch type. - * - * - A Branch Target Buffer (BTB) with a local and global indexing scheme and a - * perceptron. - * - * - A Return Address Stack (RAS) is also in use. - */ - -class PerceptronPredictor : public BranchPredictor { - public: - /** Initialise predictor models. */ - PerceptronPredictor(ryml::ConstNodeRef config = config::SimInfo::getConfig()); - ~PerceptronPredictor(); - - /** Generate a branch prediction for the supplied instruction address, a - * branch type, and a known branch offset. Returns a branch direction and - * branch target address. */ - BranchPrediction predict(uint64_t address, BranchType type, - int64_t knownOffset) override; - - /** Updates appropriate predictor model objects based on the address, type and - * outcome of the branch instruction. Update must be called on - * branches in program order. To check this, instructionId is also passed - * to this function. */ - void update(uint64_t address, bool isTaken, uint64_t targetAddress, - BranchType type, uint64_t instructionId) override; - - /** Provides flushing behaviour for the implemented branch prediction schemes - * via the instruction address. Branches must be flushed in reverse - * program order (though, if a block of n instructions is being flushed at - * once, the exact order that the individual instructions within this block - * are flushed does not matter so long as they are all flushed). */ - void flush(uint64_t address) override; - - private: - /** Returns the dot product of a perceptron and a history vector. Used to - * determine a direction prediction */ - int64_t getDotProduct(const std::vector& perceptron, - uint64_t history); - - /** The length in bits of the BTB index; BTB will have 2^bits entries. */ - uint64_t btbBits_; - - /** A 2^bits length vector of pairs containing a perceptron with - * globalHistoryLength_ + 1 inputs, and a branch target. - * The perceptrons are used to provide a branch direction prediction by - * taking a dot product with the global history, as described - * in Jiminez and Lin */ - std::vector, uint64_t>> btb_; - - /** Fetch Target Queue containing the dot product of the perceptron and the - * global history; and the global history, both at the time of prediction, - * for each of the branch instructions that are currently unresolved. The dot - * product represents the confidence of the perceptrons direction - * prediction and is needed for a correct update when the branch - * instruction is resolved. */ - std::deque> ftq_; - - /** An n-bit history of previous branch directions where n is equal to - * globalHistoryLength_. Each bit represents a branch taken (1) or not - * taken (0), with the most recent branch being the least-significant-bit */ - uint64_t globalHistory_ = 0; - - /** The number of previous branch directions recorded globally. */ - uint64_t globalHistoryLength_; - - /** A bit mask for truncating the global history to the correct size. - * Stored as a member variable to avoid duplicative calculation */ - uint64_t globalHistoryMask_; - - /** The magnitude of the dot product of the perceptron and the global history, - * below which the perceptron's weight must be updated */ - uint64_t trainingThreshold_; - - /** A return address stack. */ - std::deque ras_; - - /** RAS history with instruction address as the keys. A non-zero value - * represents the target prediction for a return instruction and a 0 entry for - * a branch-and-link instruction. */ - std::map rasHistory_; - - /** The size of the RAS. */ - uint64_t rasSize_; -}; - -} // namespace simeng diff --git a/src/include/simeng/pipeline/PipelineBuffer.hh b/src/include/simeng/pipeline/PipelineBuffer.hh index b65d21c61a..2799d54fa9 100644 --- a/src/include/simeng/pipeline/PipelineBuffer.hh +++ b/src/include/simeng/pipeline/PipelineBuffer.hh @@ -4,7 +4,7 @@ #include #include -#include "simeng/BranchPredictor.hh" +#include "simeng/branchPredictors/BranchPredictor.hh" namespace simeng { namespace pipeline { diff --git a/src/lib/branchpredictors/GenericPredictor.cc b/src/lib/branchpredictors/GenericPredictor.cc deleted file mode 100644 index 17f813d1f6..0000000000 --- a/src/lib/branchpredictors/GenericPredictor.cc +++ /dev/null @@ -1,158 +0,0 @@ -#include "simeng/branchpredictors/GenericPredictor.hh" - -#include - -namespace simeng { - -GenericPredictor::GenericPredictor(ryml::ConstNodeRef config) - : btbBits_(config["Branch-Predictor"]["BTB-Tag-Bits"].as()), - satCntBits_( - config["Branch-Predictor"]["Saturating-Count-Bits"].as()), - globalHistoryLength_( - config["Branch-Predictor"]["Global-History-Length"].as()), - rasSize_(config["Branch-Predictor"]["RAS-entries"].as()) { - // Calculate the saturation counter boundary between weakly taken and - // not-taken. `(2 ^ num_sat_cnt_bits) / 2` gives the weakly taken state - // value - uint8_t weaklyTaken = 1 << (satCntBits_ - 1); - uint8_t satCntVal = (config["Branch-Predictor"]["Fallback-Static-Predictor"] - .as() == "Always-Taken") - ? weaklyTaken - : (weaklyTaken - 1); - // Create branch prediction structures - btb_ = std::vector>(1ull << btbBits_, - {satCntVal, 0}); - - // Generate a bitmask that is used to ensure only the relevant number of - // bits are stored in the global history. This is two times the - // globalHistoryLength_ to allow rolling back of the speculatively updated - // global history in the event of a misprediction. - globalHistoryMask_ = (1ull << (globalHistoryLength_ * 2)) - 1; -} - -GenericPredictor::~GenericPredictor() { - btb_.clear(); - ras_.clear(); - rasHistory_.clear(); - ftq_.clear(); -} - -BranchPrediction GenericPredictor::predict(uint64_t address, BranchType type, - int64_t knownOffset) { - // Get index via an XOR hash between the global history and the instruction - // address. This hash is then ANDed to keep it within bounds of the btb. - // The address is shifted to remove the two least-significant bits as these - // are always 0 in an ISA with 4-byte aligned instructions. - uint64_t hashedIndex = - ((address >> 2) ^ globalHistory_) & ((1ull << btbBits_) - 1); - - // Get prediction from BTB - bool direction = btb_[hashedIndex].first >= (1ull << (satCntBits_ - 1)); - uint64_t target = - (knownOffset != 0) ? address + knownOffset : btb_[hashedIndex].second; - BranchPrediction prediction = {direction, target}; - - // Amend prediction based on branch type - if (type == BranchType::Unconditional) { - prediction.isTaken = true; - } else if (type == BranchType::Return) { - prediction.isTaken = true; - // Return branches can use the RAS if an entry is available - if (ras_.size() > 0) { - prediction.target = ras_.back(); - // Record top of RAS used for target prediction - rasHistory_[address] = ras_.back(); - ras_.pop_back(); - } - } else if (type == BranchType::SubroutineCall) { - prediction.isTaken = true; - // Subroutine call branches must push their associated return address to RAS - if (ras_.size() >= rasSize_) { - ras_.pop_front(); - } - ras_.push_back(address + 4); - // Record that this address is a branch-and-link instruction - rasHistory_[address] = 0; - } else if (type == BranchType::Conditional) { - if (!prediction.isTaken) prediction.target = address + 4; - } - - // Store the hashed index for correct hashing in update() - ftq_.emplace_back(prediction.isTaken, hashedIndex); - - // Speculatively update the global history - globalHistory_ = - ((globalHistory_ << 1) | prediction.isTaken) & globalHistoryMask_; - - return prediction; -} - -void GenericPredictor::update(uint64_t address, bool isTaken, - uint64_t targetAddress, BranchType type, - uint64_t instructionId) { - // Make sure that this function is called in program order; and then update - // the lastUpdatedInstructionId variable - assert(instructionId >= lastUpdatedInstructionId_ && - (lastUpdatedInstructionId_ = instructionId) >= 0 && - "Update not called on branch instructions in program order"); - - // Get previous prediction and index calculated from the FTQ - bool prevPrediction = ftq_.front().first; - uint64_t hashedIndex = ftq_.front().second; - ftq_.pop_front(); - - // Calculate 2-bit saturating counter value - uint8_t satCntVal = btb_[hashedIndex].first; - // Only alter value if it would transition to a valid state - if (!((satCntVal == (1 << satCntBits_) - 1) && isTaken) && - !(satCntVal == 0 && !isTaken)) { - satCntVal += isTaken ? 1 : -1; - } - - // Update BTB entry - btb_[hashedIndex].first = satCntVal; - if (isTaken) { - btb_[hashedIndex].second = targetAddress; - } - - // Update global history if prediction was incorrect - if (prevPrediction != isTaken) { - // Bit-flip the global history bit corresponding to this prediction - // We know how many predictions there have since been by the size of the FTQ - globalHistory_ ^= (1ull << (ftq_.size())); - } - - return; -} - -void GenericPredictor::flush(uint64_t address) { - // If address interacted with RAS, rewind entry - auto it = rasHistory_.find(address); - if (it != rasHistory_.end()) { - uint64_t target = it->second; - if (target != 0) { - // If history entry belongs to a return instruction, push target back onto - // stack - if (ras_.size() >= rasSize_) { - ras_.pop_front(); - } - ras_.push_back(target); - } else { - // If history entry belongs to a branch-and-link instruction, pop target - // off of stack - if (ras_.size()) { - ras_.pop_back(); - } - } - rasHistory_.erase(it); - } - - assert((ftq_.size() > 0) && - "Cannot flush instruction from Branch Predictor " - "when the ftq is empty"); - ftq_.pop_back(); - - // Roll back global history - globalHistory_ >>= 1; -} -} // namespace simeng diff --git a/src/lib/branchpredictors/PerceptronPredictor.cc b/src/lib/branchpredictors/PerceptronPredictor.cc deleted file mode 100644 index 2e517939eb..0000000000 --- a/src/lib/branchpredictors/PerceptronPredictor.cc +++ /dev/null @@ -1,201 +0,0 @@ -#include "simeng/branchpredictors/PerceptronPredictor.hh" - -namespace simeng { - -PerceptronPredictor::PerceptronPredictor(ryml::ConstNodeRef config) - : btbBits_(config["Branch-Predictor"]["BTB-Tag-Bits"].as()), - globalHistoryLength_( - config["Branch-Predictor"]["Global-History-Length"].as()), - rasSize_(config["Branch-Predictor"]["RAS-entries"].as()) { - // Build BTB based on config options - uint32_t btbSize = (1ul << btbBits_); - btb_.resize(btbSize); - - // Initialise perceptron values with 0 for the global history weights, and 1 - // for the bias weight; and initialise the target with 0 (i.e., unknown) - for (uint32_t i = 0; i < btbSize; i++) { - btb_[i].first.assign(globalHistoryLength_, 0); - btb_[i].first.push_back(1); - btb_[i].second = 0; - } - - // Set up training threshold according to empirically determined formula - trainingThreshold_ = (uint64_t)((1.93 * globalHistoryLength_) + 14); - - // Generate a bitmask that is used to ensure only the relevant number of - // bits are stored in the global history. This is two times the - // globalHistoryLength_ to allow rolling back of the speculatively updated - // global history in the event of a misprediction. - globalHistoryMask_ = (1ull << (globalHistoryLength_ * 2)) - 1; -} - -PerceptronPredictor::~PerceptronPredictor() { - ras_.clear(); - rasHistory_.clear(); - ftq_.clear(); -} - -BranchPrediction PerceptronPredictor::predict(uint64_t address, BranchType type, - int64_t knownOffset) { - // Get the hashed index for the prediction table. XOR the global history with - // the non-zero bits of the address, and then keep only the btbBits_ bits of - // the output to keep it in bounds of the prediction table. - // The address is shifted to remove the two least-significant bits as these - // are always 0 in an ISA with 4-byte aligned instructions. - uint64_t hashedIndex = - ((address >> 2) ^ globalHistory_) & ((1ull << btbBits_) - 1); - - // Retrieve the perceptron from the BTB - std::vector perceptron = btb_[hashedIndex].first; - - // Get dot product of perceptron and history - int64_t Pout = getDotProduct(perceptron, globalHistory_); - - // Determine direction prediction based on its sign - bool direction = (Pout >= 0); - - // If there is a known offset then calculate target accordingly, otherwise - // retrieve the target prediction from the btb. - uint64_t target = - (knownOffset != 0) ? address + knownOffset : btb_[hashedIndex].second; - - BranchPrediction prediction = {direction, target}; - - // Amend prediction based on branch type - if (type == BranchType::Unconditional) { - prediction.isTaken = true; - } else if (type == BranchType::Return) { - prediction.isTaken = true; - // Return branches can use the RAS if an entry is available - if (ras_.size() > 0) { - prediction.target = ras_.back(); - // Record top of RAS used for target prediction - rasHistory_[address] = ras_.back(); - ras_.pop_back(); - } - } else if (type == BranchType::SubroutineCall) { - prediction.isTaken = true; - // Subroutine call branches must push their associated return address to RAS - if (ras_.size() >= rasSize_) { - ras_.pop_front(); - } - ras_.push_back(address + 4); - // Record that this address is a branch-and-link instruction - rasHistory_[address] = 0; - } else if (type == BranchType::Conditional) { - if (!prediction.isTaken) prediction.target = address + 4; - } - - // Store the Pout and global history for correct update() -- - // needs to be global history and not the hashed index as hashing loses - // information and the global history is required for updating perceptrons. - ftq_.emplace_back(Pout, globalHistory_); - - // Speculatively update the global history based on the direction - // prediction being made - globalHistory_ = - ((globalHistory_ << 1) | prediction.isTaken) & globalHistoryMask_; - - return prediction; -} - -void PerceptronPredictor::update(uint64_t address, bool isTaken, - uint64_t targetAddress, BranchType type, - uint64_t instructionId) { - // Make sure that this function is called in program order; and then update - // the lastUpdatedInstructionId variable - assert(instructionId >= lastUpdatedInstructionId_ && - (lastUpdatedInstructionId_ = instructionId) >= 0 && - "Update not called on branch instructions in program order"); - - // Retrieve the previous global history and branch direction prediction from - // the front of the ftq (assumes branches are updated in program order). - int64_t prevPout = ftq_.front().first; - uint64_t prevGlobalHistory = ftq_.front().second; - ftq_.pop_front(); - - // Work out hashed index - uint64_t hashedIndex = - ((address >> 2) ^ prevGlobalHistory) & ((1ull << btbBits_) - 1); - - std::vector perceptron = btb_[hashedIndex].first; - - // Work out the most recent prediction - bool directionPrediction = (prevPout >= 0); - - // Update the perceptron if the prediction was wrong, or the dot product's - // magnitude was not greater than the training threshold - if ((directionPrediction != isTaken) || - (static_cast(std::abs(prevPout)) < trainingThreshold_)) { - int8_t t = (isTaken) ? 1 : -1; - - for (uint64_t i = 0; i < globalHistoryLength_; i++) { - int8_t xi = ((prevGlobalHistory & - (1ull << ((globalHistoryLength_ - 1) - i))) == 0) - ? -1 - : 1; - int8_t product_xi_t = xi * t; - // Make sure no overflow (+-127) - if (!(perceptron[i] == 127 && product_xi_t == 1) && - !(perceptron[i] == -127 && product_xi_t == -1)) { - perceptron[i] += product_xi_t; - } - } - perceptron[globalHistoryLength_] += t; - } - - btb_[hashedIndex].first = perceptron; - if (isTaken) { - btb_[hashedIndex].second = targetAddress; - } - - // Update global history if prediction was incorrect - // Bit-flip the global history bit corresponding to this prediction - // We know how many predictions there have since been by the size of the FTQ - if (directionPrediction != isTaken) globalHistory_ ^= (1ull << (ftq_.size())); -} - -void PerceptronPredictor::flush(uint64_t address) { - // If address interacted with RAS, rewind entry - auto it = rasHistory_.find(address); - if (it != rasHistory_.end()) { - uint64_t target = it->second; - if (target != 0) { - // If history entry belongs to a return instruction, push target back onto - // stack - if (ras_.size() >= rasSize_) { - ras_.pop_front(); - } - ras_.push_back(target); - } else { - // If history entry belongs to a branch-and-link instruction, pop target - // off of stack - if (ras_.size()) { - ras_.pop_back(); - } - } - rasHistory_.erase(it); - } - - assert((ftq_.size() > 0) && - "Cannot flush instruction from Branch Predictor " - "when the ftq is empty"); - ftq_.pop_back(); - - // Roll back global history - globalHistory_ >>= 1; -} - -int64_t PerceptronPredictor::getDotProduct( - const std::vector& perceptron, uint64_t history) { - int64_t Pout = perceptron[globalHistoryLength_]; - for (uint64_t i = 0; i < globalHistoryLength_; i++) { - // Get branch direction for ith entry in the history - bool historyTaken = - ((history & (1ull << ((globalHistoryLength_ - 1) - i))) != 0); - Pout += historyTaken ? perceptron[i] : (0 - perceptron[i]); - } - return Pout; -} - -} // namespace simeng From 508a2f4f252e5475727eb41020dde61719b1d4c5 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:01:09 +0000 Subject: [PATCH 10/69] Rebasing to dev --- src/include/simeng/BranchPredictor.hh | 72 --------------------------- 1 file changed, 72 deletions(-) delete mode 100644 src/include/simeng/BranchPredictor.hh diff --git a/src/include/simeng/BranchPredictor.hh b/src/include/simeng/BranchPredictor.hh deleted file mode 100644 index c4da8e1ff9..0000000000 --- a/src/include/simeng/BranchPredictor.hh +++ /dev/null @@ -1,72 +0,0 @@ -#pragma once - -#include -#include - -namespace simeng { - -/** The types of branches recognised. */ -enum class BranchType { - Conditional = 0, - LoopClosing, - Return, - SubroutineCall, - Unconditional, - Unknown -}; - -/** A branch result prediction for an instruction. */ -struct BranchPrediction { - /** Whether the branch will be isTaken. */ - bool isTaken; - - /** The branch instruction's target address. If `isTaken = false`, the value - * will be ignored. */ - uint64_t target; - - /** Check for equality of two branch predictions . */ - bool operator==(const BranchPrediction& other) { - if ((isTaken == other.isTaken) && (target == other.target)) - return true; - else - return false; - } - - /** Check for inequality of two branch predictions . */ - bool operator!=(const BranchPrediction& other) { - if ((isTaken != other.isTaken) || (target != other.target)) - return true; - else - return false; - } -}; - -/** An abstract branch predictor interface. */ -class BranchPredictor { - public: - virtual ~BranchPredictor(){}; - - /** Generate a branch prediction for the specified instruction address with a - * branch type and possible known branch offset. */ - virtual BranchPrediction predict(uint64_t address, BranchType type, - int64_t knownOffset) = 0; - - /** Provide branch results to update the prediction model for the specified - * instruction address. Update must be called on instructions in program - * order */ - virtual void update(uint64_t address, bool isTaken, uint64_t targetAddress, - BranchType type) = 0; - - /** Provides flushing behaviour for the implemented branch prediction schemes - * via the instruction address. Branches must be flushed in reverse - * program order (though, if a block of n instructions is being flushed at - * once, the exact order that the individual instructions within this block - * are flushed does not matter so long as they are all flushed) */ - virtual void flush(uint64_t address) = 0; - - /** Adds instruction to the Fetch Target Queue without making a new prediction - */ - virtual void addToFTQ(uint64_t address, bool isTaken) = 0; -}; - -} // namespace simeng \ No newline at end of file From e0f8121d0b3822bac758839758ba6cda973761e0 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:02:03 +0000 Subject: [PATCH 11/69] Rebasing to dev --- .../AlwaysNotTakenPredictor.hh | 30 ------------------- .../AlwaysNotTakenPredictor.cc | 14 --------- 2 files changed, 44 deletions(-) delete mode 100644 src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh delete mode 100644 src/lib/branchpredictors/AlwaysNotTakenPredictor.cc diff --git a/src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh b/src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh deleted file mode 100644 index 382a495420..0000000000 --- a/src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once - -#include "simeng/branchpredictors/BranchPredictor.hh" - -namespace simeng { - -/** An "Always Not Taken" branch predictor; predicts all branches as not - * taken. */ -class AlwaysNotTakenPredictor : public BranchPredictor { - public: - /** Generate a branch prediction for the specified instruction address; will - * always predict not taken. */ - BranchPrediction predict(uint64_t address, BranchType type, - int64_t knownOffset) override; - - /** Updates appropriate predictor model objects based on the address, type and - * outcome of the branch instruction. Update must be called on - * branches in program order. To check this, instructionId is also passed - * to this function. */ - void update(uint64_t address, bool isTaken, uint64_t targetAddress, - BranchType type, uint64_t instructionId) override; - - /** Provide flush logic for branch prediction scheme. As there's no flush - * logic for an always taken predictor, this does nothing. */ - void flush(uint64_t address) override; - - private: -}; - -} // namespace simeng diff --git a/src/lib/branchpredictors/AlwaysNotTakenPredictor.cc b/src/lib/branchpredictors/AlwaysNotTakenPredictor.cc deleted file mode 100644 index f9ccb416bc..0000000000 --- a/src/lib/branchpredictors/AlwaysNotTakenPredictor.cc +++ /dev/null @@ -1,14 +0,0 @@ -#include "simeng/branchpredictors/AlwaysNotTakenPredictor.hh" - -namespace simeng { -BranchPrediction AlwaysNotTakenPredictor::predict( - [[maybe_unused]] uint64_t address, BranchType type, int64_t knownOffset) { - return {false, 0}; -} - -void AlwaysNotTakenPredictor::update(uint64_t address, bool taken, - uint64_t targetAddress, BranchType type, - uint64_t instructionId) {} - -void AlwaysNotTakenPredictor::flush(uint64_t address) {} -} // namespace simeng From af8d1a00a4f58630b81b3c8e4fd2221a2791aad4 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:02:45 +0000 Subject: [PATCH 12/69] Rebasing to dev --- .../developer/arch/supported/aarch64.rst | 4 ++-- .../developer/components/branchPred.rst | 8 ++++---- .../developer/components/coreinstance.rst | 2 +- .../components/pipeline/components.rst | 2 +- .../developer/components/pipeline/units.rst | 10 +++++----- .../branchPredictors/BranchPredictor.hh | 2 +- src/include/simeng/config/yaml/ryml.hh | 20 +++++++++---------- src/include/simeng/pipeline/ExecuteUnit.hh | 1 + src/lib/arch/aarch64/Instruction.cc | 2 +- src/lib/arch/riscv/Instruction.cc | 2 +- test/unit/aarch64/InstructionTest.cc | 12 +++++------ test/unit/pipeline/FetchUnitTest.cc | 8 ++++---- test/unit/riscv/InstructionTest.cc | 12 +++++------ 13 files changed, 43 insertions(+), 42 deletions(-) diff --git a/docs/sphinx/developer/arch/supported/aarch64.rst b/docs/sphinx/developer/arch/supported/aarch64.rst index 6df0028e48..092264e991 100644 --- a/docs/sphinx/developer/arch/supported/aarch64.rst +++ b/docs/sphinx/developer/arch/supported/aarch64.rst @@ -55,12 +55,12 @@ Additional information The ``FP`` primary identifier is a placeholder to denote both the ``SCALAR`` and ``VECTOR`` primary identifiers such that, amongst the other combinations, ``FP_SIMPLE_ARTH`` expands to be ``SCALAR_SIMPLE_ARTH`` and ``VECTOR_SIMPLE_ARTH``. In some cases it was unnecessary and inconvenient to separate ``SCALAR`` and ``VECTOR`` operations within configuration options, therefore, this instruction group option was provided to solve the issue. -When setting the latencies for instruction groups, within the :ref:`Latencies ` section of the configurable options, the inheritance between instruction groups is isTaken into account (e.g. the ``VECTOR`` group latency assignment would be inherited by all ``VECTOR_*`` groups). If multiple entries could assign a latency value to an instruction group, the option with the least levels of inheritance to the instruction group takes priority. As an example, take the groups ``INT_SIMPLE`` and ``INT_SIMPLE_ARTH``. ``INT_SIMPLE_ARTH_NOSHIFT`` inherits from both of these groups but because ``INT_SIMPLE_ARTH`` has one less level of inheritance to traverse, ``INT_SIMPLE_ARTH_NOSHIFT`` inherits ``INT_SIMPLE_ARTH`` latency values. +When setting the latencies for instruction groups, within the :ref:`Latencies ` section of the configurable options, the inheritance between instruction groups is taken into account (e.g. the ``VECTOR`` group latency assignment would be inherited by all ``VECTOR_*`` groups). If multiple entries could assign a latency value to an instruction group, the option with the least levels of inheritance to the instruction group takes priority. As an example, take the groups ``INT_SIMPLE`` and ``INT_SIMPLE_ARTH``. ``INT_SIMPLE_ARTH_NOSHIFT`` inherits from both of these groups but because ``INT_SIMPLE_ARTH`` has one less level of inheritance to traverse, ``INT_SIMPLE_ARTH_NOSHIFT`` inherits ``INT_SIMPLE_ARTH`` latency values. Instruction Splitting ********************* -Instruction splitting is performed within the ``decode`` function in ``MicroDecoder.cc``. A macro-op is isTaken into the ``decode`` function and one or more micro-ops, taking the form of SimEng ``Instruction`` objects, are returned. The following instruction splitting is supported: +Instruction splitting is performed within the ``decode`` function in ``MicroDecoder.cc``. A macro-op is taken into the ``decode`` function and one or more micro-ops, taking the form of SimEng ``Instruction`` objects, are returned. The following instruction splitting is supported: - Load pair for X/W/S/D/Q registers. diff --git a/docs/sphinx/developer/components/branchPred.rst b/docs/sphinx/developer/components/branchPred.rst index f92a50f125..6a03c85129 100644 --- a/docs/sphinx/developer/components/branchPred.rst +++ b/docs/sphinx/developer/components/branchPred.rst @@ -30,13 +30,13 @@ Global History Branch Target Buffer (BTB) For each entry, the BTB stores the most recent target along with an n-bit saturating counter for an associated direction. The indexing of this structure uses the lower bits of an instruction address XOR'ed with the current global branch history value. - If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be isTaken. If the supplied branch type is ``Conditional`` and the predicted direction is not isTaken, then the predicted target is overridden to be the next sequential instruction. + If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. Static Prediction - Based on the chosen static prediction method of "always isTaken" or "always not isTaken", the n-bit saturating counter value in the initial entries of the BTB structure are filled with the weakest variant of isTaken or not-isTaken respectively. + Based on the chosen static prediction method of "always taken" or "always not taken", the n-bit saturating counter value in the initial entries of the BTB structure are filled with the weakest variant of taken or not-taken respectively. Perceptron Predictor -------------------- @@ -48,9 +48,9 @@ Global History Branch Target Buffer (BTB) For each entry, the BTB stores the most recent target along with a perceptron for an associated direction. The indexing of this structure uses the lower, non-zero bits of an instruction address XOR'ed with the current global branch history value. - The direction prediction is obtained from the perceptron by taking its dot-product with the global history. The prediction is not isTaken if this is negative, or isTaken otherwise. The perceptron is updated when its prediction is wrong or when the magnitude of the dot-product is below a pre-determined threshold (i.e., the confidence of the prediction is low). To update, each ith weight of the perceptron is incremented if the actual outcome of the branch is the same as the ith bit of ``globalHistory_``, and decremented otherwise. + The direction prediction is obtained from the perceptron by taking its dot-product with the global history. The prediction is not taken if this is negative, or taken otherwise. The perceptron is updated when its prediction is wrong or when the magnitude of the dot-product is below a pre-determined threshold (i.e., the confidence of the prediction is low). To update, each ith weight of the perceptron is incremented if the actual outcome of the branch is the same as the ith bit of ``globalHistory_``, and decremented otherwise. - If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be isTaken. If the supplied branch type is ``Conditional`` and the predicted direction is not isTaken, then the predicted target is overridden to be the next sequential instruction. + If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. \ No newline at end of file diff --git a/docs/sphinx/developer/components/coreinstance.rst b/docs/sphinx/developer/components/coreinstance.rst index 89b6247db4..8b9e99a449 100644 --- a/docs/sphinx/developer/components/coreinstance.rst +++ b/docs/sphinx/developer/components/coreinstance.rst @@ -3,7 +3,7 @@ Core Instance The ``CoreInstance`` component supplies the functionality for instantiating all simulation objects and linking them together. -The standard process isTaken to create an instance of the modelled core is as follows: +The standard process taken to create an instance of the modelled core is as follows: Process the config file Either the passed configuration file path, or default configuration string, is used to generate the model configuration class. All subsequent parameterised instantiations of simulation objects utilise this configuration class. diff --git a/docs/sphinx/developer/components/pipeline/components.rst b/docs/sphinx/developer/components/pipeline/components.rst index f74d5e892e..ab62a6b919 100644 --- a/docs/sphinx/developer/components/pipeline/components.rst +++ b/docs/sphinx/developer/components/pipeline/components.rst @@ -69,7 +69,7 @@ Once a completion slot is available, the load will be executed, the results broa Stores ****** -As with loads, stores are considered pending when initially added to the LSQ. Whilst like load operations the generation of addresses to be accessed must occur before commitment, an additional operation of supplying the data to be stored must also occur. The ``supplyStoreData`` function facilitates this by placing the data to be stored within the ``storeQueue_`` entry of the associated store. Once the store is committed, the data is isTaken from the ``storeQueue_`` entry. +As with loads, stores are considered pending when initially added to the LSQ. Whilst like load operations the generation of addresses to be accessed must occur before commitment, an additional operation of supplying the data to be stored must also occur. The ``supplyStoreData`` function facilitates this by placing the data to be stored within the ``storeQueue_`` entry of the associated store. Once the store is committed, the data is taken from the ``storeQueue_`` entry. The generation of store instruction write requests are carried out after its commitment. The reasoning for this design decision is as followed. With SimEng supporting speculative execution, processed store instruction may come from an incorrectly speculated branch direction and will inevitably be removed from the pipeline. Therefore, it is important to ensure any write requests are valid, concerning speculative execution, as the performance cost of reversing a completed write request is high. diff --git a/docs/sphinx/developer/components/pipeline/units.rst b/docs/sphinx/developer/components/pipeline/units.rst index 922b24f5a6..52358f4658 100644 --- a/docs/sphinx/developer/components/pipeline/units.rst +++ b/docs/sphinx/developer/components/pipeline/units.rst @@ -23,7 +23,7 @@ Behaviour The fetch unit fetches memory in discrete boundary-aligned blocks, according to the current program counter (PC); this is to prevent the fetched block overlapping an inaccessible or unmapped memory region that may result in the request incorrectly responding with a fault despite the validity of the initial region. -Each cycle, it will process the most recently fetched memory block by passing it to the supplied ``Architecture`` instance for pre-decoding into macro-ops. Once pre-decoded, the head of the vector of micro-ops, or macro-op, is passed to the supplied branch predictor. If the instruction is predicted to be a isTaken branch, then the PC will be updated to the predicted target address and the cycle will end. If this is not the case, the PC is incremented by the number of bytes consumed to produce the pre-decoded macro-op. The remaining bytes in the block are once again passed to the architecture for pre-decoding. +Each cycle, it will process the most recently fetched memory block by passing it to the supplied ``Architecture`` instance for pre-decoding into macro-ops. Once pre-decoded, the head of the vector of micro-ops, or macro-op, is passed to the supplied branch predictor. If the instruction is predicted to be a taken branch, then the PC will be updated to the predicted target address and the cycle will end. If this is not the case, the PC is incremented by the number of bytes consumed to produce the pre-decoded macro-op. The remaining bytes in the block are once again passed to the architecture for pre-decoding. This standard process of pre-decoding, predicting, and updating the PC continues until one of the following occurs: @@ -32,7 +32,7 @@ This standard process of pre-decoding, predicting, and updating the PC continues The maximum number of fetched macro-ops is reached The current block is saved and processing resumes in the next cycle. - A branch is predicted as isTaken + A branch is predicted as taken A block of memory from the new address may be requested, and processing will resume once the data is available. The fetched memory block is exhausted @@ -43,7 +43,7 @@ This standard process of pre-decoding, predicting, and updating the PC continues Loop Buffer *********** -Within the fetch unit is a loop buffer that can store a configurable number of Macro-Ops. The loop buffer can be pulled from instead of memory if a loop is detected. This avoids the need to re-request data from memory if a branch is isTaken and increases the throughput of the fetch unit. +Within the fetch unit is a loop buffer that can store a configurable number of Macro-Ops. The loop buffer can be pulled from instead of memory if a loop is detected. This avoids the need to re-request data from memory if a branch is taken and increases the throughput of the fetch unit. Each entry of the loop buffer is the encoding of the Macro-Op. Therefore, when supplying an instruction from the loop buffer, the pre-decoding step must still be performed. This was required to avoid any issues with multiple instantiations of the same instruction editing each others class members. @@ -59,7 +59,7 @@ FILLING The branch representing the loop has been found and the buffer is being filled until it is seen again. SUPPLYING - The supply of instructions from the fetch unit has been handed over to the loop buffer. The stream of instructions is isTaken from the loop buffer in order and resets to the top of the buffer once it reaches the end of the loop body. + The supply of instructions from the fetch unit has been handed over to the loop buffer. The stream of instructions is taken from the loop buffer in order and resets to the top of the buffer once it reaches the end of the loop body. The detection of a loop and the branch which represents it comes from the ROB. More information can be found :ref:`here `. @@ -81,7 +81,7 @@ Behaviour Each cycle, the decode unit will read macro-ops from the input buffer, and split them into a stream of ``Instruction`` objects or micro-ops. These ``Instruction`` objects are passed into an internal buffer. -Once all macro-ops in the input buffer have been passed into the internal ``Instruction`` buffer or the ``Instruction`` buffer size exceeds the size of the output buffer, ``Instruction`` objects are checked for any trivially identifiable branch mispredictions (i.e., a non-branch predicted as a isTaken branch), and if discovered, the branch predictor is informed and a pipeline flush requested. +Once all macro-ops in the input buffer have been passed into the internal ``Instruction`` buffer or the ``Instruction`` buffer size exceeds the size of the output buffer, ``Instruction`` objects are checked for any trivially identifiable branch mispredictions (i.e., a non-branch predicted as a taken branch), and if discovered, the branch predictor is informed and a pipeline flush requested. The cycle ends when all ``Instruction`` objects in the internal buffer have been processed, or a misprediction is identified and all remaining ``Instruction`` objects are flushed. diff --git a/src/include/simeng/branchPredictors/BranchPredictor.hh b/src/include/simeng/branchPredictors/BranchPredictor.hh index c4da8e1ff9..2bcf76eb25 100644 --- a/src/include/simeng/branchPredictors/BranchPredictor.hh +++ b/src/include/simeng/branchPredictors/BranchPredictor.hh @@ -17,7 +17,7 @@ enum class BranchType { /** A branch result prediction for an instruction. */ struct BranchPrediction { - /** Whether the branch will be isTaken. */ + /** Whether the branch will be taken. */ bool isTaken; /** The branch instruction's target address. If `isTaken = false`, the value diff --git a/src/include/simeng/config/yaml/ryml.hh b/src/include/simeng/config/yaml/ryml.hh index c35a4925f9..bed8f4620b 100644 --- a/src/include/simeng/config/yaml/ryml.hh +++ b/src/include/simeng/config/yaml/ryml.hh @@ -229,7 +229,7 @@ #define C4_VERSION_CAT(major, minor, patch) ((major)*10000 + (minor)*100 + (patch)) -/** A preprocessor foreach. Spectacular trick isTaken from: +/** A preprocessor foreach. Spectacular trick taken from: * http://stackoverflow.com/a/1872506/5875572 * The first argument is for a macro receiving a single argument, * which will be called with every subsequent argument. There is @@ -1449,7 +1449,7 @@ using std::index_sequence_for; /** C++11 implementation of integer sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template struct integer_sequence { @@ -1461,7 +1461,7 @@ struct integer_sequence /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using index_sequence = integer_sequence; @@ -1544,19 +1544,19 @@ struct __make_integer_sequence /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using make_integer_sequence = typename __detail::__make_integer_sequence<_Tp, _Np>::type; /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using make_index_sequence = make_integer_sequence; /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using index_sequence_for = make_index_sequence; #endif @@ -4795,7 +4795,7 @@ namespace detail { /** @internal * @ingroup hash - * @see this was isTaken a great answer in stackoverflow: + * @see this was taken a great answer in stackoverflow: * https://stackoverflow.com/a/34597785/5875572 * @see http://aras-p.info/blog/2016/08/02/Hash-Functions-all-the-way-down/ */ template @@ -12377,7 +12377,7 @@ inline size_t scan_one(csubstr str, const char *type_fmt, T *v) * * So we fake it by using a dynamic format with an explicit * field size set to the length of the given span. - * This trick is isTaken from: + * This trick is taken from: * https://stackoverflow.com/a/18368910/5875572 */ /* this is the actual format we'll use for scanning */ @@ -14624,7 +14624,7 @@ C4_ALWAYS_INLINE DumpResults format_dump_resume(DumperFn &&dumpfn, substr buf, c namespace c4 { -//! isTaken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum +//! taken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum template using is_scoped_enum = std::integral_constant::value && !std::is_convertible::value>; @@ -15704,7 +15704,7 @@ template using cspanrs = spanrs; //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- /** A non-owning span which always retains the capacity of the original - * range it was isTaken from (though it may loose its original size). + * range it was taken from (though it may loose its original size). * The resizing methods resize(), ltrim(), rtrim() as well * as the subselection methods subspan(), range(), first() and last() can be * used at will without loosing the original capacity; the full capacity span diff --git a/src/include/simeng/pipeline/ExecuteUnit.hh b/src/include/simeng/pipeline/ExecuteUnit.hh index cd11eb23d6..e6420e998f 100644 --- a/src/include/simeng/pipeline/ExecuteUnit.hh +++ b/src/include/simeng/pipeline/ExecuteUnit.hh @@ -4,6 +4,7 @@ #include #include "simeng/Instruction.hh" +#include "simeng/branchPredictors/BranchPredictor.hh" #include "simeng/pipeline/PipelineBuffer.hh" namespace simeng { diff --git a/src/lib/arch/aarch64/Instruction.cc b/src/lib/arch/aarch64/Instruction.cc index 1bf93c451f..e3b697433e 100644 --- a/src/lib/arch/aarch64/Instruction.cc +++ b/src/lib/arch/aarch64/Instruction.cc @@ -106,7 +106,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { "Early branch misprediction check shouldn't be called after execution"); if (!isBranch()) { - // Instruction isn't a branch; if predicted as isTaken, it will require a + // Instruction isn't a branch; if predicted as taken, it will require a // flush return {prediction_.isTaken, instructionAddress_ + 4}; } diff --git a/src/lib/arch/riscv/Instruction.cc b/src/lib/arch/riscv/Instruction.cc index 5eb1091c6b..c71b581a60 100644 --- a/src/lib/arch/riscv/Instruction.cc +++ b/src/lib/arch/riscv/Instruction.cc @@ -101,7 +101,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { "Early branch misprediction check shouldn't be called after execution"); if (!isBranch()) { - // Instruction isn't a branch; if predicted as isTaken, it will require a + // Instruction isn't a branch; if predicted as taken, it will require a // flush return {prediction_.isTaken, instructionAddress_ + 4}; } diff --git a/test/unit/aarch64/InstructionTest.cc b/test/unit/aarch64/InstructionTest.cc index 00279300b8..92b8e9393a 100644 --- a/test/unit/aarch64/InstructionTest.cc +++ b/test/unit/aarch64/InstructionTest.cc @@ -493,7 +493,7 @@ TEST_F(AArch64InstructionTest, earlyBranchMisprediction) { EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); } -// Test that a correct prediction (branch isTaken) is handled correctly +// Test that a correct prediction (branch taken) is handled correctly TEST_F(AArch64InstructionTest, correctPred_taken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -510,7 +510,7 @@ TEST_F(AArch64InstructionTest, correctPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where branch is isTaken is handled correctly + // Test a correct prediction where branch is taken is handled correctly pred = {true, 80 + 0x28}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -522,7 +522,7 @@ TEST_F(AArch64InstructionTest, correctPred_taken) { EXPECT_EQ(insn.getBranchAddress(), pred.target); } -// Test that a correct prediction (branch not isTaken) is handled correctly +// Test that a correct prediction (branch not taken) is handled correctly TEST_F(AArch64InstructionTest, correctPred_notTaken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -539,7 +539,7 @@ TEST_F(AArch64InstructionTest, correctPred_notTaken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where a branch isn't isTaken is handled correctly + // Test a correct prediction where a branch isn't taken is handled correctly pred = {false, 80 + 4}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -580,7 +580,7 @@ TEST_F(AArch64InstructionTest, incorrectPred_target) { EXPECT_EQ(insn.getBranchAddress(), 100 + 0x28); } -// Test that an incorrect prediction (wrong isTaken) is handled correctly +// Test that an incorrect prediction (wrong taken) is handled correctly TEST_F(AArch64InstructionTest, incorrectPred_taken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -597,7 +597,7 @@ TEST_F(AArch64InstructionTest, incorrectPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test an incorrect prediction is handled correctly - isTaken is wrong + // Test an incorrect prediction is handled correctly - taken is wrong pred = {true, 100 + 0x28}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); diff --git a/test/unit/pipeline/FetchUnitTest.cc b/test/unit/pipeline/FetchUnitTest.cc index bbb5f32ddf..90870fb5e2 100644 --- a/test/unit/pipeline/FetchUnitTest.cc +++ b/test/unit/pipeline/FetchUnitTest.cc @@ -232,7 +232,7 @@ TEST_P(PipelineFetchUnitTest, halted) { EXPECT_TRUE(fetchUnit.hasHalted()); } -// Tests that fetching a branch instruction (predicted isTaken) mid block causes a +// Tests that fetching a branch instruction (predicted taken) mid block causes a // branch stall + discards the remaining fetched instructions TEST_P(PipelineFetchUnitTest, fetchTakenBranchMidBlock) { const uint8_t pc = 16; @@ -266,7 +266,7 @@ TEST_P(PipelineFetchUnitTest, fetchTakenBranchMidBlock) { EXPECT_CALL(*uop, isBranch()).WillOnce(Return(false)); fetchUnit.tick(); - // For second tick, process a isTaken branch meaning rest of block is discarded + // For second tick, process a taken branch meaning rest of block is discarded // & a new memory block is requested EXPECT_CALL(memory, getCompletedReads()).Times(0); EXPECT_CALL(memory, clearCompletedReads()).Times(1); @@ -388,7 +388,7 @@ TEST_P(PipelineFetchUnitTest, supplyFromLoopBuffer) { } // Tests the functionality of idling the supply to the Loop Buffer one of not -// isTaken branch at the loopBoundaryAddress_ +// taken branch at the loopBoundaryAddress_ TEST_P(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { // Set instructions to be fetched from memory memory::MemoryReadResult memReadResultA = { @@ -432,7 +432,7 @@ TEST_P(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { EXPECT_CALL(predictor, predict(_, _, _)) .WillRepeatedly(Return(BranchPrediction({false, 0x0}))); - // Attempt to fill Loop Buffer but prevent it on a not isTaken outcome at the + // Attempt to fill Loop Buffer but prevent it on a not taken outcome at the // loopBoundaryAddress_ branch // Tick 4 times to process all 16 bytes of fetched data for (int i = 0; i < 4; i++) { diff --git a/test/unit/riscv/InstructionTest.cc b/test/unit/riscv/InstructionTest.cc index c40b503a6c..6103cd4f5c 100644 --- a/test/unit/riscv/InstructionTest.cc +++ b/test/unit/riscv/InstructionTest.cc @@ -467,7 +467,7 @@ TEST_F(RiscVInstructionTest, earlyBranchMisprediction) { EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); } -// Test that a correct prediction (branch isTaken) is handled correctly +// Test that a correct prediction (branch taken) is handled correctly TEST_F(RiscVInstructionTest, correctPred_taken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -484,7 +484,7 @@ TEST_F(RiscVInstructionTest, correctPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where branch is isTaken is handled correctly + // Test a correct prediction where branch is taken is handled correctly pred = {true, 400 - 86}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -497,7 +497,7 @@ TEST_F(RiscVInstructionTest, correctPred_taken) { EXPECT_EQ(insn.getBranchAddress(), pred.target); } -// Test that a correct prediction (branch not isTaken) is handled correctly +// Test that a correct prediction (branch not taken) is handled correctly TEST_F(RiscVInstructionTest, correctPred_notTaken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -514,7 +514,7 @@ TEST_F(RiscVInstructionTest, correctPred_notTaken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where a branch isn't isTaken is handled correctly + // Test a correct prediction where a branch isn't taken is handled correctly // imm operand 0x28 has 4 added implicitly by dissassembler pred = {false, 400 + 4}; insn.setBranchPrediction(pred); @@ -559,7 +559,7 @@ TEST_F(RiscVInstructionTest, incorrectPred_target) { EXPECT_EQ(insn.getBranchAddress(), 400 - 86); } -// Test that an incorrect prediction (wrong isTaken) is handled correctly +// Test that an incorrect prediction (wrong taken) is handled correctly TEST_F(RiscVInstructionTest, incorrectPred_taken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -576,7 +576,7 @@ TEST_F(RiscVInstructionTest, incorrectPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test an incorrect prediction is handled correctly - isTaken is wrong + // Test an incorrect prediction is handled correctly - taken is wrong // imm operand 0x28 has 4 added implicitly by dissassembler pred = {true, 400 - 86}; insn.setBranchPrediction(pred); From f9089e09d724161522d6af73f764a699d54b84f9 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:02:52 +0000 Subject: [PATCH 13/69] Rebasing to dev --- src/include/simeng/branchpredictors/BranchPredictor.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/simeng/branchpredictors/BranchPredictor.hh b/src/include/simeng/branchpredictors/BranchPredictor.hh index c4da8e1ff9..2bcf76eb25 100644 --- a/src/include/simeng/branchpredictors/BranchPredictor.hh +++ b/src/include/simeng/branchpredictors/BranchPredictor.hh @@ -17,7 +17,7 @@ enum class BranchType { /** A branch result prediction for an instruction. */ struct BranchPrediction { - /** Whether the branch will be isTaken. */ + /** Whether the branch will be taken. */ bool isTaken; /** The branch instruction's target address. If `isTaken = false`, the value From 3e5b507ab0db4f52979f3c8835c38e02f3702cc5 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:03:12 +0000 Subject: [PATCH 14/69] Rebasing to dev --- src/include/simeng/pipeline/ExecuteUnit.hh | 2 +- src/include/simeng/pipeline/PipelineBuffer.hh | 2 +- test/unit/pipeline/FetchUnitTest.cc | 11 ++++++----- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/src/include/simeng/pipeline/ExecuteUnit.hh b/src/include/simeng/pipeline/ExecuteUnit.hh index e6420e998f..450ad4b384 100644 --- a/src/include/simeng/pipeline/ExecuteUnit.hh +++ b/src/include/simeng/pipeline/ExecuteUnit.hh @@ -4,7 +4,7 @@ #include #include "simeng/Instruction.hh" -#include "simeng/branchPredictors/BranchPredictor.hh" +#include "simeng/branchpredictors/BranchPredictor.hh" #include "simeng/pipeline/PipelineBuffer.hh" namespace simeng { diff --git a/src/include/simeng/pipeline/PipelineBuffer.hh b/src/include/simeng/pipeline/PipelineBuffer.hh index 2799d54fa9..b6459935ed 100644 --- a/src/include/simeng/pipeline/PipelineBuffer.hh +++ b/src/include/simeng/pipeline/PipelineBuffer.hh @@ -4,7 +4,7 @@ #include #include -#include "simeng/branchPredictors/BranchPredictor.hh" +#include "simeng/branchpredictors/BranchPredictor.hh" namespace simeng { namespace pipeline { diff --git a/test/unit/pipeline/FetchUnitTest.cc b/test/unit/pipeline/FetchUnitTest.cc index 90870fb5e2..ab03dc2292 100644 --- a/test/unit/pipeline/FetchUnitTest.cc +++ b/test/unit/pipeline/FetchUnitTest.cc @@ -94,7 +94,7 @@ TEST_P(PipelineFetchUnitTest, TickStalled) { EXPECT_CALL(isa, predecode(_, _, _, _)).Times(0); - EXPECT_CALL(predictor, predict(_, _, _)).Times(0); + EXPECT_CALL(predictor, predict(_, _, _, _)).Times(0); fetchUnit.tick(); @@ -279,7 +279,8 @@ TEST_P(PipelineFetchUnitTest, fetchTakenBranchMidBlock) { EXPECT_CALL(*uop, getBranchType()).WillOnce(Return(bType)); EXPECT_CALL(*uop, getKnownOffset()).WillOnce(Return(knownOff)); BranchPrediction pred = {true, pc + knownOff}; - EXPECT_CALL(predictor, predict(20, bType, knownOff)).WillOnce(Return(pred)); + EXPECT_CALL(predictor, predict(20, bType, knownOff, true)).WillOnce + (Return(pred)); fetchUnit.tick(); // Ensure on next tick, predecode is not called @@ -325,7 +326,7 @@ TEST_P(PipelineFetchUnitTest, supplyFromLoopBuffer) { // Set the expectation from the predictor to be true so a loop body will // be detected - ON_CALL(predictor, predict(_, _, _)) + ON_CALL(predictor, predict(_, _, _, _)) .WillByDefault(Return(BranchPrediction({true, 0x0}))); // Set Loop Buffer state to be LoopBufferState::FILLING @@ -417,7 +418,7 @@ TEST_P(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { // Set the first expectation from the predictor to be true so a loop body will // be detected - EXPECT_CALL(predictor, predict(_, _, _)) + EXPECT_CALL(predictor, predict(_, _, _, _)) .WillOnce(Return(BranchPrediction({true, 0x0}))); // Set Loop Buffer state to be LoopBufferState::FILLING @@ -429,7 +430,7 @@ TEST_P(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { // Fetch the next block of instructions from memory and change the expected // outcome of the branch predictor fetchUnit.requestFromPC(); - EXPECT_CALL(predictor, predict(_, _, _)) + EXPECT_CALL(predictor, predict(_, _, _, _)) .WillRepeatedly(Return(BranchPrediction({false, 0x0}))); // Attempt to fill Loop Buffer but prevent it on a not taken outcome at the From 0445478d340995401962c14784f78e670102041f Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:03:15 +0000 Subject: [PATCH 15/69] Rebasing to dev --- .../branchPredictors/BranchPredictor.hh | 72 ------------------- 1 file changed, 72 deletions(-) delete mode 100644 src/include/simeng/branchPredictors/BranchPredictor.hh diff --git a/src/include/simeng/branchPredictors/BranchPredictor.hh b/src/include/simeng/branchPredictors/BranchPredictor.hh deleted file mode 100644 index 2bcf76eb25..0000000000 --- a/src/include/simeng/branchPredictors/BranchPredictor.hh +++ /dev/null @@ -1,72 +0,0 @@ -#pragma once - -#include -#include - -namespace simeng { - -/** The types of branches recognised. */ -enum class BranchType { - Conditional = 0, - LoopClosing, - Return, - SubroutineCall, - Unconditional, - Unknown -}; - -/** A branch result prediction for an instruction. */ -struct BranchPrediction { - /** Whether the branch will be taken. */ - bool isTaken; - - /** The branch instruction's target address. If `isTaken = false`, the value - * will be ignored. */ - uint64_t target; - - /** Check for equality of two branch predictions . */ - bool operator==(const BranchPrediction& other) { - if ((isTaken == other.isTaken) && (target == other.target)) - return true; - else - return false; - } - - /** Check for inequality of two branch predictions . */ - bool operator!=(const BranchPrediction& other) { - if ((isTaken != other.isTaken) || (target != other.target)) - return true; - else - return false; - } -}; - -/** An abstract branch predictor interface. */ -class BranchPredictor { - public: - virtual ~BranchPredictor(){}; - - /** Generate a branch prediction for the specified instruction address with a - * branch type and possible known branch offset. */ - virtual BranchPrediction predict(uint64_t address, BranchType type, - int64_t knownOffset) = 0; - - /** Provide branch results to update the prediction model for the specified - * instruction address. Update must be called on instructions in program - * order */ - virtual void update(uint64_t address, bool isTaken, uint64_t targetAddress, - BranchType type) = 0; - - /** Provides flushing behaviour for the implemented branch prediction schemes - * via the instruction address. Branches must be flushed in reverse - * program order (though, if a block of n instructions is being flushed at - * once, the exact order that the individual instructions within this block - * are flushed does not matter so long as they are all flushed) */ - virtual void flush(uint64_t address) = 0; - - /** Adds instruction to the Fetch Target Queue without making a new prediction - */ - virtual void addToFTQ(uint64_t address, bool isTaken) = 0; -}; - -} // namespace simeng \ No newline at end of file From f49e538980cdbb76e6bc1a48d7744a73e9f86eab Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:03:28 +0000 Subject: [PATCH 16/69] Rebasing to dev --- test/unit/pipeline/FetchUnitTest.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/pipeline/FetchUnitTest.cc b/test/unit/pipeline/FetchUnitTest.cc index ab03dc2292..6d891da2b2 100644 --- a/test/unit/pipeline/FetchUnitTest.cc +++ b/test/unit/pipeline/FetchUnitTest.cc @@ -279,7 +279,7 @@ TEST_P(PipelineFetchUnitTest, fetchTakenBranchMidBlock) { EXPECT_CALL(*uop, getBranchType()).WillOnce(Return(bType)); EXPECT_CALL(*uop, getKnownOffset()).WillOnce(Return(knownOff)); BranchPrediction pred = {true, pc + knownOff}; - EXPECT_CALL(predictor, predict(20, bType, knownOff, true)).WillOnce + EXPECT_CALL(predictor, predict(20, bType, knownOff, false)).WillOnce (Return(pred)); fetchUnit.tick(); From e688a05924ab1bc6cd2725a713c65e1a2ff44c62 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:03:46 +0000 Subject: [PATCH 17/69] Rebasing to dev --- src/include/simeng/arch/aarch64/helpers/conditional.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/include/simeng/arch/aarch64/helpers/conditional.hh b/src/include/simeng/arch/aarch64/helpers/conditional.hh index 2b3ea1b9c3..e541eb276a 100644 --- a/src/include/simeng/arch/aarch64/helpers/conditional.hh +++ b/src/include/simeng/arch/aarch64/helpers/conditional.hh @@ -56,7 +56,7 @@ uint8_t ccmp_reg(srcValContainer& sourceValues, /** Helper function for instructions with the format `cb rn, #imm`. * T represents the type of sourceValues (e.g. for xn, T = uint64_t). - * Returns tuple of type [bool branch isTaken, uint64_t address]. */ + * Returns tuple of type [bool branch taken, uint64_t address]. */ template std::tuple condBranch_cmpToZero( srcValContainer& sourceValues, @@ -91,7 +91,7 @@ T cs_4ops(srcValContainer& sourceValues, /** Helper function for instructions with the format `tb rn, #imm, * label`. * T represents the type of sourceValues (e.g. for xn, T = uint64_t). - * Returns tuple of type [bool branch isTaken, uint64_t address]. */ + * Returns tuple of type [bool branch taken, uint64_t address]. */ template std::tuple tbnz_tbz( srcValContainer& sourceValues, From 1f925eadc594eb06021e47e3e851365657f8c90f Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:03:55 +0000 Subject: [PATCH 18/69] Rebasing to dev --- src/include/simeng/pipeline/PipelineBuffer.hh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/include/simeng/pipeline/PipelineBuffer.hh b/src/include/simeng/pipeline/PipelineBuffer.hh index b6459935ed..342355eaf2 100644 --- a/src/include/simeng/pipeline/PipelineBuffer.hh +++ b/src/include/simeng/pipeline/PipelineBuffer.hh @@ -75,6 +75,8 @@ class PipelineBuffer { /** Get the width of the buffer slots. */ uint16_t getWidth() const { return width; } + /** flush branches in the buffer from the branch predictor, where the + * buffer contains microops */ void flushBranchMicroOps(BranchPredictor& branchPredictor) { for (size_t slot = 0; slot < width; slot++) { auto& uop = getTailSlots()[slot]; @@ -88,6 +90,8 @@ class PipelineBuffer { } } + /** flush branches in the buffer from the branch predictor, where the + * buffer contains macroops */ void flushBranchMacroOps(BranchPredictor& branchPredictor) { for (size_t slot = 0; slot < width; slot++) { auto& macroOp = getTailSlots()[slot]; From 52f9688183b2e9ed359c363e39f13fec56708e3f Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 7 May 2024 16:12:21 +0100 Subject: [PATCH 19/69] clang format --- test/unit/pipeline/FetchUnitTest.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/pipeline/FetchUnitTest.cc b/test/unit/pipeline/FetchUnitTest.cc index 6d891da2b2..17903bca54 100644 --- a/test/unit/pipeline/FetchUnitTest.cc +++ b/test/unit/pipeline/FetchUnitTest.cc @@ -279,8 +279,8 @@ TEST_P(PipelineFetchUnitTest, fetchTakenBranchMidBlock) { EXPECT_CALL(*uop, getBranchType()).WillOnce(Return(bType)); EXPECT_CALL(*uop, getKnownOffset()).WillOnce(Return(knownOff)); BranchPrediction pred = {true, pc + knownOff}; - EXPECT_CALL(predictor, predict(20, bType, knownOff, false)).WillOnce - (Return(pred)); + EXPECT_CALL(predictor, predict(20, bType, knownOff, false)) + .WillOnce(Return(pred)); fetchUnit.tick(); // Ensure on next tick, predecode is not called From 6a286d37027fc0e22482884e02c609a17db8c74c Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:04:52 +0000 Subject: [PATCH 20/69] Rebasing to dev --- test/unit/pipeline/ReorderBufferTest.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/pipeline/ReorderBufferTest.cc b/test/unit/pipeline/ReorderBufferTest.cc index ff3b63756d..84f37e45ed 100644 --- a/test/unit/pipeline/ReorderBufferTest.cc +++ b/test/unit/pipeline/ReorderBufferTest.cc @@ -446,7 +446,7 @@ TEST_F(ReorderBufferTest, branch) { EXPECT_EQ(loopBoundaryAddr, insnAddr); // Check that branch misprediction metrics have been correctly collected - EXPECT_EQ(reorderBuffer.getBranchMispredictedCount(), 8); + EXPECT_EQ(reorderBuffer.getBranchMispredictedCount(), 4); } // Tests that only those destination registers which have been renamed are From d0cc56aafb6410b75cdf6df39a0c5b4394fb453f Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 13 May 2024 13:15:45 +0100 Subject: [PATCH 21/69] undoing last push --- test/unit/pipeline/ReorderBufferTest.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/pipeline/ReorderBufferTest.cc b/test/unit/pipeline/ReorderBufferTest.cc index 84f37e45ed..ff3b63756d 100644 --- a/test/unit/pipeline/ReorderBufferTest.cc +++ b/test/unit/pipeline/ReorderBufferTest.cc @@ -446,7 +446,7 @@ TEST_F(ReorderBufferTest, branch) { EXPECT_EQ(loopBoundaryAddr, insnAddr); // Check that branch misprediction metrics have been correctly collected - EXPECT_EQ(reorderBuffer.getBranchMispredictedCount(), 4); + EXPECT_EQ(reorderBuffer.getBranchMispredictedCount(), 8); } // Tests that only those destination registers which have been renamed are From 1c1b6ce968ca9c8a38e1a9ce901eede1be7667eb Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 24 May 2024 11:17:35 +0100 Subject: [PATCH 22/69] Updating haeders and comments --- src/include/simeng/pipeline/ExecuteUnit.hh | 1 - src/include/simeng/pipeline/PipelineBuffer.hh | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/include/simeng/pipeline/ExecuteUnit.hh b/src/include/simeng/pipeline/ExecuteUnit.hh index 450ad4b384..cd11eb23d6 100644 --- a/src/include/simeng/pipeline/ExecuteUnit.hh +++ b/src/include/simeng/pipeline/ExecuteUnit.hh @@ -4,7 +4,6 @@ #include #include "simeng/Instruction.hh" -#include "simeng/branchpredictors/BranchPredictor.hh" #include "simeng/pipeline/PipelineBuffer.hh" namespace simeng { diff --git a/src/include/simeng/pipeline/PipelineBuffer.hh b/src/include/simeng/pipeline/PipelineBuffer.hh index 342355eaf2..bd7c565735 100644 --- a/src/include/simeng/pipeline/PipelineBuffer.hh +++ b/src/include/simeng/pipeline/PipelineBuffer.hh @@ -75,8 +75,8 @@ class PipelineBuffer { /** Get the width of the buffer slots. */ uint16_t getWidth() const { return width; } - /** flush branches in the buffer from the branch predictor, where the - * buffer contains microops */ + /** Flush branches in the buffer from the branch predictor, where the + * buffer contains micro-ops */ void flushBranchMicroOps(BranchPredictor& branchPredictor) { for (size_t slot = 0; slot < width; slot++) { auto& uop = getTailSlots()[slot]; @@ -90,8 +90,8 @@ class PipelineBuffer { } } - /** flush branches in the buffer from the branch predictor, where the - * buffer contains macroops */ + /** Flush branches in the buffer from the branch predictor, where the + * buffer contains macro-ops */ void flushBranchMacroOps(BranchPredictor& branchPredictor) { for (size_t slot = 0; slot < width; slot++) { auto& macroOp = getTailSlots()[slot]; From 1b800c19c64ed7a9b429e82b168c4b2d6c7c818b Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:05:25 +0000 Subject: [PATCH 23/69] Rebasing to dev --- test/unit/pipeline/FetchUnitTest.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unit/pipeline/FetchUnitTest.cc b/test/unit/pipeline/FetchUnitTest.cc index 17903bca54..331729ba23 100644 --- a/test/unit/pipeline/FetchUnitTest.cc +++ b/test/unit/pipeline/FetchUnitTest.cc @@ -418,7 +418,7 @@ TEST_P(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { // Set the first expectation from the predictor to be true so a loop body will // be detected - EXPECT_CALL(predictor, predict(_, _, _, _)) + EXPECT_CALL(predictor, predict(_, _, _, false)) .WillOnce(Return(BranchPrediction({true, 0x0}))); // Set Loop Buffer state to be LoopBufferState::FILLING @@ -430,7 +430,7 @@ TEST_P(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { // Fetch the next block of instructions from memory and change the expected // outcome of the branch predictor fetchUnit.requestFromPC(); - EXPECT_CALL(predictor, predict(_, _, _, _)) + EXPECT_CALL(predictor, predict(_, _, _, false)) .WillRepeatedly(Return(BranchPrediction({false, 0x0}))); // Attempt to fill Loop Buffer but prevent it on a not taken outcome at the From 3aa7ca0a07e23cd99b29facbde4e69c157b1197c Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 4 Jun 2024 12:18:30 +0100 Subject: [PATCH 24/69] replacing = with == --- src/include/simeng/branchpredictors/BranchPredictor.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/simeng/branchpredictors/BranchPredictor.hh b/src/include/simeng/branchpredictors/BranchPredictor.hh index 2bcf76eb25..910d860b24 100644 --- a/src/include/simeng/branchpredictors/BranchPredictor.hh +++ b/src/include/simeng/branchpredictors/BranchPredictor.hh @@ -20,7 +20,7 @@ struct BranchPrediction { /** Whether the branch will be taken. */ bool isTaken; - /** The branch instruction's target address. If `isTaken = false`, the value + /** The branch instruction's target address. If `isTaken == false`, the value * will be ignored. */ uint64_t target; From e525016f6802eb1ba58d1493a74c431828f7aeb0 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:07:55 +0000 Subject: [PATCH 25/69] Rebasing to dev --- src/lib/pipeline/ReorderBuffer.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib/pipeline/ReorderBuffer.cc b/src/lib/pipeline/ReorderBuffer.cc index e72e6e79dc..5f5e042d48 100644 --- a/src/lib/pipeline/ReorderBuffer.cc +++ b/src/lib/pipeline/ReorderBuffer.cc @@ -191,6 +191,7 @@ void ReorderBuffer::flush(uint64_t afterInsnId) { // If the instruction is a branch, supply address to branch flushing logic if (uop->isBranch()) { predictor_.flush(uop->getInstructionAddress()); + } buffer_.pop_back(); } From 673fe873d65bb6ed3c631064a761d764c80bd227 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Thu, 18 Jul 2024 15:14:45 +0100 Subject: [PATCH 26/69] clang format --- src/lib/pipeline/ReorderBuffer.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib/pipeline/ReorderBuffer.cc b/src/lib/pipeline/ReorderBuffer.cc index 5f5e042d48..e72e6e79dc 100644 --- a/src/lib/pipeline/ReorderBuffer.cc +++ b/src/lib/pipeline/ReorderBuffer.cc @@ -191,7 +191,6 @@ void ReorderBuffer::flush(uint64_t afterInsnId) { // If the instruction is a branch, supply address to branch flushing logic if (uop->isBranch()) { predictor_.flush(uop->getInstructionAddress()); - } buffer_.pop_back(); } From ace2d59133bd7427b9b87adc48a65ed695a20f49 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:13:32 +0000 Subject: [PATCH 27/69] Rebasing to dev --- .../developer/arch/supported/aarch64.rst | 4 ++-- .../developer/components/branchPred.rst | 8 ++++---- .../developer/components/coreinstance.rst | 2 +- .../components/pipeline/components.rst | 2 +- .../developer/components/pipeline/units.rst | 10 +++++----- .../arch/aarch64/helpers/conditional.hh | 4 ++-- src/include/simeng/config/yaml/ryml.hh | 20 +++++++++---------- src/lib/arch/aarch64/Instruction.cc | 2 +- src/lib/arch/riscv/Instruction.cc | 2 +- test/unit/aarch64/InstructionTest.cc | 12 +++++------ test/unit/pipeline/FetchUnitTest.cc | 8 ++++---- test/unit/riscv/InstructionTest.cc | 12 +++++------ 12 files changed, 43 insertions(+), 43 deletions(-) diff --git a/docs/sphinx/developer/arch/supported/aarch64.rst b/docs/sphinx/developer/arch/supported/aarch64.rst index 092264e991..6df0028e48 100644 --- a/docs/sphinx/developer/arch/supported/aarch64.rst +++ b/docs/sphinx/developer/arch/supported/aarch64.rst @@ -55,12 +55,12 @@ Additional information The ``FP`` primary identifier is a placeholder to denote both the ``SCALAR`` and ``VECTOR`` primary identifiers such that, amongst the other combinations, ``FP_SIMPLE_ARTH`` expands to be ``SCALAR_SIMPLE_ARTH`` and ``VECTOR_SIMPLE_ARTH``. In some cases it was unnecessary and inconvenient to separate ``SCALAR`` and ``VECTOR`` operations within configuration options, therefore, this instruction group option was provided to solve the issue. -When setting the latencies for instruction groups, within the :ref:`Latencies ` section of the configurable options, the inheritance between instruction groups is taken into account (e.g. the ``VECTOR`` group latency assignment would be inherited by all ``VECTOR_*`` groups). If multiple entries could assign a latency value to an instruction group, the option with the least levels of inheritance to the instruction group takes priority. As an example, take the groups ``INT_SIMPLE`` and ``INT_SIMPLE_ARTH``. ``INT_SIMPLE_ARTH_NOSHIFT`` inherits from both of these groups but because ``INT_SIMPLE_ARTH`` has one less level of inheritance to traverse, ``INT_SIMPLE_ARTH_NOSHIFT`` inherits ``INT_SIMPLE_ARTH`` latency values. +When setting the latencies for instruction groups, within the :ref:`Latencies ` section of the configurable options, the inheritance between instruction groups is isTaken into account (e.g. the ``VECTOR`` group latency assignment would be inherited by all ``VECTOR_*`` groups). If multiple entries could assign a latency value to an instruction group, the option with the least levels of inheritance to the instruction group takes priority. As an example, take the groups ``INT_SIMPLE`` and ``INT_SIMPLE_ARTH``. ``INT_SIMPLE_ARTH_NOSHIFT`` inherits from both of these groups but because ``INT_SIMPLE_ARTH`` has one less level of inheritance to traverse, ``INT_SIMPLE_ARTH_NOSHIFT`` inherits ``INT_SIMPLE_ARTH`` latency values. Instruction Splitting ********************* -Instruction splitting is performed within the ``decode`` function in ``MicroDecoder.cc``. A macro-op is taken into the ``decode`` function and one or more micro-ops, taking the form of SimEng ``Instruction`` objects, are returned. The following instruction splitting is supported: +Instruction splitting is performed within the ``decode`` function in ``MicroDecoder.cc``. A macro-op is isTaken into the ``decode`` function and one or more micro-ops, taking the form of SimEng ``Instruction`` objects, are returned. The following instruction splitting is supported: - Load pair for X/W/S/D/Q registers. diff --git a/docs/sphinx/developer/components/branchPred.rst b/docs/sphinx/developer/components/branchPred.rst index 6a03c85129..f92a50f125 100644 --- a/docs/sphinx/developer/components/branchPred.rst +++ b/docs/sphinx/developer/components/branchPred.rst @@ -30,13 +30,13 @@ Global History Branch Target Buffer (BTB) For each entry, the BTB stores the most recent target along with an n-bit saturating counter for an associated direction. The indexing of this structure uses the lower bits of an instruction address XOR'ed with the current global branch history value. - If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. + If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be isTaken. If the supplied branch type is ``Conditional`` and the predicted direction is not isTaken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. Static Prediction - Based on the chosen static prediction method of "always taken" or "always not taken", the n-bit saturating counter value in the initial entries of the BTB structure are filled with the weakest variant of taken or not-taken respectively. + Based on the chosen static prediction method of "always isTaken" or "always not isTaken", the n-bit saturating counter value in the initial entries of the BTB structure are filled with the weakest variant of isTaken or not-isTaken respectively. Perceptron Predictor -------------------- @@ -48,9 +48,9 @@ Global History Branch Target Buffer (BTB) For each entry, the BTB stores the most recent target along with a perceptron for an associated direction. The indexing of this structure uses the lower, non-zero bits of an instruction address XOR'ed with the current global branch history value. - The direction prediction is obtained from the perceptron by taking its dot-product with the global history. The prediction is not taken if this is negative, or taken otherwise. The perceptron is updated when its prediction is wrong or when the magnitude of the dot-product is below a pre-determined threshold (i.e., the confidence of the prediction is low). To update, each ith weight of the perceptron is incremented if the actual outcome of the branch is the same as the ith bit of ``globalHistory_``, and decremented otherwise. + The direction prediction is obtained from the perceptron by taking its dot-product with the global history. The prediction is not isTaken if this is negative, or isTaken otherwise. The perceptron is updated when its prediction is wrong or when the magnitude of the dot-product is below a pre-determined threshold (i.e., the confidence of the prediction is low). To update, each ith weight of the perceptron is incremented if the actual outcome of the branch is the same as the ith bit of ``globalHistory_``, and decremented otherwise. - If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. + If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be isTaken. If the supplied branch type is ``Conditional`` and the predicted direction is not isTaken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. \ No newline at end of file diff --git a/docs/sphinx/developer/components/coreinstance.rst b/docs/sphinx/developer/components/coreinstance.rst index 8b9e99a449..89b6247db4 100644 --- a/docs/sphinx/developer/components/coreinstance.rst +++ b/docs/sphinx/developer/components/coreinstance.rst @@ -3,7 +3,7 @@ Core Instance The ``CoreInstance`` component supplies the functionality for instantiating all simulation objects and linking them together. -The standard process taken to create an instance of the modelled core is as follows: +The standard process isTaken to create an instance of the modelled core is as follows: Process the config file Either the passed configuration file path, or default configuration string, is used to generate the model configuration class. All subsequent parameterised instantiations of simulation objects utilise this configuration class. diff --git a/docs/sphinx/developer/components/pipeline/components.rst b/docs/sphinx/developer/components/pipeline/components.rst index ab62a6b919..f74d5e892e 100644 --- a/docs/sphinx/developer/components/pipeline/components.rst +++ b/docs/sphinx/developer/components/pipeline/components.rst @@ -69,7 +69,7 @@ Once a completion slot is available, the load will be executed, the results broa Stores ****** -As with loads, stores are considered pending when initially added to the LSQ. Whilst like load operations the generation of addresses to be accessed must occur before commitment, an additional operation of supplying the data to be stored must also occur. The ``supplyStoreData`` function facilitates this by placing the data to be stored within the ``storeQueue_`` entry of the associated store. Once the store is committed, the data is taken from the ``storeQueue_`` entry. +As with loads, stores are considered pending when initially added to the LSQ. Whilst like load operations the generation of addresses to be accessed must occur before commitment, an additional operation of supplying the data to be stored must also occur. The ``supplyStoreData`` function facilitates this by placing the data to be stored within the ``storeQueue_`` entry of the associated store. Once the store is committed, the data is isTaken from the ``storeQueue_`` entry. The generation of store instruction write requests are carried out after its commitment. The reasoning for this design decision is as followed. With SimEng supporting speculative execution, processed store instruction may come from an incorrectly speculated branch direction and will inevitably be removed from the pipeline. Therefore, it is important to ensure any write requests are valid, concerning speculative execution, as the performance cost of reversing a completed write request is high. diff --git a/docs/sphinx/developer/components/pipeline/units.rst b/docs/sphinx/developer/components/pipeline/units.rst index 52358f4658..922b24f5a6 100644 --- a/docs/sphinx/developer/components/pipeline/units.rst +++ b/docs/sphinx/developer/components/pipeline/units.rst @@ -23,7 +23,7 @@ Behaviour The fetch unit fetches memory in discrete boundary-aligned blocks, according to the current program counter (PC); this is to prevent the fetched block overlapping an inaccessible or unmapped memory region that may result in the request incorrectly responding with a fault despite the validity of the initial region. -Each cycle, it will process the most recently fetched memory block by passing it to the supplied ``Architecture`` instance for pre-decoding into macro-ops. Once pre-decoded, the head of the vector of micro-ops, or macro-op, is passed to the supplied branch predictor. If the instruction is predicted to be a taken branch, then the PC will be updated to the predicted target address and the cycle will end. If this is not the case, the PC is incremented by the number of bytes consumed to produce the pre-decoded macro-op. The remaining bytes in the block are once again passed to the architecture for pre-decoding. +Each cycle, it will process the most recently fetched memory block by passing it to the supplied ``Architecture`` instance for pre-decoding into macro-ops. Once pre-decoded, the head of the vector of micro-ops, or macro-op, is passed to the supplied branch predictor. If the instruction is predicted to be a isTaken branch, then the PC will be updated to the predicted target address and the cycle will end. If this is not the case, the PC is incremented by the number of bytes consumed to produce the pre-decoded macro-op. The remaining bytes in the block are once again passed to the architecture for pre-decoding. This standard process of pre-decoding, predicting, and updating the PC continues until one of the following occurs: @@ -32,7 +32,7 @@ This standard process of pre-decoding, predicting, and updating the PC continues The maximum number of fetched macro-ops is reached The current block is saved and processing resumes in the next cycle. - A branch is predicted as taken + A branch is predicted as isTaken A block of memory from the new address may be requested, and processing will resume once the data is available. The fetched memory block is exhausted @@ -43,7 +43,7 @@ This standard process of pre-decoding, predicting, and updating the PC continues Loop Buffer *********** -Within the fetch unit is a loop buffer that can store a configurable number of Macro-Ops. The loop buffer can be pulled from instead of memory if a loop is detected. This avoids the need to re-request data from memory if a branch is taken and increases the throughput of the fetch unit. +Within the fetch unit is a loop buffer that can store a configurable number of Macro-Ops. The loop buffer can be pulled from instead of memory if a loop is detected. This avoids the need to re-request data from memory if a branch is isTaken and increases the throughput of the fetch unit. Each entry of the loop buffer is the encoding of the Macro-Op. Therefore, when supplying an instruction from the loop buffer, the pre-decoding step must still be performed. This was required to avoid any issues with multiple instantiations of the same instruction editing each others class members. @@ -59,7 +59,7 @@ FILLING The branch representing the loop has been found and the buffer is being filled until it is seen again. SUPPLYING - The supply of instructions from the fetch unit has been handed over to the loop buffer. The stream of instructions is taken from the loop buffer in order and resets to the top of the buffer once it reaches the end of the loop body. + The supply of instructions from the fetch unit has been handed over to the loop buffer. The stream of instructions is isTaken from the loop buffer in order and resets to the top of the buffer once it reaches the end of the loop body. The detection of a loop and the branch which represents it comes from the ROB. More information can be found :ref:`here `. @@ -81,7 +81,7 @@ Behaviour Each cycle, the decode unit will read macro-ops from the input buffer, and split them into a stream of ``Instruction`` objects or micro-ops. These ``Instruction`` objects are passed into an internal buffer. -Once all macro-ops in the input buffer have been passed into the internal ``Instruction`` buffer or the ``Instruction`` buffer size exceeds the size of the output buffer, ``Instruction`` objects are checked for any trivially identifiable branch mispredictions (i.e., a non-branch predicted as a taken branch), and if discovered, the branch predictor is informed and a pipeline flush requested. +Once all macro-ops in the input buffer have been passed into the internal ``Instruction`` buffer or the ``Instruction`` buffer size exceeds the size of the output buffer, ``Instruction`` objects are checked for any trivially identifiable branch mispredictions (i.e., a non-branch predicted as a isTaken branch), and if discovered, the branch predictor is informed and a pipeline flush requested. The cycle ends when all ``Instruction`` objects in the internal buffer have been processed, or a misprediction is identified and all remaining ``Instruction`` objects are flushed. diff --git a/src/include/simeng/arch/aarch64/helpers/conditional.hh b/src/include/simeng/arch/aarch64/helpers/conditional.hh index e541eb276a..2b3ea1b9c3 100644 --- a/src/include/simeng/arch/aarch64/helpers/conditional.hh +++ b/src/include/simeng/arch/aarch64/helpers/conditional.hh @@ -56,7 +56,7 @@ uint8_t ccmp_reg(srcValContainer& sourceValues, /** Helper function for instructions with the format `cb rn, #imm`. * T represents the type of sourceValues (e.g. for xn, T = uint64_t). - * Returns tuple of type [bool branch taken, uint64_t address]. */ + * Returns tuple of type [bool branch isTaken, uint64_t address]. */ template std::tuple condBranch_cmpToZero( srcValContainer& sourceValues, @@ -91,7 +91,7 @@ T cs_4ops(srcValContainer& sourceValues, /** Helper function for instructions with the format `tb rn, #imm, * label`. * T represents the type of sourceValues (e.g. for xn, T = uint64_t). - * Returns tuple of type [bool branch taken, uint64_t address]. */ + * Returns tuple of type [bool branch isTaken, uint64_t address]. */ template std::tuple tbnz_tbz( srcValContainer& sourceValues, diff --git a/src/include/simeng/config/yaml/ryml.hh b/src/include/simeng/config/yaml/ryml.hh index bed8f4620b..c35a4925f9 100644 --- a/src/include/simeng/config/yaml/ryml.hh +++ b/src/include/simeng/config/yaml/ryml.hh @@ -229,7 +229,7 @@ #define C4_VERSION_CAT(major, minor, patch) ((major)*10000 + (minor)*100 + (patch)) -/** A preprocessor foreach. Spectacular trick taken from: +/** A preprocessor foreach. Spectacular trick isTaken from: * http://stackoverflow.com/a/1872506/5875572 * The first argument is for a macro receiving a single argument, * which will be called with every subsequent argument. There is @@ -1449,7 +1449,7 @@ using std::index_sequence_for; /** C++11 implementation of integer sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template struct integer_sequence { @@ -1461,7 +1461,7 @@ struct integer_sequence /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using index_sequence = integer_sequence; @@ -1544,19 +1544,19 @@ struct __make_integer_sequence /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using make_integer_sequence = typename __detail::__make_integer_sequence<_Tp, _Np>::type; /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using make_index_sequence = make_integer_sequence; /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using index_sequence_for = make_index_sequence; #endif @@ -4795,7 +4795,7 @@ namespace detail { /** @internal * @ingroup hash - * @see this was taken a great answer in stackoverflow: + * @see this was isTaken a great answer in stackoverflow: * https://stackoverflow.com/a/34597785/5875572 * @see http://aras-p.info/blog/2016/08/02/Hash-Functions-all-the-way-down/ */ template @@ -12377,7 +12377,7 @@ inline size_t scan_one(csubstr str, const char *type_fmt, T *v) * * So we fake it by using a dynamic format with an explicit * field size set to the length of the given span. - * This trick is taken from: + * This trick is isTaken from: * https://stackoverflow.com/a/18368910/5875572 */ /* this is the actual format we'll use for scanning */ @@ -14624,7 +14624,7 @@ C4_ALWAYS_INLINE DumpResults format_dump_resume(DumperFn &&dumpfn, substr buf, c namespace c4 { -//! taken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum +//! isTaken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum template using is_scoped_enum = std::integral_constant::value && !std::is_convertible::value>; @@ -15704,7 +15704,7 @@ template using cspanrs = spanrs; //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- /** A non-owning span which always retains the capacity of the original - * range it was taken from (though it may loose its original size). + * range it was isTaken from (though it may loose its original size). * The resizing methods resize(), ltrim(), rtrim() as well * as the subselection methods subspan(), range(), first() and last() can be * used at will without loosing the original capacity; the full capacity span diff --git a/src/lib/arch/aarch64/Instruction.cc b/src/lib/arch/aarch64/Instruction.cc index e3b697433e..1bf93c451f 100644 --- a/src/lib/arch/aarch64/Instruction.cc +++ b/src/lib/arch/aarch64/Instruction.cc @@ -106,7 +106,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { "Early branch misprediction check shouldn't be called after execution"); if (!isBranch()) { - // Instruction isn't a branch; if predicted as taken, it will require a + // Instruction isn't a branch; if predicted as isTaken, it will require a // flush return {prediction_.isTaken, instructionAddress_ + 4}; } diff --git a/src/lib/arch/riscv/Instruction.cc b/src/lib/arch/riscv/Instruction.cc index c71b581a60..5eb1091c6b 100644 --- a/src/lib/arch/riscv/Instruction.cc +++ b/src/lib/arch/riscv/Instruction.cc @@ -101,7 +101,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { "Early branch misprediction check shouldn't be called after execution"); if (!isBranch()) { - // Instruction isn't a branch; if predicted as taken, it will require a + // Instruction isn't a branch; if predicted as isTaken, it will require a // flush return {prediction_.isTaken, instructionAddress_ + 4}; } diff --git a/test/unit/aarch64/InstructionTest.cc b/test/unit/aarch64/InstructionTest.cc index 92b8e9393a..00279300b8 100644 --- a/test/unit/aarch64/InstructionTest.cc +++ b/test/unit/aarch64/InstructionTest.cc @@ -493,7 +493,7 @@ TEST_F(AArch64InstructionTest, earlyBranchMisprediction) { EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); } -// Test that a correct prediction (branch taken) is handled correctly +// Test that a correct prediction (branch isTaken) is handled correctly TEST_F(AArch64InstructionTest, correctPred_taken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -510,7 +510,7 @@ TEST_F(AArch64InstructionTest, correctPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where branch is taken is handled correctly + // Test a correct prediction where branch is isTaken is handled correctly pred = {true, 80 + 0x28}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -522,7 +522,7 @@ TEST_F(AArch64InstructionTest, correctPred_taken) { EXPECT_EQ(insn.getBranchAddress(), pred.target); } -// Test that a correct prediction (branch not taken) is handled correctly +// Test that a correct prediction (branch not isTaken) is handled correctly TEST_F(AArch64InstructionTest, correctPred_notTaken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -539,7 +539,7 @@ TEST_F(AArch64InstructionTest, correctPred_notTaken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where a branch isn't taken is handled correctly + // Test a correct prediction where a branch isn't isTaken is handled correctly pred = {false, 80 + 4}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -580,7 +580,7 @@ TEST_F(AArch64InstructionTest, incorrectPred_target) { EXPECT_EQ(insn.getBranchAddress(), 100 + 0x28); } -// Test that an incorrect prediction (wrong taken) is handled correctly +// Test that an incorrect prediction (wrong isTaken) is handled correctly TEST_F(AArch64InstructionTest, incorrectPred_taken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -597,7 +597,7 @@ TEST_F(AArch64InstructionTest, incorrectPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test an incorrect prediction is handled correctly - taken is wrong + // Test an incorrect prediction is handled correctly - isTaken is wrong pred = {true, 100 + 0x28}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); diff --git a/test/unit/pipeline/FetchUnitTest.cc b/test/unit/pipeline/FetchUnitTest.cc index 331729ba23..5b4f5c82d7 100644 --- a/test/unit/pipeline/FetchUnitTest.cc +++ b/test/unit/pipeline/FetchUnitTest.cc @@ -232,7 +232,7 @@ TEST_P(PipelineFetchUnitTest, halted) { EXPECT_TRUE(fetchUnit.hasHalted()); } -// Tests that fetching a branch instruction (predicted taken) mid block causes a +// Tests that fetching a branch instruction (predicted isTaken) mid block causes a // branch stall + discards the remaining fetched instructions TEST_P(PipelineFetchUnitTest, fetchTakenBranchMidBlock) { const uint8_t pc = 16; @@ -266,7 +266,7 @@ TEST_P(PipelineFetchUnitTest, fetchTakenBranchMidBlock) { EXPECT_CALL(*uop, isBranch()).WillOnce(Return(false)); fetchUnit.tick(); - // For second tick, process a taken branch meaning rest of block is discarded + // For second tick, process a isTaken branch meaning rest of block is discarded // & a new memory block is requested EXPECT_CALL(memory, getCompletedReads()).Times(0); EXPECT_CALL(memory, clearCompletedReads()).Times(1); @@ -389,7 +389,7 @@ TEST_P(PipelineFetchUnitTest, supplyFromLoopBuffer) { } // Tests the functionality of idling the supply to the Loop Buffer one of not -// taken branch at the loopBoundaryAddress_ +// isTaken branch at the loopBoundaryAddress_ TEST_P(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { // Set instructions to be fetched from memory memory::MemoryReadResult memReadResultA = { @@ -433,7 +433,7 @@ TEST_P(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { EXPECT_CALL(predictor, predict(_, _, _, false)) .WillRepeatedly(Return(BranchPrediction({false, 0x0}))); - // Attempt to fill Loop Buffer but prevent it on a not taken outcome at the + // Attempt to fill Loop Buffer but prevent it on a not isTaken outcome at the // loopBoundaryAddress_ branch // Tick 4 times to process all 16 bytes of fetched data for (int i = 0; i < 4; i++) { diff --git a/test/unit/riscv/InstructionTest.cc b/test/unit/riscv/InstructionTest.cc index 6103cd4f5c..c40b503a6c 100644 --- a/test/unit/riscv/InstructionTest.cc +++ b/test/unit/riscv/InstructionTest.cc @@ -467,7 +467,7 @@ TEST_F(RiscVInstructionTest, earlyBranchMisprediction) { EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); } -// Test that a correct prediction (branch taken) is handled correctly +// Test that a correct prediction (branch isTaken) is handled correctly TEST_F(RiscVInstructionTest, correctPred_taken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -484,7 +484,7 @@ TEST_F(RiscVInstructionTest, correctPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where branch is taken is handled correctly + // Test a correct prediction where branch is isTaken is handled correctly pred = {true, 400 - 86}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -497,7 +497,7 @@ TEST_F(RiscVInstructionTest, correctPred_taken) { EXPECT_EQ(insn.getBranchAddress(), pred.target); } -// Test that a correct prediction (branch not taken) is handled correctly +// Test that a correct prediction (branch not isTaken) is handled correctly TEST_F(RiscVInstructionTest, correctPred_notTaken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -514,7 +514,7 @@ TEST_F(RiscVInstructionTest, correctPred_notTaken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where a branch isn't taken is handled correctly + // Test a correct prediction where a branch isn't isTaken is handled correctly // imm operand 0x28 has 4 added implicitly by dissassembler pred = {false, 400 + 4}; insn.setBranchPrediction(pred); @@ -559,7 +559,7 @@ TEST_F(RiscVInstructionTest, incorrectPred_target) { EXPECT_EQ(insn.getBranchAddress(), 400 - 86); } -// Test that an incorrect prediction (wrong taken) is handled correctly +// Test that an incorrect prediction (wrong isTaken) is handled correctly TEST_F(RiscVInstructionTest, incorrectPred_taken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -576,7 +576,7 @@ TEST_F(RiscVInstructionTest, incorrectPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test an incorrect prediction is handled correctly - taken is wrong + // Test an incorrect prediction is handled correctly - isTaken is wrong // imm operand 0x28 has 4 added implicitly by dissassembler pred = {true, 400 - 86}; insn.setBranchPrediction(pred); From 416cc20e5b94ec83c92961e31b93d36514ede3f0 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:14:56 +0000 Subject: [PATCH 28/69] Rebasing to dev --- .../branchpredictors/BranchPredictor.hh | 72 ------------------- 1 file changed, 72 deletions(-) delete mode 100644 src/include/simeng/branchpredictors/BranchPredictor.hh diff --git a/src/include/simeng/branchpredictors/BranchPredictor.hh b/src/include/simeng/branchpredictors/BranchPredictor.hh deleted file mode 100644 index 910d860b24..0000000000 --- a/src/include/simeng/branchpredictors/BranchPredictor.hh +++ /dev/null @@ -1,72 +0,0 @@ -#pragma once - -#include -#include - -namespace simeng { - -/** The types of branches recognised. */ -enum class BranchType { - Conditional = 0, - LoopClosing, - Return, - SubroutineCall, - Unconditional, - Unknown -}; - -/** A branch result prediction for an instruction. */ -struct BranchPrediction { - /** Whether the branch will be taken. */ - bool isTaken; - - /** The branch instruction's target address. If `isTaken == false`, the value - * will be ignored. */ - uint64_t target; - - /** Check for equality of two branch predictions . */ - bool operator==(const BranchPrediction& other) { - if ((isTaken == other.isTaken) && (target == other.target)) - return true; - else - return false; - } - - /** Check for inequality of two branch predictions . */ - bool operator!=(const BranchPrediction& other) { - if ((isTaken != other.isTaken) || (target != other.target)) - return true; - else - return false; - } -}; - -/** An abstract branch predictor interface. */ -class BranchPredictor { - public: - virtual ~BranchPredictor(){}; - - /** Generate a branch prediction for the specified instruction address with a - * branch type and possible known branch offset. */ - virtual BranchPrediction predict(uint64_t address, BranchType type, - int64_t knownOffset) = 0; - - /** Provide branch results to update the prediction model for the specified - * instruction address. Update must be called on instructions in program - * order */ - virtual void update(uint64_t address, bool isTaken, uint64_t targetAddress, - BranchType type) = 0; - - /** Provides flushing behaviour for the implemented branch prediction schemes - * via the instruction address. Branches must be flushed in reverse - * program order (though, if a block of n instructions is being flushed at - * once, the exact order that the individual instructions within this block - * are flushed does not matter so long as they are all flushed) */ - virtual void flush(uint64_t address) = 0; - - /** Adds instruction to the Fetch Target Queue without making a new prediction - */ - virtual void addToFTQ(uint64_t address, bool isTaken) = 0; -}; - -} // namespace simeng \ No newline at end of file From 92e67a860b9089c3e3d86f26c41d16d17daf7fd9 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:16:02 +0000 Subject: [PATCH 29/69] Rebasing to dev --- .../developer/arch/supported/aarch64.rst | 4 ++-- .../developer/components/branchPred.rst | 8 ++++---- .../developer/components/coreinstance.rst | 2 +- .../components/pipeline/components.rst | 2 +- .../developer/components/pipeline/units.rst | 10 +++++----- src/include/simeng/config/yaml/ryml.hh | 20 +++++++++---------- src/include/simeng/pipeline/ExecuteUnit.hh | 1 + src/lib/arch/aarch64/Instruction.cc | 2 +- src/lib/arch/riscv/Instruction.cc | 2 +- test/unit/aarch64/InstructionTest.cc | 12 +++++------ test/unit/pipeline/FetchUnitTest.cc | 8 ++++---- test/unit/riscv/InstructionTest.cc | 12 +++++------ 12 files changed, 42 insertions(+), 41 deletions(-) diff --git a/docs/sphinx/developer/arch/supported/aarch64.rst b/docs/sphinx/developer/arch/supported/aarch64.rst index 6df0028e48..092264e991 100644 --- a/docs/sphinx/developer/arch/supported/aarch64.rst +++ b/docs/sphinx/developer/arch/supported/aarch64.rst @@ -55,12 +55,12 @@ Additional information The ``FP`` primary identifier is a placeholder to denote both the ``SCALAR`` and ``VECTOR`` primary identifiers such that, amongst the other combinations, ``FP_SIMPLE_ARTH`` expands to be ``SCALAR_SIMPLE_ARTH`` and ``VECTOR_SIMPLE_ARTH``. In some cases it was unnecessary and inconvenient to separate ``SCALAR`` and ``VECTOR`` operations within configuration options, therefore, this instruction group option was provided to solve the issue. -When setting the latencies for instruction groups, within the :ref:`Latencies ` section of the configurable options, the inheritance between instruction groups is isTaken into account (e.g. the ``VECTOR`` group latency assignment would be inherited by all ``VECTOR_*`` groups). If multiple entries could assign a latency value to an instruction group, the option with the least levels of inheritance to the instruction group takes priority. As an example, take the groups ``INT_SIMPLE`` and ``INT_SIMPLE_ARTH``. ``INT_SIMPLE_ARTH_NOSHIFT`` inherits from both of these groups but because ``INT_SIMPLE_ARTH`` has one less level of inheritance to traverse, ``INT_SIMPLE_ARTH_NOSHIFT`` inherits ``INT_SIMPLE_ARTH`` latency values. +When setting the latencies for instruction groups, within the :ref:`Latencies ` section of the configurable options, the inheritance between instruction groups is taken into account (e.g. the ``VECTOR`` group latency assignment would be inherited by all ``VECTOR_*`` groups). If multiple entries could assign a latency value to an instruction group, the option with the least levels of inheritance to the instruction group takes priority. As an example, take the groups ``INT_SIMPLE`` and ``INT_SIMPLE_ARTH``. ``INT_SIMPLE_ARTH_NOSHIFT`` inherits from both of these groups but because ``INT_SIMPLE_ARTH`` has one less level of inheritance to traverse, ``INT_SIMPLE_ARTH_NOSHIFT`` inherits ``INT_SIMPLE_ARTH`` latency values. Instruction Splitting ********************* -Instruction splitting is performed within the ``decode`` function in ``MicroDecoder.cc``. A macro-op is isTaken into the ``decode`` function and one or more micro-ops, taking the form of SimEng ``Instruction`` objects, are returned. The following instruction splitting is supported: +Instruction splitting is performed within the ``decode`` function in ``MicroDecoder.cc``. A macro-op is taken into the ``decode`` function and one or more micro-ops, taking the form of SimEng ``Instruction`` objects, are returned. The following instruction splitting is supported: - Load pair for X/W/S/D/Q registers. diff --git a/docs/sphinx/developer/components/branchPred.rst b/docs/sphinx/developer/components/branchPred.rst index f92a50f125..6a03c85129 100644 --- a/docs/sphinx/developer/components/branchPred.rst +++ b/docs/sphinx/developer/components/branchPred.rst @@ -30,13 +30,13 @@ Global History Branch Target Buffer (BTB) For each entry, the BTB stores the most recent target along with an n-bit saturating counter for an associated direction. The indexing of this structure uses the lower bits of an instruction address XOR'ed with the current global branch history value. - If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be isTaken. If the supplied branch type is ``Conditional`` and the predicted direction is not isTaken, then the predicted target is overridden to be the next sequential instruction. + If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. Static Prediction - Based on the chosen static prediction method of "always isTaken" or "always not isTaken", the n-bit saturating counter value in the initial entries of the BTB structure are filled with the weakest variant of isTaken or not-isTaken respectively. + Based on the chosen static prediction method of "always taken" or "always not taken", the n-bit saturating counter value in the initial entries of the BTB structure are filled with the weakest variant of taken or not-taken respectively. Perceptron Predictor -------------------- @@ -48,9 +48,9 @@ Global History Branch Target Buffer (BTB) For each entry, the BTB stores the most recent target along with a perceptron for an associated direction. The indexing of this structure uses the lower, non-zero bits of an instruction address XOR'ed with the current global branch history value. - The direction prediction is obtained from the perceptron by taking its dot-product with the global history. The prediction is not isTaken if this is negative, or isTaken otherwise. The perceptron is updated when its prediction is wrong or when the magnitude of the dot-product is below a pre-determined threshold (i.e., the confidence of the prediction is low). To update, each ith weight of the perceptron is incremented if the actual outcome of the branch is the same as the ith bit of ``globalHistory_``, and decremented otherwise. + The direction prediction is obtained from the perceptron by taking its dot-product with the global history. The prediction is not taken if this is negative, or taken otherwise. The perceptron is updated when its prediction is wrong or when the magnitude of the dot-product is below a pre-determined threshold (i.e., the confidence of the prediction is low). To update, each ith weight of the perceptron is incremented if the actual outcome of the branch is the same as the ith bit of ``globalHistory_``, and decremented otherwise. - If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be isTaken. If the supplied branch type is ``Conditional`` and the predicted direction is not isTaken, then the predicted target is overridden to be the next sequential instruction. + If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. \ No newline at end of file diff --git a/docs/sphinx/developer/components/coreinstance.rst b/docs/sphinx/developer/components/coreinstance.rst index 89b6247db4..8b9e99a449 100644 --- a/docs/sphinx/developer/components/coreinstance.rst +++ b/docs/sphinx/developer/components/coreinstance.rst @@ -3,7 +3,7 @@ Core Instance The ``CoreInstance`` component supplies the functionality for instantiating all simulation objects and linking them together. -The standard process isTaken to create an instance of the modelled core is as follows: +The standard process taken to create an instance of the modelled core is as follows: Process the config file Either the passed configuration file path, or default configuration string, is used to generate the model configuration class. All subsequent parameterised instantiations of simulation objects utilise this configuration class. diff --git a/docs/sphinx/developer/components/pipeline/components.rst b/docs/sphinx/developer/components/pipeline/components.rst index f74d5e892e..ab62a6b919 100644 --- a/docs/sphinx/developer/components/pipeline/components.rst +++ b/docs/sphinx/developer/components/pipeline/components.rst @@ -69,7 +69,7 @@ Once a completion slot is available, the load will be executed, the results broa Stores ****** -As with loads, stores are considered pending when initially added to the LSQ. Whilst like load operations the generation of addresses to be accessed must occur before commitment, an additional operation of supplying the data to be stored must also occur. The ``supplyStoreData`` function facilitates this by placing the data to be stored within the ``storeQueue_`` entry of the associated store. Once the store is committed, the data is isTaken from the ``storeQueue_`` entry. +As with loads, stores are considered pending when initially added to the LSQ. Whilst like load operations the generation of addresses to be accessed must occur before commitment, an additional operation of supplying the data to be stored must also occur. The ``supplyStoreData`` function facilitates this by placing the data to be stored within the ``storeQueue_`` entry of the associated store. Once the store is committed, the data is taken from the ``storeQueue_`` entry. The generation of store instruction write requests are carried out after its commitment. The reasoning for this design decision is as followed. With SimEng supporting speculative execution, processed store instruction may come from an incorrectly speculated branch direction and will inevitably be removed from the pipeline. Therefore, it is important to ensure any write requests are valid, concerning speculative execution, as the performance cost of reversing a completed write request is high. diff --git a/docs/sphinx/developer/components/pipeline/units.rst b/docs/sphinx/developer/components/pipeline/units.rst index 922b24f5a6..52358f4658 100644 --- a/docs/sphinx/developer/components/pipeline/units.rst +++ b/docs/sphinx/developer/components/pipeline/units.rst @@ -23,7 +23,7 @@ Behaviour The fetch unit fetches memory in discrete boundary-aligned blocks, according to the current program counter (PC); this is to prevent the fetched block overlapping an inaccessible or unmapped memory region that may result in the request incorrectly responding with a fault despite the validity of the initial region. -Each cycle, it will process the most recently fetched memory block by passing it to the supplied ``Architecture`` instance for pre-decoding into macro-ops. Once pre-decoded, the head of the vector of micro-ops, or macro-op, is passed to the supplied branch predictor. If the instruction is predicted to be a isTaken branch, then the PC will be updated to the predicted target address and the cycle will end. If this is not the case, the PC is incremented by the number of bytes consumed to produce the pre-decoded macro-op. The remaining bytes in the block are once again passed to the architecture for pre-decoding. +Each cycle, it will process the most recently fetched memory block by passing it to the supplied ``Architecture`` instance for pre-decoding into macro-ops. Once pre-decoded, the head of the vector of micro-ops, or macro-op, is passed to the supplied branch predictor. If the instruction is predicted to be a taken branch, then the PC will be updated to the predicted target address and the cycle will end. If this is not the case, the PC is incremented by the number of bytes consumed to produce the pre-decoded macro-op. The remaining bytes in the block are once again passed to the architecture for pre-decoding. This standard process of pre-decoding, predicting, and updating the PC continues until one of the following occurs: @@ -32,7 +32,7 @@ This standard process of pre-decoding, predicting, and updating the PC continues The maximum number of fetched macro-ops is reached The current block is saved and processing resumes in the next cycle. - A branch is predicted as isTaken + A branch is predicted as taken A block of memory from the new address may be requested, and processing will resume once the data is available. The fetched memory block is exhausted @@ -43,7 +43,7 @@ This standard process of pre-decoding, predicting, and updating the PC continues Loop Buffer *********** -Within the fetch unit is a loop buffer that can store a configurable number of Macro-Ops. The loop buffer can be pulled from instead of memory if a loop is detected. This avoids the need to re-request data from memory if a branch is isTaken and increases the throughput of the fetch unit. +Within the fetch unit is a loop buffer that can store a configurable number of Macro-Ops. The loop buffer can be pulled from instead of memory if a loop is detected. This avoids the need to re-request data from memory if a branch is taken and increases the throughput of the fetch unit. Each entry of the loop buffer is the encoding of the Macro-Op. Therefore, when supplying an instruction from the loop buffer, the pre-decoding step must still be performed. This was required to avoid any issues with multiple instantiations of the same instruction editing each others class members. @@ -59,7 +59,7 @@ FILLING The branch representing the loop has been found and the buffer is being filled until it is seen again. SUPPLYING - The supply of instructions from the fetch unit has been handed over to the loop buffer. The stream of instructions is isTaken from the loop buffer in order and resets to the top of the buffer once it reaches the end of the loop body. + The supply of instructions from the fetch unit has been handed over to the loop buffer. The stream of instructions is taken from the loop buffer in order and resets to the top of the buffer once it reaches the end of the loop body. The detection of a loop and the branch which represents it comes from the ROB. More information can be found :ref:`here `. @@ -81,7 +81,7 @@ Behaviour Each cycle, the decode unit will read macro-ops from the input buffer, and split them into a stream of ``Instruction`` objects or micro-ops. These ``Instruction`` objects are passed into an internal buffer. -Once all macro-ops in the input buffer have been passed into the internal ``Instruction`` buffer or the ``Instruction`` buffer size exceeds the size of the output buffer, ``Instruction`` objects are checked for any trivially identifiable branch mispredictions (i.e., a non-branch predicted as a isTaken branch), and if discovered, the branch predictor is informed and a pipeline flush requested. +Once all macro-ops in the input buffer have been passed into the internal ``Instruction`` buffer or the ``Instruction`` buffer size exceeds the size of the output buffer, ``Instruction`` objects are checked for any trivially identifiable branch mispredictions (i.e., a non-branch predicted as a taken branch), and if discovered, the branch predictor is informed and a pipeline flush requested. The cycle ends when all ``Instruction`` objects in the internal buffer have been processed, or a misprediction is identified and all remaining ``Instruction`` objects are flushed. diff --git a/src/include/simeng/config/yaml/ryml.hh b/src/include/simeng/config/yaml/ryml.hh index c35a4925f9..bed8f4620b 100644 --- a/src/include/simeng/config/yaml/ryml.hh +++ b/src/include/simeng/config/yaml/ryml.hh @@ -229,7 +229,7 @@ #define C4_VERSION_CAT(major, minor, patch) ((major)*10000 + (minor)*100 + (patch)) -/** A preprocessor foreach. Spectacular trick isTaken from: +/** A preprocessor foreach. Spectacular trick taken from: * http://stackoverflow.com/a/1872506/5875572 * The first argument is for a macro receiving a single argument, * which will be called with every subsequent argument. There is @@ -1449,7 +1449,7 @@ using std::index_sequence_for; /** C++11 implementation of integer sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template struct integer_sequence { @@ -1461,7 +1461,7 @@ struct integer_sequence /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using index_sequence = integer_sequence; @@ -1544,19 +1544,19 @@ struct __make_integer_sequence /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using make_integer_sequence = typename __detail::__make_integer_sequence<_Tp, _Np>::type; /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using make_index_sequence = make_integer_sequence; /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using index_sequence_for = make_index_sequence; #endif @@ -4795,7 +4795,7 @@ namespace detail { /** @internal * @ingroup hash - * @see this was isTaken a great answer in stackoverflow: + * @see this was taken a great answer in stackoverflow: * https://stackoverflow.com/a/34597785/5875572 * @see http://aras-p.info/blog/2016/08/02/Hash-Functions-all-the-way-down/ */ template @@ -12377,7 +12377,7 @@ inline size_t scan_one(csubstr str, const char *type_fmt, T *v) * * So we fake it by using a dynamic format with an explicit * field size set to the length of the given span. - * This trick is isTaken from: + * This trick is taken from: * https://stackoverflow.com/a/18368910/5875572 */ /* this is the actual format we'll use for scanning */ @@ -14624,7 +14624,7 @@ C4_ALWAYS_INLINE DumpResults format_dump_resume(DumperFn &&dumpfn, substr buf, c namespace c4 { -//! isTaken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum +//! taken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum template using is_scoped_enum = std::integral_constant::value && !std::is_convertible::value>; @@ -15704,7 +15704,7 @@ template using cspanrs = spanrs; //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- /** A non-owning span which always retains the capacity of the original - * range it was isTaken from (though it may loose its original size). + * range it was taken from (though it may loose its original size). * The resizing methods resize(), ltrim(), rtrim() as well * as the subselection methods subspan(), range(), first() and last() can be * used at will without loosing the original capacity; the full capacity span diff --git a/src/include/simeng/pipeline/ExecuteUnit.hh b/src/include/simeng/pipeline/ExecuteUnit.hh index cd11eb23d6..e6420e998f 100644 --- a/src/include/simeng/pipeline/ExecuteUnit.hh +++ b/src/include/simeng/pipeline/ExecuteUnit.hh @@ -4,6 +4,7 @@ #include #include "simeng/Instruction.hh" +#include "simeng/branchPredictors/BranchPredictor.hh" #include "simeng/pipeline/PipelineBuffer.hh" namespace simeng { diff --git a/src/lib/arch/aarch64/Instruction.cc b/src/lib/arch/aarch64/Instruction.cc index 1bf93c451f..e3b697433e 100644 --- a/src/lib/arch/aarch64/Instruction.cc +++ b/src/lib/arch/aarch64/Instruction.cc @@ -106,7 +106,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { "Early branch misprediction check shouldn't be called after execution"); if (!isBranch()) { - // Instruction isn't a branch; if predicted as isTaken, it will require a + // Instruction isn't a branch; if predicted as taken, it will require a // flush return {prediction_.isTaken, instructionAddress_ + 4}; } diff --git a/src/lib/arch/riscv/Instruction.cc b/src/lib/arch/riscv/Instruction.cc index 5eb1091c6b..c71b581a60 100644 --- a/src/lib/arch/riscv/Instruction.cc +++ b/src/lib/arch/riscv/Instruction.cc @@ -101,7 +101,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { "Early branch misprediction check shouldn't be called after execution"); if (!isBranch()) { - // Instruction isn't a branch; if predicted as isTaken, it will require a + // Instruction isn't a branch; if predicted as taken, it will require a // flush return {prediction_.isTaken, instructionAddress_ + 4}; } diff --git a/test/unit/aarch64/InstructionTest.cc b/test/unit/aarch64/InstructionTest.cc index 00279300b8..92b8e9393a 100644 --- a/test/unit/aarch64/InstructionTest.cc +++ b/test/unit/aarch64/InstructionTest.cc @@ -493,7 +493,7 @@ TEST_F(AArch64InstructionTest, earlyBranchMisprediction) { EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); } -// Test that a correct prediction (branch isTaken) is handled correctly +// Test that a correct prediction (branch taken) is handled correctly TEST_F(AArch64InstructionTest, correctPred_taken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -510,7 +510,7 @@ TEST_F(AArch64InstructionTest, correctPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where branch is isTaken is handled correctly + // Test a correct prediction where branch is taken is handled correctly pred = {true, 80 + 0x28}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -522,7 +522,7 @@ TEST_F(AArch64InstructionTest, correctPred_taken) { EXPECT_EQ(insn.getBranchAddress(), pred.target); } -// Test that a correct prediction (branch not isTaken) is handled correctly +// Test that a correct prediction (branch not taken) is handled correctly TEST_F(AArch64InstructionTest, correctPred_notTaken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -539,7 +539,7 @@ TEST_F(AArch64InstructionTest, correctPred_notTaken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where a branch isn't isTaken is handled correctly + // Test a correct prediction where a branch isn't taken is handled correctly pred = {false, 80 + 4}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -580,7 +580,7 @@ TEST_F(AArch64InstructionTest, incorrectPred_target) { EXPECT_EQ(insn.getBranchAddress(), 100 + 0x28); } -// Test that an incorrect prediction (wrong isTaken) is handled correctly +// Test that an incorrect prediction (wrong taken) is handled correctly TEST_F(AArch64InstructionTest, incorrectPred_taken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -597,7 +597,7 @@ TEST_F(AArch64InstructionTest, incorrectPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test an incorrect prediction is handled correctly - isTaken is wrong + // Test an incorrect prediction is handled correctly - taken is wrong pred = {true, 100 + 0x28}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); diff --git a/test/unit/pipeline/FetchUnitTest.cc b/test/unit/pipeline/FetchUnitTest.cc index 5b4f5c82d7..331729ba23 100644 --- a/test/unit/pipeline/FetchUnitTest.cc +++ b/test/unit/pipeline/FetchUnitTest.cc @@ -232,7 +232,7 @@ TEST_P(PipelineFetchUnitTest, halted) { EXPECT_TRUE(fetchUnit.hasHalted()); } -// Tests that fetching a branch instruction (predicted isTaken) mid block causes a +// Tests that fetching a branch instruction (predicted taken) mid block causes a // branch stall + discards the remaining fetched instructions TEST_P(PipelineFetchUnitTest, fetchTakenBranchMidBlock) { const uint8_t pc = 16; @@ -266,7 +266,7 @@ TEST_P(PipelineFetchUnitTest, fetchTakenBranchMidBlock) { EXPECT_CALL(*uop, isBranch()).WillOnce(Return(false)); fetchUnit.tick(); - // For second tick, process a isTaken branch meaning rest of block is discarded + // For second tick, process a taken branch meaning rest of block is discarded // & a new memory block is requested EXPECT_CALL(memory, getCompletedReads()).Times(0); EXPECT_CALL(memory, clearCompletedReads()).Times(1); @@ -389,7 +389,7 @@ TEST_P(PipelineFetchUnitTest, supplyFromLoopBuffer) { } // Tests the functionality of idling the supply to the Loop Buffer one of not -// isTaken branch at the loopBoundaryAddress_ +// taken branch at the loopBoundaryAddress_ TEST_P(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { // Set instructions to be fetched from memory memory::MemoryReadResult memReadResultA = { @@ -433,7 +433,7 @@ TEST_P(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { EXPECT_CALL(predictor, predict(_, _, _, false)) .WillRepeatedly(Return(BranchPrediction({false, 0x0}))); - // Attempt to fill Loop Buffer but prevent it on a not isTaken outcome at the + // Attempt to fill Loop Buffer but prevent it on a not taken outcome at the // loopBoundaryAddress_ branch // Tick 4 times to process all 16 bytes of fetched data for (int i = 0; i < 4; i++) { diff --git a/test/unit/riscv/InstructionTest.cc b/test/unit/riscv/InstructionTest.cc index c40b503a6c..6103cd4f5c 100644 --- a/test/unit/riscv/InstructionTest.cc +++ b/test/unit/riscv/InstructionTest.cc @@ -467,7 +467,7 @@ TEST_F(RiscVInstructionTest, earlyBranchMisprediction) { EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); } -// Test that a correct prediction (branch isTaken) is handled correctly +// Test that a correct prediction (branch taken) is handled correctly TEST_F(RiscVInstructionTest, correctPred_taken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -484,7 +484,7 @@ TEST_F(RiscVInstructionTest, correctPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where branch is isTaken is handled correctly + // Test a correct prediction where branch is taken is handled correctly pred = {true, 400 - 86}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -497,7 +497,7 @@ TEST_F(RiscVInstructionTest, correctPred_taken) { EXPECT_EQ(insn.getBranchAddress(), pred.target); } -// Test that a correct prediction (branch not isTaken) is handled correctly +// Test that a correct prediction (branch not taken) is handled correctly TEST_F(RiscVInstructionTest, correctPred_notTaken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -514,7 +514,7 @@ TEST_F(RiscVInstructionTest, correctPred_notTaken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where a branch isn't isTaken is handled correctly + // Test a correct prediction where a branch isn't taken is handled correctly // imm operand 0x28 has 4 added implicitly by dissassembler pred = {false, 400 + 4}; insn.setBranchPrediction(pred); @@ -559,7 +559,7 @@ TEST_F(RiscVInstructionTest, incorrectPred_target) { EXPECT_EQ(insn.getBranchAddress(), 400 - 86); } -// Test that an incorrect prediction (wrong isTaken) is handled correctly +// Test that an incorrect prediction (wrong taken) is handled correctly TEST_F(RiscVInstructionTest, incorrectPred_taken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -576,7 +576,7 @@ TEST_F(RiscVInstructionTest, incorrectPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test an incorrect prediction is handled correctly - isTaken is wrong + // Test an incorrect prediction is handled correctly - taken is wrong // imm operand 0x28 has 4 added implicitly by dissassembler pred = {true, 400 - 86}; insn.setBranchPrediction(pred); From f0512779e80f9990f71f7e09bf566f8f1fda993d Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:16:12 +0000 Subject: [PATCH 30/69] Rebasing to dev --- src/include/simeng/pipeline/ExecuteUnit.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/simeng/pipeline/ExecuteUnit.hh b/src/include/simeng/pipeline/ExecuteUnit.hh index e6420e998f..450ad4b384 100644 --- a/src/include/simeng/pipeline/ExecuteUnit.hh +++ b/src/include/simeng/pipeline/ExecuteUnit.hh @@ -4,7 +4,7 @@ #include #include "simeng/Instruction.hh" -#include "simeng/branchPredictors/BranchPredictor.hh" +#include "simeng/branchpredictors/BranchPredictor.hh" #include "simeng/pipeline/PipelineBuffer.hh" namespace simeng { From 2a957cb9d1d311948cdafd615166ab421c72169b Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:16:34 +0000 Subject: [PATCH 31/69] Rebasing to dev --- src/include/simeng/arch/aarch64/helpers/conditional.hh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/include/simeng/arch/aarch64/helpers/conditional.hh b/src/include/simeng/arch/aarch64/helpers/conditional.hh index 2b3ea1b9c3..e541eb276a 100644 --- a/src/include/simeng/arch/aarch64/helpers/conditional.hh +++ b/src/include/simeng/arch/aarch64/helpers/conditional.hh @@ -56,7 +56,7 @@ uint8_t ccmp_reg(srcValContainer& sourceValues, /** Helper function for instructions with the format `cb rn, #imm`. * T represents the type of sourceValues (e.g. for xn, T = uint64_t). - * Returns tuple of type [bool branch isTaken, uint64_t address]. */ + * Returns tuple of type [bool branch taken, uint64_t address]. */ template std::tuple condBranch_cmpToZero( srcValContainer& sourceValues, @@ -91,7 +91,7 @@ T cs_4ops(srcValContainer& sourceValues, /** Helper function for instructions with the format `tb rn, #imm, * label`. * T represents the type of sourceValues (e.g. for xn, T = uint64_t). - * Returns tuple of type [bool branch isTaken, uint64_t address]. */ + * Returns tuple of type [bool branch taken, uint64_t address]. */ template std::tuple tbnz_tbz( srcValContainer& sourceValues, From 09808113d41377104b50a737fe288546691441f6 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:17:21 +0000 Subject: [PATCH 32/69] Rebasing to dev --- test/unit/pipeline/ReorderBufferTest.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/pipeline/ReorderBufferTest.cc b/test/unit/pipeline/ReorderBufferTest.cc index ff3b63756d..84f37e45ed 100644 --- a/test/unit/pipeline/ReorderBufferTest.cc +++ b/test/unit/pipeline/ReorderBufferTest.cc @@ -446,7 +446,7 @@ TEST_F(ReorderBufferTest, branch) { EXPECT_EQ(loopBoundaryAddr, insnAddr); // Check that branch misprediction metrics have been correctly collected - EXPECT_EQ(reorderBuffer.getBranchMispredictedCount(), 8); + EXPECT_EQ(reorderBuffer.getBranchMispredictedCount(), 4); } // Tests that only those destination registers which have been renamed are From e5f52eb43736f47cbeae5beff88925a12ed66779 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 13 May 2024 13:15:45 +0100 Subject: [PATCH 33/69] undoing last push --- test/unit/pipeline/ReorderBufferTest.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/pipeline/ReorderBufferTest.cc b/test/unit/pipeline/ReorderBufferTest.cc index 84f37e45ed..ff3b63756d 100644 --- a/test/unit/pipeline/ReorderBufferTest.cc +++ b/test/unit/pipeline/ReorderBufferTest.cc @@ -446,7 +446,7 @@ TEST_F(ReorderBufferTest, branch) { EXPECT_EQ(loopBoundaryAddr, insnAddr); // Check that branch misprediction metrics have been correctly collected - EXPECT_EQ(reorderBuffer.getBranchMispredictedCount(), 4); + EXPECT_EQ(reorderBuffer.getBranchMispredictedCount(), 8); } // Tests that only those destination registers which have been renamed are From 37a1c34ee3d72eb60c96c5de9be63e739b0a0947 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 24 May 2024 11:17:35 +0100 Subject: [PATCH 34/69] Updating haeders and comments --- src/include/simeng/pipeline/ExecuteUnit.hh | 1 - 1 file changed, 1 deletion(-) diff --git a/src/include/simeng/pipeline/ExecuteUnit.hh b/src/include/simeng/pipeline/ExecuteUnit.hh index 450ad4b384..cd11eb23d6 100644 --- a/src/include/simeng/pipeline/ExecuteUnit.hh +++ b/src/include/simeng/pipeline/ExecuteUnit.hh @@ -4,7 +4,6 @@ #include #include "simeng/Instruction.hh" -#include "simeng/branchpredictors/BranchPredictor.hh" #include "simeng/pipeline/PipelineBuffer.hh" namespace simeng { From e297e68559990347b6082534e0e36fc927475788 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:23:51 +0000 Subject: [PATCH 35/69] Rebasing to dev --- src/lib/pipeline/ReorderBuffer.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib/pipeline/ReorderBuffer.cc b/src/lib/pipeline/ReorderBuffer.cc index e72e6e79dc..5f5e042d48 100644 --- a/src/lib/pipeline/ReorderBuffer.cc +++ b/src/lib/pipeline/ReorderBuffer.cc @@ -191,6 +191,7 @@ void ReorderBuffer::flush(uint64_t afterInsnId) { // If the instruction is a branch, supply address to branch flushing logic if (uop->isBranch()) { predictor_.flush(uop->getInstructionAddress()); + } buffer_.pop_back(); } From 6dfa36caa889fa7f575c788cb99d9a13e833c9c0 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Thu, 18 Jul 2024 15:14:45 +0100 Subject: [PATCH 36/69] clang format --- src/lib/pipeline/ReorderBuffer.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib/pipeline/ReorderBuffer.cc b/src/lib/pipeline/ReorderBuffer.cc index 5f5e042d48..e72e6e79dc 100644 --- a/src/lib/pipeline/ReorderBuffer.cc +++ b/src/lib/pipeline/ReorderBuffer.cc @@ -191,7 +191,6 @@ void ReorderBuffer::flush(uint64_t afterInsnId) { // If the instruction is a branch, supply address to branch flushing logic if (uop->isBranch()) { predictor_.flush(uop->getInstructionAddress()); - } buffer_.pop_back(); } From 4155ffc6f0ccb59ade61b87823c733f033456f25 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:25:14 +0000 Subject: [PATCH 37/69] Rebasing to dev --- src/include/simeng/pipeline/PipelineBuffer.hh | 32 ------------------- 1 file changed, 32 deletions(-) diff --git a/src/include/simeng/pipeline/PipelineBuffer.hh b/src/include/simeng/pipeline/PipelineBuffer.hh index bd7c565735..6e128ae684 100644 --- a/src/include/simeng/pipeline/PipelineBuffer.hh +++ b/src/include/simeng/pipeline/PipelineBuffer.hh @@ -4,8 +4,6 @@ #include #include -#include "simeng/branchpredictors/BranchPredictor.hh" - namespace simeng { namespace pipeline { @@ -75,36 +73,6 @@ class PipelineBuffer { /** Get the width of the buffer slots. */ uint16_t getWidth() const { return width; } - /** Flush branches in the buffer from the branch predictor, where the - * buffer contains micro-ops */ - void flushBranchMicroOps(BranchPredictor& branchPredictor) { - for (size_t slot = 0; slot < width; slot++) { - auto& uop = getTailSlots()[slot]; - if (uop != nullptr && uop->isBranch()) { - branchPredictor.flush(uop->getInstructionAddress()); - } - uop = getHeadSlots()[slot]; - if (uop != nullptr && uop->isBranch()) { - branchPredictor.flush(uop->getInstructionAddress()); - } - } - } - - /** Flush branches in the buffer from the branch predictor, where the - * buffer contains macro-ops */ - void flushBranchMacroOps(BranchPredictor& branchPredictor) { - for (size_t slot = 0; slot < width; slot++) { - auto& macroOp = getTailSlots()[slot]; - if (!macroOp.empty() && macroOp[0]->isBranch()) { - branchPredictor.flush(macroOp[0]->getInstructionAddress()); - } - macroOp = getHeadSlots()[slot]; - if (!macroOp.empty() && macroOp[0]->isBranch()) { - branchPredictor.flush(macroOp[0]->getInstructionAddress()); - } - } - } - private: /** The width of each row of slots. */ uint16_t width; From 601178b4dd9d8987666c270c128b6992f93b1971 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:26:05 +0000 Subject: [PATCH 38/69] Rebasing to dev --- src/lib/models/outoforder/Core.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib/models/outoforder/Core.cc b/src/lib/models/outoforder/Core.cc index af920e32aa..7ee15e7d1a 100644 --- a/src/lib/models/outoforder/Core.cc +++ b/src/lib/models/outoforder/Core.cc @@ -263,11 +263,11 @@ void Core::raiseException(const std::shared_ptr& instruction) { void Core::handleException() { // Check for branch instructions in buffer, and flush them from the BP. // Then empty the buffers - branchPredictor_.flushBranchesInBufferFromSelf(fetchToDecodeBuffer_); + branchPredictor_.flushBranchesInBufferFromSelf(&fetchToDecodeBuffer_); fetchToDecodeBuffer_.fill({}); fetchToDecodeBuffer_.stall(false); - branchPredictor_.flushBranchesInBufferFromSelf(decodeToRenameBuffer_); + branchPredictor_.flushBranchesInBufferFromSelf(&decodeToRenameBuffer_); decodeToRenameBuffer_.fill(nullptr); decodeToRenameBuffer_.stall(false); @@ -351,11 +351,11 @@ void Core::flushIfNeeded() { // Then empty the buffers fetchUnit_.flushLoopBuffer(); fetchUnit_.updatePC(targetAddress); - branchPredictor_.flushBranchesInBufferFromSelf(fetchToDecodeBuffer_); + branchPredictor_.flushBranchesInBufferFromSelf(&fetchToDecodeBuffer_); fetchToDecodeBuffer_.fill({}); fetchToDecodeBuffer_.stall(false); - branchPredictor_.flushBranchesInBufferFromSelf(decodeToRenameBuffer_); + branchPredictor_.flushBranchesInBufferFromSelf(&decodeToRenameBuffer_); decodeToRenameBuffer_.fill(nullptr); decodeToRenameBuffer_.stall(false); @@ -383,7 +383,7 @@ void Core::flushIfNeeded() { // Then empty the buffers fetchUnit_.flushLoopBuffer(); fetchUnit_.updatePC(targetAddress); - branchPredictor_.flushBranchesInBufferFromSelf(fetchToDecodeBuffer_); + branchPredictor_.flushBranchesInBufferFromSelf(&fetchToDecodeBuffer_); fetchToDecodeBuffer_.fill({}); fetchToDecodeBuffer_.stall(false); From 5031d15f4800dcbc7a2a9580e60c934f87f5e1dc Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:33:02 +0000 Subject: [PATCH 39/69] Rebasing to dev --- src/lib/models/outoforder/Core.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib/models/outoforder/Core.cc b/src/lib/models/outoforder/Core.cc index 7ee15e7d1a..af920e32aa 100644 --- a/src/lib/models/outoforder/Core.cc +++ b/src/lib/models/outoforder/Core.cc @@ -263,11 +263,11 @@ void Core::raiseException(const std::shared_ptr& instruction) { void Core::handleException() { // Check for branch instructions in buffer, and flush them from the BP. // Then empty the buffers - branchPredictor_.flushBranchesInBufferFromSelf(&fetchToDecodeBuffer_); + branchPredictor_.flushBranchesInBufferFromSelf(fetchToDecodeBuffer_); fetchToDecodeBuffer_.fill({}); fetchToDecodeBuffer_.stall(false); - branchPredictor_.flushBranchesInBufferFromSelf(&decodeToRenameBuffer_); + branchPredictor_.flushBranchesInBufferFromSelf(decodeToRenameBuffer_); decodeToRenameBuffer_.fill(nullptr); decodeToRenameBuffer_.stall(false); @@ -351,11 +351,11 @@ void Core::flushIfNeeded() { // Then empty the buffers fetchUnit_.flushLoopBuffer(); fetchUnit_.updatePC(targetAddress); - branchPredictor_.flushBranchesInBufferFromSelf(&fetchToDecodeBuffer_); + branchPredictor_.flushBranchesInBufferFromSelf(fetchToDecodeBuffer_); fetchToDecodeBuffer_.fill({}); fetchToDecodeBuffer_.stall(false); - branchPredictor_.flushBranchesInBufferFromSelf(&decodeToRenameBuffer_); + branchPredictor_.flushBranchesInBufferFromSelf(decodeToRenameBuffer_); decodeToRenameBuffer_.fill(nullptr); decodeToRenameBuffer_.stall(false); @@ -383,7 +383,7 @@ void Core::flushIfNeeded() { // Then empty the buffers fetchUnit_.flushLoopBuffer(); fetchUnit_.updatePC(targetAddress); - branchPredictor_.flushBranchesInBufferFromSelf(&fetchToDecodeBuffer_); + branchPredictor_.flushBranchesInBufferFromSelf(fetchToDecodeBuffer_); fetchToDecodeBuffer_.fill({}); fetchToDecodeBuffer_.stall(false); From 4ad630cb5d8298fdbf08a3507a4e15e0b197dd63 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Fri, 1 Nov 2024 14:33:53 +0000 Subject: [PATCH 40/69] Rebasing to dev --- .../developer/arch/supported/aarch64.rst | 4 +- .../developer/components/branchPred.rst | 8 +- .../developer/components/coreinstance.rst | 2 +- .../components/pipeline/components.rst | 2 +- .../developer/components/pipeline/units.rst | 10 +- src/include/simeng/CoreInstance.hh | 1 + .../arch/aarch64/helpers/conditional.hh | 4 +- .../simeng/branchpredictors/BranchHistory.hh | 117 +++++++ .../simeng/branchpredictors/TagePredictor.hh | 129 ++++++++ src/include/simeng/config/yaml/ryml.hh | 20 +- src/lib/CMakeLists.txt | 1 + src/lib/CoreInstance.cc | 2 + src/lib/arch/aarch64/Instruction.cc | 2 +- src/lib/arch/riscv/Instruction.cc | 2 +- src/lib/branchpredictors/TagePredictor.cc | 286 ++++++++++++++++++ src/lib/config/ModelConfig.cc | 7 +- test/unit/aarch64/InstructionTest.cc | 12 +- test/unit/riscv/InstructionTest.cc | 12 +- 18 files changed, 579 insertions(+), 42 deletions(-) create mode 100644 src/include/simeng/branchpredictors/BranchHistory.hh create mode 100644 src/include/simeng/branchpredictors/TagePredictor.hh create mode 100644 src/lib/branchpredictors/TagePredictor.cc diff --git a/docs/sphinx/developer/arch/supported/aarch64.rst b/docs/sphinx/developer/arch/supported/aarch64.rst index 092264e991..6df0028e48 100644 --- a/docs/sphinx/developer/arch/supported/aarch64.rst +++ b/docs/sphinx/developer/arch/supported/aarch64.rst @@ -55,12 +55,12 @@ Additional information The ``FP`` primary identifier is a placeholder to denote both the ``SCALAR`` and ``VECTOR`` primary identifiers such that, amongst the other combinations, ``FP_SIMPLE_ARTH`` expands to be ``SCALAR_SIMPLE_ARTH`` and ``VECTOR_SIMPLE_ARTH``. In some cases it was unnecessary and inconvenient to separate ``SCALAR`` and ``VECTOR`` operations within configuration options, therefore, this instruction group option was provided to solve the issue. -When setting the latencies for instruction groups, within the :ref:`Latencies ` section of the configurable options, the inheritance between instruction groups is taken into account (e.g. the ``VECTOR`` group latency assignment would be inherited by all ``VECTOR_*`` groups). If multiple entries could assign a latency value to an instruction group, the option with the least levels of inheritance to the instruction group takes priority. As an example, take the groups ``INT_SIMPLE`` and ``INT_SIMPLE_ARTH``. ``INT_SIMPLE_ARTH_NOSHIFT`` inherits from both of these groups but because ``INT_SIMPLE_ARTH`` has one less level of inheritance to traverse, ``INT_SIMPLE_ARTH_NOSHIFT`` inherits ``INT_SIMPLE_ARTH`` latency values. +When setting the latencies for instruction groups, within the :ref:`Latencies ` section of the configurable options, the inheritance between instruction groups is isTaken into account (e.g. the ``VECTOR`` group latency assignment would be inherited by all ``VECTOR_*`` groups). If multiple entries could assign a latency value to an instruction group, the option with the least levels of inheritance to the instruction group takes priority. As an example, take the groups ``INT_SIMPLE`` and ``INT_SIMPLE_ARTH``. ``INT_SIMPLE_ARTH_NOSHIFT`` inherits from both of these groups but because ``INT_SIMPLE_ARTH`` has one less level of inheritance to traverse, ``INT_SIMPLE_ARTH_NOSHIFT`` inherits ``INT_SIMPLE_ARTH`` latency values. Instruction Splitting ********************* -Instruction splitting is performed within the ``decode`` function in ``MicroDecoder.cc``. A macro-op is taken into the ``decode`` function and one or more micro-ops, taking the form of SimEng ``Instruction`` objects, are returned. The following instruction splitting is supported: +Instruction splitting is performed within the ``decode`` function in ``MicroDecoder.cc``. A macro-op is isTaken into the ``decode`` function and one or more micro-ops, taking the form of SimEng ``Instruction`` objects, are returned. The following instruction splitting is supported: - Load pair for X/W/S/D/Q registers. diff --git a/docs/sphinx/developer/components/branchPred.rst b/docs/sphinx/developer/components/branchPred.rst index 6a03c85129..f92a50f125 100644 --- a/docs/sphinx/developer/components/branchPred.rst +++ b/docs/sphinx/developer/components/branchPred.rst @@ -30,13 +30,13 @@ Global History Branch Target Buffer (BTB) For each entry, the BTB stores the most recent target along with an n-bit saturating counter for an associated direction. The indexing of this structure uses the lower bits of an instruction address XOR'ed with the current global branch history value. - If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. + If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be isTaken. If the supplied branch type is ``Conditional`` and the predicted direction is not isTaken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. Static Prediction - Based on the chosen static prediction method of "always taken" or "always not taken", the n-bit saturating counter value in the initial entries of the BTB structure are filled with the weakest variant of taken or not-taken respectively. + Based on the chosen static prediction method of "always isTaken" or "always not isTaken", the n-bit saturating counter value in the initial entries of the BTB structure are filled with the weakest variant of isTaken or not-isTaken respectively. Perceptron Predictor -------------------- @@ -48,9 +48,9 @@ Global History Branch Target Buffer (BTB) For each entry, the BTB stores the most recent target along with a perceptron for an associated direction. The indexing of this structure uses the lower, non-zero bits of an instruction address XOR'ed with the current global branch history value. - The direction prediction is obtained from the perceptron by taking its dot-product with the global history. The prediction is not taken if this is negative, or taken otherwise. The perceptron is updated when its prediction is wrong or when the magnitude of the dot-product is below a pre-determined threshold (i.e., the confidence of the prediction is low). To update, each ith weight of the perceptron is incremented if the actual outcome of the branch is the same as the ith bit of ``globalHistory_``, and decremented otherwise. + The direction prediction is obtained from the perceptron by taking its dot-product with the global history. The prediction is not isTaken if this is negative, or isTaken otherwise. The perceptron is updated when its prediction is wrong or when the magnitude of the dot-product is below a pre-determined threshold (i.e., the confidence of the prediction is low). To update, each ith weight of the perceptron is incremented if the actual outcome of the branch is the same as the ith bit of ``globalHistory_``, and decremented otherwise. - If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. + If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be isTaken. If the supplied branch type is ``Conditional`` and the predicted direction is not isTaken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. \ No newline at end of file diff --git a/docs/sphinx/developer/components/coreinstance.rst b/docs/sphinx/developer/components/coreinstance.rst index 8b9e99a449..89b6247db4 100644 --- a/docs/sphinx/developer/components/coreinstance.rst +++ b/docs/sphinx/developer/components/coreinstance.rst @@ -3,7 +3,7 @@ Core Instance The ``CoreInstance`` component supplies the functionality for instantiating all simulation objects and linking them together. -The standard process taken to create an instance of the modelled core is as follows: +The standard process isTaken to create an instance of the modelled core is as follows: Process the config file Either the passed configuration file path, or default configuration string, is used to generate the model configuration class. All subsequent parameterised instantiations of simulation objects utilise this configuration class. diff --git a/docs/sphinx/developer/components/pipeline/components.rst b/docs/sphinx/developer/components/pipeline/components.rst index ab62a6b919..f74d5e892e 100644 --- a/docs/sphinx/developer/components/pipeline/components.rst +++ b/docs/sphinx/developer/components/pipeline/components.rst @@ -69,7 +69,7 @@ Once a completion slot is available, the load will be executed, the results broa Stores ****** -As with loads, stores are considered pending when initially added to the LSQ. Whilst like load operations the generation of addresses to be accessed must occur before commitment, an additional operation of supplying the data to be stored must also occur. The ``supplyStoreData`` function facilitates this by placing the data to be stored within the ``storeQueue_`` entry of the associated store. Once the store is committed, the data is taken from the ``storeQueue_`` entry. +As with loads, stores are considered pending when initially added to the LSQ. Whilst like load operations the generation of addresses to be accessed must occur before commitment, an additional operation of supplying the data to be stored must also occur. The ``supplyStoreData`` function facilitates this by placing the data to be stored within the ``storeQueue_`` entry of the associated store. Once the store is committed, the data is isTaken from the ``storeQueue_`` entry. The generation of store instruction write requests are carried out after its commitment. The reasoning for this design decision is as followed. With SimEng supporting speculative execution, processed store instruction may come from an incorrectly speculated branch direction and will inevitably be removed from the pipeline. Therefore, it is important to ensure any write requests are valid, concerning speculative execution, as the performance cost of reversing a completed write request is high. diff --git a/docs/sphinx/developer/components/pipeline/units.rst b/docs/sphinx/developer/components/pipeline/units.rst index 52358f4658..922b24f5a6 100644 --- a/docs/sphinx/developer/components/pipeline/units.rst +++ b/docs/sphinx/developer/components/pipeline/units.rst @@ -23,7 +23,7 @@ Behaviour The fetch unit fetches memory in discrete boundary-aligned blocks, according to the current program counter (PC); this is to prevent the fetched block overlapping an inaccessible or unmapped memory region that may result in the request incorrectly responding with a fault despite the validity of the initial region. -Each cycle, it will process the most recently fetched memory block by passing it to the supplied ``Architecture`` instance for pre-decoding into macro-ops. Once pre-decoded, the head of the vector of micro-ops, or macro-op, is passed to the supplied branch predictor. If the instruction is predicted to be a taken branch, then the PC will be updated to the predicted target address and the cycle will end. If this is not the case, the PC is incremented by the number of bytes consumed to produce the pre-decoded macro-op. The remaining bytes in the block are once again passed to the architecture for pre-decoding. +Each cycle, it will process the most recently fetched memory block by passing it to the supplied ``Architecture`` instance for pre-decoding into macro-ops. Once pre-decoded, the head of the vector of micro-ops, or macro-op, is passed to the supplied branch predictor. If the instruction is predicted to be a isTaken branch, then the PC will be updated to the predicted target address and the cycle will end. If this is not the case, the PC is incremented by the number of bytes consumed to produce the pre-decoded macro-op. The remaining bytes in the block are once again passed to the architecture for pre-decoding. This standard process of pre-decoding, predicting, and updating the PC continues until one of the following occurs: @@ -32,7 +32,7 @@ This standard process of pre-decoding, predicting, and updating the PC continues The maximum number of fetched macro-ops is reached The current block is saved and processing resumes in the next cycle. - A branch is predicted as taken + A branch is predicted as isTaken A block of memory from the new address may be requested, and processing will resume once the data is available. The fetched memory block is exhausted @@ -43,7 +43,7 @@ This standard process of pre-decoding, predicting, and updating the PC continues Loop Buffer *********** -Within the fetch unit is a loop buffer that can store a configurable number of Macro-Ops. The loop buffer can be pulled from instead of memory if a loop is detected. This avoids the need to re-request data from memory if a branch is taken and increases the throughput of the fetch unit. +Within the fetch unit is a loop buffer that can store a configurable number of Macro-Ops. The loop buffer can be pulled from instead of memory if a loop is detected. This avoids the need to re-request data from memory if a branch is isTaken and increases the throughput of the fetch unit. Each entry of the loop buffer is the encoding of the Macro-Op. Therefore, when supplying an instruction from the loop buffer, the pre-decoding step must still be performed. This was required to avoid any issues with multiple instantiations of the same instruction editing each others class members. @@ -59,7 +59,7 @@ FILLING The branch representing the loop has been found and the buffer is being filled until it is seen again. SUPPLYING - The supply of instructions from the fetch unit has been handed over to the loop buffer. The stream of instructions is taken from the loop buffer in order and resets to the top of the buffer once it reaches the end of the loop body. + The supply of instructions from the fetch unit has been handed over to the loop buffer. The stream of instructions is isTaken from the loop buffer in order and resets to the top of the buffer once it reaches the end of the loop body. The detection of a loop and the branch which represents it comes from the ROB. More information can be found :ref:`here `. @@ -81,7 +81,7 @@ Behaviour Each cycle, the decode unit will read macro-ops from the input buffer, and split them into a stream of ``Instruction`` objects or micro-ops. These ``Instruction`` objects are passed into an internal buffer. -Once all macro-ops in the input buffer have been passed into the internal ``Instruction`` buffer or the ``Instruction`` buffer size exceeds the size of the output buffer, ``Instruction`` objects are checked for any trivially identifiable branch mispredictions (i.e., a non-branch predicted as a taken branch), and if discovered, the branch predictor is informed and a pipeline flush requested. +Once all macro-ops in the input buffer have been passed into the internal ``Instruction`` buffer or the ``Instruction`` buffer size exceeds the size of the output buffer, ``Instruction`` objects are checked for any trivially identifiable branch mispredictions (i.e., a non-branch predicted as a isTaken branch), and if discovered, the branch predictor is informed and a pipeline flush requested. The cycle ends when all ``Instruction`` objects in the internal buffer have been processed, or a misprediction is identified and all remaining ``Instruction`` objects are flushed. diff --git a/src/include/simeng/CoreInstance.hh b/src/include/simeng/CoreInstance.hh index 64e2f9e1f5..a462ef880f 100644 --- a/src/include/simeng/CoreInstance.hh +++ b/src/include/simeng/CoreInstance.hh @@ -11,6 +11,7 @@ #include "simeng/branchpredictors/AlwaysNotTakenPredictor.hh" #include "simeng/branchpredictors/GenericPredictor.hh" #include "simeng/branchpredictors/PerceptronPredictor.hh" +#include "simeng/branchpredictors/TagePredictor.hh" #include "simeng/config/SimInfo.hh" #include "simeng/kernel/Linux.hh" #include "simeng/memory/FixedLatencyMemoryInterface.hh" diff --git a/src/include/simeng/arch/aarch64/helpers/conditional.hh b/src/include/simeng/arch/aarch64/helpers/conditional.hh index e541eb276a..2b3ea1b9c3 100644 --- a/src/include/simeng/arch/aarch64/helpers/conditional.hh +++ b/src/include/simeng/arch/aarch64/helpers/conditional.hh @@ -56,7 +56,7 @@ uint8_t ccmp_reg(srcValContainer& sourceValues, /** Helper function for instructions with the format `cb rn, #imm`. * T represents the type of sourceValues (e.g. for xn, T = uint64_t). - * Returns tuple of type [bool branch taken, uint64_t address]. */ + * Returns tuple of type [bool branch isTaken, uint64_t address]. */ template std::tuple condBranch_cmpToZero( srcValContainer& sourceValues, @@ -91,7 +91,7 @@ T cs_4ops(srcValContainer& sourceValues, /** Helper function for instructions with the format `tb rn, #imm, * label`. * T represents the type of sourceValues (e.g. for xn, T = uint64_t). - * Returns tuple of type [bool branch taken, uint64_t address]. */ + * Returns tuple of type [bool branch isTaken, uint64_t address]. */ template std::tuple tbnz_tbz( srcValContainer& sourceValues, diff --git a/src/include/simeng/branchpredictors/BranchHistory.hh b/src/include/simeng/branchpredictors/BranchHistory.hh new file mode 100644 index 0000000000..01e49b8cdb --- /dev/null +++ b/src/include/simeng/branchpredictors/BranchHistory.hh @@ -0,0 +1,117 @@ +#pragma once + +#include + +namespace simeng { +/** A class for storing a branch history. Needed for cases where a branch + * history of more than 64 bits is required. This class makes it easier to + * access and manipulate large branch histories, as are needed in + * sophisticated branch predictors. + * + * The bits of the branch history are stored in a vector of uint64_t values, + * and their access/manipulation is facilitated by the public functions.*/ + +class BranchHistory { + public: + BranchHistory(uint64_t size) : size_(size) { + history_ = {0}; + for (uint8_t i = 0; i < (size_ / 64); i++) { + history_.push_back(0); + } + } + ~BranchHistory() {}; + + /** Returns the numBits most recent bits of the branch history. Maximum + * number of bits returnable is 64 to allow it to be provided in a 64-bit + * integer. */ + uint64_t getHistory(uint8_t numBits) { +// std::cout << "getHistory" << std::endl; + assert(numBits <= 64 && "Cannot get more than 64 bits without rolling"); + assert(numBits <= size_ && "Cannot get more bits of branch history than " + "the size of the history"); + return (history_[0] & ((1 << numBits) - 1)); + } + + /** returns the global history folded over on itself to get a bitmap of the + * size specified by numBits. The global history is folded by taking an + * XOR hash with the overflowing bits to get an output of 'length' bits. + * */ + uint64_t getFolded(uint8_t numBits, uint8_t length) { +// std::cout << "getFolded" << std::endl; + assert(numBits <= size_ && "Cannot get more bits of branch history than " + "the size of the history"); + uint64_t output = 0; + + uint64_t startIndex = 0; + uint64_t endIndex = numBits - 1; + + while (startIndex <= numBits) { +// std::cout << "in while loop :(" << std::endl; + output ^= ((history_[startIndex / 64] >> startIndex) & + ((1 << (numBits - startIndex)) - 1)); + + // Check to see if a second uint64_t value will need to be accessed + if ((startIndex / 64) == (endIndex / 64)) { + uint8_t leftOverBits = endIndex % 64; + output ^= (history_[endIndex / 64] << (numBits - leftOverBits)); + } + startIndex += length; + endIndex += length; + } + + // Trim the output to the desired size + output &= (1 << length) - 1; + return output; + } + + /** Adds a branch outcome to the global history */ + void addHistory(bool isTaken) { +// std::cout << "addHistory" << std::endl; + for (int8_t i = size_ / 64; i >= 0; i--) { + history_[i] <<= 1; + if (i == 0) { + history_[i] |= ((isTaken) ? 1 : 0); + } else { + history_[i] |= (((history_[i - 1] & ((uint64_t)1 << 63)) > 0) ? 1 : 0); + } + } + } + + /** Updates the state of a branch that has already been added to the global + * history at 'position', where position is 0-indexed and starts from the + * least-significant bit. I.e., to update the most recently added branch + * outcome, position would be 0. + * */ + void updateHistory(bool isTaken, uint64_t position) { +// std::cout << "updateHistory" << std::endl; + if (position < size_) { + uint8_t vectIndex = position / 64; + uint8_t bitIndex = position % 64; + history_[vectIndex] ^= ((uint64_t)1 << bitIndex); + } + } + + /** removes the most recently added branch from the history */ + void rollBack() { +// std::cout << "rollBack" << std::endl; + for (uint8_t i = 0; i <= (size_ / 64); i++) { + history_[i] >>= 1; + if (i < (size_ / 64)) { + history_[i] |= (((history_[i + 1] & 1) > 0) ? ((uint64_t)1 << 63) : 0); + } + } + } + + private: + /** The number of bits of branch history stored in this branch history */ + uint64_t size_; + + /** A vector containing this bits of the branch history. The bits are + * arranged such that the most recent branches are stored in uint64_t at + * index 0 of the vector, then the next most recent at index 1 and so forth. + * Within each uint64_t, the most recent branches are recorded int he + * least-significant bits */ + std::vector history_; +}; + +} \ No newline at end of file diff --git a/src/include/simeng/branchpredictors/TagePredictor.hh b/src/include/simeng/branchpredictors/TagePredictor.hh new file mode 100644 index 0000000000..99dcd7d290 --- /dev/null +++ b/src/include/simeng/branchpredictors/TagePredictor.hh @@ -0,0 +1,129 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include "simeng/branchpredictors/BranchPredictor.hh" +#include "simeng/branchpredictors/BranchHistory.hh" +#include "simeng/config/SimInfo.hh" + +namespace simeng { + +struct TageEntry { + uint8_t satCnt; + uint64_t tag; + uint8_t u; // usefulness counter + uint64_t target; +}; + +struct ftqEntry { + uint8_t predTable; + std::vector indices; + std::vector tags; + BranchPrediction prediction; + BranchPrediction altPrediction; +}; + +/** ToDo -- Explain TAGE */ + +class TagePredictor : public BranchPredictor { + public: + /** Initialise predictor models. */ + TagePredictor(ryml::ConstNodeRef config = config::SimInfo::getConfig()); + ~TagePredictor(); + + /** Generate a branch prediction for the supplied instruction address, a + * branch type, and a known branch offset; defaults to 0 meaning offset is not + * known. Returns a branch direction and branch target address. */ + BranchPrediction predict(uint64_t address, BranchType type, + int64_t knownOffset = 0) override; + + /** Updates appropriate predictor model objects based on the address, type and + * outcome of the branch instruction. Update must be called on + * branches in program order. To check this, instructionId is also passed + * to this function. */ + void update(uint64_t address, bool isTaken, uint64_t targetAddress, + BranchType type, uint64_t instructionId) override; + + /** Provides flushing behaviour for the implemented branch prediction schemes + * via the instruction address. Branches must be flushed in reverse + * program order (though, if a block of n instructions is being flushed at + * once, the exact order that the individual instructions within this block + * are flushed does not matter so long as they are all flushed). */ + void flush(uint64_t address) override; + + private: + /** Returns a btb prediction for this branch */ + BranchPrediction getBtbPrediction(uint64_t address); + + /** Todo */ + void getTaggedPrediction(uint64_t address, BranchPrediction* prediction, + BranchPrediction* altPrediction, + uint8_t* predTable, + std::vector* indices, + std::vector* tags); + + /** Get the index of a branch for a given address and table */ + uint64_t getTaggedIndex(uint64_t address, uint8_t table); + + /** Return a hash of the address and the global history that is then trimmed + * to the length of the tags. The tag varies depending on + * the table that is being accessed */ + uint64_t getTag(uint64_t address, uint8_t table); + + void updateBtb(uint64_t address, bool isTaken, uint64_t target); + + void updateTaggedTables(uint64_t address, bool isTaken, uint64_t target); + + /** The bitlength of the BTB index; BTB will have 2^bits entries. */ + uint8_t btbBits_; + + /** A 2^bits length vector of pairs containing a satCntBits_-bit saturating + * counter and a branch target. */ + std::vector> btb_; + + uint64_t tageTableBits_ = 12; + uint8_t numTageTables_ = 6; + + std::vector> tageTables_; + + /** Fetch Target Queue containing the direction prediction and previous global + * history state of branches that are currently unresolved */ + std::deque ftq_; + + /** The number of bits used to form the saturating counter in a BTB entry. */ + uint8_t satCntBits_; + + /** The number of previous branch directions recorded globally. */ + uint16_t globalHistoryLength_; + + /** A return address stack. */ + std::deque ras_; + + /** RAS history with instruction address as the keys. A non-zero value + * represents the target prediction for a return instruction and a 0 entry for + * a branch-and-link instruction. */ + std::map rasHistory_; + + /** The size of the RAS. */ + uint16_t rasSize_; + + /** An n-bit history of previous branch directions where n is equal to + * globalHistoryLength_. Each bit represents a branch taken (1) or not + * taken (0), with the most recent branch being the least-significant-bit */ + BranchHistory globalHistory_; + + uint8_t tagLength_ = 8; + + // This variable is used only in debug mode -- therefore hide behind ifdef +#ifndef NDEBUG + /** The Id of the last instruction that update was called on -- used to + * ensure that update is called in program order. */ + uint64_t lastUpdatedInstructionId = 0; +#endif +}; + +} // namespace simeng diff --git a/src/include/simeng/config/yaml/ryml.hh b/src/include/simeng/config/yaml/ryml.hh index bed8f4620b..c35a4925f9 100644 --- a/src/include/simeng/config/yaml/ryml.hh +++ b/src/include/simeng/config/yaml/ryml.hh @@ -229,7 +229,7 @@ #define C4_VERSION_CAT(major, minor, patch) ((major)*10000 + (minor)*100 + (patch)) -/** A preprocessor foreach. Spectacular trick taken from: +/** A preprocessor foreach. Spectacular trick isTaken from: * http://stackoverflow.com/a/1872506/5875572 * The first argument is for a macro receiving a single argument, * which will be called with every subsequent argument. There is @@ -1449,7 +1449,7 @@ using std::index_sequence_for; /** C++11 implementation of integer sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template struct integer_sequence { @@ -1461,7 +1461,7 @@ struct integer_sequence /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using index_sequence = integer_sequence; @@ -1544,19 +1544,19 @@ struct __make_integer_sequence /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using make_integer_sequence = typename __detail::__make_integer_sequence<_Tp, _Np>::type; /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using make_index_sequence = make_integer_sequence; /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using index_sequence_for = make_index_sequence; #endif @@ -4795,7 +4795,7 @@ namespace detail { /** @internal * @ingroup hash - * @see this was taken a great answer in stackoverflow: + * @see this was isTaken a great answer in stackoverflow: * https://stackoverflow.com/a/34597785/5875572 * @see http://aras-p.info/blog/2016/08/02/Hash-Functions-all-the-way-down/ */ template @@ -12377,7 +12377,7 @@ inline size_t scan_one(csubstr str, const char *type_fmt, T *v) * * So we fake it by using a dynamic format with an explicit * field size set to the length of the given span. - * This trick is taken from: + * This trick is isTaken from: * https://stackoverflow.com/a/18368910/5875572 */ /* this is the actual format we'll use for scanning */ @@ -14624,7 +14624,7 @@ C4_ALWAYS_INLINE DumpResults format_dump_resume(DumperFn &&dumpfn, substr buf, c namespace c4 { -//! taken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum +//! isTaken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum template using is_scoped_enum = std::integral_constant::value && !std::is_convertible::value>; @@ -15704,7 +15704,7 @@ template using cspanrs = spanrs; //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- /** A non-owning span which always retains the capacity of the original - * range it was taken from (though it may loose its original size). + * range it was isTaken from (though it may loose its original size). * The resizing methods resize(), ltrim(), rtrim() as well * as the subselection methods subspan(), range(), first() and last() can be * used at will without loosing the original capacity; the full capacity span diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 3690d7fd1c..34f4fe8f5a 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -17,6 +17,7 @@ set(SIMENG_SOURCES branchpredictors/AlwaysNotTakenPredictor.cc branchpredictors/GenericPredictor.cc branchpredictors/PerceptronPredictor.cc + branchpredictors/TagePredictor.cc config/ModelConfig.cc config/SimInfo.cc kernel/Linux.cc diff --git a/src/lib/CoreInstance.cc b/src/lib/CoreInstance.cc index 45832347ce..9360ae65a8 100644 --- a/src/lib/CoreInstance.cc +++ b/src/lib/CoreInstance.cc @@ -222,6 +222,8 @@ void CoreInstance::createCore() { predictor_ = std::make_unique(); } else if (predictorType == "Perceptron") { predictor_ = std::make_unique(); + } else if (predictorType == "Tage") { + predictor_ = std::make_unique(); } // Extract the port arrangement from the config file diff --git a/src/lib/arch/aarch64/Instruction.cc b/src/lib/arch/aarch64/Instruction.cc index e3b697433e..1bf93c451f 100644 --- a/src/lib/arch/aarch64/Instruction.cc +++ b/src/lib/arch/aarch64/Instruction.cc @@ -106,7 +106,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { "Early branch misprediction check shouldn't be called after execution"); if (!isBranch()) { - // Instruction isn't a branch; if predicted as taken, it will require a + // Instruction isn't a branch; if predicted as isTaken, it will require a // flush return {prediction_.isTaken, instructionAddress_ + 4}; } diff --git a/src/lib/arch/riscv/Instruction.cc b/src/lib/arch/riscv/Instruction.cc index c71b581a60..5eb1091c6b 100644 --- a/src/lib/arch/riscv/Instruction.cc +++ b/src/lib/arch/riscv/Instruction.cc @@ -101,7 +101,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { "Early branch misprediction check shouldn't be called after execution"); if (!isBranch()) { - // Instruction isn't a branch; if predicted as taken, it will require a + // Instruction isn't a branch; if predicted as isTaken, it will require a // flush return {prediction_.isTaken, instructionAddress_ + 4}; } diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc new file mode 100644 index 0000000000..c0074ac9c2 --- /dev/null +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -0,0 +1,286 @@ +#include "simeng/branchpredictors/TagePredictor.hh" + +#include + +namespace simeng { + +TagePredictor::TagePredictor(ryml::ConstNodeRef config) + : btbBits_(config["Branch-Predictor"]["BTB-Tag-Bits"].as()), + satCntBits_( + config["Branch-Predictor"]["Saturating-Count-Bits"].as()), + globalHistoryLength_( + config["Branch-Predictor"]["Global-History-Length"].as()), + rasSize_(config["Branch-Predictor"]["RAS-entries"].as()), + globalHistory_(1 << (numTageTables_ + 1)) { + // Calculate the saturation counter boundary between weakly taken and + // not-taken. `(2 ^ num_sat_cnt_bits) / 2` gives the weakly taken state + // value + uint8_t weaklyTaken = 1 << (satCntBits_ - 1); + uint8_t satCntVal = (config["Branch-Predictor"]["Fallback-Static-Predictor"] + .as() == "Always-Taken") + ? weaklyTaken + : (weaklyTaken - 1); + // Create branch prediction structures + btb_ = + std::vector>(1 << btbBits_, {satCntVal, 0}); + + // Set up Tagged tables + for (uint32_t i = 0; i < numTageTables_; i++) { + std::vector newTable; + for (uint32_t j = 0; j < (1 << tageTableBits_); j++) { + TageEntry newEntry = {2, 0, 1, 0}; + newTable.push_back(newEntry); + } + tageTables_.push_back(newTable); + } +} + +TagePredictor::~TagePredictor() { + btb_.clear(); + ras_.clear(); + rasHistory_.clear(); + ftq_.clear(); +} + +BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, + int64_t knownOffset) { +// std::cout << "Predicting" << std::endl; + BranchPrediction prediction; + BranchPrediction altPrediction; + uint8_t predTable; + std::vector indices; + std::vector tags; + getTaggedPrediction(address, &prediction, &altPrediction, &predTable, + &indices, &tags); + + if (knownOffset != 0) prediction.target = address + knownOffset; + + // Amend prediction based on branch type + if (type == BranchType::Unconditional) { + prediction.isTaken = true; + predTable = 0; + } else if (type == BranchType::Return) { + prediction.isTaken = true; + // Return branches can use the RAS if an entry is available + if (ras_.size() > 0) { + prediction.target = ras_.back(); + // Record top of RAS used for target prediction + rasHistory_[address] = ras_.back(); + ras_.pop_back(); + } + predTable = 0; + } else if (type == BranchType::SubroutineCall) { + prediction.isTaken = true; + // Subroutine call branches must push their associated return address to RAS + if (ras_.size() >= rasSize_) { + ras_.pop_front(); + } + ras_.push_back(address + 4); + // Record that this address is a branch-and-link instruction + rasHistory_[address] = 0; + predTable = 0; + } else if (type == BranchType::Conditional || + type == BranchType::LoopClosing) { + if (!prediction.isTaken) prediction.target = address + 4; + } + + // Store the hashed index for correct hashing in update() + ftqEntry newEntry = {predTable, indices, tags, prediction, altPrediction}; + ftq_.push_back(newEntry); + + // Speculatively update the global history + globalHistory_.addHistory(prediction.isTaken); + return prediction; +} + +void TagePredictor::update(uint64_t address, bool isTaken, + uint64_t targetAddress, + simeng::BranchType type, uint64_t instructionId) { +// std::cout << "Updating" << std::endl; + // Make sure that this function is called in program order; and then update + // the lastUpdatedInstructionId variable + assert(instructionId >= lastUpdatedInstructionId && + (lastUpdatedInstructionId = instructionId) >= 0 && + "Update not called on branch instructions in program order"); + + updateBtb(address, isTaken, targetAddress); + + updateTaggedTables(address, isTaken, targetAddress); + + // Update global history if prediction was incorrect + if (ftq_.front().prediction.isTaken != isTaken) { + // Bit-flip the global history bit corresponding to this prediction + // We know how many predictions there have since been by the size of the FTQ + globalHistory_.updateHistory(isTaken, ftq_.size()); + } + + // Pop ftq entry from ftq + ftq_.pop_front(); +} + +void TagePredictor::flush(uint64_t address) { +// std::cout << "Flush" << std::endl; + // If address interacted with RAS, rewind entry + auto it = rasHistory_.find(address); + if (it != rasHistory_.end()) { + uint64_t target = it->second; + if (target != 0) { + // If history entry belongs to a return instruction, push target back onto + // stack + if (ras_.size() >= rasSize_) { + ras_.pop_front(); + } + ras_.push_back(target); + } else { + // If history entry belongs to a branch-and-link instruction, pop target + // off of stack + if (ras_.size()) { + ras_.pop_back(); + } + } + rasHistory_.erase(it); + } + + assert((ftq_.size() > 0) && + "Cannot flush instruction from Branch Predictor " + "when the ftq is empty"); + ftq_.pop_back(); + + // Roll back global history + globalHistory_.rollBack(); + +} + +BranchPrediction TagePredictor::getBtbPrediction(uint64_t address) { +// std::cout << "Getting BTB" << std::endl; + // Get prediction from BTB + uint64_t index = (address >> 2) & ((1 << btbBits_) - 1); + bool direction = (btb_[index].first >= (1 << (satCntBits_ - 1))); + uint64_t target = btb_[index].second; + return {direction, target}; +} + +void TagePredictor::getTaggedPrediction(uint64_t address, + BranchPrediction* prediction, + BranchPrediction* altPrediction, + uint8_t* predTable, + std::vector* indices, + std::vector* tags) { +// std::cout << "Getting Prediction" << std::endl; + // Get a basic prediction from the btb + BranchPrediction basePrediction = getBtbPrediction(address); + prediction->isTaken = basePrediction.isTaken; + prediction->target = basePrediction.target; + *predTable = 0; + + // Check each of the tagged predictor tables for an entry matching this + // branch. If found, update the best prediction. The greater the table + // number, the longer global history it has access to. Therefore, the + // greater the table number, the better the prediction. + for (uint8_t table = 0; table < numTageTables_; table++) { +// std::cout << "Checking table " << (table + 1) << std::endl; + uint64_t index = getTaggedIndex(address, table); + indices->push_back(index); + uint64_t tag = getTag(address, table); + tags->push_back(tag); + if (tageTables_[table][index].tag == tag) { +// std::cout << "Tag match -- " << std::endl; + altPrediction->isTaken = prediction->isTaken; + altPrediction->target = prediction->target; + + prediction->isTaken = (tageTables_[table][index].satCnt >= 2); + prediction->target = tageTables_[table][index].target; + *predTable = table; + } + } +} + +uint64_t TagePredictor::getTaggedIndex(uint64_t address, uint8_t table) { +// std::cout << "getting Index" << std::endl; + // Hash function here is pretty arbitrary. + uint64_t h1 = (address >> 2); + uint64_t h2 = globalHistory_.getFolded(1 << (table + 1), + (1 << tageTableBits_) - 1); +// std::cout << "Index: h1=" << h1 << " h2=" << h2 << " final=" +// << ((h1 ^ h2) & ((1 << tageTableBits_) - 1)) << std::endl; + return (h1 ^ h2) & ((1 << tageTableBits_) - 1); +} + +uint64_t TagePredictor::getTag(uint64_t address, uint8_t table) { +// std::cout << "getting Tag" << std::endl; + // Hash function here is pretty arbitrary. + uint64_t h1 = address; + uint64_t h2 = globalHistory_.getFolded((1 << table), + ((1 << tagLength_) - 1)); +// std::cout << "Tag: h1=" << h1 << " h2=" << h2 << " final=" +// << ((h1 ^ h2) & ((1 << tagLength_) - 1)) << std::endl; + return (h1 ^ h2) & ((1 << tagLength_) - 1); +} + + +void TagePredictor::updateBtb(uint64_t address, bool isTaken, + uint64_t targetAddress) { + // Calculate 2-bit saturating counter value + uint8_t satCntVal = btb_[((address >> 2) & ((1 << btbBits_) - 1))].first; + // Only alter value if it would transition to a valid state + if (!((satCntVal == (1 << satCntBits_) - 1) && isTaken) && + !(satCntVal == 0 && !isTaken)) { + satCntVal += isTaken ? 1 : -1; + } + + // Update BTB entry + btb_[((address >> 2) & ((1 << btbBits_) - 1))].first = satCntVal; + if (isTaken) { + btb_[((address >> 2) & ((1 << btbBits_) - 1))].second = targetAddress; + } +} + + +void TagePredictor::updateTaggedTables(uint64_t address, bool isTaken, + uint64_t target) { + // Get stored information from the ftq + uint8_t predTable = ftq_.front().predTable; + std::vector indices = ftq_.front().indices; + std::vector tags = ftq_.front().tags; + BranchPrediction pred = ftq_.front().prediction; + BranchPrediction altPred = ftq_.front().altPrediction; + + + // Update the prediction counter + uint64_t predIndex = indices[predTable]; + if (isTaken && (tageTables_[predTable][predIndex].satCnt < 3)) { + (tageTables_[predTable][predIndex].satCnt)++; + } else if (!isTaken && (tageTables_[predTable][predIndex].satCnt > 0)) { + (tageTables_[predTable][predIndex].satCnt)--; + } + + // Allocate new entry if prediction wrong and possible -- Check higher order + // tagged predictor tables to see if there is a non-useful entry that can + // be replaced + if (isTaken != pred.isTaken || (isTaken && (target != pred.target))) { + bool allocated = false; + for (uint8_t table = predTable + 1; table < numTageTables_; table++) { + if (!allocated && (tageTables_[table][indices[table]].u <= 1)) { + tageTables_[table][indices[table]] = {((isTaken) ? (uint8_t)2 : + (uint8_t)1), + tags[table], (uint8_t)2, target}; + allocated = true; + } + } + } + + // Update the usefulness counters if prediction differs from alt-prediction + if (pred.isTaken != altPred.isTaken || + (pred.isTaken && (pred.target != altPred.target))) { + bool wasUseful = (pred.isTaken == isTaken); + uint8_t currentU = tageTables_[predTable][indices[predTable]].u; + if (wasUseful && currentU < 3) { + (tageTables_[predTable][indices[predTable]].u)++; + } if (!wasUseful && currentU > 0) { + (tageTables_[predTable][indices[predTable]].u)--; + } + + } +} + +} // namespace simeng \ No newline at end of file diff --git a/src/lib/config/ModelConfig.cc b/src/lib/config/ModelConfig.cc index 6d6152ced4..2795036c37 100644 --- a/src/lib/config/ModelConfig.cc +++ b/src/lib/config/ModelConfig.cc @@ -504,7 +504,7 @@ void ModelConfig::setExpectations(bool isDefault) { expectations_["Branch-Predictor"].addChild( ExpectationNode::createExpectation("Perceptron", "Type")); expectations_["Branch-Predictor"]["Type"].setValueSet( - std::vector{"Generic", "Perceptron"}); + std::vector{"Generic", "Perceptron", "Tage"}); expectations_["Branch-Predictor"].addChild( ExpectationNode::createExpectation(8, "BTB-Tag-Bits")); @@ -530,8 +530,9 @@ void ModelConfig::setExpectations(bool isDefault) { // Ensure the key "Branch-Predictor:Type" exists before querying the // associated YAML node if (configTree_["Branch-Predictor"].has_child(ryml::to_csubstr("Type"))) { - if (configTree_["Branch-Predictor"]["Type"].as() == - "Generic") { + if ((configTree_["Branch-Predictor"]["Type"].as() == + "Generic") || (configTree_["Branch-Predictor"]["Type"] + .as() == "Tage")) { expectations_["Branch-Predictor"].addChild( ExpectationNode::createExpectation( 2, "Saturating-Count-Bits")); diff --git a/test/unit/aarch64/InstructionTest.cc b/test/unit/aarch64/InstructionTest.cc index 92b8e9393a..00279300b8 100644 --- a/test/unit/aarch64/InstructionTest.cc +++ b/test/unit/aarch64/InstructionTest.cc @@ -493,7 +493,7 @@ TEST_F(AArch64InstructionTest, earlyBranchMisprediction) { EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); } -// Test that a correct prediction (branch taken) is handled correctly +// Test that a correct prediction (branch isTaken) is handled correctly TEST_F(AArch64InstructionTest, correctPred_taken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -510,7 +510,7 @@ TEST_F(AArch64InstructionTest, correctPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where branch is taken is handled correctly + // Test a correct prediction where branch is isTaken is handled correctly pred = {true, 80 + 0x28}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -522,7 +522,7 @@ TEST_F(AArch64InstructionTest, correctPred_taken) { EXPECT_EQ(insn.getBranchAddress(), pred.target); } -// Test that a correct prediction (branch not taken) is handled correctly +// Test that a correct prediction (branch not isTaken) is handled correctly TEST_F(AArch64InstructionTest, correctPred_notTaken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -539,7 +539,7 @@ TEST_F(AArch64InstructionTest, correctPred_notTaken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where a branch isn't taken is handled correctly + // Test a correct prediction where a branch isn't isTaken is handled correctly pred = {false, 80 + 4}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -580,7 +580,7 @@ TEST_F(AArch64InstructionTest, incorrectPred_target) { EXPECT_EQ(insn.getBranchAddress(), 100 + 0x28); } -// Test that an incorrect prediction (wrong taken) is handled correctly +// Test that an incorrect prediction (wrong isTaken) is handled correctly TEST_F(AArch64InstructionTest, incorrectPred_taken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -597,7 +597,7 @@ TEST_F(AArch64InstructionTest, incorrectPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test an incorrect prediction is handled correctly - taken is wrong + // Test an incorrect prediction is handled correctly - isTaken is wrong pred = {true, 100 + 0x28}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); diff --git a/test/unit/riscv/InstructionTest.cc b/test/unit/riscv/InstructionTest.cc index 6103cd4f5c..c40b503a6c 100644 --- a/test/unit/riscv/InstructionTest.cc +++ b/test/unit/riscv/InstructionTest.cc @@ -467,7 +467,7 @@ TEST_F(RiscVInstructionTest, earlyBranchMisprediction) { EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); } -// Test that a correct prediction (branch taken) is handled correctly +// Test that a correct prediction (branch isTaken) is handled correctly TEST_F(RiscVInstructionTest, correctPred_taken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -484,7 +484,7 @@ TEST_F(RiscVInstructionTest, correctPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where branch is taken is handled correctly + // Test a correct prediction where branch is isTaken is handled correctly pred = {true, 400 - 86}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -497,7 +497,7 @@ TEST_F(RiscVInstructionTest, correctPred_taken) { EXPECT_EQ(insn.getBranchAddress(), pred.target); } -// Test that a correct prediction (branch not taken) is handled correctly +// Test that a correct prediction (branch not isTaken) is handled correctly TEST_F(RiscVInstructionTest, correctPred_notTaken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -514,7 +514,7 @@ TEST_F(RiscVInstructionTest, correctPred_notTaken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where a branch isn't taken is handled correctly + // Test a correct prediction where a branch isn't isTaken is handled correctly // imm operand 0x28 has 4 added implicitly by dissassembler pred = {false, 400 + 4}; insn.setBranchPrediction(pred); @@ -559,7 +559,7 @@ TEST_F(RiscVInstructionTest, incorrectPred_target) { EXPECT_EQ(insn.getBranchAddress(), 400 - 86); } -// Test that an incorrect prediction (wrong taken) is handled correctly +// Test that an incorrect prediction (wrong isTaken) is handled correctly TEST_F(RiscVInstructionTest, incorrectPred_taken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -576,7 +576,7 @@ TEST_F(RiscVInstructionTest, incorrectPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test an incorrect prediction is handled correctly - taken is wrong + // Test an incorrect prediction is handled correctly - isTaken is wrong // imm operand 0x28 has 4 added implicitly by dissassembler pred = {true, 400 - 86}; insn.setBranchPrediction(pred); From d2a651bd57154dc52393ca28d76b5058283badc2 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 5 Nov 2024 11:43:39 +0000 Subject: [PATCH 41/69] Rebasing to dev --- .../AlwaysNotTakenPredictor.hh | 30 +++ .../branchpredictors/BranchPredictor.hh | 97 +++++++++ .../branchpredictors/GenericPredictor.hh | 89 ++++++++ .../branchpredictors/PerceptronPredictor.hh | 105 +++++++++ .../AlwaysNotTakenPredictor.cc | 14 ++ src/lib/branchpredictors/GenericPredictor.cc | 156 ++++++++++++++ .../branchpredictors/PerceptronPredictor.cc | 201 ++++++++++++++++++ 7 files changed, 692 insertions(+) create mode 100644 src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh create mode 100644 src/include/simeng/branchpredictors/BranchPredictor.hh create mode 100644 src/include/simeng/branchpredictors/GenericPredictor.hh create mode 100644 src/include/simeng/branchpredictors/PerceptronPredictor.hh create mode 100644 src/lib/branchpredictors/AlwaysNotTakenPredictor.cc create mode 100644 src/lib/branchpredictors/GenericPredictor.cc create mode 100644 src/lib/branchpredictors/PerceptronPredictor.cc diff --git a/src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh b/src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh new file mode 100644 index 0000000000..6d88ae70fc --- /dev/null +++ b/src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh @@ -0,0 +1,30 @@ +#pragma once + +#include "simeng/branchpredictors/BranchPredictor.hh" + +namespace simeng { + +/** An "Always Not Taken" branch predictor; predicts all branches as not + * taken. */ +class AlwaysNotTakenPredictor : public BranchPredictor { + public: + /** Generate a branch prediction for the specified instruction address; will + * always predict not taken. */ + BranchPrediction predict(uint64_t address, BranchType type, + int64_t knownOffset) override; + + /** Updates appropriate predictor model objects based on the address, type and + * outcome of the branch instruction. Update must be called on + * branches in program order. To check this, instructionId is also passed + * to this function. */ + void update(uint64_t address, bool isTaken, uint64_t targetAddress, + BranchType type, uint64_t instructionId) override; + + /** Provide flush logic for branch prediction scheme. As there's no flush + * logic for an always taken predictor, this does nothing. */ + void flush(uint64_t address) override; + + private: +}; + +} // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/branchpredictors/BranchPredictor.hh b/src/include/simeng/branchpredictors/BranchPredictor.hh new file mode 100644 index 0000000000..7779fe0703 --- /dev/null +++ b/src/include/simeng/branchpredictors/BranchPredictor.hh @@ -0,0 +1,97 @@ +#pragma once + +#include +#include + +#include "simeng/Instruction.hh" +#include "simeng/branchpredictors/BranchPrediction.hh" +#include "simeng/pipeline/PipelineBuffer.hh" + +namespace simeng { + +/** An abstract branch predictor interface. */ +class BranchPredictor { + public: + virtual ~BranchPredictor(){}; + + /** Generate a branch prediction for the supplied instruction address, a + * branch type, and a known branch offset. Returns a branch direction and + * branch target address. */ + virtual BranchPrediction predict(uint64_t address, BranchType type, + int64_t knownOffset) = 0; + + /** Updates appropriate predictor model objects based on the address, type and + * outcome of the branch instruction. Update must be called on + * branches in program order. To check this, instructionId is also passed + * to this function. */ + virtual void update(uint64_t address, bool isTaken, uint64_t targetAddress, + BranchType type, uint64_t instructionId) = 0; + + /** Provides flushing behaviour for the implemented branch prediction schemes + * via the instruction address. Branches must be flushed in reverse + * program order (though, if a block of n instructions is being flushed at + * once, the exact order that the individual instructions within this block + * are flushed does not matter so long as they are all flushed). */ + virtual void flush(uint64_t address) = 0; + + /** + * Overloaded function for flushing branch instructions from a + * PipelineBuffer. Accepts a reference to a PipelineBuffer of microOps. + * Iterates over the entries of the PipelineBuffer and, if they are a + * branch instruction, flushes them. + */ + void flushBranchesInBufferFromSelf( + pipeline::PipelineBuffer>& buffer) { + for (size_t slot = 0; slot < buffer.getWidth(); slot++) { + auto& uop = buffer.getTailSlots()[slot]; + if (uop != nullptr && uop->isBranch()) { + flush(uop->getInstructionAddress()); + } + + uop = buffer.getHeadSlots()[slot]; + if (uop != nullptr && uop->isBranch()) { + flush(uop->getInstructionAddress()); + } + } + } + + /** + * Overloaded function for flushing branch instructions from a + * PipelineBuffer. Accepts a reference to a PipelineBuffer macroOps. + * Iterates over the entries of the PipelineBuffer and, if they are a + * branch instruction, flushes them. + */ + void flushBranchesInBufferFromSelf( + pipeline::PipelineBuffer>>& + buffer) { + for (size_t slot = 0; slot < buffer.getWidth(); slot++) { + auto& macroOp = buffer.getTailSlots()[slot]; + for (size_t uop = 0; uop < macroOp.size(); uop++) { + if (macroOp[uop]->isBranch()) { + flush(macroOp[uop]->getInstructionAddress()); + } + } + macroOp = buffer.getHeadSlots()[slot]; + for (size_t uop = 0; uop < macroOp.size(); uop++) { + if (macroOp[uop]->isBranch()) { + flush(macroOp[uop]->getInstructionAddress()); + } + } + } + } + + /** lastUpdatedInstructionId_ is used only in debug mode. Clang throws a + * warning (which becomes an error with our cmake flags) for unused + * variables. If the [[maybe_unused]] attribute is added to avoid this, + * then gcc throws a warning (which becomes an error) because it ignores + * this attribute. Therefore, to avoid the above catch 22, this variable is + * hidden behind an ifdef such that it is declared only in debug mode; when + * it is used. */ +#ifndef NDEBUG + /** The Id of the last instruction that update was called on -- used to + * ensure that update is called in program order. */ + uint64_t lastUpdatedInstructionId_ = 0; +#endif +}; + +} // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/branchpredictors/GenericPredictor.hh b/src/include/simeng/branchpredictors/GenericPredictor.hh new file mode 100644 index 0000000000..8474d757e2 --- /dev/null +++ b/src/include/simeng/branchpredictors/GenericPredictor.hh @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include + +#include "simeng/branchpredictors/BranchPredictor.hh" +#include "simeng/config/SimInfo.hh" + +namespace simeng { + +/** A generic branch predictor implementing well known/text book branch + * predictor logic. The following predictors have been included: + * + * - Static predictor based on pre-allocated branch type. + * + * - A Branch Target Buffer (BTB) with a local and global indexing scheme and a + * 2-bit saturating counter. + * + * - A Return Address Stack (RAS) is also in use. + */ + +class GenericPredictor : public BranchPredictor { + public: + /** Initialise predictor models. */ + GenericPredictor(ryml::ConstNodeRef config = config::SimInfo::getConfig()); + ~GenericPredictor(); + + /** Generate a branch prediction for the supplied instruction address, a + * branch type, and a known branch offset. Returns a branch direction and + * branch target address. */ + BranchPrediction predict(uint64_t address, BranchType type, + int64_t knownOffset) override; + + /** Updates appropriate predictor model objects based on the address, type and + * outcome of the branch instruction. Update must be called on + * branches in program order. To check this, instructionId is also passed + * to this function. */ + void update(uint64_t address, bool isTaken, uint64_t targetAddress, + BranchType type, uint64_t instructionId) override; + + /** Provides flushing behaviour for the implemented branch prediction schemes + * via the instruction address. Branches must be flushed in reverse + * program order (though, if a block of n instructions is being flushed at + * once, the exact order that the individual instructions within this block + * are flushed does not matter so long as they are all flushed). */ + void flush(uint64_t address) override; + + private: + /** The bitlength of the BTB index; BTB will have 2^bits entries. */ + uint8_t btbBits_; + + /** A 2^bits length vector of pairs containing a satCntBits_-bit saturating + * counter and a branch target. */ + std::vector> btb_; + + /** Fetch Target Queue containing the direction prediction and previous global + * history state of branches that are currently unresolved */ + std::deque> ftq_; + + /** The number of bits used to form the saturating counter in a BTB entry. */ + uint8_t satCntBits_; + + /** An n-bit history of previous branch directions where n is equal to + * globalHistoryLength_. Each bit represents a branch taken (1) or not + * taken (0), with the most recent branch being the least-significant-bit */ + uint64_t globalHistory_ = 0; + + /** The number of previous branch directions recorded globally. */ + uint16_t globalHistoryLength_; + + /** A bit mask for truncating the global history to the correct size. + * Stored as a member variable to avoid duplicative calculation */ + uint64_t globalHistoryMask_; + + /** A return address stack. */ + std::deque ras_; + + /** RAS history with instruction address as the keys. A non-zero value + * represents the target prediction for a return instruction and a 0 entry for + * a branch-and-link instruction. */ + std::map rasHistory_; + + /** The size of the RAS. */ + uint16_t rasSize_; +}; + +} // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/branchpredictors/PerceptronPredictor.hh b/src/include/simeng/branchpredictors/PerceptronPredictor.hh new file mode 100644 index 0000000000..c5d3c87f19 --- /dev/null +++ b/src/include/simeng/branchpredictors/PerceptronPredictor.hh @@ -0,0 +1,105 @@ +#pragma once + +#include +#include +#include +#include + +#include "simeng/branchpredictors/BranchPredictor.hh" +#include "simeng/config/SimInfo.hh" + +namespace simeng { + +/** A Perceptron branch predictor implementing the branch predictor described in + * Jimenez and Lin ("Dynamic branch prediction with perceptrons", IEEE High- + * Performance Computer Architecture Symposium Proceedings (2001), 197-206 -- + * https://www.cs.utexas.edu/~lin/papers/hpca01.pdf). + * The following predictors have been included: + * + * - Static predictor based on pre-allocated branch type. + * + * - A Branch Target Buffer (BTB) with a local and global indexing scheme and a + * perceptron. + * + * - A Return Address Stack (RAS) is also in use. + */ + +class PerceptronPredictor : public BranchPredictor { + public: + /** Initialise predictor models. */ + PerceptronPredictor(ryml::ConstNodeRef config = config::SimInfo::getConfig()); + ~PerceptronPredictor(); + + /** Generate a branch prediction for the supplied instruction address, a + * branch type, and a known branch offset. Returns a branch direction and + * branch target address. */ + BranchPrediction predict(uint64_t address, BranchType type, + int64_t knownOffset) override; + + /** Updates appropriate predictor model objects based on the address, type and + * outcome of the branch instruction. Update must be called on + * branches in program order. To check this, instructionId is also passed + * to this function. */ + void update(uint64_t address, bool isTaken, uint64_t targetAddress, + BranchType type, uint64_t instructionId) override; + + /** Provides flushing behaviour for the implemented branch prediction schemes + * via the instruction address. Branches must be flushed in reverse + * program order (though, if a block of n instructions is being flushed at + * once, the exact order that the individual instructions within this block + * are flushed does not matter so long as they are all flushed). */ + void flush(uint64_t address) override; + + private: + /** Returns the dot product of a perceptron and a history vector. Used to + * determine a direction prediction */ + int64_t getDotProduct(const std::vector& perceptron, + uint64_t history); + + /** The length in bits of the BTB index; BTB will have 2^bits entries. */ + uint64_t btbBits_; + + /** A 2^bits length vector of pairs containing a perceptron with + * globalHistoryLength_ + 1 inputs, and a branch target. + * The perceptrons are used to provide a branch direction prediction by + * taking a dot product with the global history, as described + * in Jiminez and Lin */ + std::vector, uint64_t>> btb_; + + /** Fetch Target Queue containing the dot product of the perceptron and the + * global history; and the global history, both at the time of prediction, + * for each of the branch instructions that are currently unresolved. The dot + * product represents the confidence of the perceptrons direction + * prediction and is needed for a correct update when the branch + * instruction is resolved. */ + std::deque> ftq_; + + /** An n-bit history of previous branch directions where n is equal to + * globalHistoryLength_. Each bit represents a branch taken (1) or not + * taken (0), with the most recent branch being the least-significant-bit */ + uint64_t globalHistory_ = 0; + + /** The number of previous branch directions recorded globally. */ + uint64_t globalHistoryLength_; + + /** A bit mask for truncating the global history to the correct size. + * Stored as a member variable to avoid duplicative calculation */ + uint64_t globalHistoryMask_; + + /** The magnitude of the dot product of the perceptron and the global history, + * below which the perceptron's weight must be updated */ + uint64_t trainingThreshold_; + + /** A return address stack. */ + std::deque ras_; + + /** RAS history with instruction address as the keys. A non-zero value + * represents the target prediction for a return instruction and a 0 entry for + * a branch-and-link instruction. */ + std::map rasHistory_; + + /** The size of the RAS. */ + uint64_t rasSize_; +}; + +} // namespace simeng \ No newline at end of file diff --git a/src/lib/branchpredictors/AlwaysNotTakenPredictor.cc b/src/lib/branchpredictors/AlwaysNotTakenPredictor.cc new file mode 100644 index 0000000000..6fe109fe7f --- /dev/null +++ b/src/lib/branchpredictors/AlwaysNotTakenPredictor.cc @@ -0,0 +1,14 @@ +#include "simeng/branchpredictors/AlwaysNotTakenPredictor.hh" + +namespace simeng { +BranchPrediction AlwaysNotTakenPredictor::predict( + [[maybe_unused]] uint64_t address, BranchType type, int64_t knownOffset) { + return {false, 0}; +} + +void AlwaysNotTakenPredictor::update(uint64_t address, bool taken, + uint64_t targetAddress, BranchType type, + uint64_t instructionId) {} + +void AlwaysNotTakenPredictor::flush(uint64_t address) {} +} // namespace simeng \ No newline at end of file diff --git a/src/lib/branchpredictors/GenericPredictor.cc b/src/lib/branchpredictors/GenericPredictor.cc new file mode 100644 index 0000000000..cd41f20cac --- /dev/null +++ b/src/lib/branchpredictors/GenericPredictor.cc @@ -0,0 +1,156 @@ +#include "simeng/branchpredictors/GenericPredictor.hh" + +#include + +namespace simeng { + +GenericPredictor::GenericPredictor(ryml::ConstNodeRef config) + : btbBits_(config["Branch-Predictor"]["BTB-Tag-Bits"].as()), + satCntBits_( + config["Branch-Predictor"]["Saturating-Count-Bits"].as()), + globalHistoryLength_( + config["Branch-Predictor"]["Global-History-Length"].as()), + rasSize_(config["Branch-Predictor"]["RAS-entries"].as()) { + // Calculate the saturation counter boundary between weakly taken and + // not-taken. `(2 ^ num_sat_cnt_bits) / 2` gives the weakly taken state + // value + uint8_t weaklyTaken = 1 << (satCntBits_ - 1); + uint8_t satCntVal = (config["Branch-Predictor"]["Fallback-Static-Predictor"] + .as() == "Always-Taken") + ? weaklyTaken + : (weaklyTaken - 1); + // Create branch prediction structures + btb_ = std::vector>(1ull << btbBits_, + {satCntVal, 0}); + + // Generate a bitmask that is used to ensure only the relevant number of + // bits are stored in the global history. This is two times the + // globalHistoryLength_ to allow rolling back of the speculatively updated + // global history in the event of a misprediction. + globalHistoryMask_ = (1ull << (globalHistoryLength_ * 2)) - 1; +} + +GenericPredictor::~GenericPredictor() { + btb_.clear(); + ras_.clear(); + rasHistory_.clear(); + ftq_.clear(); +} + +BranchPrediction GenericPredictor::predict(uint64_t address, BranchType type, + int64_t knownOffset) { + // Get index via an XOR hash between the global history and the instruction + // address. This hash is then ANDed to keep it within bounds of the btb. + // The address is shifted to remove the two least-significant bits as these + // are always 0 in an ISA with 4-byte aligned instructions. + uint64_t hashedIndex = + ((address >> 2) ^ globalHistory_) & ((1ull << btbBits_) - 1); + + // Get prediction from BTB + bool direction = btb_[hashedIndex].first >= (1ull << (satCntBits_ - 1)); + uint64_t target = + (knownOffset != 0) ? address + knownOffset : btb_[hashedIndex].second; + BranchPrediction prediction = {direction, target}; + + // Amend prediction based on branch type + if (type == BranchType::Unconditional) { + prediction.isTaken = true; + } else if (type == BranchType::Return) { + prediction.isTaken = true; + // Return branches can use the RAS if an entry is available + if (ras_.size() > 0) { + prediction.target = ras_.back(); + // Record top of RAS used for target prediction + rasHistory_[address] = ras_.back(); + ras_.pop_back(); + } + } else if (type == BranchType::SubroutineCall) { + prediction.isTaken = true; + // Subroutine call branches must push their associated return address to RAS + if (ras_.size() >= rasSize_) { + ras_.pop_front(); + } + ras_.push_back(address + 4); + // Record that this address is a branch-and-link instruction + rasHistory_[address] = 0; + } else if (type == BranchType::Conditional) { + if (!prediction.isTaken) prediction.target = address + 4; + } + + // Store the hashed index for correct hashing in update() + ftq_.emplace_back(prediction.isTaken, hashedIndex); + + // Speculatively update the global history + globalHistory_ = + ((globalHistory_ << 1) | prediction.isTaken) & globalHistoryMask_; + + return prediction; +} + +void GenericPredictor::update(uint64_t address, bool isTaken, + uint64_t targetAddress, BranchType type, + uint64_t instructionId) { + // Make sure that this function is called in program order; and then update + // the lastUpdatedInstructionId variable + assert(instructionId >= lastUpdatedInstructionId_ && + (lastUpdatedInstructionId_ = instructionId) >= 0 && + "Update not called on branch instructions in program order"); + + // Get previous prediction and index calculated from the FTQ + bool prevPrediction = ftq_.front().first; + uint64_t hashedIndex = ftq_.front().second; + ftq_.pop_front(); + + // Calculate 2-bit saturating counter value + uint8_t satCntVal = btb_[hashedIndex].first; + // Only alter value if it would transition to a valid state + if (!((satCntVal == (1 << satCntBits_) - 1) && isTaken) && + !(satCntVal == 0 && !isTaken)) { + satCntVal += isTaken ? 1 : -1; + } + + // Update BTB entry + btb_[hashedIndex].first = satCntVal; + if (isTaken) { + btb_[hashedIndex].second = targetAddress; + } + + // Update global history if prediction was incorrect + if (prevPrediction != isTaken) { + // Bit-flip the global history bit corresponding to this prediction + // We know how many predictions there have since been by the size of the FTQ + globalHistory_ ^= (1ull << (ftq_.size())); + } +} + +void GenericPredictor::flush(uint64_t address) { + // If address interacted with RAS, rewind entry + auto it = rasHistory_.find(address); + if (it != rasHistory_.end()) { + uint64_t target = it->second; + if (target != 0) { + // If history entry belongs to a return instruction, push target back onto + // stack + if (ras_.size() >= rasSize_) { + ras_.pop_front(); + } + ras_.push_back(target); + } else { + // If history entry belongs to a branch-and-link instruction, pop target + // off of stack + if (ras_.size()) { + ras_.pop_back(); + } + } + rasHistory_.erase(it); + } + + assert((ftq_.size() > 0) && + "Cannot flush instruction from Branch Predictor " + "when the ftq is empty"); + ftq_.pop_back(); + + // Roll back global history + globalHistory_ >>= 1; +} +} // namespace simeng \ No newline at end of file diff --git a/src/lib/branchpredictors/PerceptronPredictor.cc b/src/lib/branchpredictors/PerceptronPredictor.cc new file mode 100644 index 0000000000..09151e37dc --- /dev/null +++ b/src/lib/branchpredictors/PerceptronPredictor.cc @@ -0,0 +1,201 @@ +#include "simeng/branchpredictors/PerceptronPredictor.hh" + +namespace simeng { + +PerceptronPredictor::PerceptronPredictor(ryml::ConstNodeRef config) + : btbBits_(config["Branch-Predictor"]["BTB-Tag-Bits"].as()), + globalHistoryLength_( + config["Branch-Predictor"]["Global-History-Length"].as()), + rasSize_(config["Branch-Predictor"]["RAS-entries"].as()) { + // Build BTB based on config options + uint32_t btbSize = (1ul << btbBits_); + btb_.resize(btbSize); + + // Initialise perceptron values with 0 for the global history weights, and 1 + // for the bias weight; and initialise the target with 0 (i.e., unknown) + for (uint32_t i = 0; i < btbSize; i++) { + btb_[i].first.assign(globalHistoryLength_, 0); + btb_[i].first.push_back(1); + btb_[i].second = 0; + } + + // Set up training threshold according to empirically determined formula + trainingThreshold_ = (uint64_t)((1.93 * globalHistoryLength_) + 14); + + // Generate a bitmask that is used to ensure only the relevant number of + // bits are stored in the global history. This is two times the + // globalHistoryLength_ to allow rolling back of the speculatively updated + // global history in the event of a misprediction. + globalHistoryMask_ = (1ull << (globalHistoryLength_ * 2)) - 1; +} + +PerceptronPredictor::~PerceptronPredictor() { + ras_.clear(); + rasHistory_.clear(); + ftq_.clear(); +} + +BranchPrediction PerceptronPredictor::predict(uint64_t address, BranchType type, + int64_t knownOffset) { + // Get the hashed index for the prediction table. XOR the global history with + // the non-zero bits of the address, and then keep only the btbBits_ bits of + // the output to keep it in bounds of the prediction table. + // The address is shifted to remove the two least-significant bits as these + // are always 0 in an ISA with 4-byte aligned instructions. + uint64_t hashedIndex = + ((address >> 2) ^ globalHistory_) & ((1ull << btbBits_) - 1); + + // Retrieve the perceptron from the BTB + std::vector perceptron = btb_[hashedIndex].first; + + // Get dot product of perceptron and history + int64_t Pout = getDotProduct(perceptron, globalHistory_); + + // Determine direction prediction based on its sign + bool direction = (Pout >= 0); + + // If there is a known offset then calculate target accordingly, otherwise + // retrieve the target prediction from the btb. + uint64_t target = + (knownOffset != 0) ? address + knownOffset : btb_[hashedIndex].second; + + BranchPrediction prediction = {direction, target}; + + // Amend prediction based on branch type + if (type == BranchType::Unconditional) { + prediction.isTaken = true; + } else if (type == BranchType::Return) { + prediction.isTaken = true; + // Return branches can use the RAS if an entry is available + if (ras_.size() > 0) { + prediction.target = ras_.back(); + // Record top of RAS used for target prediction + rasHistory_[address] = ras_.back(); + ras_.pop_back(); + } + } else if (type == BranchType::SubroutineCall) { + prediction.isTaken = true; + // Subroutine call branches must push their associated return address to RAS + if (ras_.size() >= rasSize_) { + ras_.pop_front(); + } + ras_.push_back(address + 4); + // Record that this address is a branch-and-link instruction + rasHistory_[address] = 0; + } else if (type == BranchType::Conditional) { + if (!prediction.isTaken) prediction.target = address + 4; + } + + // Store the Pout and global history for correct update() -- + // needs to be global history and not the hashed index as hashing loses + // information and the global history is required for updating perceptrons. + ftq_.emplace_back(Pout, globalHistory_); + + // Speculatively update the global history based on the direction + // prediction being made + globalHistory_ = + ((globalHistory_ << 1) | prediction.isTaken) & globalHistoryMask_; + + return prediction; +} + +void PerceptronPredictor::update(uint64_t address, bool isTaken, + uint64_t targetAddress, BranchType type, + uint64_t instructionId) { + // Make sure that this function is called in program order; and then update + // the lastUpdatedInstructionId variable + assert(instructionId >= lastUpdatedInstructionId_ && + (lastUpdatedInstructionId_ = instructionId) >= 0 && + "Update not called on branch instructions in program order"); + + // Retrieve the previous global history and branch direction prediction from + // the front of the ftq (assumes branches are updated in program order). + int64_t prevPout = ftq_.front().first; + uint64_t prevGlobalHistory = ftq_.front().second; + ftq_.pop_front(); + + // Work out hashed index + uint64_t hashedIndex = + ((address >> 2) ^ prevGlobalHistory) & ((1ull << btbBits_) - 1); + + std::vector perceptron = btb_[hashedIndex].first; + + // Work out the most recent prediction + bool directionPrediction = (prevPout >= 0); + + // Update the perceptron if the prediction was wrong, or the dot product's + // magnitude was not greater than the training threshold + if ((directionPrediction != isTaken) || + (static_cast(std::abs(prevPout)) < trainingThreshold_)) { + int8_t t = (isTaken) ? 1 : -1; + + for (uint64_t i = 0; i < globalHistoryLength_; i++) { + int8_t xi = ((prevGlobalHistory & + (1ull << ((globalHistoryLength_ - 1) - i))) == 0) + ? -1 + : 1; + int8_t product_xi_t = xi * t; + // Make sure no overflow (+-127) + if (!(perceptron[i] == 127 && product_xi_t == 1) && + !(perceptron[i] == -127 && product_xi_t == -1)) { + perceptron[i] += product_xi_t; + } + } + perceptron[globalHistoryLength_] += t; + } + + btb_[hashedIndex].first = perceptron; + if (isTaken) { + btb_[hashedIndex].second = targetAddress; + } + + // Update global history if prediction was incorrect + // Bit-flip the global history bit corresponding to this prediction + // We know how many predictions there have since been by the size of the FTQ + if (directionPrediction != isTaken) globalHistory_ ^= (1ull << (ftq_.size())); +} + +void PerceptronPredictor::flush(uint64_t address) { + // If address interacted with RAS, rewind entry + auto it = rasHistory_.find(address); + if (it != rasHistory_.end()) { + uint64_t target = it->second; + if (target != 0) { + // If history entry belongs to a return instruction, push target back onto + // stack + if (ras_.size() >= rasSize_) { + ras_.pop_front(); + } + ras_.push_back(target); + } else { + // If history entry belongs to a branch-and-link instruction, pop target + // off of stack + if (ras_.size()) { + ras_.pop_back(); + } + } + rasHistory_.erase(it); + } + + assert((ftq_.size() > 0) && + "Cannot flush instruction from Branch Predictor " + "when the ftq is empty"); + ftq_.pop_back(); + + // Roll back global history + globalHistory_ >>= 1; +} + +int64_t PerceptronPredictor::getDotProduct( + const std::vector& perceptron, uint64_t history) { + int64_t Pout = perceptron[globalHistoryLength_]; + for (uint64_t i = 0; i < globalHistoryLength_; i++) { + // Get branch direction for ith entry in the history + bool historyTaken = + ((history & (1ull << ((globalHistoryLength_ - 1) - i))) != 0); + Pout += historyTaken ? perceptron[i] : (0 - perceptron[i]); + } + return Pout; +} + +} // namespace simeng \ No newline at end of file From 111a48fe434672462e0a8e321f408603452d0dfe Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Thu, 14 Nov 2024 12:38:17 +0000 Subject: [PATCH 42/69] Rebasing --- src/lib/models/inorder/Core.cc | 8 ++++---- test/unit/pipeline/FetchUnitTest.cc | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/lib/models/inorder/Core.cc b/src/lib/models/inorder/Core.cc index 80b1281118..4df754a60d 100644 --- a/src/lib/models/inorder/Core.cc +++ b/src/lib/models/inorder/Core.cc @@ -147,10 +147,10 @@ std::map Core::getStats() const { std::ostringstream ipcStr; ipcStr << std::setprecision(2) << ipc; - return {{"cycles", std::to_string(ticks_)}, - {"retired", std::to_string(retired)}, - {"ipc", ipcStr.str()}, - {"flushes", std::to_string(flushes_)}, + return { + {"cycles", std::to_string(ticks_)}, {"retired", std::to_string(retired)}, + {"ipc", ipcStr.str()}, {"flushes", std::to_string(flushes_)}, + }; } void Core::raiseException(const std::shared_ptr& instruction) { diff --git a/test/unit/pipeline/FetchUnitTest.cc b/test/unit/pipeline/FetchUnitTest.cc index 331729ba23..2c1c99b69b 100644 --- a/test/unit/pipeline/FetchUnitTest.cc +++ b/test/unit/pipeline/FetchUnitTest.cc @@ -94,7 +94,7 @@ TEST_P(PipelineFetchUnitTest, TickStalled) { EXPECT_CALL(isa, predecode(_, _, _, _)).Times(0); - EXPECT_CALL(predictor, predict(_, _, _, _)).Times(0); + EXPECT_CALL(predictor, predict(_, _, _)).Times(0); fetchUnit.tick(); @@ -279,7 +279,7 @@ TEST_P(PipelineFetchUnitTest, fetchTakenBranchMidBlock) { EXPECT_CALL(*uop, getBranchType()).WillOnce(Return(bType)); EXPECT_CALL(*uop, getKnownOffset()).WillOnce(Return(knownOff)); BranchPrediction pred = {true, pc + knownOff}; - EXPECT_CALL(predictor, predict(20, bType, knownOff, false)) + EXPECT_CALL(predictor, predict(20, bType, knownOff)) .WillOnce(Return(pred)); fetchUnit.tick(); @@ -326,7 +326,7 @@ TEST_P(PipelineFetchUnitTest, supplyFromLoopBuffer) { // Set the expectation from the predictor to be true so a loop body will // be detected - ON_CALL(predictor, predict(_, _, _, _)) + ON_CALL(predictor, predict(_, _, _)) .WillByDefault(Return(BranchPrediction({true, 0x0}))); // Set Loop Buffer state to be LoopBufferState::FILLING @@ -418,7 +418,7 @@ TEST_P(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { // Set the first expectation from the predictor to be true so a loop body will // be detected - EXPECT_CALL(predictor, predict(_, _, _, false)) + EXPECT_CALL(predictor, predict(_, _, _)) .WillOnce(Return(BranchPrediction({true, 0x0}))); // Set Loop Buffer state to be LoopBufferState::FILLING @@ -430,7 +430,7 @@ TEST_P(PipelineFetchUnitTest, idleLoopBufferDueToNotTakenBoundary) { // Fetch the next block of instructions from memory and change the expected // outcome of the branch predictor fetchUnit.requestFromPC(); - EXPECT_CALL(predictor, predict(_, _, _, false)) + EXPECT_CALL(predictor, predict(_, _, _)) .WillRepeatedly(Return(BranchPrediction({false, 0x0}))); // Attempt to fill Loop Buffer but prevent it on a not taken outcome at the From c66129402985c0d8a05fbbc3b782bf26f0c77e84 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Thu, 14 Nov 2024 12:39:09 +0000 Subject: [PATCH 43/69] Adding Tage config file --- configs/a64fx_Tage.yaml | 387 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 387 insertions(+) create mode 100644 configs/a64fx_Tage.yaml diff --git a/configs/a64fx_Tage.yaml b/configs/a64fx_Tage.yaml new file mode 100644 index 0000000000..930b2841c2 --- /dev/null +++ b/configs/a64fx_Tage.yaml @@ -0,0 +1,387 @@ +# The following resources where utilised to create the config file and naming schemes: +# https://github.com/fujitsu/A64FX + +Core: + ISA: AArch64 + Simulation-Mode: outoforder + Clock-Frequency-GHz: 1.8 + Timer-Frequency-MHz: 100 + Micro-Operations: True + Vector-Length: 512 +Fetch: + Fetch-Block-Size: 32 + Loop-Buffer-Size: 48 + Loop-Detection-Threshold: 4 +Process-Image: + Heap-Size: 1073741824 + Stack-Size: 1048576 +Register-Set: + GeneralPurpose-Count: 96 + FloatingPoint/SVE-Count: 128 + Predicate-Count: 48 + Conditional-Count: 128 +Pipeline-Widths: + Commit: 4 + FrontEnd: 4 + LSQ-Completion: 2 +Queue-Sizes: + ROB: 128 + Load: 40 + Store: 24 +Branch-Predictor: + Type: "Tage" + BTB-Tag-Bits: 11 + Saturating-Count-Bits: 2 + Global-History-Length: 19 + RAS-entries: 8 + Fallback-Static-Predictor: "Always-Taken" +L1-Data-Memory: + Interface-Type: Fixed +L1-Instruction-Memory: + Interface-Type: Flat +LSQ-L1-Interface: + Access-Latency: 5 + Exclusive: True + Load-Bandwidth: 128 + Store-Bandwidth: 64 + Permitted-Requests-Per-Cycle: 2 + Permitted-Loads-Per-Cycle: 2 + Permitted-Stores-Per-Cycle: 1 +Ports: + 0: + Portname: FLA + Instruction-Group-Support: + - FP + - SVE + 1: + Portname: PR + Instruction-Group-Support: + - PREDICATE + 2: + Portname: EXA + Instruction-Group-Support: + - INT_SIMPLE + - INT_MUL + - STORE_DATA + 3: + Portname: FLB + Instruction-Group-Support: + - FP_SIMPLE + - FP_MUL + - SVE_SIMPLE + - SVE_MUL + 4: + Portname: EXB + Instruction-Group-Support: + - INT_SIMPLE + - INT_DIV_OR_SQRT + 5: + Portname: EAGA + Instruction-Group-Support: + - LOAD + - STORE_ADDRESS + - INT_SIMPLE_ARTH_NOSHIFT + - INT_SIMPLE_LOGICAL_NOSHIFT + - INT_SIMPLE_CMP + 6: + Portname: EAGB + Instruction-Group-Support: + - LOAD + - STORE_ADDRESS + - INT_SIMPLE_ARTH_NOSHIFT + - INT_SIMPLE_LOGICAL_NOSHIFT + - INT_SIMPLE_CMP + 7: + Portname: BR + Instruction-Group-Support: + - BRANCH +Reservation-Stations: + 0: + Size: 20 + Dispatch-Rate: 2 + Ports: + - FLA + - PR + - EXA + 1: + Size: 20 + Dispatch-Rate: 2 + Ports: + - FLB + - EXB + 2: + Size: 10 + Dispatch-Rate: 1 + Ports: + - EAGA + 3: + Size: 10 + Dispatch-Rate: 1 + Ports: + - EAGB + 4: + Size: 19 + Dispatch-Rate: 1 + Ports: + - BR +Execution-Units: + 0: + Pipelined: True + Blocking-Groups: + - INT_DIV_OR_SQRT + - FP_DIV_OR_SQRT + - SVE_DIV_OR_SQRT + 1: + Pipelined: True + Blocking-Groups: + - INT_DIV_OR_SQRT + - FP_DIV_OR_SQRT + - SVE_DIV_OR_SQRT + 2: + Pipelined: True + Blocking-Groups: + - INT_DIV_OR_SQRT + - FP_DIV_OR_SQRT + - SVE_DIV_OR_SQRT + 3: + Pipelined: True + Blocking-Groups: + - INT_DIV_OR_SQRT + - FP_DIV_OR_SQRT + - SVE_DIV_OR_SQRT + 4: + Pipelined: True + Blocking-Groups: + - INT_DIV_OR_SQRT + - FP_DIV_OR_SQRT + - SVE_DIV_OR_SQRT + 5: + Pipelined: True + Blocking-Groups: + - INT_DIV_OR_SQRT + - FP_DIV_OR_SQRT + - SVE_DIV_OR_SQRT + 6: + Pipelined: True + Blocking-Groups: + - INT_DIV_OR_SQRT + - FP_DIV_OR_SQRT + - SVE_DIV_OR_SQRT + 7: + Pipelined: True + Blocking-Groups: + - INT_DIV_OR_SQRT + - FP_DIV_OR_SQRT + - SVE_DIV_OR_SQRT +Latencies: + 0: + Instruction-Groups: + - INT + Execution-Latency: 2 + Execution-Throughput: 2 + 1: + Instruction-Groups: + - INT_SIMPLE_ARTH_NOSHIFT + - INT_SIMPLE_LOGICAL_NOSHIFT + - INT_SIMPLE_CVT + Execution-Latency: 1 + Execution-Throughput: 1 + 2: + Instruction-Groups: + - INT_MUL + Execution-Latency: 5 + Execution-Throughput: 1 + 3: + Instruction-Groups: + - INT_DIV_OR_SQRT + Execution-Latency: 41 + Execution-Throughput: 41 + 4: + Instruction-Groups: + - SCALAR_SIMPLE + - VECTOR_SIMPLE_LOGICAL + - SVE_SIMPLE_LOGICAL + - VECTOR_SIMPLE_CMP + - SVE_SIMPLE_CMP + Execution-Latency: 4 + Execution-Throughput: 1 + 5: + Instruction-Groups: + - FP_DIV_OR_SQRT + Execution-Latency: 29 + Execution-Throughput: 29 + 6: + Instruction-Groups: + - VECTOR_SIMPLE + - SVE_SIMPLE + - SCALAR_SIMPLE_CVT + - FP_MUL + - SVE_MUL + Execution-Latency: 9 + Execution-Throughput: 1 + 7: + Instruction-Groups: + - SVE_DIV_OR_SQRT + Execution-Latency: 98 + Execution-Throughput: 98 + 8: + Instruction-Groups: + - PREDICATE + Execution-Latency: 3 + Execution-Throughput: 1 + 9: + Instruction-Groups: + - LOAD_SCALAR + - LOAD_VECTOR + - STORE_ADDRESS_SCALAR + - STORE_ADDRESS_VECTOR + Execution-Latency: 3 + Execution-Throughput: 1 + 10: + Instruction-Groups: + - LOAD_SVE + - STORE_ADDRESS_SVE + Execution-Latency: 6 + Execution-Throughput: 1 + # Indexed FMLA instructions split into 2 dependent µops. Latency increased to 15 to mimic such behaviour + # NOTE: Any changes to the capstone opcode list could invalidate the mapping between ARM instructions and the values below + 11: + Instruction-Opcodes: + - 1922 + - 1924 + - 1926 + - 2359 + - 2360 + - 2361 + - 2364 + - 2365 + - 2368 + - 2369 + - 2371 + - 2390 + - 2391 + - 2392 + - 2395 + - 2396 + - 2399 + - 2400 + - 2402 + - 2445 + - 2446 + - 2447 + - 2450 + - 2451 + - 2454 + - 2455 + - 2457 + - 2470 + - 2471 + - 2472 + - 2475 + - 2476 + - 2479 + - 2480 + - 2482 + - 3627 + - 3629 + - 3631 + - 3633 + - 3644 + - 3646 + - 3648 + - 3650 + - 3709 + - 3711 + - 3713 + - 3715 + - 4306 + - 4308 + - 4310 + - 4312 + - 4326 + - 4328 + - 4330 + - 4332 + - 4372 + - 4374 + - 4376 + - 4378 + - 4468 + - 4469 + - 4470 + - 4472 + - 4474 + - 4476 + - 4493 + - 4494 + - 4495 + - 4497 + - 4499 + - 4501 + - 4511 + - 4513 + - 4515 + - 4517 + - 4519 + - 4521 + - 4534 + - 4535 + - 4536 + - 4538 + - 4540 + - 4542 + - 4594 + - 4595 + - 4599 + - 4601 + - 4603 + - 4605 + - 4613 + - 4614 + - 4618 + - 4620 + - 4622 + - 4624 + - 4633 + - 4635 + - 4637 + - 4639 + - 4641 + - 4643 + - 5760 + - 5762 + - 5764 + - 5766 + - 5780 + - 5782 + - 5784 + - 5786 + - 5824 + - 5826 + - 5828 + - 5830 + Execution-Latency: 15 + Execution-Throughput: 1 +# CPU-Info mainly used to generate a replica of the special (or system) file directory +# structure +CPU-Info: + # Set Generate-Special-Dir to True to generate the special files directory, or to False to not. + # (Not generating the special files directory may require the user to copy over files manually) + Generate-Special-Dir: True + # Core-Count MUST be 1 as multi-core is not supported at this time. (A64FX true value is 48) + Core-Count: 1 + # Socket-Count MUST be 1 as multi-socket simulations are not supported at this time. (A64FX true value is 1) + Socket-Count: 1 + # SMT MUST be 1 as Simultanious-Multi-Threading is not supported at this time. (A64FX true value is 1) + SMT: 1 + # Below are the values needed to generate /proc/cpuinfo + BogoMIPS: 200.00 + Features: fp asimd evtstrm sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm fcma dcpop sve + CPU-Implementer: "0x46" + CPU-Architecture: 8 + CPU-Variant: "0x1" + CPU-Part: "0x001" + CPU-Revision: 0 + # Package-Count is used to generate + # /sys/devices/system/cpu/cpu{0..Core-Count}/topology/{physical_package_id, core_id} + Package-Count: 1 \ No newline at end of file From 00d198a6b34dd278bef62b2c8f383bf4df8e1bb2 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Thu, 5 Dec 2024 17:06:26 +0000 Subject: [PATCH 44/69] Changes to config to allow parameterisation --- .../simeng/branchpredictors/TagePredictor.hh | 3 +- src/lib/branchpredictors/TagePredictor.cc | 31 +++++-------------- src/lib/config/ModelConfig.cc | 10 ++++++ 3 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/include/simeng/branchpredictors/TagePredictor.hh b/src/include/simeng/branchpredictors/TagePredictor.hh index 99dcd7d290..7fd4c8afbb 100644 --- a/src/include/simeng/branchpredictors/TagePredictor.hh +++ b/src/include/simeng/branchpredictors/TagePredictor.hh @@ -59,13 +59,14 @@ class TagePredictor : public BranchPredictor { /** Returns a btb prediction for this branch */ BranchPrediction getBtbPrediction(uint64_t address); - /** Todo */ void getTaggedPrediction(uint64_t address, BranchPrediction* prediction, BranchPrediction* altPrediction, uint8_t* predTable, std::vector* indices, std::vector* tags); + BranchPrediction getBtbPrediction(uint64_t address); + /** Get the index of a branch for a given address and table */ uint64_t getTaggedIndex(uint64_t address, uint8_t table); diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc index c0074ac9c2..d4bb27dc2f 100644 --- a/src/lib/branchpredictors/TagePredictor.cc +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -1,7 +1,5 @@ #include "simeng/branchpredictors/TagePredictor.hh" -#include - namespace simeng { TagePredictor::TagePredictor(ryml::ConstNodeRef config) @@ -44,7 +42,6 @@ TagePredictor::~TagePredictor() { BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, int64_t knownOffset) { -// std::cout << "Predicting" << std::endl; BranchPrediction prediction; BranchPrediction altPrediction; uint8_t predTable; @@ -96,7 +93,6 @@ BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, void TagePredictor::update(uint64_t address, bool isTaken, uint64_t targetAddress, simeng::BranchType type, uint64_t instructionId) { -// std::cout << "Updating" << std::endl; // Make sure that this function is called in program order; and then update // the lastUpdatedInstructionId variable assert(instructionId >= lastUpdatedInstructionId && @@ -119,7 +115,6 @@ void TagePredictor::update(uint64_t address, bool isTaken, } void TagePredictor::flush(uint64_t address) { -// std::cout << "Flush" << std::endl; // If address interacted with RAS, rewind entry auto it = rasHistory_.find(address); if (it != rasHistory_.end()) { @@ -151,22 +146,12 @@ void TagePredictor::flush(uint64_t address) { } -BranchPrediction TagePredictor::getBtbPrediction(uint64_t address) { -// std::cout << "Getting BTB" << std::endl; - // Get prediction from BTB - uint64_t index = (address >> 2) & ((1 << btbBits_) - 1); - bool direction = (btb_[index].first >= (1 << (satCntBits_ - 1))); - uint64_t target = btb_[index].second; - return {direction, target}; -} - void TagePredictor::getTaggedPrediction(uint64_t address, BranchPrediction* prediction, BranchPrediction* altPrediction, uint8_t* predTable, std::vector* indices, std::vector* tags) { -// std::cout << "Getting Prediction" << std::endl; // Get a basic prediction from the btb BranchPrediction basePrediction = getBtbPrediction(address); prediction->isTaken = basePrediction.isTaken; @@ -178,13 +163,11 @@ void TagePredictor::getTaggedPrediction(uint64_t address, // number, the longer global history it has access to. Therefore, the // greater the table number, the better the prediction. for (uint8_t table = 0; table < numTageTables_; table++) { -// std::cout << "Checking table " << (table + 1) << std::endl; uint64_t index = getTaggedIndex(address, table); indices->push_back(index); uint64_t tag = getTag(address, table); tags->push_back(tag); if (tageTables_[table][index].tag == tag) { -// std::cout << "Tag match -- " << std::endl; altPrediction->isTaken = prediction->isTaken; altPrediction->target = prediction->target; @@ -195,25 +178,27 @@ void TagePredictor::getTaggedPrediction(uint64_t address, } } +BranchPrediction TagePredictor::getBtbPrediction(uint64_t address) { + // Get prediction from BTB + uint64_t index = (address >> 2) & ((1 << btbBits_) - 1); + bool direction = (btb_[index].first >= (1 << (satCntBits_ - 1))); + uint64_t target = btb_[index].second; + return {direction, target}; +} + uint64_t TagePredictor::getTaggedIndex(uint64_t address, uint8_t table) { -// std::cout << "getting Index" << std::endl; // Hash function here is pretty arbitrary. uint64_t h1 = (address >> 2); uint64_t h2 = globalHistory_.getFolded(1 << (table + 1), (1 << tageTableBits_) - 1); -// std::cout << "Index: h1=" << h1 << " h2=" << h2 << " final=" -// << ((h1 ^ h2) & ((1 << tageTableBits_) - 1)) << std::endl; return (h1 ^ h2) & ((1 << tageTableBits_) - 1); } uint64_t TagePredictor::getTag(uint64_t address, uint8_t table) { -// std::cout << "getting Tag" << std::endl; // Hash function here is pretty arbitrary. uint64_t h1 = address; uint64_t h2 = globalHistory_.getFolded((1 << table), ((1 << tagLength_) - 1)); -// std::cout << "Tag: h1=" << h1 << " h2=" << h2 << " final=" -// << ((h1 ^ h2) & ((1 << tagLength_) - 1)) << std::endl; return (h1 ^ h2) & ((1 << tagLength_) - 1); } diff --git a/src/lib/config/ModelConfig.cc b/src/lib/config/ModelConfig.cc index 2795036c37..cf226cb390 100644 --- a/src/lib/config/ModelConfig.cc +++ b/src/lib/config/ModelConfig.cc @@ -546,6 +546,16 @@ void ModelConfig::setExpectations(bool isDefault) { .setValueSet( std::vector{"Always-Taken", "Always-Not-Taken"}); } + if ((configTree_["Branch-Predictor"]["Type"].as() + == "Tage")) { + expectations_["Branch-Predictor"].addChild( + ExpectationNode::createExpectation( + 12, "Tage-Table-Bits")); + expectations_["Branch-Predictor"]["Tage-Table-Bits"] + .setValueBounds(1, UINT64_MAX); + + + } } else { std::cerr << "[SimEng:ModelConfig] Attempted to access config key " "\"Branch-Predictor:Type\" but it doesn't exist. " From 9f144e11e52b0a38fc9a7c3518e0fd41f4e94c1f Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:31:25 +0000 Subject: [PATCH 45/69] Making TAGE paramterisable --- configs/a64fx_Tage.yaml | 3 + .../simeng/branchpredictors/TagePredictor.hh | 71 ++++++++++++++----- src/lib/branchpredictors/TagePredictor.cc | 8 ++- src/lib/config/ModelConfig.cc | 13 +++- 4 files changed, 76 insertions(+), 19 deletions(-) diff --git a/configs/a64fx_Tage.yaml b/configs/a64fx_Tage.yaml index 930b2841c2..3c88cc0c9c 100644 --- a/configs/a64fx_Tage.yaml +++ b/configs/a64fx_Tage.yaml @@ -35,6 +35,9 @@ Branch-Predictor: Global-History-Length: 19 RAS-entries: 8 Fallback-Static-Predictor: "Always-Taken" + Tage-Table-Bits: 12 + Num-Tage-Tables: 6 + Tag-Length: 8 L1-Data-Memory: Interface-Type: Fixed L1-Instruction-Memory: diff --git a/src/include/simeng/branchpredictors/TagePredictor.hh b/src/include/simeng/branchpredictors/TagePredictor.hh index 7fd4c8afbb..6a6167ab6f 100644 --- a/src/include/simeng/branchpredictors/TagePredictor.hh +++ b/src/include/simeng/branchpredictors/TagePredictor.hh @@ -12,6 +12,8 @@ namespace simeng { +/** A data structure to store all of the information needed for a single entry + * in a tagged table. */ struct TageEntry { uint8_t satCnt; uint64_t tag; @@ -19,6 +21,8 @@ struct TageEntry { uint64_t target; }; +/** A data structure to store all of the information needed for a single entry + * in the Fetch Target Queue. */ struct ftqEntry { uint8_t predTable; std::vector indices; @@ -27,7 +31,24 @@ struct ftqEntry { BranchPrediction altPrediction; }; -/** ToDo -- Explain TAGE */ +/** + * A TAGE branch predictor of the type described by Seznec and Michaud: + * https://inria.hal.science/hal-03408381/document. A brief summary of the + * prediction mechanism is described below. + * + * This predictor uses a series of prediction tables (a user-defined number + * thereof), each of which uses a progressively larger global history to index + * it. The default prediction table does not use any global history. + * + * To access a prediction table, an XOR hash of the branch's address and the + * global history of the relevant length is used to index the table. Then, a + * tag is determined by a hash of the address and the context of the branch is + * used to confirm that the entry belongs to the present branch. + * + * A prediction is made on the basis of the prediction table using the longest + * global history that has an entry corresponding to the present branch + * (matching tag). + * */ class TagePredictor : public BranchPredictor { public: @@ -36,10 +57,10 @@ class TagePredictor : public BranchPredictor { ~TagePredictor(); /** Generate a branch prediction for the supplied instruction address, a - * branch type, and a known branch offset; defaults to 0 meaning offset is not - * known. Returns a branch direction and branch target address. */ + * branch type, and a known branch offset. Returns a branch direction and + * branch target address. */ BranchPrediction predict(uint64_t address, BranchType type, - int64_t knownOffset = 0) override; + int64_t knownOffset) override; /** Updates appropriate predictor model objects based on the address, type and * outcome of the branch instruction. Update must be called on @@ -56,39 +77,55 @@ class TagePredictor : public BranchPredictor { void flush(uint64_t address) override; private: - /** Returns a btb prediction for this branch */ + /** Returns a prediction for a branch at this address from the non-tagged BTB + * that is used for default predictions. */ BranchPrediction getBtbPrediction(uint64_t address); + /** provides a prediction, alternative prediction, the table number that + * provided the prediction, and the indices and tags of the prediction and + * alternative prediction. This prediction info is determined from the + * tagged tables for a branch with the provided address. */ void getTaggedPrediction(uint64_t address, BranchPrediction* prediction, BranchPrediction* altPrediction, uint8_t* predTable, std::vector* indices, std::vector* tags); - BranchPrediction getBtbPrediction(uint64_t address); - - /** Get the index of a branch for a given address and table */ + /** Returns the index of a branch in a tagged table for a given address and + * table. */ uint64_t getTaggedIndex(uint64_t address, uint8_t table); - /** Return a hash of the address and the global history that is then trimmed - * to the length of the tags. The tag varies depending on - * the table that is being accessed */ + /** Returns a hash of the address and the global history that is then trimmed + * to the appropriate tag length. The tag varies depending on the table + * that is being accessed. */ uint64_t getTag(uint64_t address, uint8_t table); + /** Updates the default, untagged prediction table on the basis of the + * outcome of a branch. */ void updateBtb(uint64_t address, bool isTaken, uint64_t target); + /** Updates the tagged tables on the basis of the outcome of a branch. */ void updateTaggedTables(uint64_t address, bool isTaken, uint64_t target); - /** The bitlength of the BTB index; BTB will have 2^bits entries. */ + /** The bitlength of the BTB (i.e., default prediction table) index; BTB + * will have 2^bits entries. */ uint8_t btbBits_; /** A 2^bits length vector of pairs containing a satCntBits_-bit saturating - * counter and a branch target. */ + * counter and a branch target. This is the untagged, default prediction + * table. */ std::vector> btb_; - uint64_t tageTableBits_ = 12; - uint8_t numTageTables_ = 6; + /** The bitlength of the Tagged tables' indices. + * Each tagged table with have 2^bits entries. */ + uint8_t tageTableBits_; + + /** The number of tagged tables in the TAGE scheme. + * In addition to the tagged tables, there will be a single untagged table + * (the BTB) from which default predictions will be made. */ + uint8_t numTageTables_; + /** Data structure to store the tagged tables in. */ std::vector> tageTables_; /** Fetch Target Queue containing the direction prediction and previous global @@ -117,7 +154,9 @@ class TagePredictor : public BranchPredictor { * taken (0), with the most recent branch being the least-significant-bit */ BranchHistory globalHistory_; - uint8_t tagLength_ = 8; + /** The size of the tags used in the tagged tables, where the units of + * size are bits. */ + uint8_t tagLength_; // This variable is used only in debug mode -- therefore hide behind ifdef #ifndef NDEBUG diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc index d4bb27dc2f..9d54cb63d4 100644 --- a/src/lib/branchpredictors/TagePredictor.cc +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -4,12 +4,18 @@ namespace simeng { TagePredictor::TagePredictor(ryml::ConstNodeRef config) : btbBits_(config["Branch-Predictor"]["BTB-Tag-Bits"].as()), + tageTableBits_( + config["Branch-Predictor"]["Tage-Table-Bits"].as()), + numTageTables_( + config["Branch-Predictor"]["Num-Tage-Tables"].as()), satCntBits_( config["Branch-Predictor"]["Saturating-Count-Bits"].as()), globalHistoryLength_( config["Branch-Predictor"]["Global-History-Length"].as()), rasSize_(config["Branch-Predictor"]["RAS-entries"].as()), - globalHistory_(1 << (numTageTables_ + 1)) { + globalHistory_(1 << (numTageTables_ + 1)), + tagLength_(config["Branch-Predictor"]["Tag-Length"].as()) + { // Calculate the saturation counter boundary between weakly taken and // not-taken. `(2 ^ num_sat_cnt_bits) / 2` gives the weakly taken state // value diff --git a/src/lib/config/ModelConfig.cc b/src/lib/config/ModelConfig.cc index cf226cb390..1708b3e1f0 100644 --- a/src/lib/config/ModelConfig.cc +++ b/src/lib/config/ModelConfig.cc @@ -549,12 +549,21 @@ void ModelConfig::setExpectations(bool isDefault) { if ((configTree_["Branch-Predictor"]["Type"].as() == "Tage")) { expectations_["Branch-Predictor"].addChild( - ExpectationNode::createExpectation( + ExpectationNode::createExpectation( 12, "Tage-Table-Bits")); expectations_["Branch-Predictor"]["Tage-Table-Bits"] - .setValueBounds(1, UINT64_MAX); + .setValueBounds(1, UINT8_MAX); + expectations_["Branch-Predictor"].addChild( + ExpectationNode::createExpectation( + 6, "Num-Tage-Tables")); + expectations_["Branch-Predictor"]["Num-Tage-Tables"] + .setValueBounds(1, UINT8_MAX); + expectations_["Branch-Predictor"].addChild( + ExpectationNode::createExpectation(8, "Tag-Length")); + expectations_["Branch-Predictor"]["Tag-Length"] + .setValueBounds(1, UINT8_MAX); } } else { std::cerr << "[SimEng:ModelConfig] Attempted to access config key " From 6f73f4e16c56d05e13847ae36b66c5c61f7e1b87 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 9 Dec 2024 12:23:07 +0000 Subject: [PATCH 46/69] Specifying size of constant 1 throughout --- src/lib/branchpredictors/TagePredictor.cc | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc index 9d54cb63d4..dcbae3df31 100644 --- a/src/lib/branchpredictors/TagePredictor.cc +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -19,19 +19,20 @@ TagePredictor::TagePredictor(ryml::ConstNodeRef config) // Calculate the saturation counter boundary between weakly taken and // not-taken. `(2 ^ num_sat_cnt_bits) / 2` gives the weakly taken state // value - uint8_t weaklyTaken = 1 << (satCntBits_ - 1); + uint8_t weaklyTaken = (uint8_t)1 << (satCntBits_ - 1); uint8_t satCntVal = (config["Branch-Predictor"]["Fallback-Static-Predictor"] .as() == "Always-Taken") ? weaklyTaken : (weaklyTaken - 1); // Create branch prediction structures btb_ = - std::vector>(1 << btbBits_, {satCntVal, 0}); + std::vector>( + (uint8_t)1 << btbBits_, {satCntVal, 0}); // Set up Tagged tables for (uint32_t i = 0; i < numTageTables_; i++) { std::vector newTable; - for (uint32_t j = 0; j < (1 << tageTableBits_); j++) { + for (uint32_t j = 0; j < (1ul << tageTableBits_); j++) { TageEntry newEntry = {2, 0, 1, 0}; newTable.push_back(newEntry); } @@ -203,26 +204,26 @@ uint64_t TagePredictor::getTaggedIndex(uint64_t address, uint8_t table) { uint64_t TagePredictor::getTag(uint64_t address, uint8_t table) { // Hash function here is pretty arbitrary. uint64_t h1 = address; - uint64_t h2 = globalHistory_.getFolded((1 << table), - ((1 << tagLength_) - 1)); - return (h1 ^ h2) & ((1 << tagLength_) - 1); + uint64_t h2 = globalHistory_.getFolded((1ull << table), + ((1ull << tagLength_) - 1)); + return (h1 ^ h2) & ((1ull << tagLength_) - 1); } void TagePredictor::updateBtb(uint64_t address, bool isTaken, uint64_t targetAddress) { // Calculate 2-bit saturating counter value - uint8_t satCntVal = btb_[((address >> 2) & ((1 << btbBits_) - 1))].first; + uint8_t satCntVal = btb_[((address >> 2) & ((1ull << btbBits_) - 1))].first; // Only alter value if it would transition to a valid state - if (!((satCntVal == (1 << satCntBits_) - 1) && isTaken) && + if (!((satCntVal == (1ull << satCntBits_) - 1) && isTaken) && !(satCntVal == 0 && !isTaken)) { satCntVal += isTaken ? 1 : -1; } // Update BTB entry - btb_[((address >> 2) & ((1 << btbBits_) - 1))].first = satCntVal; + btb_[((address >> 2) & ((1ull << btbBits_) - 1))].first = satCntVal; if (isTaken) { - btb_[((address >> 2) & ((1 << btbBits_) - 1))].second = targetAddress; + btb_[((address >> 2) & ((1ull << btbBits_) - 1))].second = targetAddress; } } From 03d807d265d22169f9582cc833de2ddd5bb6b4df Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 9 Dec 2024 12:39:34 +0000 Subject: [PATCH 47/69] Updating default A64FX config branch predictor --- configs/a64fx.yaml | 7 +- configs/a64fx_Tage.yaml | 390 ---------------------------------------- 2 files changed, 6 insertions(+), 391 deletions(-) delete mode 100644 configs/a64fx_Tage.yaml diff --git a/configs/a64fx.yaml b/configs/a64fx.yaml index 36d09a42c9..267b01671f 100644 --- a/configs/a64fx.yaml +++ b/configs/a64fx.yaml @@ -29,10 +29,15 @@ Queue-Sizes: Load: 40 Store: 24 Branch-Predictor: - Type: "Perceptron" + Type: "Tage" BTB-Tag-Bits: 11 + Saturating-Count-Bits: 2 Global-History-Length: 19 RAS-entries: 8 + Fallback-Static-Predictor: "Always-Taken" + Tage-Table-Bits: 12 + Num-Tage-Tables: 6 + Tag-Length: 8 L1-Data-Memory: Interface-Type: Fixed L1-Instruction-Memory: diff --git a/configs/a64fx_Tage.yaml b/configs/a64fx_Tage.yaml deleted file mode 100644 index 3c88cc0c9c..0000000000 --- a/configs/a64fx_Tage.yaml +++ /dev/null @@ -1,390 +0,0 @@ -# The following resources where utilised to create the config file and naming schemes: -# https://github.com/fujitsu/A64FX - -Core: - ISA: AArch64 - Simulation-Mode: outoforder - Clock-Frequency-GHz: 1.8 - Timer-Frequency-MHz: 100 - Micro-Operations: True - Vector-Length: 512 -Fetch: - Fetch-Block-Size: 32 - Loop-Buffer-Size: 48 - Loop-Detection-Threshold: 4 -Process-Image: - Heap-Size: 1073741824 - Stack-Size: 1048576 -Register-Set: - GeneralPurpose-Count: 96 - FloatingPoint/SVE-Count: 128 - Predicate-Count: 48 - Conditional-Count: 128 -Pipeline-Widths: - Commit: 4 - FrontEnd: 4 - LSQ-Completion: 2 -Queue-Sizes: - ROB: 128 - Load: 40 - Store: 24 -Branch-Predictor: - Type: "Tage" - BTB-Tag-Bits: 11 - Saturating-Count-Bits: 2 - Global-History-Length: 19 - RAS-entries: 8 - Fallback-Static-Predictor: "Always-Taken" - Tage-Table-Bits: 12 - Num-Tage-Tables: 6 - Tag-Length: 8 -L1-Data-Memory: - Interface-Type: Fixed -L1-Instruction-Memory: - Interface-Type: Flat -LSQ-L1-Interface: - Access-Latency: 5 - Exclusive: True - Load-Bandwidth: 128 - Store-Bandwidth: 64 - Permitted-Requests-Per-Cycle: 2 - Permitted-Loads-Per-Cycle: 2 - Permitted-Stores-Per-Cycle: 1 -Ports: - 0: - Portname: FLA - Instruction-Group-Support: - - FP - - SVE - 1: - Portname: PR - Instruction-Group-Support: - - PREDICATE - 2: - Portname: EXA - Instruction-Group-Support: - - INT_SIMPLE - - INT_MUL - - STORE_DATA - 3: - Portname: FLB - Instruction-Group-Support: - - FP_SIMPLE - - FP_MUL - - SVE_SIMPLE - - SVE_MUL - 4: - Portname: EXB - Instruction-Group-Support: - - INT_SIMPLE - - INT_DIV_OR_SQRT - 5: - Portname: EAGA - Instruction-Group-Support: - - LOAD - - STORE_ADDRESS - - INT_SIMPLE_ARTH_NOSHIFT - - INT_SIMPLE_LOGICAL_NOSHIFT - - INT_SIMPLE_CMP - 6: - Portname: EAGB - Instruction-Group-Support: - - LOAD - - STORE_ADDRESS - - INT_SIMPLE_ARTH_NOSHIFT - - INT_SIMPLE_LOGICAL_NOSHIFT - - INT_SIMPLE_CMP - 7: - Portname: BR - Instruction-Group-Support: - - BRANCH -Reservation-Stations: - 0: - Size: 20 - Dispatch-Rate: 2 - Ports: - - FLA - - PR - - EXA - 1: - Size: 20 - Dispatch-Rate: 2 - Ports: - - FLB - - EXB - 2: - Size: 10 - Dispatch-Rate: 1 - Ports: - - EAGA - 3: - Size: 10 - Dispatch-Rate: 1 - Ports: - - EAGB - 4: - Size: 19 - Dispatch-Rate: 1 - Ports: - - BR -Execution-Units: - 0: - Pipelined: True - Blocking-Groups: - - INT_DIV_OR_SQRT - - FP_DIV_OR_SQRT - - SVE_DIV_OR_SQRT - 1: - Pipelined: True - Blocking-Groups: - - INT_DIV_OR_SQRT - - FP_DIV_OR_SQRT - - SVE_DIV_OR_SQRT - 2: - Pipelined: True - Blocking-Groups: - - INT_DIV_OR_SQRT - - FP_DIV_OR_SQRT - - SVE_DIV_OR_SQRT - 3: - Pipelined: True - Blocking-Groups: - - INT_DIV_OR_SQRT - - FP_DIV_OR_SQRT - - SVE_DIV_OR_SQRT - 4: - Pipelined: True - Blocking-Groups: - - INT_DIV_OR_SQRT - - FP_DIV_OR_SQRT - - SVE_DIV_OR_SQRT - 5: - Pipelined: True - Blocking-Groups: - - INT_DIV_OR_SQRT - - FP_DIV_OR_SQRT - - SVE_DIV_OR_SQRT - 6: - Pipelined: True - Blocking-Groups: - - INT_DIV_OR_SQRT - - FP_DIV_OR_SQRT - - SVE_DIV_OR_SQRT - 7: - Pipelined: True - Blocking-Groups: - - INT_DIV_OR_SQRT - - FP_DIV_OR_SQRT - - SVE_DIV_OR_SQRT -Latencies: - 0: - Instruction-Groups: - - INT - Execution-Latency: 2 - Execution-Throughput: 2 - 1: - Instruction-Groups: - - INT_SIMPLE_ARTH_NOSHIFT - - INT_SIMPLE_LOGICAL_NOSHIFT - - INT_SIMPLE_CVT - Execution-Latency: 1 - Execution-Throughput: 1 - 2: - Instruction-Groups: - - INT_MUL - Execution-Latency: 5 - Execution-Throughput: 1 - 3: - Instruction-Groups: - - INT_DIV_OR_SQRT - Execution-Latency: 41 - Execution-Throughput: 41 - 4: - Instruction-Groups: - - SCALAR_SIMPLE - - VECTOR_SIMPLE_LOGICAL - - SVE_SIMPLE_LOGICAL - - VECTOR_SIMPLE_CMP - - SVE_SIMPLE_CMP - Execution-Latency: 4 - Execution-Throughput: 1 - 5: - Instruction-Groups: - - FP_DIV_OR_SQRT - Execution-Latency: 29 - Execution-Throughput: 29 - 6: - Instruction-Groups: - - VECTOR_SIMPLE - - SVE_SIMPLE - - SCALAR_SIMPLE_CVT - - FP_MUL - - SVE_MUL - Execution-Latency: 9 - Execution-Throughput: 1 - 7: - Instruction-Groups: - - SVE_DIV_OR_SQRT - Execution-Latency: 98 - Execution-Throughput: 98 - 8: - Instruction-Groups: - - PREDICATE - Execution-Latency: 3 - Execution-Throughput: 1 - 9: - Instruction-Groups: - - LOAD_SCALAR - - LOAD_VECTOR - - STORE_ADDRESS_SCALAR - - STORE_ADDRESS_VECTOR - Execution-Latency: 3 - Execution-Throughput: 1 - 10: - Instruction-Groups: - - LOAD_SVE - - STORE_ADDRESS_SVE - Execution-Latency: 6 - Execution-Throughput: 1 - # Indexed FMLA instructions split into 2 dependent µops. Latency increased to 15 to mimic such behaviour - # NOTE: Any changes to the capstone opcode list could invalidate the mapping between ARM instructions and the values below - 11: - Instruction-Opcodes: - - 1922 - - 1924 - - 1926 - - 2359 - - 2360 - - 2361 - - 2364 - - 2365 - - 2368 - - 2369 - - 2371 - - 2390 - - 2391 - - 2392 - - 2395 - - 2396 - - 2399 - - 2400 - - 2402 - - 2445 - - 2446 - - 2447 - - 2450 - - 2451 - - 2454 - - 2455 - - 2457 - - 2470 - - 2471 - - 2472 - - 2475 - - 2476 - - 2479 - - 2480 - - 2482 - - 3627 - - 3629 - - 3631 - - 3633 - - 3644 - - 3646 - - 3648 - - 3650 - - 3709 - - 3711 - - 3713 - - 3715 - - 4306 - - 4308 - - 4310 - - 4312 - - 4326 - - 4328 - - 4330 - - 4332 - - 4372 - - 4374 - - 4376 - - 4378 - - 4468 - - 4469 - - 4470 - - 4472 - - 4474 - - 4476 - - 4493 - - 4494 - - 4495 - - 4497 - - 4499 - - 4501 - - 4511 - - 4513 - - 4515 - - 4517 - - 4519 - - 4521 - - 4534 - - 4535 - - 4536 - - 4538 - - 4540 - - 4542 - - 4594 - - 4595 - - 4599 - - 4601 - - 4603 - - 4605 - - 4613 - - 4614 - - 4618 - - 4620 - - 4622 - - 4624 - - 4633 - - 4635 - - 4637 - - 4639 - - 4641 - - 4643 - - 5760 - - 5762 - - 5764 - - 5766 - - 5780 - - 5782 - - 5784 - - 5786 - - 5824 - - 5826 - - 5828 - - 5830 - Execution-Latency: 15 - Execution-Throughput: 1 -# CPU-Info mainly used to generate a replica of the special (or system) file directory -# structure -CPU-Info: - # Set Generate-Special-Dir to True to generate the special files directory, or to False to not. - # (Not generating the special files directory may require the user to copy over files manually) - Generate-Special-Dir: True - # Core-Count MUST be 1 as multi-core is not supported at this time. (A64FX true value is 48) - Core-Count: 1 - # Socket-Count MUST be 1 as multi-socket simulations are not supported at this time. (A64FX true value is 1) - Socket-Count: 1 - # SMT MUST be 1 as Simultanious-Multi-Threading is not supported at this time. (A64FX true value is 1) - SMT: 1 - # Below are the values needed to generate /proc/cpuinfo - BogoMIPS: 200.00 - Features: fp asimd evtstrm sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm fcma dcpop sve - CPU-Implementer: "0x46" - CPU-Architecture: 8 - CPU-Variant: "0x1" - CPU-Part: "0x001" - CPU-Revision: 0 - # Package-Count is used to generate - # /sys/devices/system/cpu/cpu{0..Core-Count}/topology/{physical_package_id, core_id} - Package-Count: 1 \ No newline at end of file From 9afa83c600c45b6281e59e31688cf02c1fa8b0bf Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 9 Dec 2024 12:46:02 +0000 Subject: [PATCH 48/69] Cleaning up BranchHistory.hh comments --- .../simeng/branchpredictors/BranchHistory.hh | 35 +++++++------------ 1 file changed, 13 insertions(+), 22 deletions(-) diff --git a/src/include/simeng/branchpredictors/BranchHistory.hh b/src/include/simeng/branchpredictors/BranchHistory.hh index 01e49b8cdb..04d4d7e08c 100644 --- a/src/include/simeng/branchpredictors/BranchHistory.hh +++ b/src/include/simeng/branchpredictors/BranchHistory.hh @@ -1,7 +1,5 @@ #pragma once -#include - namespace simeng { /** A class for storing a branch history. Needed for cases where a branch * history of more than 64 bits is required. This class makes it easier to @@ -9,7 +7,7 @@ namespace simeng { * sophisticated branch predictors. * * The bits of the branch history are stored in a vector of uint64_t values, - * and their access/manipulation is facilitated by the public functions.*/ + * and their access/manipulation is facilitated by the public functions. */ class BranchHistory { public: @@ -21,23 +19,20 @@ class BranchHistory { } ~BranchHistory() {}; - /** Returns the numBits most recent bits of the branch history. Maximum + /** Returns the 'numBits' most recent bits of the branch history. Maximum * number of bits returnable is 64 to allow it to be provided in a 64-bit * integer. */ uint64_t getHistory(uint8_t numBits) { -// std::cout << "getHistory" << std::endl; assert(numBits <= 64 && "Cannot get more than 64 bits without rolling"); assert(numBits <= size_ && "Cannot get more bits of branch history than " "the size of the history"); return (history_[0] & ((1 << numBits) - 1)); } - /** returns the global history folded over on itself to get a bitmap of the - * size specified by numBits. The global history is folded by taking an - * XOR hash with the overflowing bits to get an output of 'length' bits. - * */ + /** Returns 'numBits' of the global history folded over on itself to get a + * value of size 'length'. The global history is folded by taking an + * XOR hash with the overflowing bits to get an output of 'length' bits. */ uint64_t getFolded(uint8_t numBits, uint8_t length) { -// std::cout << "getFolded" << std::endl; assert(numBits <= size_ && "Cannot get more bits of branch history than " "the size of the history"); uint64_t output = 0; @@ -46,7 +41,6 @@ class BranchHistory { uint64_t endIndex = numBits - 1; while (startIndex <= numBits) { -// std::cout << "in while loop :(" << std::endl; output ^= ((history_[startIndex / 64] >> startIndex) & ((1 << (numBits - startIndex)) - 1)); @@ -64,9 +58,8 @@ class BranchHistory { return output; } - /** Adds a branch outcome to the global history */ + /** Adds a branch outcome ('isTaken') to the global history */ void addHistory(bool isTaken) { -// std::cout << "addHistory" << std::endl; for (int8_t i = size_ / 64; i >= 0; i--) { history_[i] <<= 1; if (i == 0) { @@ -78,12 +71,11 @@ class BranchHistory { } /** Updates the state of a branch that has already been added to the global - * history at 'position', where position is 0-indexed and starts from the - * least-significant bit. I.e., to update the most recently added branch - * outcome, position would be 0. + * history at 'position', where 'position' is 0-indexed and starts from the + * most recent history. I.e., to update the most recently added branch + * outcome, 'position' would be 0. * */ void updateHistory(bool isTaken, uint64_t position) { -// std::cout << "updateHistory" << std::endl; if (position < size_) { uint8_t vectIndex = position / 64; uint8_t bitIndex = position % 64; @@ -91,9 +83,8 @@ class BranchHistory { } } - /** removes the most recently added branch from the history */ + /** Removes the most recently added branch from the history */ void rollBack() { -// std::cout << "rollBack" << std::endl; for (uint8_t i = 0; i <= (size_ / 64); i++) { history_[i] >>= 1; if (i < (size_ / 64)) { @@ -106,11 +97,11 @@ class BranchHistory { /** The number of bits of branch history stored in this branch history */ uint64_t size_; - /** A vector containing this bits of the branch history. The bits are + /** A vector containing the bits of the branch history. The bits are * arranged such that the most recent branches are stored in uint64_t at * index 0 of the vector, then the next most recent at index 1 and so forth. - * Within each uint64_t, the most recent branches are recorded int he - * least-significant bits */ + * Within each uint64_t, the most recent branches are recorded in the + * least-significant bits. */ std::vector history_; }; From 79a5c7f2184af135cf880710253419ce6b38f802 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 9 Dec 2024 13:44:25 +0000 Subject: [PATCH 49/69] Adding to documentation --- .../developer/components/branchPred.rst | 19 +++++++++++++++++-- .../simeng/branchpredictors/TagePredictor.hh | 2 +- src/lib/branchpredictors/TagePredictor.cc | 7 ++++--- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/docs/sphinx/developer/components/branchPred.rst b/docs/sphinx/developer/components/branchPred.rst index f92a50f125..5b5f69a0c0 100644 --- a/docs/sphinx/developer/components/branchPred.rst +++ b/docs/sphinx/developer/components/branchPred.rst @@ -21,7 +21,6 @@ The state of the branch predictor when ``predict`` is called on a branch is stor Generic Predictor ----------------- - The algorithm(s) held within a ``BranchPredictor`` class instance can be model-specific, however, SimEng provides a ``GenericPredictor`` which contains the following logic. Global History @@ -53,4 +52,20 @@ Branch Target Buffer (BTB) If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be isTaken. If the supplied branch type is ``Conditional`` and the predicted direction is not isTaken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) - Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. \ No newline at end of file + Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. + +TAGE Predictor +-------------------- +The ``TagePredictor`` is a TAGE predictor of the type described in https://inria.hal.science/hal-03408381/document. Unlike ``GenericPredictor`` and ``PerceptronPredictor``, this predictor uses a series of prediction tables, each of which uses an increasing global history size. E.g., the default prediction table will be indexed by the address itself, then the following tables will use global histories of length 2, 4, 8, 16, .... + +Tagged prediction tables + The prediction returned from this branch predictor will be that determined by the table with the largest global history that has an entry corresponding to the given branch. To determine whether or not a table entry corresponds to the present branch or not, a hash is made from the branch's address and the global history. Each table entry has a usefulness counter which is updated when the prediction differs from the next-best prediction. On incorrect prediction, if possible, replace a non-useful entry in a table with more global history. + +Default prediction table + In addition to the tagged tables, there is a non-tagged default prediction table that is used as a fall-back in the event that none of the tagged tables have an entry corresponding to a given branch. This table is much like the BTB in the ``GenericPredictor``, except that the index is determined from the truncated address only (i.e., it does not depend on the global history at all). + +Global History + To accomodate larger numbers of tagged tables, global histories of greater than 64 bits are needed. Therefore, ``TagePredictor`` incorporates a new ``BranchHistory`` structure that allows global histories of unlimited size to be kept and accessed. + +Return Address Stack (RAS) + Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. diff --git a/src/include/simeng/branchpredictors/TagePredictor.hh b/src/include/simeng/branchpredictors/TagePredictor.hh index 6a6167ab6f..2afaa0f7d7 100644 --- a/src/include/simeng/branchpredictors/TagePredictor.hh +++ b/src/include/simeng/branchpredictors/TagePredictor.hh @@ -105,7 +105,7 @@ class TagePredictor : public BranchPredictor { void updateBtb(uint64_t address, bool isTaken, uint64_t target); /** Updates the tagged tables on the basis of the outcome of a branch. */ - void updateTaggedTables(uint64_t address, bool isTaken, uint64_t target); + void updateTaggedTables(bool isTaken, uint64_t target); /** The bitlength of the BTB (i.e., default prediction table) index; BTB * will have 2^bits entries. */ diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc index dcbae3df31..3f25873f48 100644 --- a/src/lib/branchpredictors/TagePredictor.cc +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -108,7 +108,7 @@ void TagePredictor::update(uint64_t address, bool isTaken, updateBtb(address, isTaken, targetAddress); - updateTaggedTables(address, isTaken, targetAddress); + updateTaggedTables(isTaken, targetAddress); // Update global history if prediction was incorrect if (ftq_.front().prediction.isTaken != isTaken) { @@ -215,6 +215,7 @@ void TagePredictor::updateBtb(uint64_t address, bool isTaken, // Calculate 2-bit saturating counter value uint8_t satCntVal = btb_[((address >> 2) & ((1ull << btbBits_) - 1))].first; // Only alter value if it would transition to a valid state + // (i.e., avoid overflow) if (!((satCntVal == (1ull << satCntBits_) - 1) && isTaken) && !(satCntVal == 0 && !isTaken)) { satCntVal += isTaken ? 1 : -1; @@ -228,9 +229,9 @@ void TagePredictor::updateBtb(uint64_t address, bool isTaken, } -void TagePredictor::updateTaggedTables(uint64_t address, bool isTaken, +void TagePredictor::updateTaggedTables(bool isTaken, uint64_t target) { - // Get stored information from the ftq + // Get stored information from the FTQ uint8_t predTable = ftq_.front().predTable; std::vector indices = ftq_.front().indices; std::vector tags = ftq_.front().tags; From e11742d81d7379bcd666a1e80bc98022e6263699 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 9 Dec 2024 13:57:04 +0000 Subject: [PATCH 50/69] Adding to documentation --- docs/sphinx/user/configuring_simeng.rst | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/docs/sphinx/user/configuring_simeng.rst b/docs/sphinx/user/configuring_simeng.rst index a021369ea6..d73c88c0c4 100644 --- a/docs/sphinx/user/configuring_simeng.rst +++ b/docs/sphinx/user/configuring_simeng.rst @@ -149,13 +149,13 @@ The Branch-Prediction section contains those options to parameterise the branch The current options include: Type - The type of branch predictor that is used, the options are ``Generic``, and ``Perceptron``. Both types of predictor use a branch target buffer with each entry containing a direction prediction mechanism and a target address. The direction predictor used in ``Generic`` is a saturating counter, and in ``Perceptron`` it is a perceptron. + The type of branch predictor that is used, the options are ``Generic``, ``Perceptron``, and ``Tage``. Each of these types of predictor use prediction tables with each entry containing a direction prediction mechanism and a target address. The direction predictor used in ``Generic`` and ``TAGE`` is a saturating counter, and in ``Perceptron`` it is a perceptron. ``TAGE`` also uses a series of further, tagged prediction tables to provide predictions informed by greater branch histories. BTB-Tag-Bits The number of bits used to index the entries in the Branch Target Buffer (BTB). The number of entries in the BTB is obtained from the calculation: 1 << ``bits``. For example, a ``bits`` value of 12 would result in a BTB with 4096 entries. Saturating-Count-Bits - Only needed for a ``Generic`` predictor. The number of bits used in the saturating counter value. + Only needed for ``Generic`` and ``Tage`` predictors. The number of bits used in the saturating counter value. Global-History-Length The number of bits used to record the global history of branch directions. Each bit represents one branch direction. For ``PerceptronPredictor``, this dictates the size of the perceptrons (with each perceptron having Global-History-Length + 1 weights). @@ -164,7 +164,16 @@ RAS-entries The number of entries in the Return Address Stack (RAS). Fallback-Static-Predictor - Only needed for a ``Generic`` predictor. The static predictor used when no dynamic prediction is available. The options are either ``"Always-Taken"`` or ``"Always-Not-Taken"``. + Only needed for ``Generic`` and ``Tage`` predictors. The static predictor used when no dynamic prediction is available. The options are either ``"Always-Taken"`` or ``"Always-Not-Taken"``. + +Tage-Table-Bits + Only needed for a ``Tage`` predictor. The number of bits used to index entries in the tagged tables. The number of entries in each of the tagged tables is obtained from the calculation: 1 << ``bits``. For examples, a ``bits`` value of 12 would result in tagged tables with 4096 entries. + +Num-Tage-Tables + Only needed for a ``Tage`` predictor. The number of tagged tables used by the predictor, in addition to a default prediction table (i.e., the BTB). Therefore, a value of 3 for ``Num-Tage-Tables`` would result in four total prediction tables: one BTB and three tagged tables. If no tagged tables are desired, it is recommended to use the ``GenericPredictor`` instead. + +Tage-Length + Only needed for a ``Tage`` predictor. The number of bits used to tage the entries of the tagged tables. .. _l1dcnf: From 729de42326e102514a37ec3f08496c462f4b687a Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 9 Dec 2024 15:32:23 +0000 Subject: [PATCH 51/69] Adding tests --- test/unit/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 2826cd0030..bc3d752f52 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -35,6 +35,7 @@ set(TEST_SOURCES RegisterValueTest.cc PerceptronPredictorTest.cc SpecialFileDirGenTest.cc + TagePredictorTest.cc ) add_executable(unittests ${TEST_SOURCES}) From 922ba4b853ece3de7d4aab3ba3fecd3da593e3d7 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 9 Dec 2024 15:48:36 +0000 Subject: [PATCH 52/69] Actually adding the test file --- test/unit/TagePredictorTest.cc | 297 +++++++++++++++++++++++++++++++++ 1 file changed, 297 insertions(+) create mode 100644 test/unit/TagePredictorTest.cc diff --git a/test/unit/TagePredictorTest.cc b/test/unit/TagePredictorTest.cc new file mode 100644 index 0000000000..ada448cfc5 --- /dev/null +++ b/test/unit/TagePredictorTest.cc @@ -0,0 +1,297 @@ +#include "MockInstruction.hh" +#include "gtest/gtest.h" +#include "simeng/branchpredictors/TagePredictor.hh" + +namespace simeng { + +class TagePredictorTest : public testing::Test { + public: + TagePredictorTest() : uop(new MockInstruction), uopPtr(uop) { + uop->setInstructionAddress(0); + } + + protected: + MockInstruction* uop; + std::shared_ptr uopPtr; +}; + +// Tests that TagePredictor will predict the correct direction on a miss +TEST_F(TagePredictorTest, Miss) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: " + " {Type: Tage, " + " BTB-Tag-Bits: 11, " + " Saturating-Count-Bits: 2, " + " Global-History-Length: 10, " + " RAS-entries: 5," + " Fallback-Static-Predictor: Always-Taken," + " Tage-Table-Bits: 12," + " Num-Tage-Tables: 6," + " Tag-Length: 8" + " }" + "}"); + auto predictor = simeng::TagePredictor(); + auto prediction = predictor.predict(0, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.isTaken); + prediction = predictor.predict(8, BranchType::Unconditional, 0); + EXPECT_TRUE(prediction.isTaken); +} + +// Tests that TagePredictor will predict branch-and-link return pairs correctly +TEST_F(TagePredictorTest, RAS) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: " + " {Type: Tage, " + " BTB-Tag-Bits: 11, " + " Saturating-Count-Bits: 2, " + " Global-History-Length: 10, " + " RAS-entries: 5," + " Fallback-Static-Predictor: Always-Taken," + " Tage-Table-Bits: 12," + " Num-Tage-Tables: 6," + " Tag-Length: 8" + " }" + "}"); + auto predictor = simeng::TagePredictor(); + auto prediction = predictor.predict(8, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 16); + prediction = predictor.predict(24, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 32); + prediction = predictor.predict(40, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 48); + prediction = predictor.predict(56, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 64); + prediction = predictor.predict(72, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 80); + + prediction = predictor.predict(84, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 76); + prediction = predictor.predict(68, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 60); + prediction = predictor.predict(52, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 44); + prediction = predictor.predict(36, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 28); + prediction = predictor.predict(20, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 12); +} + +// Tests that TagePredictor will predict a previously encountered +// branch correctly, when no address aliasing has occurred +TEST_F(TagePredictorTest, Hit) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: " + " {Type: Tage, " + " BTB-Tag-Bits: 11, " + " Saturating-Count-Bits: 2, " + " Global-History-Length: 10, " + " RAS-entries: 5," + " Fallback-Static-Predictor: Always-Taken," + " Tage-Table-Bits: 12," + " Num-Tage-Tables: 6," + " Tag-Length: 8" + " }" + "}"); + auto predictor = simeng::TagePredictor(); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 0); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 1); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 2); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 3); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 4); + + auto prediction = predictor.predict(0, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 16); +} + +// Tests that TagePredictor will predict correctly for two different +// behaviours of the same branch but in different states of the program +TEST_F(TagePredictorTest, GlobalIndexing) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: " + " {Type: Tage, " + " BTB-Tag-Bits: 11, " + " Saturating-Count-Bits: 2, " + " Global-History-Length: 10, " + " RAS-entries: 5," + " Fallback-Static-Predictor: Always-Taken," + " Tage-Table-Bits: 5," + " Num-Tage-Tables: 1," + " Tag-Length: 8" + " }" + "}"); + auto predictor = simeng::TagePredictor(); + // Spool up first global history pattern + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 0); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 1); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 2); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 3); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 4); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 5); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 6); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 7); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 8); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 9); + // Ensure default behaviour for first encounter + auto prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 0); + // Set entry in BTB + predictor.update(0x7C, false, 0x80, BranchType::Conditional, 10); + + // Spool up second global history pattern + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 11); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 12); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 13); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 14); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 15); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 16); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 17); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 18); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 19); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 20); + // Ee-encounter but with different global history + prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_FALSE(prediction.isTaken); + EXPECT_EQ(prediction.target, 0x80); + // Set entry in BTB + predictor.update(0x7C, true, 0xBA, BranchType::Conditional, 21); + + // Recreate first global history pattern + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 22); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 23); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 24); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 25); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 26); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 27); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 28); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 29); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 30); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 31); + // Get prediction + prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 0xBA); + // Set entry in BTB + predictor.update(0x7C, true, 0x80, BranchType::Conditional, 32); + + // Recreate second global history pattern + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 33); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 34); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 35); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 36); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 37); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 38); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 39); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 40); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 41); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 42); + // Get prediction + prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 0x80); + predictor.update(0x7C, true, 0xBA, BranchType::Conditional, 43); +} + +// Test Flush of RAS functionality +TEST_F(TagePredictorTest, flush) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: " + " {Type: Tage, " + " BTB-Tag-Bits: 11, " + " Saturating-Count-Bits: 2, " + " Global-History-Length: 10, " + " RAS-entries: 5," + " Fallback-Static-Predictor: Always-Taken," + " Tage-Table-Bits: 12," + " Num-Tage-Tables: 1," + " Tag-Length: 8" + " }" + "}"); + auto predictor = simeng::TagePredictor(); + // Add some entries to the RAS + auto prediction = predictor.predict(8, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 16); + prediction = predictor.predict(24, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 32); + prediction = predictor.predict(40, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 48); + + // Start getting entries from RAS + prediction = predictor.predict(52, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 44); + prediction = predictor.predict(36, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 28); + + // Flush address + predictor.flush(36); + + // Continue getting entries from RAS + prediction = predictor.predict(20, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 28); + prediction = predictor.predict(16, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 12); +} +} // namespace simeng From dd3053a2028fa0cbc07c31d3253a0d3eaaad2ced Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 9 Dec 2024 16:51:19 +0000 Subject: [PATCH 53/69] Adjusting comments --- .../developer/arch/supported/aarch64.rst | 4 ++-- .../developer/components/branchPred.rst | 8 ++++---- .../developer/components/coreinstance.rst | 2 +- .../components/pipeline/components.rst | 2 +- .../developer/components/pipeline/units.rst | 10 +++++----- .../arch/aarch64/helpers/conditional.hh | 4 ++-- src/include/simeng/config/yaml/ryml.hh | 20 +++++++++---------- src/lib/arch/aarch64/Instruction.cc | 2 +- src/lib/arch/riscv/Instruction.cc | 2 +- src/lib/branchpredictors/GenericPredictor.cc | 2 -- src/lib/branchpredictors/TagePredictor.cc | 8 +++----- test/unit/MockBranchPredictor.hh | 1 - test/unit/aarch64/InstructionTest.cc | 12 +++++------ test/unit/riscv/InstructionTest.cc | 12 +++++------ 14 files changed, 42 insertions(+), 47 deletions(-) diff --git a/docs/sphinx/developer/arch/supported/aarch64.rst b/docs/sphinx/developer/arch/supported/aarch64.rst index 6df0028e48..092264e991 100644 --- a/docs/sphinx/developer/arch/supported/aarch64.rst +++ b/docs/sphinx/developer/arch/supported/aarch64.rst @@ -55,12 +55,12 @@ Additional information The ``FP`` primary identifier is a placeholder to denote both the ``SCALAR`` and ``VECTOR`` primary identifiers such that, amongst the other combinations, ``FP_SIMPLE_ARTH`` expands to be ``SCALAR_SIMPLE_ARTH`` and ``VECTOR_SIMPLE_ARTH``. In some cases it was unnecessary and inconvenient to separate ``SCALAR`` and ``VECTOR`` operations within configuration options, therefore, this instruction group option was provided to solve the issue. -When setting the latencies for instruction groups, within the :ref:`Latencies ` section of the configurable options, the inheritance between instruction groups is isTaken into account (e.g. the ``VECTOR`` group latency assignment would be inherited by all ``VECTOR_*`` groups). If multiple entries could assign a latency value to an instruction group, the option with the least levels of inheritance to the instruction group takes priority. As an example, take the groups ``INT_SIMPLE`` and ``INT_SIMPLE_ARTH``. ``INT_SIMPLE_ARTH_NOSHIFT`` inherits from both of these groups but because ``INT_SIMPLE_ARTH`` has one less level of inheritance to traverse, ``INT_SIMPLE_ARTH_NOSHIFT`` inherits ``INT_SIMPLE_ARTH`` latency values. +When setting the latencies for instruction groups, within the :ref:`Latencies ` section of the configurable options, the inheritance between instruction groups is taken into account (e.g. the ``VECTOR`` group latency assignment would be inherited by all ``VECTOR_*`` groups). If multiple entries could assign a latency value to an instruction group, the option with the least levels of inheritance to the instruction group takes priority. As an example, take the groups ``INT_SIMPLE`` and ``INT_SIMPLE_ARTH``. ``INT_SIMPLE_ARTH_NOSHIFT`` inherits from both of these groups but because ``INT_SIMPLE_ARTH`` has one less level of inheritance to traverse, ``INT_SIMPLE_ARTH_NOSHIFT`` inherits ``INT_SIMPLE_ARTH`` latency values. Instruction Splitting ********************* -Instruction splitting is performed within the ``decode`` function in ``MicroDecoder.cc``. A macro-op is isTaken into the ``decode`` function and one or more micro-ops, taking the form of SimEng ``Instruction`` objects, are returned. The following instruction splitting is supported: +Instruction splitting is performed within the ``decode`` function in ``MicroDecoder.cc``. A macro-op is taken into the ``decode`` function and one or more micro-ops, taking the form of SimEng ``Instruction`` objects, are returned. The following instruction splitting is supported: - Load pair for X/W/S/D/Q registers. diff --git a/docs/sphinx/developer/components/branchPred.rst b/docs/sphinx/developer/components/branchPred.rst index 5b5f69a0c0..bd18271014 100644 --- a/docs/sphinx/developer/components/branchPred.rst +++ b/docs/sphinx/developer/components/branchPred.rst @@ -29,13 +29,13 @@ Global History Branch Target Buffer (BTB) For each entry, the BTB stores the most recent target along with an n-bit saturating counter for an associated direction. The indexing of this structure uses the lower bits of an instruction address XOR'ed with the current global branch history value. - If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be isTaken. If the supplied branch type is ``Conditional`` and the predicted direction is not isTaken, then the predicted target is overridden to be the next sequential instruction. + If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. Static Prediction - Based on the chosen static prediction method of "always isTaken" or "always not isTaken", the n-bit saturating counter value in the initial entries of the BTB structure are filled with the weakest variant of isTaken or not-isTaken respectively. + Based on the chosen static prediction method of "always taken" or "always not taken", the n-bit saturating counter value in the initial entries of the BTB structure are filled with the weakest variant of taken or not-taken respectively. Perceptron Predictor -------------------- @@ -47,9 +47,9 @@ Global History Branch Target Buffer (BTB) For each entry, the BTB stores the most recent target along with a perceptron for an associated direction. The indexing of this structure uses the lower, non-zero bits of an instruction address XOR'ed with the current global branch history value. - The direction prediction is obtained from the perceptron by taking its dot-product with the global history. The prediction is not isTaken if this is negative, or isTaken otherwise. The perceptron is updated when its prediction is wrong or when the magnitude of the dot-product is below a pre-determined threshold (i.e., the confidence of the prediction is low). To update, each ith weight of the perceptron is incremented if the actual outcome of the branch is the same as the ith bit of ``globalHistory_``, and decremented otherwise. + The direction prediction is obtained from the perceptron by taking its dot-product with the global history. The prediction is not taken if this is negative, or taken otherwise. The perceptron is updated when its prediction is wrong or when the magnitude of the dot-product is below a pre-determined threshold (i.e., the confidence of the prediction is low). To update, each ith weight of the perceptron is incremented if the actual outcome of the branch is the same as the ith bit of ``globalHistory_``, and decremented otherwise. - If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be isTaken. If the supplied branch type is ``Conditional`` and the predicted direction is not isTaken, then the predicted target is overridden to be the next sequential instruction. + If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. diff --git a/docs/sphinx/developer/components/coreinstance.rst b/docs/sphinx/developer/components/coreinstance.rst index 89b6247db4..8b9e99a449 100644 --- a/docs/sphinx/developer/components/coreinstance.rst +++ b/docs/sphinx/developer/components/coreinstance.rst @@ -3,7 +3,7 @@ Core Instance The ``CoreInstance`` component supplies the functionality for instantiating all simulation objects and linking them together. -The standard process isTaken to create an instance of the modelled core is as follows: +The standard process taken to create an instance of the modelled core is as follows: Process the config file Either the passed configuration file path, or default configuration string, is used to generate the model configuration class. All subsequent parameterised instantiations of simulation objects utilise this configuration class. diff --git a/docs/sphinx/developer/components/pipeline/components.rst b/docs/sphinx/developer/components/pipeline/components.rst index f74d5e892e..ab62a6b919 100644 --- a/docs/sphinx/developer/components/pipeline/components.rst +++ b/docs/sphinx/developer/components/pipeline/components.rst @@ -69,7 +69,7 @@ Once a completion slot is available, the load will be executed, the results broa Stores ****** -As with loads, stores are considered pending when initially added to the LSQ. Whilst like load operations the generation of addresses to be accessed must occur before commitment, an additional operation of supplying the data to be stored must also occur. The ``supplyStoreData`` function facilitates this by placing the data to be stored within the ``storeQueue_`` entry of the associated store. Once the store is committed, the data is isTaken from the ``storeQueue_`` entry. +As with loads, stores are considered pending when initially added to the LSQ. Whilst like load operations the generation of addresses to be accessed must occur before commitment, an additional operation of supplying the data to be stored must also occur. The ``supplyStoreData`` function facilitates this by placing the data to be stored within the ``storeQueue_`` entry of the associated store. Once the store is committed, the data is taken from the ``storeQueue_`` entry. The generation of store instruction write requests are carried out after its commitment. The reasoning for this design decision is as followed. With SimEng supporting speculative execution, processed store instruction may come from an incorrectly speculated branch direction and will inevitably be removed from the pipeline. Therefore, it is important to ensure any write requests are valid, concerning speculative execution, as the performance cost of reversing a completed write request is high. diff --git a/docs/sphinx/developer/components/pipeline/units.rst b/docs/sphinx/developer/components/pipeline/units.rst index 922b24f5a6..52358f4658 100644 --- a/docs/sphinx/developer/components/pipeline/units.rst +++ b/docs/sphinx/developer/components/pipeline/units.rst @@ -23,7 +23,7 @@ Behaviour The fetch unit fetches memory in discrete boundary-aligned blocks, according to the current program counter (PC); this is to prevent the fetched block overlapping an inaccessible or unmapped memory region that may result in the request incorrectly responding with a fault despite the validity of the initial region. -Each cycle, it will process the most recently fetched memory block by passing it to the supplied ``Architecture`` instance for pre-decoding into macro-ops. Once pre-decoded, the head of the vector of micro-ops, or macro-op, is passed to the supplied branch predictor. If the instruction is predicted to be a isTaken branch, then the PC will be updated to the predicted target address and the cycle will end. If this is not the case, the PC is incremented by the number of bytes consumed to produce the pre-decoded macro-op. The remaining bytes in the block are once again passed to the architecture for pre-decoding. +Each cycle, it will process the most recently fetched memory block by passing it to the supplied ``Architecture`` instance for pre-decoding into macro-ops. Once pre-decoded, the head of the vector of micro-ops, or macro-op, is passed to the supplied branch predictor. If the instruction is predicted to be a taken branch, then the PC will be updated to the predicted target address and the cycle will end. If this is not the case, the PC is incremented by the number of bytes consumed to produce the pre-decoded macro-op. The remaining bytes in the block are once again passed to the architecture for pre-decoding. This standard process of pre-decoding, predicting, and updating the PC continues until one of the following occurs: @@ -32,7 +32,7 @@ This standard process of pre-decoding, predicting, and updating the PC continues The maximum number of fetched macro-ops is reached The current block is saved and processing resumes in the next cycle. - A branch is predicted as isTaken + A branch is predicted as taken A block of memory from the new address may be requested, and processing will resume once the data is available. The fetched memory block is exhausted @@ -43,7 +43,7 @@ This standard process of pre-decoding, predicting, and updating the PC continues Loop Buffer *********** -Within the fetch unit is a loop buffer that can store a configurable number of Macro-Ops. The loop buffer can be pulled from instead of memory if a loop is detected. This avoids the need to re-request data from memory if a branch is isTaken and increases the throughput of the fetch unit. +Within the fetch unit is a loop buffer that can store a configurable number of Macro-Ops. The loop buffer can be pulled from instead of memory if a loop is detected. This avoids the need to re-request data from memory if a branch is taken and increases the throughput of the fetch unit. Each entry of the loop buffer is the encoding of the Macro-Op. Therefore, when supplying an instruction from the loop buffer, the pre-decoding step must still be performed. This was required to avoid any issues with multiple instantiations of the same instruction editing each others class members. @@ -59,7 +59,7 @@ FILLING The branch representing the loop has been found and the buffer is being filled until it is seen again. SUPPLYING - The supply of instructions from the fetch unit has been handed over to the loop buffer. The stream of instructions is isTaken from the loop buffer in order and resets to the top of the buffer once it reaches the end of the loop body. + The supply of instructions from the fetch unit has been handed over to the loop buffer. The stream of instructions is taken from the loop buffer in order and resets to the top of the buffer once it reaches the end of the loop body. The detection of a loop and the branch which represents it comes from the ROB. More information can be found :ref:`here `. @@ -81,7 +81,7 @@ Behaviour Each cycle, the decode unit will read macro-ops from the input buffer, and split them into a stream of ``Instruction`` objects or micro-ops. These ``Instruction`` objects are passed into an internal buffer. -Once all macro-ops in the input buffer have been passed into the internal ``Instruction`` buffer or the ``Instruction`` buffer size exceeds the size of the output buffer, ``Instruction`` objects are checked for any trivially identifiable branch mispredictions (i.e., a non-branch predicted as a isTaken branch), and if discovered, the branch predictor is informed and a pipeline flush requested. +Once all macro-ops in the input buffer have been passed into the internal ``Instruction`` buffer or the ``Instruction`` buffer size exceeds the size of the output buffer, ``Instruction`` objects are checked for any trivially identifiable branch mispredictions (i.e., a non-branch predicted as a taken branch), and if discovered, the branch predictor is informed and a pipeline flush requested. The cycle ends when all ``Instruction`` objects in the internal buffer have been processed, or a misprediction is identified and all remaining ``Instruction`` objects are flushed. diff --git a/src/include/simeng/arch/aarch64/helpers/conditional.hh b/src/include/simeng/arch/aarch64/helpers/conditional.hh index 2b3ea1b9c3..e541eb276a 100644 --- a/src/include/simeng/arch/aarch64/helpers/conditional.hh +++ b/src/include/simeng/arch/aarch64/helpers/conditional.hh @@ -56,7 +56,7 @@ uint8_t ccmp_reg(srcValContainer& sourceValues, /** Helper function for instructions with the format `cb rn, #imm`. * T represents the type of sourceValues (e.g. for xn, T = uint64_t). - * Returns tuple of type [bool branch isTaken, uint64_t address]. */ + * Returns tuple of type [bool branch taken, uint64_t address]. */ template std::tuple condBranch_cmpToZero( srcValContainer& sourceValues, @@ -91,7 +91,7 @@ T cs_4ops(srcValContainer& sourceValues, /** Helper function for instructions with the format `tb rn, #imm, * label`. * T represents the type of sourceValues (e.g. for xn, T = uint64_t). - * Returns tuple of type [bool branch isTaken, uint64_t address]. */ + * Returns tuple of type [bool branch taken, uint64_t address]. */ template std::tuple tbnz_tbz( srcValContainer& sourceValues, diff --git a/src/include/simeng/config/yaml/ryml.hh b/src/include/simeng/config/yaml/ryml.hh index c35a4925f9..bed8f4620b 100644 --- a/src/include/simeng/config/yaml/ryml.hh +++ b/src/include/simeng/config/yaml/ryml.hh @@ -229,7 +229,7 @@ #define C4_VERSION_CAT(major, minor, patch) ((major)*10000 + (minor)*100 + (patch)) -/** A preprocessor foreach. Spectacular trick isTaken from: +/** A preprocessor foreach. Spectacular trick taken from: * http://stackoverflow.com/a/1872506/5875572 * The first argument is for a macro receiving a single argument, * which will be called with every subsequent argument. There is @@ -1449,7 +1449,7 @@ using std::index_sequence_for; /** C++11 implementation of integer sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template struct integer_sequence { @@ -1461,7 +1461,7 @@ struct integer_sequence /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using index_sequence = integer_sequence; @@ -1544,19 +1544,19 @@ struct __make_integer_sequence /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using make_integer_sequence = typename __detail::__make_integer_sequence<_Tp, _Np>::type; /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using make_index_sequence = make_integer_sequence; /** C++11 implementation of index sequence * @see https://en.cppreference.com/w/cpp/utility/integer_sequence - * @see isTaken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ + * @see taken from clang: http://llvm.org/viewvc/llvm-project/libcxx/trunk/include/utility?revision=211563&view=markup#l687 */ template using index_sequence_for = make_index_sequence; #endif @@ -4795,7 +4795,7 @@ namespace detail { /** @internal * @ingroup hash - * @see this was isTaken a great answer in stackoverflow: + * @see this was taken a great answer in stackoverflow: * https://stackoverflow.com/a/34597785/5875572 * @see http://aras-p.info/blog/2016/08/02/Hash-Functions-all-the-way-down/ */ template @@ -12377,7 +12377,7 @@ inline size_t scan_one(csubstr str, const char *type_fmt, T *v) * * So we fake it by using a dynamic format with an explicit * field size set to the length of the given span. - * This trick is isTaken from: + * This trick is taken from: * https://stackoverflow.com/a/18368910/5875572 */ /* this is the actual format we'll use for scanning */ @@ -14624,7 +14624,7 @@ C4_ALWAYS_INLINE DumpResults format_dump_resume(DumperFn &&dumpfn, substr buf, c namespace c4 { -//! isTaken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum +//! taken from http://stackoverflow.com/questions/15586163/c11-type-trait-to-differentiate-between-enum-class-and-regular-enum template using is_scoped_enum = std::integral_constant::value && !std::is_convertible::value>; @@ -15704,7 +15704,7 @@ template using cspanrs = spanrs; //----------------------------------------------------------------------------- //----------------------------------------------------------------------------- /** A non-owning span which always retains the capacity of the original - * range it was isTaken from (though it may loose its original size). + * range it was taken from (though it may loose its original size). * The resizing methods resize(), ltrim(), rtrim() as well * as the subselection methods subspan(), range(), first() and last() can be * used at will without loosing the original capacity; the full capacity span diff --git a/src/lib/arch/aarch64/Instruction.cc b/src/lib/arch/aarch64/Instruction.cc index 1bf93c451f..e3b697433e 100644 --- a/src/lib/arch/aarch64/Instruction.cc +++ b/src/lib/arch/aarch64/Instruction.cc @@ -106,7 +106,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { "Early branch misprediction check shouldn't be called after execution"); if (!isBranch()) { - // Instruction isn't a branch; if predicted as isTaken, it will require a + // Instruction isn't a branch; if predicted as taken, it will require a // flush return {prediction_.isTaken, instructionAddress_ + 4}; } diff --git a/src/lib/arch/riscv/Instruction.cc b/src/lib/arch/riscv/Instruction.cc index 5eb1091c6b..c71b581a60 100644 --- a/src/lib/arch/riscv/Instruction.cc +++ b/src/lib/arch/riscv/Instruction.cc @@ -101,7 +101,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { "Early branch misprediction check shouldn't be called after execution"); if (!isBranch()) { - // Instruction isn't a branch; if predicted as isTaken, it will require a + // Instruction isn't a branch; if predicted as taken, it will require a // flush return {prediction_.isTaken, instructionAddress_ + 4}; } diff --git a/src/lib/branchpredictors/GenericPredictor.cc b/src/lib/branchpredictors/GenericPredictor.cc index cd41f20cac..3a6091f85a 100644 --- a/src/lib/branchpredictors/GenericPredictor.cc +++ b/src/lib/branchpredictors/GenericPredictor.cc @@ -1,7 +1,5 @@ #include "simeng/branchpredictors/GenericPredictor.hh" -#include - namespace simeng { GenericPredictor::GenericPredictor(ryml::ConstNodeRef config) diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc index 3f25873f48..bdc7c43241 100644 --- a/src/lib/branchpredictors/TagePredictor.cc +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -22,12 +22,10 @@ TagePredictor::TagePredictor(ryml::ConstNodeRef config) uint8_t weaklyTaken = (uint8_t)1 << (satCntBits_ - 1); uint8_t satCntVal = (config["Branch-Predictor"]["Fallback-Static-Predictor"] .as() == "Always-Taken") - ? weaklyTaken - : (weaklyTaken - 1); + ? weaklyTaken : (weaklyTaken - 1); // Create branch prediction structures - btb_ = - std::vector>( - (uint8_t)1 << btbBits_, {satCntVal, 0}); + btb_ = std::vector>( + (uint8_t)1 << btbBits_, {satCntVal, 0}); // Set up Tagged tables for (uint32_t i = 0; i < numTageTables_; i++) { diff --git a/test/unit/MockBranchPredictor.hh b/test/unit/MockBranchPredictor.hh index 413db7ef98..2727e6db51 100644 --- a/test/unit/MockBranchPredictor.hh +++ b/test/unit/MockBranchPredictor.hh @@ -14,7 +14,6 @@ class MockBranchPredictor : public BranchPredictor { void(uint64_t address, bool taken, uint64_t targetAddress, BranchType type, uint64_t instructionId)); MOCK_METHOD1(flush, void(uint64_t address)); - MOCK_METHOD2(addToFTQ, void(uint64_t address, bool taken)); }; } // namespace simeng diff --git a/test/unit/aarch64/InstructionTest.cc b/test/unit/aarch64/InstructionTest.cc index 00279300b8..92b8e9393a 100644 --- a/test/unit/aarch64/InstructionTest.cc +++ b/test/unit/aarch64/InstructionTest.cc @@ -493,7 +493,7 @@ TEST_F(AArch64InstructionTest, earlyBranchMisprediction) { EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); } -// Test that a correct prediction (branch isTaken) is handled correctly +// Test that a correct prediction (branch taken) is handled correctly TEST_F(AArch64InstructionTest, correctPred_taken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -510,7 +510,7 @@ TEST_F(AArch64InstructionTest, correctPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where branch is isTaken is handled correctly + // Test a correct prediction where branch is taken is handled correctly pred = {true, 80 + 0x28}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -522,7 +522,7 @@ TEST_F(AArch64InstructionTest, correctPred_taken) { EXPECT_EQ(insn.getBranchAddress(), pred.target); } -// Test that a correct prediction (branch not isTaken) is handled correctly +// Test that a correct prediction (branch not taken) is handled correctly TEST_F(AArch64InstructionTest, correctPred_notTaken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -539,7 +539,7 @@ TEST_F(AArch64InstructionTest, correctPred_notTaken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where a branch isn't isTaken is handled correctly + // Test a correct prediction where a branch isn't taken is handled correctly pred = {false, 80 + 4}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -580,7 +580,7 @@ TEST_F(AArch64InstructionTest, incorrectPred_target) { EXPECT_EQ(insn.getBranchAddress(), 100 + 0x28); } -// Test that an incorrect prediction (wrong isTaken) is handled correctly +// Test that an incorrect prediction (wrong taken) is handled correctly TEST_F(AArch64InstructionTest, incorrectPred_taken) { // insn is `cbz x2, #0x28` Instruction insn = Instruction(arch, *cbzMetadata.get(), MicroOpInfo()); @@ -597,7 +597,7 @@ TEST_F(AArch64InstructionTest, incorrectPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test an incorrect prediction is handled correctly - isTaken is wrong + // Test an incorrect prediction is handled correctly - taken is wrong pred = {true, 100 + 0x28}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); diff --git a/test/unit/riscv/InstructionTest.cc b/test/unit/riscv/InstructionTest.cc index c40b503a6c..6103cd4f5c 100644 --- a/test/unit/riscv/InstructionTest.cc +++ b/test/unit/riscv/InstructionTest.cc @@ -467,7 +467,7 @@ TEST_F(RiscVInstructionTest, earlyBranchMisprediction) { EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); } -// Test that a correct prediction (branch isTaken) is handled correctly +// Test that a correct prediction (branch taken) is handled correctly TEST_F(RiscVInstructionTest, correctPred_taken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -484,7 +484,7 @@ TEST_F(RiscVInstructionTest, correctPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where branch is isTaken is handled correctly + // Test a correct prediction where branch is taken is handled correctly pred = {true, 400 - 86}; insn.setBranchPrediction(pred); matchingPred = (insn.getBranchPrediction() == pred); @@ -497,7 +497,7 @@ TEST_F(RiscVInstructionTest, correctPred_taken) { EXPECT_EQ(insn.getBranchAddress(), pred.target); } -// Test that a correct prediction (branch not isTaken) is handled correctly +// Test that a correct prediction (branch not taken) is handled correctly TEST_F(RiscVInstructionTest, correctPred_notTaken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -514,7 +514,7 @@ TEST_F(RiscVInstructionTest, correctPred_notTaken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test a correct prediction where a branch isn't isTaken is handled correctly + // Test a correct prediction where a branch isn't taken is handled correctly // imm operand 0x28 has 4 added implicitly by dissassembler pred = {false, 400 + 4}; insn.setBranchPrediction(pred); @@ -559,7 +559,7 @@ TEST_F(RiscVInstructionTest, incorrectPred_target) { EXPECT_EQ(insn.getBranchAddress(), 400 - 86); } -// Test that an incorrect prediction (wrong isTaken) is handled correctly +// Test that an incorrect prediction (wrong taken) is handled correctly TEST_F(RiscVInstructionTest, incorrectPred_taken) { // insn is `bgeu a5, a4, -86` Instruction insn = Instruction(arch, *bgeuMetadata.get()); @@ -576,7 +576,7 @@ TEST_F(RiscVInstructionTest, incorrectPred_taken) { std::tuple tup = {false, 0}; EXPECT_EQ(insn.checkEarlyBranchMisprediction(), tup); - // Test an incorrect prediction is handled correctly - isTaken is wrong + // Test an incorrect prediction is handled correctly - taken is wrong // imm operand 0x28 has 4 added implicitly by dissassembler pred = {true, 400 - 86}; insn.setBranchPrediction(pred); From 4e1717d5b3f5437a53bb5201104f90b17a7d07bd Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 9 Dec 2024 16:53:42 +0000 Subject: [PATCH 54/69] clang format --- src/include/simeng/Instruction.hh | 2 +- src/include/simeng/arch/ArchInfo.hh | 2 +- src/include/simeng/arch/Architecture.hh | 4 +- .../simeng/branchpredictors/BranchHistory.hh | 8 ++-- .../branchpredictors/BranchPredictor.hh | 2 +- .../simeng/branchpredictors/TagePredictor.hh | 13 +++--- src/include/simeng/config/ExpectationNode.hh | 4 +- src/include/simeng/pipeline/PortAllocator.hh | 2 +- src/lib/branchpredictors/TagePredictor.cc | 41 ++++++++----------- src/lib/config/ModelConfig.cc | 19 +++++---- src/lib/models/inorder/Core.cc | 6 ++- test/integration/ConfigTest.cc | 8 +--- test/regression/aarch64/instructions/neon.cc | 2 +- test/unit/aarch64/AuxiliaryFunctionsTest.cc | 27 ++++++------ test/unit/pipeline/FetchUnitTest.cc | 3 +- 15 files changed, 70 insertions(+), 73 deletions(-) diff --git a/src/include/simeng/Instruction.hh b/src/include/simeng/Instruction.hh index b5a4e33e3b..5004942539 100644 --- a/src/include/simeng/Instruction.hh +++ b/src/include/simeng/Instruction.hh @@ -29,7 +29,7 @@ struct ExecutionInfo { * Each supported ISA should provide a derived implementation of this class. */ class Instruction { public: - virtual ~Instruction(){}; + virtual ~Instruction() {}; /** Retrieve the source registers this instruction reads. */ virtual const span getSourceRegisters() const = 0; diff --git a/src/include/simeng/arch/ArchInfo.hh b/src/include/simeng/arch/ArchInfo.hh index e029699c07..eed7055cf7 100644 --- a/src/include/simeng/arch/ArchInfo.hh +++ b/src/include/simeng/arch/ArchInfo.hh @@ -12,7 +12,7 @@ namespace arch { /** A class to hold and generate architecture specific configuration options. */ class ArchInfo { public: - virtual ~ArchInfo(){}; + virtual ~ArchInfo() {}; /** Get the set of system register enums currently supported. */ virtual const std::vector& getSysRegEnums() const = 0; diff --git a/src/include/simeng/arch/Architecture.hh b/src/include/simeng/arch/Architecture.hh index aa293d6f5f..b4e6ac6001 100644 --- a/src/include/simeng/arch/Architecture.hh +++ b/src/include/simeng/arch/Architecture.hh @@ -30,7 +30,7 @@ struct ExceptionResult { * cycle until complete. */ class ExceptionHandler { public: - virtual ~ExceptionHandler(){}; + virtual ~ExceptionHandler() {}; /** Tick the exception handler to progress handling of the exception. Should * return `false` if the exception requires further handling, or `true` once * complete. */ @@ -46,7 +46,7 @@ class Architecture { public: Architecture(kernel::Linux& kernel) : linux_(kernel) {} - virtual ~Architecture(){}; + virtual ~Architecture() {}; /** Attempt to pre-decode from `bytesAvailable` bytes of instruction memory. * Writes into the supplied macro-op vector, and returns the number of bytes diff --git a/src/include/simeng/branchpredictors/BranchHistory.hh b/src/include/simeng/branchpredictors/BranchHistory.hh index 04d4d7e08c..2106676669 100644 --- a/src/include/simeng/branchpredictors/BranchHistory.hh +++ b/src/include/simeng/branchpredictors/BranchHistory.hh @@ -24,7 +24,8 @@ class BranchHistory { * integer. */ uint64_t getHistory(uint8_t numBits) { assert(numBits <= 64 && "Cannot get more than 64 bits without rolling"); - assert(numBits <= size_ && "Cannot get more bits of branch history than " + assert(numBits <= size_ && + "Cannot get more bits of branch history than " "the size of the history"); return (history_[0] & ((1 << numBits) - 1)); } @@ -33,7 +34,8 @@ class BranchHistory { * value of size 'length'. The global history is folded by taking an * XOR hash with the overflowing bits to get an output of 'length' bits. */ uint64_t getFolded(uint8_t numBits, uint8_t length) { - assert(numBits <= size_ && "Cannot get more bits of branch history than " + assert(numBits <= size_ && + "Cannot get more bits of branch history than " "the size of the history"); uint64_t output = 0; @@ -105,4 +107,4 @@ class BranchHistory { std::vector history_; }; -} \ No newline at end of file +} // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/branchpredictors/BranchPredictor.hh b/src/include/simeng/branchpredictors/BranchPredictor.hh index 7779fe0703..d1cf1eeec3 100644 --- a/src/include/simeng/branchpredictors/BranchPredictor.hh +++ b/src/include/simeng/branchpredictors/BranchPredictor.hh @@ -12,7 +12,7 @@ namespace simeng { /** An abstract branch predictor interface. */ class BranchPredictor { public: - virtual ~BranchPredictor(){}; + virtual ~BranchPredictor() {}; /** Generate a branch prediction for the supplied instruction address, a * branch type, and a known branch offset. Returns a branch direction and diff --git a/src/include/simeng/branchpredictors/TagePredictor.hh b/src/include/simeng/branchpredictors/TagePredictor.hh index 2afaa0f7d7..1b52f44173 100644 --- a/src/include/simeng/branchpredictors/TagePredictor.hh +++ b/src/include/simeng/branchpredictors/TagePredictor.hh @@ -1,13 +1,13 @@ #pragma once +#include #include #include #include #include -#include -#include "simeng/branchpredictors/BranchPredictor.hh" #include "simeng/branchpredictors/BranchHistory.hh" +#include "simeng/branchpredictors/BranchPredictor.hh" #include "simeng/config/SimInfo.hh" namespace simeng { @@ -17,7 +17,7 @@ namespace simeng { struct TageEntry { uint8_t satCnt; uint64_t tag; - uint8_t u; // usefulness counter + uint8_t u; // usefulness counter uint64_t target; }; @@ -86,8 +86,7 @@ class TagePredictor : public BranchPredictor { * alternative prediction. This prediction info is determined from the * tagged tables for a branch with the provided address. */ void getTaggedPrediction(uint64_t address, BranchPrediction* prediction, - BranchPrediction* altPrediction, - uint8_t* predTable, + BranchPrediction* altPrediction, uint8_t* predTable, std::vector* indices, std::vector* tags); @@ -96,8 +95,8 @@ class TagePredictor : public BranchPredictor { uint64_t getTaggedIndex(uint64_t address, uint8_t table); /** Returns a hash of the address and the global history that is then trimmed - * to the appropriate tag length. The tag varies depending on the table - * that is being accessed. */ + * to the appropriate tag length. The tag varies depending on the table + * that is being accessed. */ uint64_t getTag(uint64_t address, uint8_t table); /** Updates the default, untagged prediction table on the basis of the diff --git a/src/include/simeng/config/ExpectationNode.hh b/src/include/simeng/config/ExpectationNode.hh index 187d3ed37a..cbf59d5750 100644 --- a/src/include/simeng/config/ExpectationNode.hh +++ b/src/include/simeng/config/ExpectationNode.hh @@ -134,9 +134,9 @@ class ExpectationNode { /** Default constructor. Used primarily to provide a root node for populated * ExpectationNode instances to be added to. */ - ExpectationNode(){}; + ExpectationNode() {}; - ~ExpectationNode(){}; + ~ExpectationNode() {}; /** A getter function to retrieve the key of a node. */ std::string getKey() const { return nodeKey_; } diff --git a/src/include/simeng/pipeline/PortAllocator.hh b/src/include/simeng/pipeline/PortAllocator.hh index 78e3a0c5c9..bd566702ae 100644 --- a/src/include/simeng/pipeline/PortAllocator.hh +++ b/src/include/simeng/pipeline/PortAllocator.hh @@ -16,7 +16,7 @@ const uint8_t OPTIONAL = 1; /** An abstract execution port allocator interface. */ class PortAllocator { public: - virtual ~PortAllocator(){}; + virtual ~PortAllocator() {}; /** Allocate a port for the specified instruction group; returns the allocated * port. */ diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc index bdc7c43241..dcfe1a5050 100644 --- a/src/lib/branchpredictors/TagePredictor.cc +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -14,18 +14,18 @@ TagePredictor::TagePredictor(ryml::ConstNodeRef config) config["Branch-Predictor"]["Global-History-Length"].as()), rasSize_(config["Branch-Predictor"]["RAS-entries"].as()), globalHistory_(1 << (numTageTables_ + 1)), - tagLength_(config["Branch-Predictor"]["Tag-Length"].as()) - { + tagLength_(config["Branch-Predictor"]["Tag-Length"].as()) { // Calculate the saturation counter boundary between weakly taken and // not-taken. `(2 ^ num_sat_cnt_bits) / 2` gives the weakly taken state // value uint8_t weaklyTaken = (uint8_t)1 << (satCntBits_ - 1); uint8_t satCntVal = (config["Branch-Predictor"]["Fallback-Static-Predictor"] .as() == "Always-Taken") - ? weaklyTaken : (weaklyTaken - 1); + ? weaklyTaken + : (weaklyTaken - 1); // Create branch prediction structures - btb_ = std::vector>( - (uint8_t)1 << btbBits_, {satCntVal, 0}); + btb_ = std::vector>((uint8_t)1 << btbBits_, + {satCntVal, 0}); // Set up Tagged tables for (uint32_t i = 0; i < numTageTables_; i++) { @@ -96,8 +96,8 @@ BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, } void TagePredictor::update(uint64_t address, bool isTaken, - uint64_t targetAddress, - simeng::BranchType type, uint64_t instructionId) { + uint64_t targetAddress, simeng::BranchType type, + uint64_t instructionId) { // Make sure that this function is called in program order; and then update // the lastUpdatedInstructionId variable assert(instructionId >= lastUpdatedInstructionId && @@ -148,7 +148,6 @@ void TagePredictor::flush(uint64_t address) { // Roll back global history globalHistory_.rollBack(); - } void TagePredictor::getTaggedPrediction(uint64_t address, @@ -194,20 +193,19 @@ BranchPrediction TagePredictor::getBtbPrediction(uint64_t address) { uint64_t TagePredictor::getTaggedIndex(uint64_t address, uint8_t table) { // Hash function here is pretty arbitrary. uint64_t h1 = (address >> 2); - uint64_t h2 = globalHistory_.getFolded(1 << (table + 1), - (1 << tageTableBits_) - 1); + uint64_t h2 = + globalHistory_.getFolded(1 << (table + 1), (1 << tageTableBits_) - 1); return (h1 ^ h2) & ((1 << tageTableBits_) - 1); } uint64_t TagePredictor::getTag(uint64_t address, uint8_t table) { // Hash function here is pretty arbitrary. uint64_t h1 = address; - uint64_t h2 = globalHistory_.getFolded((1ull << table), - ((1ull << tagLength_) - 1)); + uint64_t h2 = + globalHistory_.getFolded((1ull << table), ((1ull << tagLength_) - 1)); return (h1 ^ h2) & ((1ull << tagLength_) - 1); } - void TagePredictor::updateBtb(uint64_t address, bool isTaken, uint64_t targetAddress) { // Calculate 2-bit saturating counter value @@ -226,9 +224,7 @@ void TagePredictor::updateBtb(uint64_t address, bool isTaken, } } - -void TagePredictor::updateTaggedTables(bool isTaken, - uint64_t target) { +void TagePredictor::updateTaggedTables(bool isTaken, uint64_t target) { // Get stored information from the FTQ uint8_t predTable = ftq_.front().predTable; std::vector indices = ftq_.front().indices; @@ -236,7 +232,6 @@ void TagePredictor::updateTaggedTables(bool isTaken, BranchPrediction pred = ftq_.front().prediction; BranchPrediction altPred = ftq_.front().altPrediction; - // Update the prediction counter uint64_t predIndex = indices[predTable]; if (isTaken && (tageTables_[predTable][predIndex].satCnt < 3)) { @@ -252,9 +247,9 @@ void TagePredictor::updateTaggedTables(bool isTaken, bool allocated = false; for (uint8_t table = predTable + 1; table < numTageTables_; table++) { if (!allocated && (tageTables_[table][indices[table]].u <= 1)) { - tageTables_[table][indices[table]] = {((isTaken) ? (uint8_t)2 : - (uint8_t)1), - tags[table], (uint8_t)2, target}; + tageTables_[table][indices[table]] = { + ((isTaken) ? (uint8_t)2 : (uint8_t)1), tags[table], (uint8_t)2, + target}; allocated = true; } } @@ -267,11 +262,11 @@ void TagePredictor::updateTaggedTables(bool isTaken, uint8_t currentU = tageTables_[predTable][indices[predTable]].u; if (wasUseful && currentU < 3) { (tageTables_[predTable][indices[predTable]].u)++; - } if (!wasUseful && currentU > 0) { + } + if (!wasUseful && currentU > 0) { (tageTables_[predTable][indices[predTable]].u)--; } - } } -} // namespace simeng \ No newline at end of file +} // namespace simeng \ No newline at end of file diff --git a/src/lib/config/ModelConfig.cc b/src/lib/config/ModelConfig.cc index 1708b3e1f0..d6f2732b45 100644 --- a/src/lib/config/ModelConfig.cc +++ b/src/lib/config/ModelConfig.cc @@ -531,8 +531,9 @@ void ModelConfig::setExpectations(bool isDefault) { // associated YAML node if (configTree_["Branch-Predictor"].has_child(ryml::to_csubstr("Type"))) { if ((configTree_["Branch-Predictor"]["Type"].as() == - "Generic") || (configTree_["Branch-Predictor"]["Type"] - .as() == "Tage")) { + "Generic") || + (configTree_["Branch-Predictor"]["Type"].as() == + "Tage")) { expectations_["Branch-Predictor"].addChild( ExpectationNode::createExpectation( 2, "Saturating-Count-Bits")); @@ -546,17 +547,17 @@ void ModelConfig::setExpectations(bool isDefault) { .setValueSet( std::vector{"Always-Taken", "Always-Not-Taken"}); } - if ((configTree_["Branch-Predictor"]["Type"].as() - == "Tage")) { + if ((configTree_["Branch-Predictor"]["Type"].as() == + "Tage")) { expectations_["Branch-Predictor"].addChild( - ExpectationNode::createExpectation( - 12, "Tage-Table-Bits")); + ExpectationNode::createExpectation(12, + "Tage-Table-Bits")); expectations_["Branch-Predictor"]["Tage-Table-Bits"] - .setValueBounds(1, UINT8_MAX); + .setValueBounds(1, UINT8_MAX); expectations_["Branch-Predictor"].addChild( - ExpectationNode::createExpectation( - 6, "Num-Tage-Tables")); + ExpectationNode::createExpectation(6, + "Num-Tage-Tables")); expectations_["Branch-Predictor"]["Num-Tage-Tables"] .setValueBounds(1, UINT8_MAX); diff --git a/src/lib/models/inorder/Core.cc b/src/lib/models/inorder/Core.cc index 4df754a60d..53ea8ec61c 100644 --- a/src/lib/models/inorder/Core.cc +++ b/src/lib/models/inorder/Core.cc @@ -148,8 +148,10 @@ std::map Core::getStats() const { ipcStr << std::setprecision(2) << ipc; return { - {"cycles", std::to_string(ticks_)}, {"retired", std::to_string(retired)}, - {"ipc", ipcStr.str()}, {"flushes", std::to_string(flushes_)}, + {"cycles", std::to_string(ticks_)}, + {"retired", std::to_string(retired)}, + {"ipc", ipcStr.str()}, + {"flushes", std::to_string(flushes_)}, }; } diff --git a/test/integration/ConfigTest.cc b/test/integration/ConfigTest.cc index 12c295d2d4..bd2c64382f 100644 --- a/test/integration/ConfigTest.cc +++ b/test/integration/ConfigTest.cc @@ -300,9 +300,7 @@ TEST(ConfigTest, invalidTypeOnSetBounds) { simeng::config::ExpectationNode::createExpectation("DEFAULT", "CHILD")); ASSERT_DEATH( - { - expectations["HEAD"]["CHILD"].setValueSet({0, 1, 2}); - }, + { expectations["HEAD"]["CHILD"].setValueSet({0, 1, 2}); }, "The data type of the passed vector used in setValueSet\\() " "does not match that held within the ExpectationNode with key " "HEAD:CHILD. Passed vector elements are of type 32-bit integer and the " @@ -320,9 +318,7 @@ TEST(ConfigTest, alreadyDefinedBounds) { simeng::config::ExpectationNode::createExpectation(0, "CHILD")); expectations["HEAD"]["CHILD"].setValueBounds(0, 10); ASSERT_DEATH( - { - expectations["HEAD"]["CHILD"].setValueSet({1, 2, 3}); - }, + { expectations["HEAD"]["CHILD"].setValueSet({1, 2, 3}); }, "Invalid call of setValueSet\\() for the ExpectationNode with key " "HEAD:CHILD as value bounds have already been defined."); } diff --git a/test/regression/aarch64/instructions/neon.cc b/test/regression/aarch64/instructions/neon.cc index a4731f388f..64efb68110 100644 --- a/test/regression/aarch64/instructions/neon.cc +++ b/test/regression/aarch64/instructions/neon.cc @@ -2546,7 +2546,7 @@ TEST_P(InstNeon, mvni) { ~((32u << 8) | 255)}); } -TEST_P(InstNeon, not ) { +TEST_P(InstNeon, not) { initialHeapData_.resize(128); uint8_t* heap = reinterpret_cast(initialHeapData_.data()); heap[0] = 0b11111111; diff --git a/test/unit/aarch64/AuxiliaryFunctionsTest.cc b/test/unit/aarch64/AuxiliaryFunctionsTest.cc index dd18b16a31..c7b823f5c7 100644 --- a/test/unit/aarch64/AuxiliaryFunctionsTest.cc +++ b/test/unit/aarch64/AuxiliaryFunctionsTest.cc @@ -71,10 +71,11 @@ TEST(AArch64AuxiliaryFunctionTest, BitfieldManipulate) { { bitfieldManipulate(0, 0, 16, 0, false); }, "Attempted to use a rotate amount of 16 in bitfieldManipulate which is " "greater than or equal to the data type size of 16b in use"); - ASSERT_DEATH({ bitfieldManipulate(0, 0, 0, 16, false); }, - "Attempted to use a source bit position value of 16 in " - "bitfieldManipulate which is greater than or equal to the data " - "type size of 16b in use"); + ASSERT_DEATH( + { bitfieldManipulate(0, 0, 0, 16, false); }, + "Attempted to use a source bit position value of 16 in " + "bitfieldManipulate which is greater than or equal to the data " + "type size of 16b in use"); // uint32 EXPECT_EQ(bitfieldManipulate(0x0000FFFF, 0xFFFF0000, 0, 0, false), @@ -104,10 +105,11 @@ TEST(AArch64AuxiliaryFunctionTest, BitfieldManipulate) { { bitfieldManipulate(0, 0, 32, 0, false); }, "Attempted to use a rotate amount of 32 in bitfieldManipulate which is " "greater than or equal to the data type size of 32b in use"); - ASSERT_DEATH({ bitfieldManipulate(0, 0, 0, 32, false); }, - "Attempted to use a source bit position value of 32 in " - "bitfieldManipulate which is greater than or equal to the data " - "type size of 32b in use"); + ASSERT_DEATH( + { bitfieldManipulate(0, 0, 0, 32, false); }, + "Attempted to use a source bit position value of 32 in " + "bitfieldManipulate which is greater than or equal to the data " + "type size of 32b in use"); // uint64 EXPECT_EQ(bitfieldManipulate(0x00000000FFFFFFFF, 0xFFFFFFFF00000000, @@ -147,10 +149,11 @@ TEST(AArch64AuxiliaryFunctionTest, BitfieldManipulate) { { bitfieldManipulate(0, 0, 64, 0, false); }, "Attempted to use a rotate amount of 64 in bitfieldManipulate which is " "greater than or equal to the data type size of 64b in use"); - ASSERT_DEATH({ bitfieldManipulate(0, 0, 0, 64, false); }, - "Attempted to use a source bit position value of 64 in " - "bitfieldManipulate which is greater than or equal to the data " - "type size of 64b in use"); + ASSERT_DEATH( + { bitfieldManipulate(0, 0, 0, 64, false); }, + "Attempted to use a source bit position value of 64 in " + "bitfieldManipulate which is greater than or equal to the data " + "type size of 64b in use"); } /** `conditionHolds` Tests */ diff --git a/test/unit/pipeline/FetchUnitTest.cc b/test/unit/pipeline/FetchUnitTest.cc index 2c1c99b69b..90870fb5e2 100644 --- a/test/unit/pipeline/FetchUnitTest.cc +++ b/test/unit/pipeline/FetchUnitTest.cc @@ -279,8 +279,7 @@ TEST_P(PipelineFetchUnitTest, fetchTakenBranchMidBlock) { EXPECT_CALL(*uop, getBranchType()).WillOnce(Return(bType)); EXPECT_CALL(*uop, getKnownOffset()).WillOnce(Return(knownOff)); BranchPrediction pred = {true, pc + knownOff}; - EXPECT_CALL(predictor, predict(20, bType, knownOff)) - .WillOnce(Return(pred)); + EXPECT_CALL(predictor, predict(20, bType, knownOff)).WillOnce(Return(pred)); fetchUnit.tick(); // Ensure on next tick, predecode is not called From bff5fa86e3f65fe921cd70be97987d313f5e4c99 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 9 Dec 2024 17:29:08 +0000 Subject: [PATCH 55/69] Finessing --- src/lib/branchpredictors/TagePredictor.cc | 43 +++++++++++++---------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc index dcfe1a5050..2a5dd98a0a 100644 --- a/src/lib/branchpredictors/TagePredictor.cc +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -23,11 +23,12 @@ TagePredictor::TagePredictor(ryml::ConstNodeRef config) .as() == "Always-Taken") ? weaklyTaken : (weaklyTaken - 1); - // Create branch prediction structures - btb_ = std::vector>((uint8_t)1 << btbBits_, + + // Set up non-tagged default prediction table + btb_ = std::vector>(1ul << btbBits_, {satCntVal, 0}); - // Set up Tagged tables + // Set up tagged prediction tables for (uint32_t i = 0; i < numTageTables_; i++) { std::vector newTable; for (uint32_t j = 0; j < (1ul << tageTableBits_); j++) { @@ -55,6 +56,7 @@ BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, getTaggedPrediction(address, &prediction, &altPrediction, &predTable, &indices, &tags); + // If known offset then overwrite predicted target with this if (knownOffset != 0) prediction.target = address + knownOffset; // Amend prediction based on branch type @@ -86,7 +88,7 @@ BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, if (!prediction.isTaken) prediction.target = address + 4; } - // Store the hashed index for correct hashing in update() + // Store prediction data so that update() has the info it needs ftqEntry newEntry = {predTable, indices, tags, prediction, altPrediction}; ftq_.push_back(newEntry); @@ -110,12 +112,11 @@ void TagePredictor::update(uint64_t address, bool isTaken, // Update global history if prediction was incorrect if (ftq_.front().prediction.isTaken != isTaken) { - // Bit-flip the global history bit corresponding to this prediction // We know how many predictions there have since been by the size of the FTQ globalHistory_.updateHistory(isTaken, ftq_.size()); } - // Pop ftq entry from ftq + // Pop used ftq entry from ftq ftq_.pop_front(); } @@ -167,10 +168,14 @@ void TagePredictor::getTaggedPrediction(uint64_t address, // number, the longer global history it has access to. Therefore, the // greater the table number, the better the prediction. for (uint8_t table = 0; table < numTageTables_; table++) { + // Determine the index and tag for this table, as they vary depending on + // the length of global history uint64_t index = getTaggedIndex(address, table); indices->push_back(index); uint64_t tag = getTag(address, table); tags->push_back(tag); + + // If tag matches, then use this prediction if (tageTables_[table][index].tag == tag) { altPrediction->isTaken = prediction->isTaken; altPrediction->target = prediction->target; @@ -184,22 +189,23 @@ void TagePredictor::getTaggedPrediction(uint64_t address, BranchPrediction TagePredictor::getBtbPrediction(uint64_t address) { // Get prediction from BTB - uint64_t index = (address >> 2) & ((1 << btbBits_) - 1); + uint64_t index = (address >> 2) & ((1ull << btbBits_) - 1); bool direction = (btb_[index].first >= (1 << (satCntBits_ - 1))); uint64_t target = btb_[index].second; return {direction, target}; } uint64_t TagePredictor::getTaggedIndex(uint64_t address, uint8_t table) { - // Hash function here is pretty arbitrary. + // Get the XOR of the address (sans two least-significant bits) and the + // global history (folded onto itself to make it of the correct size). uint64_t h1 = (address >> 2); - uint64_t h2 = - globalHistory_.getFolded(1 << (table + 1), (1 << tageTableBits_) - 1); + uint64_t h2 = globalHistory_.getFolded(1ull << (table + 1), tageTableBits_); + // Then truncat the XOR to make it fit thed esired size of an index return (h1 ^ h2) & ((1 << tageTableBits_) - 1); } uint64_t TagePredictor::getTag(uint64_t address, uint8_t table) { - // Hash function here is pretty arbitrary. + // Hash function here is pretty arbitrary uint64_t h1 = address; uint64_t h2 = globalHistory_.getFolded((1ull << table), ((1ull << tagLength_) - 1)); @@ -240,17 +246,17 @@ void TagePredictor::updateTaggedTables(bool isTaken, uint64_t target) { (tageTables_[predTable][predIndex].satCnt)--; } - // Allocate new entry if prediction wrong and possible -- Check higher order - // tagged predictor tables to see if there is a non-useful entry that can - // be replaced + // Allocate new entry if prediction was wrong and space for a new entry is + // available + // -- Check higher order tagged predictor tables to see if there is a + // non-useful entry that can be replaced if (isTaken != pred.isTaken || (isTaken && (target != pred.target))) { - bool allocated = false; for (uint8_t table = predTable + 1; table < numTageTables_; table++) { - if (!allocated && (tageTables_[table][indices[table]].u <= 1)) { + if (tageTables_[table][indices[table]].u <= 1) { tageTables_[table][indices[table]] = { - ((isTaken) ? (uint8_t)2 : (uint8_t)1), tags[table], (uint8_t)2, + (isTaken ? (uint8_t)2 : (uint8_t)1), tags[table], (uint8_t)2, target}; - allocated = true; + break; } } } @@ -260,6 +266,7 @@ void TagePredictor::updateTaggedTables(bool isTaken, uint64_t target) { (pred.isTaken && (pred.target != altPred.target))) { bool wasUseful = (pred.isTaken == isTaken); uint8_t currentU = tageTables_[predTable][indices[predTable]].u; + // Make sure that update is possible if (wasUseful && currentU < 3) { (tageTables_[predTable][indices[predTable]].u)++; } From 22756e6fd7ec4397ce8c9ca05c67591bf632fcad Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 10 Dec 2024 10:35:00 +0000 Subject: [PATCH 56/69] Adding include to BranchHistory.hh --- .../simeng/branchpredictors/BranchHistory.hh | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/include/simeng/branchpredictors/BranchHistory.hh b/src/include/simeng/branchpredictors/BranchHistory.hh index 2106676669..70f8fe9116 100644 --- a/src/include/simeng/branchpredictors/BranchHistory.hh +++ b/src/include/simeng/branchpredictors/BranchHistory.hh @@ -1,5 +1,7 @@ #pragma once +#include + namespace simeng { /** A class for storing a branch history. Needed for cases where a branch * history of more than 64 bits is required. This class makes it easier to @@ -27,7 +29,7 @@ class BranchHistory { assert(numBits <= size_ && "Cannot get more bits of branch history than " "the size of the history"); - return (history_[0] & ((1 << numBits) - 1)); + return (history_[0] & ((1ull << numBits) - 1)); } /** Returns 'numBits' of the global history folded over on itself to get a @@ -44,7 +46,7 @@ class BranchHistory { while (startIndex <= numBits) { output ^= ((history_[startIndex / 64] >> startIndex) & - ((1 << (numBits - startIndex)) - 1)); + ((1ull << (numBits - startIndex)) - 1)); // Check to see if a second uint64_t value will need to be accessed if ((startIndex / 64) == (endIndex / 64)) { @@ -67,7 +69,7 @@ class BranchHistory { if (i == 0) { history_[i] |= ((isTaken) ? 1 : 0); } else { - history_[i] |= (((history_[i - 1] & ((uint64_t)1 << 63)) > 0) ? 1 : 0); + history_[i] |= (((history_[i - 1] & (1ull << 63)) > 0) ? 1 : 0); } } } @@ -81,7 +83,10 @@ class BranchHistory { if (position < size_) { uint8_t vectIndex = position / 64; uint8_t bitIndex = position % 64; - history_[vectIndex] ^= ((uint64_t)1 << bitIndex); + bool currentlyTaken = ((history_[vectIndex] & (1ull << bitIndex)) != 0); + if (currentlyTaken != isTaken) { + history_[vectIndex] ^= (1ull << bitIndex); + } } } @@ -90,7 +95,7 @@ class BranchHistory { for (uint8_t i = 0; i <= (size_ / 64); i++) { history_[i] >>= 1; if (i < (size_ / 64)) { - history_[i] |= (((history_[i + 1] & 1) > 0) ? ((uint64_t)1 << 63) : 0); + history_[i] |= (((history_[i + 1] & 1) > 0) ? (1ull << 63) : 0); } } } From eba5447b122c8ff0eed18d850c1304fbff8874f8 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 10 Dec 2024 16:51:23 +0000 Subject: [PATCH 57/69] Turning around Finn's comments --- src/include/simeng/branchpredictors/BranchHistory.hh | 11 +++++------ src/include/simeng/branchpredictors/TagePredictor.hh | 3 ++- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/include/simeng/branchpredictors/BranchHistory.hh b/src/include/simeng/branchpredictors/BranchHistory.hh index 70f8fe9116..ab52b99808 100644 --- a/src/include/simeng/branchpredictors/BranchHistory.hh +++ b/src/include/simeng/branchpredictors/BranchHistory.hh @@ -1,6 +1,7 @@ #pragma once #include +#include namespace simeng { /** A class for storing a branch history. Needed for cases where a branch @@ -14,11 +15,9 @@ namespace simeng { class BranchHistory { public: BranchHistory(uint64_t size) : size_(size) { - history_ = {0}; - for (uint8_t i = 0; i < (size_ / 64); i++) { - history_.push_back(0); - } + history_ = std::make_unique(size_); } + ~BranchHistory() {}; /** Returns the 'numBits' most recent bits of the branch history. Maximum @@ -104,12 +103,12 @@ class BranchHistory { /** The number of bits of branch history stored in this branch history */ uint64_t size_; - /** A vector containing the bits of the branch history. The bits are + /** An array containing the bits of the branch history. The bits are * arranged such that the most recent branches are stored in uint64_t at * index 0 of the vector, then the next most recent at index 1 and so forth. * Within each uint64_t, the most recent branches are recorded in the * least-significant bits. */ - std::vector history_; + std::unique_ptr history_; }; } // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/branchpredictors/TagePredictor.hh b/src/include/simeng/branchpredictors/TagePredictor.hh index 1b52f44173..74fe0204e6 100644 --- a/src/include/simeng/branchpredictors/TagePredictor.hh +++ b/src/include/simeng/branchpredictors/TagePredictor.hh @@ -54,6 +54,7 @@ class TagePredictor : public BranchPredictor { public: /** Initialise predictor models. */ TagePredictor(ryml::ConstNodeRef config = config::SimInfo::getConfig()); + ~TagePredictor(); /** Generate a branch prediction for the supplied instruction address, a @@ -145,7 +146,7 @@ class TagePredictor : public BranchPredictor { * a branch-and-link instruction. */ std::map rasHistory_; - /** The size of the RAS. */ + /** The size of the RAS. I.e., the maximum capacity of the RAS. */ uint16_t rasSize_; /** An n-bit history of previous branch directions where n is equal to From 768db53c080d8208f65b37f649bd897e06e9907a Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Sat, 14 Dec 2024 12:16:26 +0000 Subject: [PATCH 58/69] Capitalising a comment --- src/include/simeng/branchpredictors/TagePredictor.hh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/simeng/branchpredictors/TagePredictor.hh b/src/include/simeng/branchpredictors/TagePredictor.hh index 74fe0204e6..87c4762cad 100644 --- a/src/include/simeng/branchpredictors/TagePredictor.hh +++ b/src/include/simeng/branchpredictors/TagePredictor.hh @@ -82,7 +82,7 @@ class TagePredictor : public BranchPredictor { * that is used for default predictions. */ BranchPrediction getBtbPrediction(uint64_t address); - /** provides a prediction, alternative prediction, the table number that + /** Provides a prediction, alternative prediction, the table number that * provided the prediction, and the indices and tags of the prediction and * alternative prediction. This prediction info is determined from the * tagged tables for a branch with the provided address. */ From 14789a67b53dc923a8e17d957d3f0fe77010efce Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:55:23 +0000 Subject: [PATCH 59/69] Turning vectors for indices and tags in the ftq into shared_ptrs of a fixed but dynamically chosen size --- .../simeng/branchpredictors/TagePredictor.hh | 9 ++--- src/lib/branchpredictors/TagePredictor.cc | 35 ++++++++++--------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/include/simeng/branchpredictors/TagePredictor.hh b/src/include/simeng/branchpredictors/TagePredictor.hh index 87c4762cad..29e92e3b02 100644 --- a/src/include/simeng/branchpredictors/TagePredictor.hh +++ b/src/include/simeng/branchpredictors/TagePredictor.hh @@ -5,6 +5,7 @@ #include #include #include +#include #include "simeng/branchpredictors/BranchHistory.hh" #include "simeng/branchpredictors/BranchPredictor.hh" @@ -25,8 +26,8 @@ struct TageEntry { * in the Fetch Target Queue. */ struct ftqEntry { uint8_t predTable; - std::vector indices; - std::vector tags; + std::shared_ptr indices; + std::shared_ptr tags; BranchPrediction prediction; BranchPrediction altPrediction; }; @@ -88,8 +89,8 @@ class TagePredictor : public BranchPredictor { * tagged tables for a branch with the provided address. */ void getTaggedPrediction(uint64_t address, BranchPrediction* prediction, BranchPrediction* altPrediction, uint8_t* predTable, - std::vector* indices, - std::vector* tags); + std::shared_ptr indices, + std::shared_ptr tags); /** Returns the index of a branch in a tagged table for a given address and * table. */ diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc index 2a5dd98a0a..1d3baaf086 100644 --- a/src/lib/branchpredictors/TagePredictor.cc +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -51,10 +51,10 @@ BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, BranchPrediction prediction; BranchPrediction altPrediction; uint8_t predTable; - std::vector indices; - std::vector tags; + std::shared_ptr indices(new uint64_t[numTageTables_]); + std::shared_ptr tags(new uint64_t[numTageTables_]); getTaggedPrediction(address, &prediction, &altPrediction, &predTable, - &indices, &tags); + indices, tags); // If known offset then overwrite predicted target with this if (knownOffset != 0) prediction.target = address + knownOffset; @@ -89,7 +89,8 @@ BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, } // Store prediction data so that update() has the info it needs - ftqEntry newEntry = {predTable, indices, tags, prediction, altPrediction}; + ftqEntry newEntry = {predTable, indices, tags, + prediction, altPrediction}; ftq_.push_back(newEntry); // Speculatively update the global history @@ -155,8 +156,8 @@ void TagePredictor::getTaggedPrediction(uint64_t address, BranchPrediction* prediction, BranchPrediction* altPrediction, uint8_t* predTable, - std::vector* indices, - std::vector* tags) { + std::shared_ptr indices, + std::shared_ptr tags) { // Get a basic prediction from the btb BranchPrediction basePrediction = getBtbPrediction(address); prediction->isTaken = basePrediction.isTaken; @@ -171,9 +172,9 @@ void TagePredictor::getTaggedPrediction(uint64_t address, // Determine the index and tag for this table, as they vary depending on // the length of global history uint64_t index = getTaggedIndex(address, table); - indices->push_back(index); + indices.get()[table] = index; uint64_t tag = getTag(address, table); - tags->push_back(tag); + tags.get()[table] = tag; // If tag matches, then use this prediction if (tageTables_[table][index].tag == tag) { @@ -233,13 +234,13 @@ void TagePredictor::updateBtb(uint64_t address, bool isTaken, void TagePredictor::updateTaggedTables(bool isTaken, uint64_t target) { // Get stored information from the FTQ uint8_t predTable = ftq_.front().predTable; - std::vector indices = ftq_.front().indices; - std::vector tags = ftq_.front().tags; + std::shared_ptr indices = ftq_.front().indices; + std::shared_ptr tags = ftq_.front().tags; BranchPrediction pred = ftq_.front().prediction; BranchPrediction altPred = ftq_.front().altPrediction; // Update the prediction counter - uint64_t predIndex = indices[predTable]; + uint64_t predIndex = indices.get()[predTable]; if (isTaken && (tageTables_[predTable][predIndex].satCnt < 3)) { (tageTables_[predTable][predIndex].satCnt)++; } else if (!isTaken && (tageTables_[predTable][predIndex].satCnt > 0)) { @@ -252,9 +253,9 @@ void TagePredictor::updateTaggedTables(bool isTaken, uint64_t target) { // non-useful entry that can be replaced if (isTaken != pred.isTaken || (isTaken && (target != pred.target))) { for (uint8_t table = predTable + 1; table < numTageTables_; table++) { - if (tageTables_[table][indices[table]].u <= 1) { - tageTables_[table][indices[table]] = { - (isTaken ? (uint8_t)2 : (uint8_t)1), tags[table], (uint8_t)2, + if (tageTables_[table][indices.get()[table]].u <= 1) { + tageTables_[table][indices.get()[table]] = { + (isTaken ? (uint8_t)2 : (uint8_t)1), tags.get()[table], (uint8_t)2, target}; break; } @@ -265,13 +266,13 @@ void TagePredictor::updateTaggedTables(bool isTaken, uint64_t target) { if (pred.isTaken != altPred.isTaken || (pred.isTaken && (pred.target != altPred.target))) { bool wasUseful = (pred.isTaken == isTaken); - uint8_t currentU = tageTables_[predTable][indices[predTable]].u; + uint8_t currentU = tageTables_[predTable][indices.get()[predTable]].u; // Make sure that update is possible if (wasUseful && currentU < 3) { - (tageTables_[predTable][indices[predTable]].u)++; + (tageTables_[predTable][indices.get()[predTable]].u)++; } if (!wasUseful && currentU > 0) { - (tageTables_[predTable][indices[predTable]].u)--; + (tageTables_[predTable][indices.get()[predTable]].u)--; } } } From 7dcbb16a87c64ae7cfd89edbd396520dfc4300a7 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:56:59 +0000 Subject: [PATCH 60/69] predTable from uint8_t to int8_t --- src/include/simeng/branchpredictors/TagePredictor.hh | 4 ++-- src/lib/branchpredictors/TagePredictor.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/include/simeng/branchpredictors/TagePredictor.hh b/src/include/simeng/branchpredictors/TagePredictor.hh index 29e92e3b02..96d6cc28e0 100644 --- a/src/include/simeng/branchpredictors/TagePredictor.hh +++ b/src/include/simeng/branchpredictors/TagePredictor.hh @@ -25,7 +25,7 @@ struct TageEntry { /** A data structure to store all of the information needed for a single entry * in the Fetch Target Queue. */ struct ftqEntry { - uint8_t predTable; + int8_t predTable; std::shared_ptr indices; std::shared_ptr tags; BranchPrediction prediction; @@ -88,7 +88,7 @@ class TagePredictor : public BranchPredictor { * alternative prediction. This prediction info is determined from the * tagged tables for a branch with the provided address. */ void getTaggedPrediction(uint64_t address, BranchPrediction* prediction, - BranchPrediction* altPrediction, uint8_t* predTable, + BranchPrediction* altPrediction, int8_t* predTable, std::shared_ptr indices, std::shared_ptr tags); diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc index 1d3baaf086..a15488fc63 100644 --- a/src/lib/branchpredictors/TagePredictor.cc +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -50,7 +50,7 @@ BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, int64_t knownOffset) { BranchPrediction prediction; BranchPrediction altPrediction; - uint8_t predTable; + int8_t predTable; std::shared_ptr indices(new uint64_t[numTageTables_]); std::shared_ptr tags(new uint64_t[numTageTables_]); getTaggedPrediction(address, &prediction, &altPrediction, &predTable, @@ -155,7 +155,7 @@ void TagePredictor::flush(uint64_t address) { void TagePredictor::getTaggedPrediction(uint64_t address, BranchPrediction* prediction, BranchPrediction* altPrediction, - uint8_t* predTable, + int8_t* predTable, std::shared_ptr indices, std::shared_ptr tags) { // Get a basic prediction from the btb From c6dc5b5dd613c4c372882ce2d0d05862d1a1c510 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:00:23 +0000 Subject: [PATCH 61/69] updating how predTable is handled so that btb is -1, rather than 0 (i.e., is now different from the first tagged table) --- src/lib/branchpredictors/TagePredictor.cc | 31 +++++++++++++---------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc index a15488fc63..846f992650 100644 --- a/src/lib/branchpredictors/TagePredictor.cc +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -62,7 +62,7 @@ BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, // Amend prediction based on branch type if (type == BranchType::Unconditional) { prediction.isTaken = true; - predTable = 0; + predTable = -1; } else if (type == BranchType::Return) { prediction.isTaken = true; // Return branches can use the RAS if an entry is available @@ -72,7 +72,7 @@ BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, rasHistory_[address] = ras_.back(); ras_.pop_back(); } - predTable = 0; + predTable = -1; } else if (type == BranchType::SubroutineCall) { prediction.isTaken = true; // Subroutine call branches must push their associated return address to RAS @@ -82,7 +82,7 @@ BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, ras_.push_back(address + 4); // Record that this address is a branch-and-link instruction rasHistory_[address] = 0; - predTable = 0; + predTable = -1; } else if (type == BranchType::Conditional || type == BranchType::LoopClosing) { if (!prediction.isTaken) prediction.target = address + 4; @@ -162,7 +162,7 @@ void TagePredictor::getTaggedPrediction(uint64_t address, BranchPrediction basePrediction = getBtbPrediction(address); prediction->isTaken = basePrediction.isTaken; prediction->target = basePrediction.target; - *predTable = 0; + *predTable = -1; // Check each of the tagged predictor tables for an entry matching this // branch. If found, update the best prediction. The greater the table @@ -233,18 +233,20 @@ void TagePredictor::updateBtb(uint64_t address, bool isTaken, void TagePredictor::updateTaggedTables(bool isTaken, uint64_t target) { // Get stored information from the FTQ - uint8_t predTable = ftq_.front().predTable; + int8_t predTable = ftq_.front().predTable; std::shared_ptr indices = ftq_.front().indices; std::shared_ptr tags = ftq_.front().tags; BranchPrediction pred = ftq_.front().prediction; BranchPrediction altPred = ftq_.front().altPrediction; - // Update the prediction counter - uint64_t predIndex = indices.get()[predTable]; - if (isTaken && (tageTables_[predTable][predIndex].satCnt < 3)) { - (tageTables_[predTable][predIndex].satCnt)++; - } else if (!isTaken && (tageTables_[predTable][predIndex].satCnt > 0)) { - (tageTables_[predTable][predIndex].satCnt)--; + // Update the prediction counter if tagged prediction table was used + if (predTable != -1) { + uint64_t predIndex = indices.get()[predTable]; + if (isTaken && (tageTables_[predTable][predIndex].satCnt < 3)) { + (tageTables_[predTable][predIndex].satCnt)++; + } else if (!isTaken && (tageTables_[predTable][predIndex].satCnt > 0)) { + (tageTables_[predTable][predIndex].satCnt)--; + } } // Allocate new entry if prediction was wrong and space for a new entry is @@ -262,9 +264,10 @@ void TagePredictor::updateTaggedTables(bool isTaken, uint64_t target) { } } - // Update the usefulness counters if prediction differs from alt-prediction - if (pred.isTaken != altPred.isTaken || - (pred.isTaken && (pred.target != altPred.target))) { + // Update the usefulness counters if prediction is from a tagged prediction + // table and differs from alt-prediction + if ((predTable != -1) && (pred.isTaken != altPred.isTaken || + (pred.isTaken && (pred.target != altPred.target)))) { bool wasUseful = (pred.isTaken == isTaken); uint8_t currentU = tageTables_[predTable][indices.get()[predTable]].u; // Make sure that update is possible From f9da6021bba681b1602f332fe305098824573d66 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:11:39 +0000 Subject: [PATCH 62/69] Correcting tests after optimisation --- src/lib/branchpredictors/TagePredictor.cc | 2 +- test/unit/TagePredictorTest.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc index 846f992650..33848cf4bf 100644 --- a/src/lib/branchpredictors/TagePredictor.cc +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -168,7 +168,7 @@ void TagePredictor::getTaggedPrediction(uint64_t address, // branch. If found, update the best prediction. The greater the table // number, the longer global history it has access to. Therefore, the // greater the table number, the better the prediction. - for (uint8_t table = 0; table < numTageTables_; table++) { + for (int8_t table = 0; table < numTageTables_; table++) { // Determine the index and tag for this table, as they vary depending on // the length of global history uint64_t index = getTaggedIndex(address, table); diff --git a/test/unit/TagePredictorTest.cc b/test/unit/TagePredictorTest.cc index ada448cfc5..ef4d982514 100644 --- a/test/unit/TagePredictorTest.cc +++ b/test/unit/TagePredictorTest.cc @@ -215,8 +215,8 @@ TEST_F(TagePredictorTest, GlobalIndexing) { predictor.update(0, true, 4, BranchType::Conditional, 31); // Get prediction prediction = predictor.predict(0x7C, BranchType::Conditional, 0); - EXPECT_TRUE(prediction.isTaken); - EXPECT_EQ(prediction.target, 0xBA); + EXPECT_FALSE(prediction.isTaken); + EXPECT_EQ(prediction.target, 0x80); // Set entry in BTB predictor.update(0x7C, true, 0x80, BranchType::Conditional, 32); From 0ecdd6b40c1ffc0e5014f0da40bc8d0a4c88f470 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:13:53 +0000 Subject: [PATCH 63/69] TAGE->Tage in the documentation --- docs/sphinx/user/configuring_simeng.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sphinx/user/configuring_simeng.rst b/docs/sphinx/user/configuring_simeng.rst index d73c88c0c4..320df77531 100644 --- a/docs/sphinx/user/configuring_simeng.rst +++ b/docs/sphinx/user/configuring_simeng.rst @@ -149,7 +149,7 @@ The Branch-Prediction section contains those options to parameterise the branch The current options include: Type - The type of branch predictor that is used, the options are ``Generic``, ``Perceptron``, and ``Tage``. Each of these types of predictor use prediction tables with each entry containing a direction prediction mechanism and a target address. The direction predictor used in ``Generic`` and ``TAGE`` is a saturating counter, and in ``Perceptron`` it is a perceptron. ``TAGE`` also uses a series of further, tagged prediction tables to provide predictions informed by greater branch histories. + The type of branch predictor that is used, the options are ``Generic``, ``Perceptron``, and ``Tage``. Each of these types of predictor use prediction tables with each entry containing a direction prediction mechanism and a target address. The direction predictor used in ``Generic`` and ``Tage`` is a saturating counter, and in ``Perceptron`` it is a perceptron. ``Tage`` also uses a series of further, tagged prediction tables to provide predictions informed by greater branch histories. BTB-Tag-Bits The number of bits used to index the entries in the Branch Target Buffer (BTB). The number of entries in the BTB is obtained from the calculation: 1 << ``bits``. For example, a ``bits`` value of 12 would result in a BTB with 4096 entries. From 57f3575875f9739d89e8ed4a2e407e44e749d7f7 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:14:51 +0000 Subject: [PATCH 64/69] Adding Tage to TX2 config file --- configs/tx2.yaml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/configs/tx2.yaml b/configs/tx2.yaml index a5e28807f9..1c9068af1e 100644 --- a/configs/tx2.yaml +++ b/configs/tx2.yaml @@ -27,10 +27,15 @@ Queue-Sizes: Load: 64 Store: 36 Branch-Predictor: - Type: "Perceptron" + Type: "Tage" BTB-Tag-Bits: 11 + Saturating-Count-Bits: 2 Global-History-Length: 19 - RAS-entries: 5 + RAS-entries: 8 + Fallback-Static-Predictor: "Always-Taken" + Tage-Table-Bits: 12 + Num-Tage-Tables: 6 + Tag-Length: 8 L1-Data-Memory: Interface-Type: Fixed L1-Instruction-Memory: From 674672f28ca77a462803ec79b6774da6c7e6469e Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:17:04 +0000 Subject: [PATCH 65/69] Correcting typos in comments --- src/lib/branchpredictors/TagePredictor.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc index 33848cf4bf..b7b7f82a9b 100644 --- a/src/lib/branchpredictors/TagePredictor.cc +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -32,7 +32,7 @@ TagePredictor::TagePredictor(ryml::ConstNodeRef config) for (uint32_t i = 0; i < numTageTables_; i++) { std::vector newTable; for (uint32_t j = 0; j < (1ul << tageTableBits_); j++) { - TageEntry newEntry = {2, 0, 1, 0}; + TageEntry newEntry = {satCntVal, 0, 1, 0}; newTable.push_back(newEntry); } tageTables_.push_back(newTable); @@ -201,7 +201,7 @@ uint64_t TagePredictor::getTaggedIndex(uint64_t address, uint8_t table) { // global history (folded onto itself to make it of the correct size). uint64_t h1 = (address >> 2); uint64_t h2 = globalHistory_.getFolded(1ull << (table + 1), tageTableBits_); - // Then truncat the XOR to make it fit thed esired size of an index + // Then truncate the XOR to make it fit the desired size of an index return (h1 ^ h2) & ((1 << tageTableBits_) - 1); } From b23429f087cc1596bd9df84c06da9d334539efe3 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:28:03 +0000 Subject: [PATCH 66/69] Adding Tage to a64fx_SME.yaml --- configs/a64fx_SME.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/configs/a64fx_SME.yaml b/configs/a64fx_SME.yaml index 7b1442cc32..c1a35a228f 100644 --- a/configs/a64fx_SME.yaml +++ b/configs/a64fx_SME.yaml @@ -31,10 +31,15 @@ Queue-Sizes: Load: 40 Store: 24 Branch-Predictor: - Type: "Perceptron" + Type: "Tage" BTB-Tag-Bits: 11 + Saturating-Count-Bits: 2 Global-History-Length: 19 RAS-entries: 8 + Fallback-Static-Predictor: "Always-Taken" + Tage-Table-Bits: 12 + Num-Tage-Tables: 6 + Tag-Length: 8 L1-Data-Memory: Interface-Type: Fixed L1-Instruction-Memory: From ac0d2cf9aedbc1d9314571a5547d533a7cbb4ac6 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:26:47 +0000 Subject: [PATCH 67/69] Adjusting comments --- configs/tx2.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/tx2.yaml b/configs/tx2.yaml index 1c9068af1e..36b83fd13e 100644 --- a/configs/tx2.yaml +++ b/configs/tx2.yaml @@ -30,7 +30,7 @@ Branch-Predictor: Type: "Tage" BTB-Tag-Bits: 11 Saturating-Count-Bits: 2 - Global-History-Length: 19 + Global-History-Length: 12 RAS-entries: 8 Fallback-Static-Predictor: "Always-Taken" Tage-Table-Bits: 12 From f9ecd29d9016d0ae0a050f02e834a315f15acd5b Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Tue, 17 Dec 2024 23:02:12 +0000 Subject: [PATCH 68/69] Clang format --- src/include/simeng/branchpredictors/TagePredictor.hh | 2 +- src/lib/branchpredictors/TagePredictor.cc | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/include/simeng/branchpredictors/TagePredictor.hh b/src/include/simeng/branchpredictors/TagePredictor.hh index 96d6cc28e0..dedbc2f407 100644 --- a/src/include/simeng/branchpredictors/TagePredictor.hh +++ b/src/include/simeng/branchpredictors/TagePredictor.hh @@ -4,8 +4,8 @@ #include #include #include -#include #include +#include #include "simeng/branchpredictors/BranchHistory.hh" #include "simeng/branchpredictors/BranchPredictor.hh" diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc index b7b7f82a9b..f54b9a6e2e 100644 --- a/src/lib/branchpredictors/TagePredictor.cc +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -53,8 +53,8 @@ BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, int8_t predTable; std::shared_ptr indices(new uint64_t[numTageTables_]); std::shared_ptr tags(new uint64_t[numTageTables_]); - getTaggedPrediction(address, &prediction, &altPrediction, &predTable, - indices, tags); + getTaggedPrediction(address, &prediction, &altPrediction, &predTable, indices, + tags); // If known offset then overwrite predicted target with this if (knownOffset != 0) prediction.target = address + knownOffset; @@ -89,8 +89,7 @@ BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, } // Store prediction data so that update() has the info it needs - ftqEntry newEntry = {predTable, indices, tags, - prediction, altPrediction}; + ftqEntry newEntry = {predTable, indices, tags, prediction, altPrediction}; ftq_.push_back(newEntry); // Speculatively update the global history @@ -266,8 +265,9 @@ void TagePredictor::updateTaggedTables(bool isTaken, uint64_t target) { // Update the usefulness counters if prediction is from a tagged prediction // table and differs from alt-prediction - if ((predTable != -1) && (pred.isTaken != altPred.isTaken || - (pred.isTaken && (pred.target != altPred.target)))) { + if ((predTable != -1) && + (pred.isTaken != altPred.isTaken || + (pred.isTaken && (pred.target != altPred.target)))) { bool wasUseful = (pred.isTaken == isTaken); uint8_t currentU = tageTables_[predTable][indices.get()[predTable]].u; // Make sure that update is possible From d87cc5dccfa685930363d2dcaa342f182e27b334 Mon Sep 17 00:00:00 2001 From: Alex Cockrean <84676155+ABenC377@users.noreply.github.com> Date: Mon, 30 Dec 2024 13:14:12 +0000 Subject: [PATCH 69/69] Updating comments and docs in response to PR comments --- docs/sphinx/user/configuring_simeng.rst | 2 +- src/include/simeng/branchpredictors/BranchHistory.hh | 2 +- src/include/simeng/branchpredictors/TagePredictor.hh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/sphinx/user/configuring_simeng.rst b/docs/sphinx/user/configuring_simeng.rst index 66322b5674..245c17809a 100644 --- a/docs/sphinx/user/configuring_simeng.rst +++ b/docs/sphinx/user/configuring_simeng.rst @@ -183,7 +183,7 @@ Num-Tage-Tables Only needed for a ``Tage`` predictor. The number of tagged tables used by the predictor, in addition to a default prediction table (i.e., the BTB). Therefore, a value of 3 for ``Num-Tage-Tables`` would result in four total prediction tables: one BTB and three tagged tables. If no tagged tables are desired, it is recommended to use the ``GenericPredictor`` instead. Tage-Length - Only needed for a ``Tage`` predictor. The number of bits used to tage the entries of the tagged tables. + Only needed for a ``Tage`` predictor. The number of bits used to tag the entries of the tagged tables. .. _l1dcnf: diff --git a/src/include/simeng/branchpredictors/BranchHistory.hh b/src/include/simeng/branchpredictors/BranchHistory.hh index ab52b99808..105caa314f 100644 --- a/src/include/simeng/branchpredictors/BranchHistory.hh +++ b/src/include/simeng/branchpredictors/BranchHistory.hh @@ -9,7 +9,7 @@ namespace simeng { * access and manipulate large branch histories, as are needed in * sophisticated branch predictors. * - * The bits of the branch history are stored in a vector of uint64_t values, + * The bits of the branch history are stored in an array of uint64_t values, * and their access/manipulation is facilitated by the public functions. */ class BranchHistory { diff --git a/src/include/simeng/branchpredictors/TagePredictor.hh b/src/include/simeng/branchpredictors/TagePredictor.hh index dedbc2f407..dabb07c000 100644 --- a/src/include/simeng/branchpredictors/TagePredictor.hh +++ b/src/include/simeng/branchpredictors/TagePredictor.hh @@ -118,7 +118,7 @@ class TagePredictor : public BranchPredictor { std::vector> btb_; /** The bitlength of the Tagged tables' indices. - * Each tagged table with have 2^bits entries. */ + * Each tagged table will have 2^bits entries. */ uint8_t tageTableBits_; /** The number of tagged tables in the TAGE scheme.