diff --git a/configs/a64fx.yaml b/configs/a64fx.yaml index c3285a22b7..919b52cfe6 100644 --- a/configs/a64fx.yaml +++ b/configs/a64fx.yaml @@ -31,10 +31,15 @@ Queue-Sizes: Port-Allocator: Type: A64FX Branch-Predictor: - Type: "Perceptron" + Type: "Tage" BTB-Tag-Bits: 11 + Saturating-Count-Bits: 2 Global-History-Length: 19 RAS-entries: 8 + Fallback-Static-Predictor: "Always-Taken" + Tage-Table-Bits: 12 + Num-Tage-Tables: 6 + Tag-Length: 8 L1-Data-Memory: Interface-Type: Fixed L1-Instruction-Memory: diff --git a/configs/a64fx_SME.yaml b/configs/a64fx_SME.yaml index 7fe7086d5e..6b9661a6a2 100644 --- a/configs/a64fx_SME.yaml +++ b/configs/a64fx_SME.yaml @@ -34,10 +34,15 @@ Queue-Sizes: Port-Allocator: Type: A64FX Branch-Predictor: - Type: "Perceptron" + Type: "Tage" BTB-Tag-Bits: 11 + Saturating-Count-Bits: 2 Global-History-Length: 19 RAS-entries: 8 + Fallback-Static-Predictor: "Always-Taken" + Tage-Table-Bits: 12 + Num-Tage-Tables: 6 + Tag-Length: 8 L1-Data-Memory: Interface-Type: Fixed L1-Instruction-Memory: diff --git a/configs/tx2.yaml b/configs/tx2.yaml index 45a8bb498b..ce07063680 100644 --- a/configs/tx2.yaml +++ b/configs/tx2.yaml @@ -29,10 +29,15 @@ Queue-Sizes: Port-Allocator: Type: Balanced Branch-Predictor: - Type: "Perceptron" + Type: "Tage" BTB-Tag-Bits: 11 - Global-History-Length: 19 - RAS-entries: 5 + Saturating-Count-Bits: 2 + Global-History-Length: 12 + RAS-entries: 8 + Fallback-Static-Predictor: "Always-Taken" + Tage-Table-Bits: 12 + Num-Tage-Tables: 6 + Tag-Length: 8 L1-Data-Memory: Interface-Type: Fixed L1-Instruction-Memory: diff --git a/docs/sphinx/developer/components/branchPred.rst b/docs/sphinx/developer/components/branchPred.rst index 6a03c85129..bd18271014 100644 --- a/docs/sphinx/developer/components/branchPred.rst +++ b/docs/sphinx/developer/components/branchPred.rst @@ -21,7 +21,6 @@ The state of the branch predictor when ``predict`` is called on a branch is stor Generic Predictor ----------------- - The algorithm(s) held within a ``BranchPredictor`` class instance can be model-specific, however, SimEng provides a ``GenericPredictor`` which contains the following logic. Global History @@ -53,4 +52,20 @@ Branch Target Buffer (BTB) If the supplied branch type is ``Unconditional``, then the predicted direction is overridden to be taken. If the supplied branch type is ``Conditional`` and the predicted direction is not taken, then the predicted target is overridden to be the next sequential instruction. Return Address Stack (RAS) - Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. \ No newline at end of file + Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. + +TAGE Predictor +-------------------- +The ``TagePredictor`` is a TAGE predictor of the type described in https://inria.hal.science/hal-03408381/document. Unlike ``GenericPredictor`` and ``PerceptronPredictor``, this predictor uses a series of prediction tables, each of which uses an increasing global history size. E.g., the default prediction table will be indexed by the address itself, then the following tables will use global histories of length 2, 4, 8, 16, .... + +Tagged prediction tables + The prediction returned from this branch predictor will be that determined by the table with the largest global history that has an entry corresponding to the given branch. To determine whether or not a table entry corresponds to the present branch or not, a hash is made from the branch's address and the global history. Each table entry has a usefulness counter which is updated when the prediction differs from the next-best prediction. On incorrect prediction, if possible, replace a non-useful entry in a table with more global history. + +Default prediction table + In addition to the tagged tables, there is a non-tagged default prediction table that is used as a fall-back in the event that none of the tagged tables have an entry corresponding to a given branch. This table is much like the BTB in the ``GenericPredictor``, except that the index is determined from the truncated address only (i.e., it does not depend on the global history at all). + +Global History + To accomodate larger numbers of tagged tables, global histories of greater than 64 bits are needed. Therefore, ``TagePredictor`` incorporates a new ``BranchHistory`` structure that allows global histories of unlimited size to be kept and accessed. + +Return Address Stack (RAS) + Identified through the supplied branch type, Return instructions pop values off of the RAS to get their branch target whilst Branch-and-Link instructions push values onto the RAS, for later use by the Branch-and-Link instruction's corresponding Return instruction. diff --git a/docs/sphinx/user/configuring_simeng.rst b/docs/sphinx/user/configuring_simeng.rst index 9a49893375..245c17809a 100644 --- a/docs/sphinx/user/configuring_simeng.rst +++ b/docs/sphinx/user/configuring_simeng.rst @@ -159,13 +159,13 @@ The Branch-Prediction section contains those options to parameterise the branch The current options include: Type - The type of branch predictor that is used, the options are ``Generic``, and ``Perceptron``. Both types of predictor use a branch target buffer with each entry containing a direction prediction mechanism and a target address. The direction predictor used in ``Generic`` is a saturating counter, and in ``Perceptron`` it is a perceptron. + The type of branch predictor that is used, the options are ``Generic``, ``Perceptron``, and ``Tage``. Each of these types of predictor use prediction tables with each entry containing a direction prediction mechanism and a target address. The direction predictor used in ``Generic`` and ``Tage`` is a saturating counter, and in ``Perceptron`` it is a perceptron. ``Tage`` also uses a series of further, tagged prediction tables to provide predictions informed by greater branch histories. BTB-Tag-Bits The number of bits used to index the entries in the Branch Target Buffer (BTB). The number of entries in the BTB is obtained from the calculation: 1 << ``bits``. For example, a ``bits`` value of 12 would result in a BTB with 4096 entries. Saturating-Count-Bits - Only needed for a ``Generic`` predictor. The number of bits used in the saturating counter value. + Only needed for ``Generic`` and ``Tage`` predictors. The number of bits used in the saturating counter value. Global-History-Length The number of bits used to record the global history of branch directions. Each bit represents one branch direction. For ``PerceptronPredictor``, this dictates the size of the perceptrons (with each perceptron having Global-History-Length + 1 weights). @@ -174,7 +174,16 @@ RAS-entries The number of entries in the Return Address Stack (RAS). Fallback-Static-Predictor - Only needed for a ``Generic`` predictor. The static predictor used when no dynamic prediction is available. The options are either ``"Always-Taken"`` or ``"Always-Not-Taken"``. + Only needed for ``Generic`` and ``Tage`` predictors. The static predictor used when no dynamic prediction is available. The options are either ``"Always-Taken"`` or ``"Always-Not-Taken"``. + +Tage-Table-Bits + Only needed for a ``Tage`` predictor. The number of bits used to index entries in the tagged tables. The number of entries in each of the tagged tables is obtained from the calculation: 1 << ``bits``. For examples, a ``bits`` value of 12 would result in tagged tables with 4096 entries. + +Num-Tage-Tables + Only needed for a ``Tage`` predictor. The number of tagged tables used by the predictor, in addition to a default prediction table (i.e., the BTB). Therefore, a value of 3 for ``Num-Tage-Tables`` would result in four total prediction tables: one BTB and three tagged tables. If no tagged tables are desired, it is recommended to use the ``GenericPredictor`` instead. + +Tage-Length + Only needed for a ``Tage`` predictor. The number of bits used to tag the entries of the tagged tables. .. _l1dcnf: diff --git a/src/include/simeng/CoreInstance.hh b/src/include/simeng/CoreInstance.hh index 2cc739f3f9..15547222e9 100644 --- a/src/include/simeng/CoreInstance.hh +++ b/src/include/simeng/CoreInstance.hh @@ -11,6 +11,7 @@ #include "simeng/branchpredictors/AlwaysNotTakenPredictor.hh" #include "simeng/branchpredictors/GenericPredictor.hh" #include "simeng/branchpredictors/PerceptronPredictor.hh" +#include "simeng/branchpredictors/TagePredictor.hh" #include "simeng/config/SimInfo.hh" #include "simeng/kernel/Linux.hh" #include "simeng/memory/FixedLatencyMemoryInterface.hh" diff --git a/src/include/simeng/Instruction.hh b/src/include/simeng/Instruction.hh index b5a4e33e3b..5004942539 100644 --- a/src/include/simeng/Instruction.hh +++ b/src/include/simeng/Instruction.hh @@ -29,7 +29,7 @@ struct ExecutionInfo { * Each supported ISA should provide a derived implementation of this class. */ class Instruction { public: - virtual ~Instruction(){}; + virtual ~Instruction() {}; /** Retrieve the source registers this instruction reads. */ virtual const span getSourceRegisters() const = 0; diff --git a/src/include/simeng/arch/ArchInfo.hh b/src/include/simeng/arch/ArchInfo.hh index e029699c07..eed7055cf7 100644 --- a/src/include/simeng/arch/ArchInfo.hh +++ b/src/include/simeng/arch/ArchInfo.hh @@ -12,7 +12,7 @@ namespace arch { /** A class to hold and generate architecture specific configuration options. */ class ArchInfo { public: - virtual ~ArchInfo(){}; + virtual ~ArchInfo() {}; /** Get the set of system register enums currently supported. */ virtual const std::vector& getSysRegEnums() const = 0; diff --git a/src/include/simeng/arch/Architecture.hh b/src/include/simeng/arch/Architecture.hh index aa293d6f5f..b4e6ac6001 100644 --- a/src/include/simeng/arch/Architecture.hh +++ b/src/include/simeng/arch/Architecture.hh @@ -30,7 +30,7 @@ struct ExceptionResult { * cycle until complete. */ class ExceptionHandler { public: - virtual ~ExceptionHandler(){}; + virtual ~ExceptionHandler() {}; /** Tick the exception handler to progress handling of the exception. Should * return `false` if the exception requires further handling, or `true` once * complete. */ @@ -46,7 +46,7 @@ class Architecture { public: Architecture(kernel::Linux& kernel) : linux_(kernel) {} - virtual ~Architecture(){}; + virtual ~Architecture() {}; /** Attempt to pre-decode from `bytesAvailable` bytes of instruction memory. * Writes into the supplied macro-op vector, and returns the number of bytes diff --git a/src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh b/src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh index 382a495420..6d88ae70fc 100644 --- a/src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh +++ b/src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh @@ -27,4 +27,4 @@ class AlwaysNotTakenPredictor : public BranchPredictor { private: }; -} // namespace simeng +} // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/branchpredictors/BranchHistory.hh b/src/include/simeng/branchpredictors/BranchHistory.hh new file mode 100644 index 0000000000..105caa314f --- /dev/null +++ b/src/include/simeng/branchpredictors/BranchHistory.hh @@ -0,0 +1,114 @@ +#pragma once + +#include +#include + +namespace simeng { +/** A class for storing a branch history. Needed for cases where a branch + * history of more than 64 bits is required. This class makes it easier to + * access and manipulate large branch histories, as are needed in + * sophisticated branch predictors. + * + * The bits of the branch history are stored in an array of uint64_t values, + * and their access/manipulation is facilitated by the public functions. */ + +class BranchHistory { + public: + BranchHistory(uint64_t size) : size_(size) { + history_ = std::make_unique(size_); + } + + ~BranchHistory() {}; + + /** Returns the 'numBits' most recent bits of the branch history. Maximum + * number of bits returnable is 64 to allow it to be provided in a 64-bit + * integer. */ + uint64_t getHistory(uint8_t numBits) { + assert(numBits <= 64 && "Cannot get more than 64 bits without rolling"); + assert(numBits <= size_ && + "Cannot get more bits of branch history than " + "the size of the history"); + return (history_[0] & ((1ull << numBits) - 1)); + } + + /** Returns 'numBits' of the global history folded over on itself to get a + * value of size 'length'. The global history is folded by taking an + * XOR hash with the overflowing bits to get an output of 'length' bits. */ + uint64_t getFolded(uint8_t numBits, uint8_t length) { + assert(numBits <= size_ && + "Cannot get more bits of branch history than " + "the size of the history"); + uint64_t output = 0; + + uint64_t startIndex = 0; + uint64_t endIndex = numBits - 1; + + while (startIndex <= numBits) { + output ^= ((history_[startIndex / 64] >> startIndex) & + ((1ull << (numBits - startIndex)) - 1)); + + // Check to see if a second uint64_t value will need to be accessed + if ((startIndex / 64) == (endIndex / 64)) { + uint8_t leftOverBits = endIndex % 64; + output ^= (history_[endIndex / 64] << (numBits - leftOverBits)); + } + startIndex += length; + endIndex += length; + } + + // Trim the output to the desired size + output &= (1 << length) - 1; + return output; + } + + /** Adds a branch outcome ('isTaken') to the global history */ + void addHistory(bool isTaken) { + for (int8_t i = size_ / 64; i >= 0; i--) { + history_[i] <<= 1; + if (i == 0) { + history_[i] |= ((isTaken) ? 1 : 0); + } else { + history_[i] |= (((history_[i - 1] & (1ull << 63)) > 0) ? 1 : 0); + } + } + } + + /** Updates the state of a branch that has already been added to the global + * history at 'position', where 'position' is 0-indexed and starts from the + * most recent history. I.e., to update the most recently added branch + * outcome, 'position' would be 0. + * */ + void updateHistory(bool isTaken, uint64_t position) { + if (position < size_) { + uint8_t vectIndex = position / 64; + uint8_t bitIndex = position % 64; + bool currentlyTaken = ((history_[vectIndex] & (1ull << bitIndex)) != 0); + if (currentlyTaken != isTaken) { + history_[vectIndex] ^= (1ull << bitIndex); + } + } + } + + /** Removes the most recently added branch from the history */ + void rollBack() { + for (uint8_t i = 0; i <= (size_ / 64); i++) { + history_[i] >>= 1; + if (i < (size_ / 64)) { + history_[i] |= (((history_[i + 1] & 1) > 0) ? (1ull << 63) : 0); + } + } + } + + private: + /** The number of bits of branch history stored in this branch history */ + uint64_t size_; + + /** An array containing the bits of the branch history. The bits are + * arranged such that the most recent branches are stored in uint64_t at + * index 0 of the vector, then the next most recent at index 1 and so forth. + * Within each uint64_t, the most recent branches are recorded in the + * least-significant bits. */ + std::unique_ptr history_; +}; + +} // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/branchpredictors/BranchPredictor.hh b/src/include/simeng/branchpredictors/BranchPredictor.hh index 7779fe0703..d1cf1eeec3 100644 --- a/src/include/simeng/branchpredictors/BranchPredictor.hh +++ b/src/include/simeng/branchpredictors/BranchPredictor.hh @@ -12,7 +12,7 @@ namespace simeng { /** An abstract branch predictor interface. */ class BranchPredictor { public: - virtual ~BranchPredictor(){}; + virtual ~BranchPredictor() {}; /** Generate a branch prediction for the supplied instruction address, a * branch type, and a known branch offset. Returns a branch direction and diff --git a/src/include/simeng/branchpredictors/GenericPredictor.hh b/src/include/simeng/branchpredictors/GenericPredictor.hh index ae1aff6d05..8474d757e2 100644 --- a/src/include/simeng/branchpredictors/GenericPredictor.hh +++ b/src/include/simeng/branchpredictors/GenericPredictor.hh @@ -86,4 +86,4 @@ class GenericPredictor : public BranchPredictor { uint16_t rasSize_; }; -} // namespace simeng +} // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/branchpredictors/PerceptronPredictor.hh b/src/include/simeng/branchpredictors/PerceptronPredictor.hh index d9e05bca52..c5d3c87f19 100644 --- a/src/include/simeng/branchpredictors/PerceptronPredictor.hh +++ b/src/include/simeng/branchpredictors/PerceptronPredictor.hh @@ -102,4 +102,4 @@ class PerceptronPredictor : public BranchPredictor { uint64_t rasSize_; }; -} // namespace simeng +} // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/branchpredictors/TagePredictor.hh b/src/include/simeng/branchpredictors/TagePredictor.hh new file mode 100644 index 0000000000..dabb07c000 --- /dev/null +++ b/src/include/simeng/branchpredictors/TagePredictor.hh @@ -0,0 +1,170 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "simeng/branchpredictors/BranchHistory.hh" +#include "simeng/branchpredictors/BranchPredictor.hh" +#include "simeng/config/SimInfo.hh" + +namespace simeng { + +/** A data structure to store all of the information needed for a single entry + * in a tagged table. */ +struct TageEntry { + uint8_t satCnt; + uint64_t tag; + uint8_t u; // usefulness counter + uint64_t target; +}; + +/** A data structure to store all of the information needed for a single entry + * in the Fetch Target Queue. */ +struct ftqEntry { + int8_t predTable; + std::shared_ptr indices; + std::shared_ptr tags; + BranchPrediction prediction; + BranchPrediction altPrediction; +}; + +/** + * A TAGE branch predictor of the type described by Seznec and Michaud: + * https://inria.hal.science/hal-03408381/document. A brief summary of the + * prediction mechanism is described below. + * + * This predictor uses a series of prediction tables (a user-defined number + * thereof), each of which uses a progressively larger global history to index + * it. The default prediction table does not use any global history. + * + * To access a prediction table, an XOR hash of the branch's address and the + * global history of the relevant length is used to index the table. Then, a + * tag is determined by a hash of the address and the context of the branch is + * used to confirm that the entry belongs to the present branch. + * + * A prediction is made on the basis of the prediction table using the longest + * global history that has an entry corresponding to the present branch + * (matching tag). + * */ + +class TagePredictor : public BranchPredictor { + public: + /** Initialise predictor models. */ + TagePredictor(ryml::ConstNodeRef config = config::SimInfo::getConfig()); + + ~TagePredictor(); + + /** Generate a branch prediction for the supplied instruction address, a + * branch type, and a known branch offset. Returns a branch direction and + * branch target address. */ + BranchPrediction predict(uint64_t address, BranchType type, + int64_t knownOffset) override; + + /** Updates appropriate predictor model objects based on the address, type and + * outcome of the branch instruction. Update must be called on + * branches in program order. To check this, instructionId is also passed + * to this function. */ + void update(uint64_t address, bool isTaken, uint64_t targetAddress, + BranchType type, uint64_t instructionId) override; + + /** Provides flushing behaviour for the implemented branch prediction schemes + * via the instruction address. Branches must be flushed in reverse + * program order (though, if a block of n instructions is being flushed at + * once, the exact order that the individual instructions within this block + * are flushed does not matter so long as they are all flushed). */ + void flush(uint64_t address) override; + + private: + /** Returns a prediction for a branch at this address from the non-tagged BTB + * that is used for default predictions. */ + BranchPrediction getBtbPrediction(uint64_t address); + + /** Provides a prediction, alternative prediction, the table number that + * provided the prediction, and the indices and tags of the prediction and + * alternative prediction. This prediction info is determined from the + * tagged tables for a branch with the provided address. */ + void getTaggedPrediction(uint64_t address, BranchPrediction* prediction, + BranchPrediction* altPrediction, int8_t* predTable, + std::shared_ptr indices, + std::shared_ptr tags); + + /** Returns the index of a branch in a tagged table for a given address and + * table. */ + uint64_t getTaggedIndex(uint64_t address, uint8_t table); + + /** Returns a hash of the address and the global history that is then trimmed + * to the appropriate tag length. The tag varies depending on the table + * that is being accessed. */ + uint64_t getTag(uint64_t address, uint8_t table); + + /** Updates the default, untagged prediction table on the basis of the + * outcome of a branch. */ + void updateBtb(uint64_t address, bool isTaken, uint64_t target); + + /** Updates the tagged tables on the basis of the outcome of a branch. */ + void updateTaggedTables(bool isTaken, uint64_t target); + + /** The bitlength of the BTB (i.e., default prediction table) index; BTB + * will have 2^bits entries. */ + uint8_t btbBits_; + + /** A 2^bits length vector of pairs containing a satCntBits_-bit saturating + * counter and a branch target. This is the untagged, default prediction + * table. */ + std::vector> btb_; + + /** The bitlength of the Tagged tables' indices. + * Each tagged table will have 2^bits entries. */ + uint8_t tageTableBits_; + + /** The number of tagged tables in the TAGE scheme. + * In addition to the tagged tables, there will be a single untagged table + * (the BTB) from which default predictions will be made. */ + uint8_t numTageTables_; + + /** Data structure to store the tagged tables in. */ + std::vector> tageTables_; + + /** Fetch Target Queue containing the direction prediction and previous global + * history state of branches that are currently unresolved */ + std::deque ftq_; + + /** The number of bits used to form the saturating counter in a BTB entry. */ + uint8_t satCntBits_; + + /** The number of previous branch directions recorded globally. */ + uint16_t globalHistoryLength_; + + /** A return address stack. */ + std::deque ras_; + + /** RAS history with instruction address as the keys. A non-zero value + * represents the target prediction for a return instruction and a 0 entry for + * a branch-and-link instruction. */ + std::map rasHistory_; + + /** The size of the RAS. I.e., the maximum capacity of the RAS. */ + uint16_t rasSize_; + + /** An n-bit history of previous branch directions where n is equal to + * globalHistoryLength_. Each bit represents a branch taken (1) or not + * taken (0), with the most recent branch being the least-significant-bit */ + BranchHistory globalHistory_; + + /** The size of the tags used in the tagged tables, where the units of + * size are bits. */ + uint8_t tagLength_; + + // This variable is used only in debug mode -- therefore hide behind ifdef +#ifndef NDEBUG + /** The Id of the last instruction that update was called on -- used to + * ensure that update is called in program order. */ + uint64_t lastUpdatedInstructionId = 0; +#endif +}; + +} // namespace simeng diff --git a/src/include/simeng/config/ExpectationNode.hh b/src/include/simeng/config/ExpectationNode.hh index 187d3ed37a..cbf59d5750 100644 --- a/src/include/simeng/config/ExpectationNode.hh +++ b/src/include/simeng/config/ExpectationNode.hh @@ -134,9 +134,9 @@ class ExpectationNode { /** Default constructor. Used primarily to provide a root node for populated * ExpectationNode instances to be added to. */ - ExpectationNode(){}; + ExpectationNode() {}; - ~ExpectationNode(){}; + ~ExpectationNode() {}; /** A getter function to retrieve the key of a node. */ std::string getKey() const { return nodeKey_; } diff --git a/src/include/simeng/pipeline/PortAllocator.hh b/src/include/simeng/pipeline/PortAllocator.hh index 78e3a0c5c9..bd566702ae 100644 --- a/src/include/simeng/pipeline/PortAllocator.hh +++ b/src/include/simeng/pipeline/PortAllocator.hh @@ -16,7 +16,7 @@ const uint8_t OPTIONAL = 1; /** An abstract execution port allocator interface. */ class PortAllocator { public: - virtual ~PortAllocator(){}; + virtual ~PortAllocator() {}; /** Allocate a port for the specified instruction group; returns the allocated * port. */ diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index 2e8b136777..cd4f131dc7 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -17,6 +17,7 @@ set(SIMENG_SOURCES branchpredictors/AlwaysNotTakenPredictor.cc branchpredictors/GenericPredictor.cc branchpredictors/PerceptronPredictor.cc + branchpredictors/TagePredictor.cc config/ModelConfig.cc config/SimInfo.cc kernel/Linux.cc diff --git a/src/lib/CoreInstance.cc b/src/lib/CoreInstance.cc index 46f8638286..7815371e49 100644 --- a/src/lib/CoreInstance.cc +++ b/src/lib/CoreInstance.cc @@ -222,6 +222,8 @@ void CoreInstance::createCore() { predictor_ = std::make_unique(); } else if (predictorType == "Perceptron") { predictor_ = std::make_unique(); + } else if (predictorType == "Tage") { + predictor_ = std::make_unique(); } // Extract the port arrangement from the config file diff --git a/src/lib/branchpredictors/AlwaysNotTakenPredictor.cc b/src/lib/branchpredictors/AlwaysNotTakenPredictor.cc index f9ccb416bc..6fe109fe7f 100644 --- a/src/lib/branchpredictors/AlwaysNotTakenPredictor.cc +++ b/src/lib/branchpredictors/AlwaysNotTakenPredictor.cc @@ -11,4 +11,4 @@ void AlwaysNotTakenPredictor::update(uint64_t address, bool taken, uint64_t instructionId) {} void AlwaysNotTakenPredictor::flush(uint64_t address) {} -} // namespace simeng +} // namespace simeng \ No newline at end of file diff --git a/src/lib/branchpredictors/GenericPredictor.cc b/src/lib/branchpredictors/GenericPredictor.cc index fce3fd2b1d..3a6091f85a 100644 --- a/src/lib/branchpredictors/GenericPredictor.cc +++ b/src/lib/branchpredictors/GenericPredictor.cc @@ -1,7 +1,5 @@ #include "simeng/branchpredictors/GenericPredictor.hh" -#include - namespace simeng { GenericPredictor::GenericPredictor(ryml::ConstNodeRef config) @@ -153,4 +151,4 @@ void GenericPredictor::flush(uint64_t address) { // Roll back global history globalHistory_ >>= 1; } -} // namespace simeng +} // namespace simeng \ No newline at end of file diff --git a/src/lib/branchpredictors/PerceptronPredictor.cc b/src/lib/branchpredictors/PerceptronPredictor.cc index 2e517939eb..09151e37dc 100644 --- a/src/lib/branchpredictors/PerceptronPredictor.cc +++ b/src/lib/branchpredictors/PerceptronPredictor.cc @@ -198,4 +198,4 @@ int64_t PerceptronPredictor::getDotProduct( return Pout; } -} // namespace simeng +} // namespace simeng \ No newline at end of file diff --git a/src/lib/branchpredictors/TagePredictor.cc b/src/lib/branchpredictors/TagePredictor.cc new file mode 100644 index 0000000000..f54b9a6e2e --- /dev/null +++ b/src/lib/branchpredictors/TagePredictor.cc @@ -0,0 +1,283 @@ +#include "simeng/branchpredictors/TagePredictor.hh" + +namespace simeng { + +TagePredictor::TagePredictor(ryml::ConstNodeRef config) + : btbBits_(config["Branch-Predictor"]["BTB-Tag-Bits"].as()), + tageTableBits_( + config["Branch-Predictor"]["Tage-Table-Bits"].as()), + numTageTables_( + config["Branch-Predictor"]["Num-Tage-Tables"].as()), + satCntBits_( + config["Branch-Predictor"]["Saturating-Count-Bits"].as()), + globalHistoryLength_( + config["Branch-Predictor"]["Global-History-Length"].as()), + rasSize_(config["Branch-Predictor"]["RAS-entries"].as()), + globalHistory_(1 << (numTageTables_ + 1)), + tagLength_(config["Branch-Predictor"]["Tag-Length"].as()) { + // Calculate the saturation counter boundary between weakly taken and + // not-taken. `(2 ^ num_sat_cnt_bits) / 2` gives the weakly taken state + // value + uint8_t weaklyTaken = (uint8_t)1 << (satCntBits_ - 1); + uint8_t satCntVal = (config["Branch-Predictor"]["Fallback-Static-Predictor"] + .as() == "Always-Taken") + ? weaklyTaken + : (weaklyTaken - 1); + + // Set up non-tagged default prediction table + btb_ = std::vector>(1ul << btbBits_, + {satCntVal, 0}); + + // Set up tagged prediction tables + for (uint32_t i = 0; i < numTageTables_; i++) { + std::vector newTable; + for (uint32_t j = 0; j < (1ul << tageTableBits_); j++) { + TageEntry newEntry = {satCntVal, 0, 1, 0}; + newTable.push_back(newEntry); + } + tageTables_.push_back(newTable); + } +} + +TagePredictor::~TagePredictor() { + btb_.clear(); + ras_.clear(); + rasHistory_.clear(); + ftq_.clear(); +} + +BranchPrediction TagePredictor::predict(uint64_t address, BranchType type, + int64_t knownOffset) { + BranchPrediction prediction; + BranchPrediction altPrediction; + int8_t predTable; + std::shared_ptr indices(new uint64_t[numTageTables_]); + std::shared_ptr tags(new uint64_t[numTageTables_]); + getTaggedPrediction(address, &prediction, &altPrediction, &predTable, indices, + tags); + + // If known offset then overwrite predicted target with this + if (knownOffset != 0) prediction.target = address + knownOffset; + + // Amend prediction based on branch type + if (type == BranchType::Unconditional) { + prediction.isTaken = true; + predTable = -1; + } else if (type == BranchType::Return) { + prediction.isTaken = true; + // Return branches can use the RAS if an entry is available + if (ras_.size() > 0) { + prediction.target = ras_.back(); + // Record top of RAS used for target prediction + rasHistory_[address] = ras_.back(); + ras_.pop_back(); + } + predTable = -1; + } else if (type == BranchType::SubroutineCall) { + prediction.isTaken = true; + // Subroutine call branches must push their associated return address to RAS + if (ras_.size() >= rasSize_) { + ras_.pop_front(); + } + ras_.push_back(address + 4); + // Record that this address is a branch-and-link instruction + rasHistory_[address] = 0; + predTable = -1; + } else if (type == BranchType::Conditional || + type == BranchType::LoopClosing) { + if (!prediction.isTaken) prediction.target = address + 4; + } + + // Store prediction data so that update() has the info it needs + ftqEntry newEntry = {predTable, indices, tags, prediction, altPrediction}; + ftq_.push_back(newEntry); + + // Speculatively update the global history + globalHistory_.addHistory(prediction.isTaken); + return prediction; +} + +void TagePredictor::update(uint64_t address, bool isTaken, + uint64_t targetAddress, simeng::BranchType type, + uint64_t instructionId) { + // Make sure that this function is called in program order; and then update + // the lastUpdatedInstructionId variable + assert(instructionId >= lastUpdatedInstructionId && + (lastUpdatedInstructionId = instructionId) >= 0 && + "Update not called on branch instructions in program order"); + + updateBtb(address, isTaken, targetAddress); + + updateTaggedTables(isTaken, targetAddress); + + // Update global history if prediction was incorrect + if (ftq_.front().prediction.isTaken != isTaken) { + // We know how many predictions there have since been by the size of the FTQ + globalHistory_.updateHistory(isTaken, ftq_.size()); + } + + // Pop used ftq entry from ftq + ftq_.pop_front(); +} + +void TagePredictor::flush(uint64_t address) { + // If address interacted with RAS, rewind entry + auto it = rasHistory_.find(address); + if (it != rasHistory_.end()) { + uint64_t target = it->second; + if (target != 0) { + // If history entry belongs to a return instruction, push target back onto + // stack + if (ras_.size() >= rasSize_) { + ras_.pop_front(); + } + ras_.push_back(target); + } else { + // If history entry belongs to a branch-and-link instruction, pop target + // off of stack + if (ras_.size()) { + ras_.pop_back(); + } + } + rasHistory_.erase(it); + } + + assert((ftq_.size() > 0) && + "Cannot flush instruction from Branch Predictor " + "when the ftq is empty"); + ftq_.pop_back(); + + // Roll back global history + globalHistory_.rollBack(); +} + +void TagePredictor::getTaggedPrediction(uint64_t address, + BranchPrediction* prediction, + BranchPrediction* altPrediction, + int8_t* predTable, + std::shared_ptr indices, + std::shared_ptr tags) { + // Get a basic prediction from the btb + BranchPrediction basePrediction = getBtbPrediction(address); + prediction->isTaken = basePrediction.isTaken; + prediction->target = basePrediction.target; + *predTable = -1; + + // Check each of the tagged predictor tables for an entry matching this + // branch. If found, update the best prediction. The greater the table + // number, the longer global history it has access to. Therefore, the + // greater the table number, the better the prediction. + for (int8_t table = 0; table < numTageTables_; table++) { + // Determine the index and tag for this table, as they vary depending on + // the length of global history + uint64_t index = getTaggedIndex(address, table); + indices.get()[table] = index; + uint64_t tag = getTag(address, table); + tags.get()[table] = tag; + + // If tag matches, then use this prediction + if (tageTables_[table][index].tag == tag) { + altPrediction->isTaken = prediction->isTaken; + altPrediction->target = prediction->target; + + prediction->isTaken = (tageTables_[table][index].satCnt >= 2); + prediction->target = tageTables_[table][index].target; + *predTable = table; + } + } +} + +BranchPrediction TagePredictor::getBtbPrediction(uint64_t address) { + // Get prediction from BTB + uint64_t index = (address >> 2) & ((1ull << btbBits_) - 1); + bool direction = (btb_[index].first >= (1 << (satCntBits_ - 1))); + uint64_t target = btb_[index].second; + return {direction, target}; +} + +uint64_t TagePredictor::getTaggedIndex(uint64_t address, uint8_t table) { + // Get the XOR of the address (sans two least-significant bits) and the + // global history (folded onto itself to make it of the correct size). + uint64_t h1 = (address >> 2); + uint64_t h2 = globalHistory_.getFolded(1ull << (table + 1), tageTableBits_); + // Then truncate the XOR to make it fit the desired size of an index + return (h1 ^ h2) & ((1 << tageTableBits_) - 1); +} + +uint64_t TagePredictor::getTag(uint64_t address, uint8_t table) { + // Hash function here is pretty arbitrary + uint64_t h1 = address; + uint64_t h2 = + globalHistory_.getFolded((1ull << table), ((1ull << tagLength_) - 1)); + return (h1 ^ h2) & ((1ull << tagLength_) - 1); +} + +void TagePredictor::updateBtb(uint64_t address, bool isTaken, + uint64_t targetAddress) { + // Calculate 2-bit saturating counter value + uint8_t satCntVal = btb_[((address >> 2) & ((1ull << btbBits_) - 1))].first; + // Only alter value if it would transition to a valid state + // (i.e., avoid overflow) + if (!((satCntVal == (1ull << satCntBits_) - 1) && isTaken) && + !(satCntVal == 0 && !isTaken)) { + satCntVal += isTaken ? 1 : -1; + } + + // Update BTB entry + btb_[((address >> 2) & ((1ull << btbBits_) - 1))].first = satCntVal; + if (isTaken) { + btb_[((address >> 2) & ((1ull << btbBits_) - 1))].second = targetAddress; + } +} + +void TagePredictor::updateTaggedTables(bool isTaken, uint64_t target) { + // Get stored information from the FTQ + int8_t predTable = ftq_.front().predTable; + std::shared_ptr indices = ftq_.front().indices; + std::shared_ptr tags = ftq_.front().tags; + BranchPrediction pred = ftq_.front().prediction; + BranchPrediction altPred = ftq_.front().altPrediction; + + // Update the prediction counter if tagged prediction table was used + if (predTable != -1) { + uint64_t predIndex = indices.get()[predTable]; + if (isTaken && (tageTables_[predTable][predIndex].satCnt < 3)) { + (tageTables_[predTable][predIndex].satCnt)++; + } else if (!isTaken && (tageTables_[predTable][predIndex].satCnt > 0)) { + (tageTables_[predTable][predIndex].satCnt)--; + } + } + + // Allocate new entry if prediction was wrong and space for a new entry is + // available + // -- Check higher order tagged predictor tables to see if there is a + // non-useful entry that can be replaced + if (isTaken != pred.isTaken || (isTaken && (target != pred.target))) { + for (uint8_t table = predTable + 1; table < numTageTables_; table++) { + if (tageTables_[table][indices.get()[table]].u <= 1) { + tageTables_[table][indices.get()[table]] = { + (isTaken ? (uint8_t)2 : (uint8_t)1), tags.get()[table], (uint8_t)2, + target}; + break; + } + } + } + + // Update the usefulness counters if prediction is from a tagged prediction + // table and differs from alt-prediction + if ((predTable != -1) && + (pred.isTaken != altPred.isTaken || + (pred.isTaken && (pred.target != altPred.target)))) { + bool wasUseful = (pred.isTaken == isTaken); + uint8_t currentU = tageTables_[predTable][indices.get()[predTable]].u; + // Make sure that update is possible + if (wasUseful && currentU < 3) { + (tageTables_[predTable][indices.get()[predTable]].u)++; + } + if (!wasUseful && currentU > 0) { + (tageTables_[predTable][indices.get()[predTable]].u)--; + } + } +} + +} // namespace simeng \ No newline at end of file diff --git a/src/lib/config/ModelConfig.cc b/src/lib/config/ModelConfig.cc index 6d54b50d06..50a67f00a5 100644 --- a/src/lib/config/ModelConfig.cc +++ b/src/lib/config/ModelConfig.cc @@ -518,7 +518,7 @@ void ModelConfig::setExpectations(bool isDefault) { expectations_["Branch-Predictor"].addChild( ExpectationNode::createExpectation("Perceptron", "Type")); expectations_["Branch-Predictor"]["Type"].setValueSet( - std::vector{"Generic", "Perceptron"}); + std::vector{"Generic", "Perceptron", "Tage"}); expectations_["Branch-Predictor"].addChild( ExpectationNode::createExpectation(8, "BTB-Tag-Bits")); @@ -544,8 +544,10 @@ void ModelConfig::setExpectations(bool isDefault) { // Ensure the key "Branch-Predictor:Type" exists before querying the // associated YAML node if (configTree_["Branch-Predictor"].has_child(ryml::to_csubstr("Type"))) { - if (configTree_["Branch-Predictor"]["Type"].as() == - "Generic") { + if ((configTree_["Branch-Predictor"]["Type"].as() == + "Generic") || + (configTree_["Branch-Predictor"]["Type"].as() == + "Tage")) { expectations_["Branch-Predictor"].addChild( ExpectationNode::createExpectation( 2, "Saturating-Count-Bits")); @@ -559,6 +561,25 @@ void ModelConfig::setExpectations(bool isDefault) { .setValueSet( std::vector{"Always-Taken", "Always-Not-Taken"}); } + if ((configTree_["Branch-Predictor"]["Type"].as() == + "Tage")) { + expectations_["Branch-Predictor"].addChild( + ExpectationNode::createExpectation(12, + "Tage-Table-Bits")); + expectations_["Branch-Predictor"]["Tage-Table-Bits"] + .setValueBounds(1, UINT8_MAX); + + expectations_["Branch-Predictor"].addChild( + ExpectationNode::createExpectation(6, + "Num-Tage-Tables")); + expectations_["Branch-Predictor"]["Num-Tage-Tables"] + .setValueBounds(1, UINT8_MAX); + + expectations_["Branch-Predictor"].addChild( + ExpectationNode::createExpectation(8, "Tag-Length")); + expectations_["Branch-Predictor"]["Tag-Length"] + .setValueBounds(1, UINT8_MAX); + } } else { std::cerr << "[SimEng:ModelConfig] Attempted to access config key " "\"Branch-Predictor:Type\" but it doesn't exist. " diff --git a/src/lib/models/inorder/Core.cc b/src/lib/models/inorder/Core.cc index add307f714..53ea8ec61c 100644 --- a/src/lib/models/inorder/Core.cc +++ b/src/lib/models/inorder/Core.cc @@ -147,10 +147,12 @@ std::map Core::getStats() const { std::ostringstream ipcStr; ipcStr << std::setprecision(2) << ipc; - return {{"cycles", std::to_string(ticks_)}, - {"retired", std::to_string(retired)}, - {"ipc", ipcStr.str()}, - {"flushes", std::to_string(flushes_)}}; + return { + {"cycles", std::to_string(ticks_)}, + {"retired", std::to_string(retired)}, + {"ipc", ipcStr.str()}, + {"flushes", std::to_string(flushes_)}, + }; } void Core::raiseException(const std::shared_ptr& instruction) { diff --git a/test/integration/ConfigTest.cc b/test/integration/ConfigTest.cc index 48975eeacd..a713a4cbc0 100644 --- a/test/integration/ConfigTest.cc +++ b/test/integration/ConfigTest.cc @@ -304,9 +304,7 @@ TEST(ConfigTest, invalidTypeOnSetBounds) { simeng::config::ExpectationNode::createExpectation("DEFAULT", "CHILD")); ASSERT_DEATH( - { - expectations["HEAD"]["CHILD"].setValueSet({0, 1, 2}); - }, + { expectations["HEAD"]["CHILD"].setValueSet({0, 1, 2}); }, "The data type of the passed vector used in setValueSet\\() " "does not match that held within the ExpectationNode with key " "HEAD:CHILD. Passed vector elements are of type 32-bit integer and the " @@ -324,9 +322,7 @@ TEST(ConfigTest, alreadyDefinedBounds) { simeng::config::ExpectationNode::createExpectation(0, "CHILD")); expectations["HEAD"]["CHILD"].setValueBounds(0, 10); ASSERT_DEATH( - { - expectations["HEAD"]["CHILD"].setValueSet({1, 2, 3}); - }, + { expectations["HEAD"]["CHILD"].setValueSet({1, 2, 3}); }, "Invalid call of setValueSet\\() for the ExpectationNode with key " "HEAD:CHILD as value bounds have already been defined."); } diff --git a/test/regression/aarch64/instructions/neon.cc b/test/regression/aarch64/instructions/neon.cc index a4731f388f..64efb68110 100644 --- a/test/regression/aarch64/instructions/neon.cc +++ b/test/regression/aarch64/instructions/neon.cc @@ -2546,7 +2546,7 @@ TEST_P(InstNeon, mvni) { ~((32u << 8) | 255)}); } -TEST_P(InstNeon, not ) { +TEST_P(InstNeon, not) { initialHeapData_.resize(128); uint8_t* heap = reinterpret_cast(initialHeapData_.data()); heap[0] = 0b11111111; diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 2826cd0030..bc3d752f52 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -35,6 +35,7 @@ set(TEST_SOURCES RegisterValueTest.cc PerceptronPredictorTest.cc SpecialFileDirGenTest.cc + TagePredictorTest.cc ) add_executable(unittests ${TEST_SOURCES}) diff --git a/test/unit/TagePredictorTest.cc b/test/unit/TagePredictorTest.cc new file mode 100644 index 0000000000..ef4d982514 --- /dev/null +++ b/test/unit/TagePredictorTest.cc @@ -0,0 +1,297 @@ +#include "MockInstruction.hh" +#include "gtest/gtest.h" +#include "simeng/branchpredictors/TagePredictor.hh" + +namespace simeng { + +class TagePredictorTest : public testing::Test { + public: + TagePredictorTest() : uop(new MockInstruction), uopPtr(uop) { + uop->setInstructionAddress(0); + } + + protected: + MockInstruction* uop; + std::shared_ptr uopPtr; +}; + +// Tests that TagePredictor will predict the correct direction on a miss +TEST_F(TagePredictorTest, Miss) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: " + " {Type: Tage, " + " BTB-Tag-Bits: 11, " + " Saturating-Count-Bits: 2, " + " Global-History-Length: 10, " + " RAS-entries: 5," + " Fallback-Static-Predictor: Always-Taken," + " Tage-Table-Bits: 12," + " Num-Tage-Tables: 6," + " Tag-Length: 8" + " }" + "}"); + auto predictor = simeng::TagePredictor(); + auto prediction = predictor.predict(0, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.isTaken); + prediction = predictor.predict(8, BranchType::Unconditional, 0); + EXPECT_TRUE(prediction.isTaken); +} + +// Tests that TagePredictor will predict branch-and-link return pairs correctly +TEST_F(TagePredictorTest, RAS) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: " + " {Type: Tage, " + " BTB-Tag-Bits: 11, " + " Saturating-Count-Bits: 2, " + " Global-History-Length: 10, " + " RAS-entries: 5," + " Fallback-Static-Predictor: Always-Taken," + " Tage-Table-Bits: 12," + " Num-Tage-Tables: 6," + " Tag-Length: 8" + " }" + "}"); + auto predictor = simeng::TagePredictor(); + auto prediction = predictor.predict(8, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 16); + prediction = predictor.predict(24, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 32); + prediction = predictor.predict(40, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 48); + prediction = predictor.predict(56, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 64); + prediction = predictor.predict(72, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 80); + + prediction = predictor.predict(84, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 76); + prediction = predictor.predict(68, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 60); + prediction = predictor.predict(52, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 44); + prediction = predictor.predict(36, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 28); + prediction = predictor.predict(20, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 12); +} + +// Tests that TagePredictor will predict a previously encountered +// branch correctly, when no address aliasing has occurred +TEST_F(TagePredictorTest, Hit) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: " + " {Type: Tage, " + " BTB-Tag-Bits: 11, " + " Saturating-Count-Bits: 2, " + " Global-History-Length: 10, " + " RAS-entries: 5," + " Fallback-Static-Predictor: Always-Taken," + " Tage-Table-Bits: 12," + " Num-Tage-Tables: 6," + " Tag-Length: 8" + " }" + "}"); + auto predictor = simeng::TagePredictor(); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 0); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 1); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 2); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 3); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 4); + + auto prediction = predictor.predict(0, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 16); +} + +// Tests that TagePredictor will predict correctly for two different +// behaviours of the same branch but in different states of the program +TEST_F(TagePredictorTest, GlobalIndexing) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: " + " {Type: Tage, " + " BTB-Tag-Bits: 11, " + " Saturating-Count-Bits: 2, " + " Global-History-Length: 10, " + " RAS-entries: 5," + " Fallback-Static-Predictor: Always-Taken," + " Tage-Table-Bits: 5," + " Num-Tage-Tables: 1," + " Tag-Length: 8" + " }" + "}"); + auto predictor = simeng::TagePredictor(); + // Spool up first global history pattern + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 0); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 1); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 2); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 3); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 4); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 5); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 6); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 7); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 8); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 9); + // Ensure default behaviour for first encounter + auto prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 0); + // Set entry in BTB + predictor.update(0x7C, false, 0x80, BranchType::Conditional, 10); + + // Spool up second global history pattern + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 11); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 12); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 13); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 14); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 15); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 16); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 17); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 18); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 19); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 20); + // Ee-encounter but with different global history + prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_FALSE(prediction.isTaken); + EXPECT_EQ(prediction.target, 0x80); + // Set entry in BTB + predictor.update(0x7C, true, 0xBA, BranchType::Conditional, 21); + + // Recreate first global history pattern + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 22); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 23); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 24); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 25); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 26); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 27); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 28); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 29); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 30); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 31); + // Get prediction + prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_FALSE(prediction.isTaken); + EXPECT_EQ(prediction.target, 0x80); + // Set entry in BTB + predictor.update(0x7C, true, 0x80, BranchType::Conditional, 32); + + // Recreate second global history pattern + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 33); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 34); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 35); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 36); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 37); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 38); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 39); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 40); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 41); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 42); + // Get prediction + prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 0x80); + predictor.update(0x7C, true, 0xBA, BranchType::Conditional, 43); +} + +// Test Flush of RAS functionality +TEST_F(TagePredictorTest, flush) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: " + " {Type: Tage, " + " BTB-Tag-Bits: 11, " + " Saturating-Count-Bits: 2, " + " Global-History-Length: 10, " + " RAS-entries: 5," + " Fallback-Static-Predictor: Always-Taken," + " Tage-Table-Bits: 12," + " Num-Tage-Tables: 1," + " Tag-Length: 8" + " }" + "}"); + auto predictor = simeng::TagePredictor(); + // Add some entries to the RAS + auto prediction = predictor.predict(8, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 16); + prediction = predictor.predict(24, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 32); + prediction = predictor.predict(40, BranchType::SubroutineCall, 8); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 48); + + // Start getting entries from RAS + prediction = predictor.predict(52, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 44); + prediction = predictor.predict(36, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 28); + + // Flush address + predictor.flush(36); + + // Continue getting entries from RAS + prediction = predictor.predict(20, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 28); + prediction = predictor.predict(16, BranchType::Return, 0); + EXPECT_TRUE(prediction.isTaken); + EXPECT_EQ(prediction.target, 12); +} +} // namespace simeng diff --git a/test/unit/aarch64/AuxiliaryFunctionsTest.cc b/test/unit/aarch64/AuxiliaryFunctionsTest.cc index f319b55dbb..e6918f72d9 100644 --- a/test/unit/aarch64/AuxiliaryFunctionsTest.cc +++ b/test/unit/aarch64/AuxiliaryFunctionsTest.cc @@ -71,10 +71,11 @@ TEST(AArch64AuxiliaryFunctionTest, BitfieldManipulate) { { bitfieldManipulate(0, 0, 16, 0, false); }, "Attempted to use a rotate amount of 16 in bitfieldManipulate which is " "greater than or equal to the data type size of 16b in use"); - ASSERT_DEATH({ bitfieldManipulate(0, 0, 0, 16, false); }, - "Attempted to use a source bit position value of 16 in " - "bitfieldManipulate which is greater than or equal to the data " - "type size of 16b in use"); + ASSERT_DEATH( + { bitfieldManipulate(0, 0, 0, 16, false); }, + "Attempted to use a source bit position value of 16 in " + "bitfieldManipulate which is greater than or equal to the data " + "type size of 16b in use"); // uint32 EXPECT_EQ(bitfieldManipulate(0x0000FFFF, 0xFFFF0000, 0, 0, false), @@ -104,10 +105,11 @@ TEST(AArch64AuxiliaryFunctionTest, BitfieldManipulate) { { bitfieldManipulate(0, 0, 32, 0, false); }, "Attempted to use a rotate amount of 32 in bitfieldManipulate which is " "greater than or equal to the data type size of 32b in use"); - ASSERT_DEATH({ bitfieldManipulate(0, 0, 0, 32, false); }, - "Attempted to use a source bit position value of 32 in " - "bitfieldManipulate which is greater than or equal to the data " - "type size of 32b in use"); + ASSERT_DEATH( + { bitfieldManipulate(0, 0, 0, 32, false); }, + "Attempted to use a source bit position value of 32 in " + "bitfieldManipulate which is greater than or equal to the data " + "type size of 32b in use"); // uint64 EXPECT_EQ(bitfieldManipulate(0x00000000FFFFFFFF, 0xFFFFFFFF00000000, @@ -147,10 +149,11 @@ TEST(AArch64AuxiliaryFunctionTest, BitfieldManipulate) { { bitfieldManipulate(0, 0, 64, 0, false); }, "Attempted to use a rotate amount of 64 in bitfieldManipulate which is " "greater than or equal to the data type size of 64b in use"); - ASSERT_DEATH({ bitfieldManipulate(0, 0, 0, 64, false); }, - "Attempted to use a source bit position value of 64 in " - "bitfieldManipulate which is greater than or equal to the data " - "type size of 64b in use"); + ASSERT_DEATH( + { bitfieldManipulate(0, 0, 0, 64, false); }, + "Attempted to use a source bit position value of 64 in " + "bitfieldManipulate which is greater than or equal to the data " + "type size of 64b in use"); } /** `conditionHolds` Tests */