Skip to content

Commit

Permalink
DI unit balanced port allocation fix (#431)
Browse files Browse the repository at this point in the history
* Made more balanced RS allocation in DI unit by not stalling on first port allocation failure but instead cycling through all possible RSs.

* Added port allocator config option.
  • Loading branch information
FinnWilkinson authored Dec 17, 2024
1 parent f01b5d6 commit 8f4ef13
Show file tree
Hide file tree
Showing 19 changed files with 103 additions and 27 deletions.
2 changes: 2 additions & 0 deletions configs/DEMO_RISCV.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ Queue-Sizes:
ROB: 180
Load: 64
Store: 36
Port-Allocator:
Type: Balanced
Branch-Predictor:
Type: "Perceptron"
BTB-Tag-Bits: 11
Expand Down
2 changes: 2 additions & 0 deletions configs/a64fx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ Queue-Sizes:
ROB: 128
Load: 40
Store: 24
Port-Allocator:
Type: A64FX
Branch-Predictor:
Type: "Perceptron"
BTB-Tag-Bits: 11
Expand Down
2 changes: 2 additions & 0 deletions configs/a64fx_SME.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ Queue-Sizes:
ROB: 128
Load: 40
Store: 24
Port-Allocator:
Type: A64FX
Branch-Predictor:
Type: "Perceptron"
BTB-Tag-Bits: 11
Expand Down
2 changes: 2 additions & 0 deletions configs/m1_firestorm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ Queue-Sizes:
ROB: 630
Load: 130
Store: 60
Port-Allocator:
Type: M1
Branch-Predictor:
Type: "Perceptron"
BTB-Tag-Bits: 11
Expand Down
2 changes: 2 additions & 0 deletions configs/sst-cores/a64fx-sst.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ Queue-Sizes:
ROB: 128
Load: 40
Store: 24
Port-Allocator:
Type: A64FX
Branch-Predictor:
Type: "Perceptron"
BTB-Tag-Bits: 11
Expand Down
2 changes: 2 additions & 0 deletions configs/sst-cores/m1_firestorm-sst.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ Queue-Sizes:
ROB: 630
Load: 130
Store: 60
Port-Allocator:
Type: M1
Branch-Predictor:
Type: "Perceptron"
BTB-Tag-Bits: 11
Expand Down
2 changes: 2 additions & 0 deletions configs/sst-cores/tx2-sst.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ Queue-Sizes:
ROB: 180
Load: 64
Store: 36
Port-Allocator:
Type: Balanced
Branch-Predictor:
Type: "Perceptron"
BTB-Tag-Bits: 11
Expand Down
2 changes: 2 additions & 0 deletions configs/tx2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ Queue-Sizes:
ROB: 180
Load: 64
Store: 36
Port-Allocator:
Type: Balanced
Branch-Predictor:
Type: "Perceptron"
BTB-Tag-Bits: 11
Expand Down
7 changes: 7 additions & 0 deletions docs/sphinx/user/configuring_simeng.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,13 @@ Load
Store
The size of the store queue within the load/store queue unit.

Port-Allocator
--------------

This section allows a user to select which Port Allocator to use. The available options are:

Type
The specific allocator algorithm to use. The current options are ``Balanced``, ``A64FX``, and ``M1``. The former implements a round-robin style algorithm, allocating instructions to compatable ports evenly. The latter two implement the port allocation algorithms found in the respective hardware as per their names.

Branch-Predictor
----------------
Expand Down
1 change: 1 addition & 0 deletions src/include/simeng/CoreInstance.hh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "simeng/models/outoforder/Core.hh"
#include "simeng/pipeline/A64FXPortAllocator.hh"
#include "simeng/pipeline/BalancedPortAllocator.hh"
#include "simeng/pipeline/M1PortAllocator.hh"

namespace simeng {

Expand Down
4 changes: 2 additions & 2 deletions src/include/simeng/pipeline/M1PortAllocator.hh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class M1PortAllocator : public PortAllocator {
* a port type which denotes the matching requirements of said instruction
* groups. */
M1PortAllocator(const std::vector<std::vector<uint16_t>>& portArrangement,
std::vector<std::pair<uint8_t, uint64_t>> rsArrangement);
std::vector<std::pair<uint16_t, uint64_t>> rsArrangement);

/** Allocate the lowest weighted port available for the specified instruction
* group. Returns the allocated port, and increases the weight of the port.
Expand Down Expand Up @@ -56,7 +56,7 @@ class M1PortAllocator : public PortAllocator {
std::function<void(std::vector<uint32_t>&)> rsSizes_;

/** Mapping from port index to reservation station <index, size> */
std::vector<std::pair<uint8_t, uint64_t>> rsArrangement_;
std::vector<std::pair<uint16_t, uint64_t>> rsArrangement_;
};

} // namespace pipeline
Expand Down
33 changes: 31 additions & 2 deletions src/lib/CoreInstance.cc
Original file line number Diff line number Diff line change
Expand Up @@ -236,8 +236,37 @@ void CoreInstance::createCore() {
portArrangement[i].push_back(grp);
}
}
portAllocator_ =
std::make_unique<pipeline::BalancedPortAllocator>(portArrangement);

// Initialise the desired port allocator
std::string portAllocatorType =
config_["Port-Allocator"]["Type"].as<std::string>();
if (portAllocatorType == "Balanced") {
portAllocator_ =
std::make_unique<pipeline::BalancedPortAllocator>(portArrangement);
} else if (portAllocatorType == "A64FX") {
portAllocator_ =
std::make_unique<pipeline::A64FXPortAllocator>(portArrangement);
} else if (portAllocatorType == "M1") {
// Extract the reservation station arrangement from the config file
auto config_rs = config_["Reservation-Stations"];
std::vector<std::pair<uint16_t, uint64_t>> rsArrangement;
for (size_t i = 0; i < config_rs.num_children(); i++) {
auto config_rs_ports = config_rs[i]["Port-Nums"];
for (size_t j = 0; j < config_rs_ports.num_children(); j++) {
uint16_t port = config_rs_ports[j].as<uint16_t>();
if (static_cast<uint16_t>(rsArrangement.size()) < port + 1) {
rsArrangement.resize(port + 1);
}
rsArrangement[port] = {i, config_rs[i]["Size"].as<uint64_t>()};
}
}
portAllocator_ = std::make_unique<pipeline::M1PortAllocator>(
portArrangement, rsArrangement);
} else {
std::cout << "[SimEng:CoreInstnce] Invalid Port Allocator type selected."
<< std::endl;
exit(EXIT_FAILURE);
}

// Construct the core object based on the defined simulation mode
uint64_t entryPoint = process_->getEntryPoint();
Expand Down
7 changes: 7 additions & 0 deletions src/lib/config/ModelConfig.cc
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,13 @@ void ModelConfig::setExpectations(bool isDefault) {
ExpectationNode::createExpectation<uint32_t>(16, "Store"));
expectations_["Queue-Sizes"]["Store"].setValueBounds<uint32_t>(1, UINT32_MAX);

// Port-Allocator
expectations_.addChild(ExpectationNode::createExpectation("Port-Allocator"));
expectations_["Port-Allocator"].addChild(
ExpectationNode::createExpectation<std::string>("Balanced", "Type"));
expectations_["Port-Allocator"]["Type"].setValueSet(
std::vector<std::string>{"Balanced", "A64FX", "M1"});

// Branch-Predictor
expectations_.addChild(
ExpectationNode::createExpectation("Branch-Predictor"));
Expand Down
41 changes: 26 additions & 15 deletions src/lib/pipeline/DispatchIssueUnit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,31 +67,42 @@ void DispatchIssueUnit::tick() {
continue;
}

const std::vector<uint16_t>& supportedPorts = uop->getSupportedPorts();
std::vector<uint16_t> supportedPorts = uop->getSupportedPorts();
if (uop->exceptionEncountered()) {
// Exception; mark as ready to commit, and remove from pipeline
uop->setCommitReady();
input_.getHeadSlots()[slot] = nullptr;
continue;
}
// Allocate issue port to uop
uint16_t port = portAllocator_.allocate(supportedPorts);
uint16_t RS_Index = portMapping_[port].first;
uint16_t RS_Port = portMapping_[port].second;
assert(RS_Index < reservationStations_.size() &&
"Allocated port inaccessible");
ReservationStation& rs = reservationStations_[RS_Index];

// When appropriate, stall uop or input buffer if stall buffer full
if (rs.currentSize == rs.capacity ||
dispatches_[RS_Index] == rs.dispatchRate) {
// Deallocate port given
portAllocator_.deallocate(port);
// Loop through all ports and remove any who's RS is at capacity or dispatch
// rate has been met
auto portIt = supportedPorts.begin();
while (portIt != supportedPorts.end()) {
uint16_t RS_Index = portMapping_[*portIt].first;
ReservationStation* rs = &reservationStations_[RS_Index];
if (rs->currentSize == rs->capacity ||
dispatches_[RS_Index] == rs->dispatchRate) {
portIt = supportedPorts.erase(portIt);
} else {
portIt++;
}
}
// If no ports left, stall and return
if (supportedPorts.size() == 0) {
input_.stall(true);
rsStalls_++;
return;
}

// Find an available RS
uint16_t port = portAllocator_.allocate(supportedPorts);
uint16_t RS_Index = portMapping_[port].first;
uint16_t RS_Port = portMapping_[port].second;
assert(RS_Index < reservationStations_.size() &&
"Allocated port inaccessible");
ReservationStation* rs = &reservationStations_[RS_Index];

// Assume the uop will be ready
bool ready = true;

Expand Down Expand Up @@ -123,10 +134,10 @@ void DispatchIssueUnit::tick() {

// Increment dispatches made and RS occupied entries size
dispatches_[RS_Index]++;
rs.currentSize++;
rs->currentSize++;

if (ready) {
rs.ports[RS_Port].ready.push_back(std::move(uop));
rs->ports[RS_Port].ready.push_back(std::move(uop));
}

input_.getHeadSlots()[slot] = nullptr;
Expand Down
2 changes: 1 addition & 1 deletion src/lib/pipeline/M1PortAllocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace pipeline {

M1PortAllocator::M1PortAllocator(
const std::vector<std::vector<uint16_t>>& portArrangement,
std::vector<std::pair<uint8_t, uint64_t>> rsArrangement)
std::vector<std::pair<uint16_t, uint64_t>> rsArrangement)
: weights(portArrangement.size(), 0), rsArrangement_(rsArrangement) {}

uint16_t M1PortAllocator::allocate(const std::vector<uint16_t>& ports) {
Expand Down
6 changes: 4 additions & 2 deletions test/integration/ConfigTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ TEST(ConfigTest, Default) {
"'FloatingPoint/SVE-Count': 38\n 'Predicate-Count': 17\n "
"'Conditional-Count': 1\n 'Matrix-Count': 1\n'Pipeline-Widths':\n "
"Commit: 1\n FrontEnd: 1\n 'LSQ-Completion': 1\n'Queue-Sizes':\n ROB: "
"32\n Load: 16\n Store: 16\n'Branch-Predictor':\n Type: Perceptron\n "
"32\n Load: 16\n Store: 16\n'Port-Allocator':\n Type: "
"Balanced\n'Branch-Predictor':\n Type: Perceptron\n "
"'BTB-Tag-Bits': 8\n 'Global-History-Length': 8\n 'RAS-entries': "
"8\n'L1-Data-Memory':\n 'Interface-Type': "
"Flat\n'L1-Instruction-Memory':\n 'Interface-Type': "
Expand Down Expand Up @@ -103,7 +104,8 @@ TEST(ConfigTest, Default) {
"100000\n'Register-Set':\n 'GeneralPurpose-Count': 38\n "
"'FloatingPoint-Count': 38\n'Pipeline-Widths':\n Commit: 1\n FrontEnd: "
"1\n 'LSQ-Completion': 1\n'Queue-Sizes':\n ROB: 32\n Load: 16\n "
"Store: 16\n'Branch-Predictor':\n Type: Perceptron\n 'BTB-Tag-Bits': "
"Store: 16\n'Port-Allocator':\n Type: Balanced\n'Branch-Predictor':\n "
"Type: Perceptron\n 'BTB-Tag-Bits': "
"8\n 'Global-History-Length': 8\n 'RAS-entries': "
"8\n'L1-Data-Memory':\n 'Interface-Type': "
"Flat\n'L1-Instruction-Memory':\n 'Interface-Type': "
Expand Down
7 changes: 5 additions & 2 deletions test/unit/GenericPredictorTest.cc
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include "ConfigInit.hh"
#include "MockInstruction.hh"
#include "gtest/gtest.h"
#include "simeng/branchpredictors/GenericPredictor.hh"
Expand All @@ -18,15 +19,17 @@ class GenericPredictorTest : public testing::Test {
// Tests that a GenericPredictor will predict the correct direction on a
// miss
TEST_F(GenericPredictorTest, Miss) {
simeng::config::SimInfo::addToConfig(
ConfigInit configInit = ConfigInit(
config::ISA::AArch64,
"{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 11, "
"Saturating-Count-Bits: 2, Global-History-Length: 10, RAS-entries: 5, "
"Fallback-Static-Predictor: Always-Taken}}");
auto predictor = simeng::GenericPredictor();
auto prediction = predictor.predict(0, BranchType::Conditional, 0);
EXPECT_TRUE(prediction.isTaken);

simeng::config::SimInfo::addToConfig(
configInit = ConfigInit(
config::ISA::AArch64,
"{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 11, "
"Saturating-Count-Bits: 2, Global-History-Length: 10, RAS-entries: 5, "
"Fallback-Static-Predictor: Always-Not-Taken}}");
Expand Down
4 changes: 2 additions & 2 deletions test/unit/pipeline/DispatchIssueUnitTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
namespace simeng {
namespace pipeline {

using ::testing::_;
using ::testing::Return;
using ::testing::ReturnRef;

Expand Down Expand Up @@ -269,8 +270,7 @@ TEST_F(PipelineDispatchIssueUnitTest, singleInstr_rsFull) {
// All expected calls to instruction during tick()
EXPECT_CALL(*uop, getSupportedPorts()).WillOnce(ReturnRef(suppPorts));
// All expected calls to portAllocator during tick()
EXPECT_CALL(portAlloc, allocate(suppPorts)).WillOnce(Return(EAGA));
EXPECT_CALL(portAlloc, deallocate(EAGA));
EXPECT_CALL(portAlloc, allocate(_)).Times(0);
input.getHeadSlots()[0] = uopPtr;
diUnit.tick();
// Ensure Reservation station sizes have stayed the same
Expand Down
2 changes: 1 addition & 1 deletion test/unit/pipeline/M1PortAllocatorTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class M1PortAllocatorTest : public testing::Test {
{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12}, {13}};
// Representation of the M1 Firestorm Reservation Station Arrangement
// std::pair<uint8_t, uint64_t> = <rsIndex, rsSize>
std::vector<std::pair<uint8_t, uint64_t>> rsArrangement = {
std::vector<std::pair<uint16_t, uint64_t>> rsArrangement = {
{0, 24}, {1, 26}, {2, 16}, {3, 12}, {4, 28}, {5, 28}, {6, 12},
{7, 12}, {8, 12}, {9, 12}, {10, 36}, {11, 36}, {12, 36}, {13, 36}};

Expand Down

0 comments on commit 8f4ef13

Please sign in to comment.