Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Re-factor master to main #343

Merged
merged 3 commits into from
Aug 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions include/faabric/batch-scheduler/SchedulingDecision.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
namespace faabric::batch_scheduler {
// Scheduling topology hints help the scheduler decide which host to assign new
// requests in a batch.
// - NONE: bin-packs requests to slots in hosts starting from the master
// host, and overloadds the master if it runs out of resources.
// - NONE: bin-packs requests to slots in hosts starting from the main
// host, and overloadds the main if it runs out of resources.
// - FORCE_LOCAL: force local execution irrespective of the available
// resources.
// - NEVER_ALONE: never allocates a single (non-master) request to a host
// - NEVER_ALONE: never allocates a single (non-main) request to a host
// without other requests of the batch.
// - UNDERFULL: schedule up to 50% of the master hosts' capacity to force
// - UNDERFULL: schedule up to 50% of the main hosts' capacity to force
// migration opportunities to appear.
enum SchedulingTopologyHint
{
Expand Down Expand Up @@ -82,7 +82,7 @@ class SchedulingDecision
/**
* Work out if this decision is all on this host. If the decision is
* completely on *another* host, we still count it as not being on a single
* host, as this host will be the master.
* host, as this host will be the main.
*
* Will always return false if single host optimisations are switched off.
*/
Expand Down
6 changes: 3 additions & 3 deletions include/faabric/state/InMemoryStateKeyValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
namespace faabric::state {
enum InMemoryStateKeyStatus
{
NOT_MASTER,
MASTER,
NOT_MAIN,
MAIN,
};

class AppendedInMemoryState
Expand Down Expand Up @@ -55,7 +55,7 @@ class InMemoryStateKeyValue final : public StateKeyValue

private:
const std::string thisIP;
const std::string masterIP;
const std::string mainIP;
InMemoryStateKeyStatus status;

InMemoryStateRegistry& stateRegistry;
Expand Down
4 changes: 2 additions & 2 deletions include/faabric/state/InMemoryStateRegistry.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ class InMemoryStateRegistry
void clear();

private:
std::unordered_map<std::string, std::string> masterMap;
std::shared_mutex masterMapMutex;
std::unordered_map<std::string, std::string> mainMap;
std::shared_mutex mainMapMutex;
};

InMemoryStateRegistry& getInMemoryStateRegistry();
Expand Down
4 changes: 2 additions & 2 deletions include/faabric/transport/PointToPointBroker.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

#define DEFAULT_DISTRIBUTED_TIMEOUT_MS 30000

#define POINT_TO_POINT_MASTER_IDX 0
#define POINT_TO_POINT_MAIN_IDX 0

namespace faabric::transport {

Expand Down Expand Up @@ -63,7 +63,7 @@ class PointToPointGroup
private:
faabric::util::SystemConfig& conf;

std::string masterHost;
std::string mainHost;
int appId = 0;
int groupId = 0;
int groupSize = 0;
Expand Down
6 changes: 3 additions & 3 deletions include/faabric/util/snapshot.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,9 @@ class SnapshotMergeRegion
};

/*
* Calculates a diff value that can later be merged into the master copy of the
* Calculates a diff value that can later be merged into the main copy of the
* given snapshot. It will be used on remote hosts to calculate the diffs that
* are to be sent back to the master host.
* are to be sent back to the main host.
*/
template<typename T>
inline bool calculateDiffValue(const uint8_t* original,
Expand Down Expand Up @@ -210,7 +210,7 @@ inline bool calculateDiffValue(const uint8_t* original,
}

/*
* Applies a diff value to the master copy of a snapshot, where the diff has
* Applies a diff value to the main copy of a snapshot, where the diff has
* been calculated based on a change made to another copy of the same snapshot.
*/
template<typename T>
Expand Down
4 changes: 2 additions & 2 deletions src/mpi/MpiWorld.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ void MpiWorld::create(faabric::Message& call, int newId, int newSize)

auto& sch = faabric::scheduler::getScheduler();

// Dispatch all the chained calls. With the master being rank zero, we want
// Dispatch all the chained calls. With the main being rank zero, we want
// to spawn (size - 1) new functions starting with rank 1
std::shared_ptr<faabric::BatchExecuteRequest> req =
faabric::util::batchExecFactory(user, function, size - 1);
Expand Down Expand Up @@ -724,7 +724,7 @@ void MpiWorld::broadcast(int sendRank,
}
} else {
// If we are neither the sending rank nor a local leader, we receive
// from either our leader master if the broadcast originated in a
// from either our local leader if the broadcast originated in a
// different host, or the sending rank itself if we are on the same host
int sendingRank =
getHostForRank(sendRank) == thisHost ? sendRank : localLeader;
Expand Down
8 changes: 4 additions & 4 deletions src/planner/Planner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,9 +245,9 @@ std::shared_ptr<faabric::Message> Planner::getMessageResult(

// If we are here, it means that we have not found the message result, so
// we register the calling-host's interest if the calling-host has
// provided a masterhost. The masterhost is set when dispatching a message
// provided a main host. The main host is set when dispatching a message
// within faabric, but not when sending an HTTP request
if (!msg->masterhost().empty()) {
if (!msg->mainhost().empty()) {
faabric::util::FullLock lock(plannerMx);

// Check again if the result is not set, as it could have been set
Expand All @@ -260,9 +260,9 @@ std::shared_ptr<faabric::Message> Planner::getMessageResult(
// Definately the message result is not set, so we add the host to the
// waiters list
SPDLOG_DEBUG("Adding host {} on the waiting list for message {}",
msg->masterhost(),
msg->mainhost(),
msgId);
state.appResultWaiters[msgId].push_back(msg->masterhost());
state.appResultWaiters[msgId].push_back(msg->mainhost());
}

return nullptr;
Expand Down
6 changes: 3 additions & 3 deletions src/planner/PlannerClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,10 @@ std::shared_ptr<faabric::Message> PlannerClient::getMessageResultFromPlanner(
faabric::Message PlannerClient::getMessageResult(const faabric::Message& msg,
int timeoutMs)
{
// Deliberately make a copy here so that we can set the masterhost when
// Deliberately make a copy here so that we can set the main host when
// registering interest in the results
auto msgPtr = std::make_shared<faabric::Message>(msg);
msgPtr->set_masterhost(faabric::util::getSystemConfig().endpointHost);
msgPtr->set_mainhost(faabric::util::getSystemConfig().endpointHost);
return doGetMessageResult(msgPtr, timeoutMs);
}

Expand All @@ -181,7 +181,7 @@ faabric::Message PlannerClient::getMessageResult(int appId,
auto msgPtr = std::make_shared<faabric::Message>();
msgPtr->set_appid(appId);
msgPtr->set_id(msgId);
msgPtr->set_masterhost(faabric::util::getSystemConfig().endpointHost);
msgPtr->set_mainhost(faabric::util::getSystemConfig().endpointHost);
return doGetMessageResult(msgPtr, timeoutMs);
}

Expand Down
2 changes: 1 addition & 1 deletion src/proto/faabric.proto
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ message Message {
// Each BER has a unique app id
int32 appId = 2;
int32 appIdx = 3;
string masterHost = 4;
string mainHost = 4;

enum MessageType {
CALL = 0;
Expand Down
4 changes: 2 additions & 2 deletions src/scheduler/Executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ void Executor::executeTasks(std::vector<int> msgIdxs,
faabric::Message& firstMsg = req->mutable_messages()->at(0);
std::string thisHost = faabric::util::getSystemConfig().endpointHost;

bool isMaster = firstMsg.masterhost() == thisHost;
bool isMaster = firstMsg.mainhost() == thisHost;
bool isThreads = req->type() == faabric::BatchExecuteRequest::THREADS;
bool isSingleHost = req->singlehost();
std::string snapshotKey = firstMsg.snapshotkey();
Expand Down Expand Up @@ -287,7 +287,7 @@ void Executor::executeTasks(std::vector<int> msgIdxs,
// Here all threads are still executing, so we have to overload.
// If any tasks are blocking we risk a deadlock, and can no
// longer guarantee the application will finish. In general if
// we're on the master host and this is a thread, we should
// we're on the main host and this is a thread, we should
// avoid the zeroth and first pool threads as they are likely to
// be the main thread and the zeroth in the communication group,
// so will be blocking.
Expand Down
2 changes: 1 addition & 1 deletion src/scheduler/FunctionCallClient.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ faabric::HostResources FunctionCallClient::getResources()
return response;
}

// This function call is used by the master host of an application to let know
// This function call is used by the main host of an application to let know
// other hosts running functions of the same application that a migration
// opportunity has been found.
void FunctionCallClient::sendPendingMigrations(
Expand Down
2 changes: 1 addition & 1 deletion src/scheduler/FunctionCallServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ void FunctionCallServer::recvExecuteFunctions(std::span<const uint8_t> buffer)
// This flags were set by the old endpoint, we temporarily set them here
parsedMsg.mutable_messages()->at(0).set_timestamp(
faabric::util::getGlobalClock().epochMillis());
parsedMsg.mutable_messages()->at(0).set_masterhost(
parsedMsg.mutable_messages()->at(0).set_mainhost(
faabric::util::getSystemConfig().endpointHost);
}
scheduler.callFunctions(
Expand Down
48 changes: 24 additions & 24 deletions src/scheduler/Scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@
faabric::Message& firstMsg = execs.back()->getBoundMessage();
std::string user = firstMsg.user();
std::string function = firstMsg.function();
std::string masterHost = firstMsg.masterhost();
std::string mainHost = firstMsg.mainhost();

for (auto exec : execs) {
long millisSinceLastExec = exec->getMillisSinceLastExec();
Expand Down Expand Up @@ -279,18 +279,18 @@
}

// Unregister this host if no more executors remain on this host, and
// it's not the master
// it's not the main
if (execs.empty()) {
SPDLOG_TRACE("No remaining executors for {}", key);

bool isMaster = thisHost == masterHost;
bool isMaster = thisHost == mainHost;
if (!isMaster) {
faabric::UnregisterRequest req;
req.set_host(thisHost);
req.set_user(user);
req.set_function(function);

getFunctionCallClient(masterHost)->unregister(req);
getFunctionCallClient(mainHost)->unregister(req);

Check warning on line 293 in src/scheduler/Scheduler.cpp

View check run for this annotation

Codecov / codecov/patch

src/scheduler/Scheduler.cpp#L293

Added line #L293 was not covered by tests
}

keysToRemove.emplace_back(key);
Expand Down Expand Up @@ -359,9 +359,9 @@
std::shared_ptr<faabric::BatchExecuteRequest> req)
{
// We assume all the messages are for the same function and have the
// same master host
// same main host
faabric::Message& firstMsg = req->mutable_messages()->at(0);
std::string masterHost = firstMsg.masterhost();
std::string mainHost = firstMsg.mainhost();

// Get topology hint from message
faabric::batch_scheduler::SchedulingTopologyHint topologyHint =
Expand All @@ -374,16 +374,16 @@
topologyHint ==
faabric::batch_scheduler::SchedulingTopologyHint::FORCE_LOCAL;

// If we're not the master host, we need to forward the request back to the
// master host. This will only happen if a nested batch execution happens.
if (!isForceLocal && masterHost != thisHost) {
// If we're not the main host, we need to forward the request back to the
// main host. This will only happen if a nested batch execution happens.
if (!isForceLocal && mainHost != thisHost) {
std::string funcStr = faabric::util::funcToString(firstMsg, false);
SPDLOG_DEBUG("Forwarding {} back to master {}", funcStr, masterHost);
SPDLOG_DEBUG("Forwarding {} back to main {}", funcStr, mainHost);

getFunctionCallClient(masterHost)->executeFunctions(req);
getFunctionCallClient(mainHost)->executeFunctions(req);
faabric::batch_scheduler::SchedulingDecision decision(
firstMsg.appid(), firstMsg.groupid());
decision.returnHost = masterHost;
decision.returnHost = mainHost;
return decision;
}

Expand Down Expand Up @@ -477,7 +477,7 @@
hosts.push_back(thisHost);
}
} else {
// At this point we know we're the master host, and we've not been
// At this point we know we're the main host, and we've not been
// asked to force full local execution.

// Work out how many we can handle locally
Expand Down Expand Up @@ -664,7 +664,7 @@
faabric::Message& firstMsg = req->mutable_messages()->at(0);
std::string funcStr = faabric::util::funcToString(firstMsg, false);
int nMessages = req->messages_size();
bool isMaster = thisHost == firstMsg.masterhost();
bool isMaster = thisHost == firstMsg.mainhost();
bool isMigration = req->type() == faabric::BatchExecuteRequest::MIGRATION;

if (decision.hosts.size() != nMessages) {
Expand All @@ -676,14 +676,14 @@
throw std::runtime_error("Invalid scheduler hint for messages");
}

if (firstMsg.masterhost().empty()) {
SPDLOG_ERROR("Request {} has no master host", funcStr);
throw std::runtime_error("Message with no master host");
if (firstMsg.mainhost().empty()) {
SPDLOG_ERROR("Request {} has no main host", funcStr);
throw std::runtime_error("Message with no main host");

Check warning on line 681 in src/scheduler/Scheduler.cpp

View check run for this annotation

Codecov / codecov/patch

src/scheduler/Scheduler.cpp#L680-L681

Added lines #L680 - L681 were not covered by tests
}

// Send out point-to-point mappings if necessary (unless being forced to
// execute locally, in which case they will be transmitted from the
// master)
// main)
bool isForceLocal =
topologyHint ==
faabric::batch_scheduler::SchedulingTopologyHint::FORCE_LOCAL;
Expand Down Expand Up @@ -1076,8 +1076,8 @@
msg.set_finishtimestamp(faabric::util::getGlobalClock().epochMillis());

// Remove the app from in-flight map if still there, and this host is the
// master host for the message
if (msg.masterhost() == thisHost) {
// main host for the message
if (msg.mainhost() == thisHost) {
removePendingMigration(msg.appid());
}

Expand All @@ -1100,11 +1100,11 @@
const std::string& key,
const std::vector<faabric::util::SnapshotDiff>& diffs)
{
bool isMaster = msg.masterhost() == conf.endpointHost;
bool isMaster = msg.mainhost() == conf.endpointHost;
if (isMaster) {
if (!diffs.empty()) {
// On master we queue the diffs locally directly, on a remote
// host we push them back to master
// On main we queue the diffs locally directly, on a remote
// host we push them back to main
SPDLOG_DEBUG("Queueing {} diffs for {} to snapshot {} (group {})",
diffs.size(),
faabric::util::funcToString(msg, false),
Expand All @@ -1123,7 +1123,7 @@
setThreadResultLocally(msg.id(), returnValue);
} else {
// Push thread result and diffs together
getSnapshotClient(msg.masterhost())
getSnapshotClient(msg.mainhost())
->pushThreadResult(msg.id(), returnValue, key, diffs);
}
}
Expand Down
Loading
Loading