-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
plugin: add estimation of cores-per-node count on system during initialization #469
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ extern "C" { | |
#endif | ||
#include <flux/core.h> | ||
#include <flux/jobtap.h> | ||
#include <flux/idset.h> | ||
#include <jansson.h> | ||
} | ||
|
||
|
@@ -43,6 +44,9 @@ extern "C" { | |
#define DEFAULT_QUEUE_WEIGHT 10000 | ||
#define DEFAULT_AGE_WEIGHT 1000 | ||
|
||
// set up cores-per-node count for the system | ||
size_t ncores_per_node = 0; | ||
|
||
std::map<int, std::map<std::string, Association>> users; | ||
std::map<std::string, Queue> queues; | ||
std::map<int, std::string> users_def_bank; | ||
|
@@ -254,9 +258,11 @@ static int query_cb (flux_plugin_t *p, | |
|
||
if (flux_plugin_arg_pack (args, | ||
FLUX_PLUGIN_ARG_OUT, | ||
"{s:O}", | ||
"{s:O s:i}", | ||
"mf_priority_map", | ||
accounting_data) < 0) | ||
accounting_data, | ||
"ncores_per_node", | ||
ncores_per_node) < 0) | ||
flux_log_error (flux_jobtap_get_flux (p), | ||
"mf_priority: query_cb: flux_plugin_arg_pack: %s", | ||
flux_plugin_arg_strerror (args)); | ||
|
@@ -1180,6 +1186,49 @@ extern "C" int flux_plugin_init (flux_plugin_t *p) | |
priority_weights["queue"] = DEFAULT_QUEUE_WEIGHT; | ||
priority_weights["age"] = DEFAULT_AGE_WEIGHT; | ||
|
||
// initialize the plugin with total node and core counts | ||
flux_t *h; | ||
flux_future_t *f; | ||
const char *core; | ||
|
||
h = flux_jobtap_get_flux (p); | ||
// This synchronous call to fetch R from the KVS is needed in order to | ||
// validate and enforce resource limits on jobs. The job manager will | ||
// block here while waiting for R when the plugin is loaded but it *should* | ||
// occur over a very short time. | ||
if (!(f = flux_kvs_lookup (h, | ||
NULL, | ||
FLUX_KVS_WAITCREATE, | ||
"resource.R"))) { | ||
flux_log_error (h, "flux_kvs_lookup"); | ||
return -1; | ||
} | ||
// Equal number of cores on all nodes in R is assumed here; thus, only | ||
// the first entry is looked at | ||
if (flux_kvs_lookup_get_unpack (f, | ||
"{s{s[{s{s:s}}]}}", | ||
"execution", | ||
"R_lite", | ||
"children", | ||
"core", &core) < 0) { | ||
flux_log_error (h, "flux_kvs_lookup_unpack"); | ||
return -1; | ||
} | ||
Comment on lines
+1208
to
+1216
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Normally I would request that this synchronous get be replaced with an asynchronous I'd at least suggest a comment here describing why a synchronous get is used in this case. @garlick: any other thoughts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The FLUX_KVS_WATCH flag should be dropped. That's only used when you want to receive a response for every change to the key, and here the future is being destroyed after the first response. IRL, we only load Yes a comment would be good since synchronous activities always raise eyebrows. |
||
|
||
if (core == NULL) { | ||
flux_log_error (h, | ||
"mf_priority: could not get system " | ||
"cores-per-node information"); | ||
return -1; | ||
} | ||
|
||
// calculate number of cores-per-node on system | ||
idset* cores_decoded = idset_decode (core); | ||
ncores_per_node = idset_count (cores_decoded); | ||
|
||
flux_future_destroy (f); | ||
idset_destroy (cores_decoded); | ||
|
||
return 0; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#!/bin/bash | ||
|
||
test_description='test calculating and storing system core information in priority plugin' | ||
|
||
. `dirname $0`/sharness.sh | ||
|
||
mkdir -p conf.d | ||
|
||
MULTI_FACTOR_PRIORITY=${FLUX_BUILD_DIR}/src/plugins/.libs/mf_priority.so | ||
|
||
export TEST_UNDER_FLUX_SCHED_SIMPLE_MODE="limited=1" | ||
test_under_flux 4 job -o,--config-path=$(pwd)/conf.d | ||
|
||
flux setattr log-stderr-level 1 | ||
|
||
test_expect_success 'load multi-factor priority plugin' ' | ||
flux jobtap load -r .priority-default ${MULTI_FACTOR_PRIORITY} | ||
' | ||
|
||
test_expect_success 'check that mf_priority plugin is loaded' ' | ||
flux jobtap list | grep mf_priority | ||
' | ||
|
||
test_expect_success 'check that cores-per-node count is correct' ' | ||
flux jobtap query mf_priority.so > query.json && | ||
test_debug "jq -S . <query.json" && | ||
flux resource R --include=0 | flux R decode --count=core > ncores_per_node.test && | ||
jq -e ".ncores_per_node == $(cat ncores_per_node.test)" <query.json | ||
' | ||
|
||
test_done |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
R may have multiple entries in the R_lite array. To handle possible heterogeneity, you could iterate each entry and use the maximum number of cores found.
Probably ok if this is just a first cut, though. If so, I'd put a comment stating that "equal number of cores on all nodes in R is assumed, so we only look at the first entry" or simimlar.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for mentioning this, I was unaware that it could have multiple entries. If it has multiple entries, would it look like this?