Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

One tool #429

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 110 additions & 63 deletions endpoints/base
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
# Shared init and functions for endpoints

echo "#params: $@"

# globals
host="" # the host the endpoint works with
controller_ipaddr=""
rb_exit_success=0
Expand All @@ -17,7 +19,8 @@ cs_rb_opts=""
do_validate="0"
num_clients=0
num_servers=0
num_collectors=0
num_profilers=0
new_followers=""
disable_tools=0
abort=0
userenv=""
Expand All @@ -27,16 +30,27 @@ max_sample_failures=""
total_cpu_partitions=0
export ssh_opts="-q -o StrictHostKeyChecking=no -o PasswordAuthentication=no -o ControlMaster=auto -o ControlPath=~/.ssh/sockets/%r@%h-%p -o ControlPersist=60"
mkdir -p ~/.ssh/sockets
declare -A clients
declare -A servers
declare -A collectors # For a remotehost that has no client or server, but runs tools
config_dir=""
engine_config_dir=""
engine_bench_cmds_dir=""
run_dir=""
engine_logs_dir=""
endpoint_run_dir=""
bootstrap_script="/usr/local/bin/bootstrap"
engine_run_script="/usr/local/bin/engine-script"
roadblock_msgs_dir=""

declare -A clients=()
declare -A servers=()
declare -A profilers=()
declare -A id_to_bench
declare -A bench_to_image
declare -A cpuPartitioning
declare -A osruntime
cpuPartitioning[default]=0
declare -A numaNode
numaNode[default]=-1
profiler_count=0


function range_to_list() {
Expand Down Expand Up @@ -305,42 +319,41 @@ function process_common_endpoint_opts() {
function addto_clients_servers() {
local arg="$1"; shift
local val="$1"; shift
for ids in `echo $val | sed -e 's/+/ /g'`; do
if echo $ids | grep -q -- "-"; then
range=`echo $ids | sed 's/-/ /'`
for j in `seq $range`; do
if [ "$arg" == "clients" -o "$arg" == "client" ]; then
clients[$j]="client-$j"
let num_clients=$num_clients+1
elif [ "$arg" == "servers" -o "$arg" == "server" ]; then
servers[$j]="server-$j"
let num_servers=$num_servers+1
elif [ "$arg" == "profiler" ]; then
collectors[$j]="profiler-$j"
let num_collectors=$num_collectors+1
fi
done
local list=`range_to_list $val`
for id in $list; do
if [ "$arg" == "clients" -o "$arg" == "client" ]; then
clients[$id]="client-$id"
let num_clients=$num_clients+1
elif [ "$arg" == "servers" -o "$arg" == "server" ]; then
servers[$id]="server-$id"
let num_servers=$num_servers+1
elif [ "$arg" == "profiler" ]; then
echo "# Not adding profiler-$val because per-tool profilers will be added later"
elif echo $arg | grep -P '^profiler' >/dev/null; then
profilers[$arg-$id]="$arg-$id"
let num_profilers=$num_profilers+1
else
if [ "$arg" == "clients" -o "$arg" == "client" ]; then
clients[$ids]="client-$ids"
let num_clients=$num_clients+1
elif [ "$arg" == "servers" -o "$arg" == "server" ]; then
servers[$ids]="server-$ids"
let num_servers=$num_servers+1
elif [ "$arg" == "profiler" ]; then
collectors[$ids]="profiler-$ids"
let num_collectors=$num_collectors+1
fi
exit_error "This client/server/profiler label is not recognized: $arg-$val"
fi
done
}

function set_total_cpupart() {
for this_cs_label in ${clients[@]} ${servers[@]}; do
# To be called only once at endpoint-deploy
local engine_label cpu_partitioning

# globals used:
# cpu_part_idx clients servers cpuPartitioning total_cpu_partitions

# Multiple osruntimes hosted need to differentiate their
# cpu-allocation by an assigned index. For each osruntime launched
# with cpu-partitioning must increment this index.
cpu_part_idx=0
for engine_label in ${clients[@]} ${servers[@]}; do
set +u
cpu_partitioning=0
if [ ! -z "${cpuPartitioning[$this_cs_label]}" ]; then
cpu_partitioning=${cpuPartitioning[$this_cs_label]}
if [ ! -z "${cpuPartitioning[$engine_label]}" ]; then
cpu_partitioning=${cpuPartitioning[$engine_label]}
elif [ ! -z "${cpuPartitioning[default]}" ]; then
cpu_partitioning=${cpuPartitioning[default]}
fi
Expand All @@ -355,13 +368,20 @@ function set_total_cpupart() {
function set_osruntime_numanode_cpupart() {
local this_cs_label=$1
set +u
if [ ! -z "${osruntime[$this_cs_label]}" ]; then
# Always force tools to podman because they tend to clean
# themselves up reliably
if echo $this_cs_label | grep -P '^profiler-\w+-\d+-\w+-\d+$'; then
os_runtime="podman"
elif [ ! -z "${osruntime[$this_cs_label]}" ]; then
os_runtime=${osruntime[$this_cs_label]}
else
os_runtime=${osruntime[default]}
fi

if [ ! -z "${cpuPartitioning[$this_cs_label]}" ]; then
# profilers never use cpu-partitioning
if echo $this_cs_label | grep -P '^profiler-\w+-\d+-\w+-\d+$'; then
cpu_partitioning=0
elif [ ! -z "${cpuPartitioning[$this_cs_label]}" ]; then
cpu_partitioning=${cpuPartitioning[$this_cs_label]}
elif [ ! -z "${cpuPartitioning[default]}" ]; then
cpu_partitioning=${cpuPartitioning[default]}
Expand All @@ -382,8 +402,8 @@ function echo_clients_servers() {
if [ $num_servers -gt 0 ]; then
echo "server ${!servers[@]}"
fi
if [ $num_collectors -gt 0 ]; then
echo "profiler ${!collectors[@]}"
if [ $num_profilers -gt 0 ]; then
echo "profiler ${!profilers[@]}"
fi
}

Expand All @@ -394,8 +414,6 @@ function init_common_dirs() {
run_dir="${base_run_dir}/run"
engine_logs_dir="${run_dir}/engine/logs"
endpoint_run_dir="${run_dir}/endpoint/${endpoint_label}"
bootstrap_script="/usr/local/bin/bootstrap"
engine_run_script="/usr/local/bin/engine-script"
roadblock_msgs_dir="${endpoint_run_dir}/roadblock-msgs"

if [ "$do_validate" != 1 ]; then
Expand Down Expand Up @@ -986,15 +1004,17 @@ function process_roadblocks() {
roadblock_rc=$?
roadblock_exit_on_error ${roadblock_rc}



do_roadblock "engine-init-begin" ${engine_script_start_timeout}
roadblock_rc=$?
roadblock_exit_on_error ${roadblock_rc}

call_endpoint_specific_function "${endpoint_type}" "engine_init" "engine-init-end"

do_roadblock "engine-init-end" ${engine_script_start_timeout} messages "${msg_file}"
if [ "$msg_file" != "" -a -e "$msg_file" ]; then
do_roadblock "engine-init-end" ${engine_script_start_timeout} messages $msg_file
else
do_roadblock "engine-init-end" ${engine_script_start_timeout}
fi
roadblock_rc=$?
roadblock_exit_on_error ${roadblock_rc}

Expand Down Expand Up @@ -1149,7 +1169,7 @@ function load_settings() {
fi

if [ -z "${userenv}" -o "${userenv}" == "default" ]; then
load_json_setting "userenvs.default" userenv
load_json_setting "userenvs.default.benchmarks" userenv
fi

if [ -z "${user}" ]; then
Expand Down Expand Up @@ -1208,7 +1228,7 @@ function make_osruntime_boostrap_script() {
echo "run_id=$1" >>$outfile; shift
echo "rickshaw_host=$1" >>$outfile; shift
echo "endpoint_run_dir=$1" >>$outfile; shift
echo "cs_label=$1" >>$outfile; shift
echo "cs_labels=$1" >>$outfile; shift
echo "base_run_dir=$1" >>$outfile; shift
echo "cpu_partitioning=$1" >>$outfile; shift
echo "endpoint=$1" >>$outfile; shift
Expand All @@ -1218,7 +1238,7 @@ function make_osruntime_boostrap_script() {
echo "rb_passwd=$1" >>$outfile; shift
echo "rb_id=$1" >>$outfile; shift
echo "ssh_id=\"$1\"" >>$outfile; shift
echo "image=$1" >>$outfile; shift
echo "images=$1" >>$outfile; shift
echo "disable_tools=$1" >>$outfile; shift
cat $endpoint_base_dir/osruntime-bootstrap >>$outfile
chmod +x $outfile
Expand All @@ -1240,28 +1260,55 @@ function get_opt_field() {
echo "${input}" | cut -d ':' -f ${field}
}

function get_image() {
local var value
local type=$1; shift
local id=$1; shift
function get_image_name() {
local label=$1; shift
local var image type id bench_or_tool
typeset -n var=$1; shift

if [ "$type" != "client" -a "$type" != "server" ]; then
# For any engine which is not running a benchmark client or server:
# We don't care what benchmark is in the image. We only care that
# an image exists for the ID. Since the ID for a non-client/server
# can exceed the ID range of a benchmark client[/server], (for example
# there could be a "worker-2" when there is only client-1 and server-1)
# always force the ID to 1 for purposes of looking up benchmark->image.
# When we build separate images for tools and benchmarks, this will
# change (significantly) and a new method to find the correct image for
# en engine will be implemented.
id="1"
type=`echo $label | awk -F- '{print $1}'`

if [ "$type" == "profiler" ]; then
# example label: profiler-remotehost-2-sysstat-1
# we need only 'sysstat'
bench_or_tool=`echo $label | awk -F- '{print $4}'`
atheurer marked this conversation as resolved.
Show resolved Hide resolved
else
id=`echo $label | sed -e s/^$type-//`
atheurer marked this conversation as resolved.
Show resolved Hide resolved
bench_or_tool=${id_to_bench[$id]}
fi
bench=${id_to_bench[$id]}
value=${bench_to_image[$bench]}
echo "image for $type $id is [$value]"
var=$value

image=${bench_to_image[$bench_or_tool]}
echo "image for $label is [$image]"
var=$image
}

function add_profiler_engines() {
local var new_profilers tool tools scope
scope=$1; shift # client-<n>, server-<n>, worker, master, compute
typeset -n var=$1; shift
new_profilers=""

# globals used:
# disable_tools config_dir endpoint_label new_followers

if [ "$disable_tools" != "1" ]; then
if echo $scope | grep -q -P '^(client|server)-\d+$'; then
# convert a client-1 to client/1 to match tool-cmds dir heirarchy
scope=`echo $scope | sed -e 'sX-X/X'`
fi
tools=`awk -F: '{print $1}' $config_dir/tool-cmds/$scope/start`
if [ ! -z "$tools" ]; then
let profiler_count=$profiler_count+1
for tool in $tools; do
echo "tool: $tool"
engine_type=profiler-$endpoint_label-$tool
atheurer marked this conversation as resolved.
Show resolved Hide resolved
engine_label=$engine_type-$profiler_count
# new RB followers, cumultive, over multiple calls add_profiler_engines()
new_followers+=" $engine_label"
# new profiler engines, only for this call add_profiler_engines()
new_profilers+=" $engine_label"
addto_clients_servers "$engine_type" "$profiler_count"
done
fi
fi
var="$new_profilers"
}
Loading
Loading