From 374b374bc5970e744d925dd2827c2e0e74de697f Mon Sep 17 00:00:00 2001 From: Benjamin Tovar Date: Tue, 9 Jun 2020 12:27:55 -0400 Subject: [PATCH 01/12] rmonitor: Remove race condition when detecting root process #2308 For long-running subprocesses (i.e., running for more than .25s), the resource_monitor measures the resources one last time before the process exits. This is not done for short-running subprocesses, as the measurement overhead becomes noticeable (e.g., madgraph in HEP, that internally uses make, gcc, and a series of very small python and shell scripts). However, this distinction is only relevant for subprocesses, and not for the very first process created by the application to be measured. A measurement before exit is always done for this first process, as this guarantees a measurement of any of the short-running subprocesses via getrusage. The previous code that labeled a process as the first process had a race condition in which several processes may identify themselves as the root process, with the potential of a noticeable overhead. This commit fixes this by labeling the first process before the exec call, rather than using the ld preloaded monitoring helper library to label the correct process. The incorrect behaviour was observed by running scripts with #!/bin/bash. (This was not observed with !#/bin/sh.) --- resource_monitor/src/resource_monitor.c | 4 ++++ resource_monitor/src/rmonitor_helper.c | 14 +++++--------- resource_monitor/src/rmonitor_helper_comm.c | 3 --- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/resource_monitor/src/resource_monitor.c b/resource_monitor/src/resource_monitor.c index 1f6928923c..ec8d03f1ef 100644 --- a/resource_monitor/src/resource_monitor.c +++ b/resource_monitor/src/resource_monitor.c @@ -2006,6 +2006,10 @@ struct rmonitor_process_info *spawn_first_process(const char *executable, char * debug(D_RMON, "executing: %s\n", executable); + char *pid_s = string_format("%d", getpid()); + setenv(RESOURCE_MONITOR_ROOT_PROCESS, pid_s, 1); + free(pid_s); + errno = 0; execvp(executable, argv); //We get here only if execlp fails. diff --git a/resource_monitor/src/rmonitor_helper.c b/resource_monitor/src/rmonitor_helper.c index 908e69c8f5..ffbc0d66c6 100644 --- a/resource_monitor/src/rmonitor_helper.c +++ b/resource_monitor/src/rmonitor_helper.c @@ -59,7 +59,6 @@ static uint64_t start_time = 0; static uint64_t end_time = 0; static int stop_short_running = 0; /* Stop processes that run for less than RESOURCE_MONITOR_SHORT_TIME seconds. */ -static int root_process = 0; /* 1 for first process to be monitored. */ #define declare_original_dlsym(name) __typeof__(name) *original_ ## name; #define define_original_dlsym(name) original_ ## name = dlsym(RTLD_NEXT, #name); @@ -119,13 +118,6 @@ void rmonitor_helper_initialize() { family_of_fd = itable_create(8); } - if(getenv(RESOURCE_MONITOR_ROOT_PROCESS)) { - root_process = 1; - unsetenv(RESOURCE_MONITOR_ROOT_PROCESS); - } else { - root_process = 0; - } - if(getenv(RESOURCE_MONITOR_HELPER_STOP_SHORT)) { stop_short_running = 1; } else { @@ -135,6 +127,10 @@ void rmonitor_helper_initialize() { initializing_helper = 0; } +int is_root_process() { + const char *pid_s = getenv(RESOURCE_MONITOR_ROOT_PROCESS); + return (pid_s && atoi(pid_s) == getpid()); +} pid_t fork() { @@ -585,7 +581,7 @@ void exit_wrapper_preamble(int status) sighandler_t old_handler = signal(SIGCONT, exit_signal_handler); int short_process = 0; - if(root_process) { + if(is_root_process()) { // root process is never considered a short running process short_process = 0; } else if(stop_short_running) { diff --git a/resource_monitor/src/rmonitor_helper_comm.c b/resource_monitor/src/rmonitor_helper_comm.c index d50903ee3e..05fe4217a3 100644 --- a/resource_monitor/src/rmonitor_helper_comm.c +++ b/resource_monitor/src/rmonitor_helper_comm.c @@ -257,9 +257,6 @@ int rmonitor_helper_init(char *lib_default_path, int *fd, int stop_short_running setenv(RESOURCE_MONITOR_HELPER_STOP_SHORT, "1", 1); } - /* First process will unset this variable. */ - setenv(RESOURCE_MONITOR_ROOT_PROCESS, "1", 1); - /* Each process sets this variable to its start time after a fork, * except for the first process, which for which we set here. */ char *start_time = string_format("%" PRId64, timestamp_get()); From e894d7a225fd4703c2ea30442c51c8976116565b Mon Sep 17 00:00:00 2001 From: Benjamin Tovar Date: Mon, 1 Jun 2020 12:01:36 -0400 Subject: [PATCH 02/12] Do not force debug messages to the console #2306 remove syslog remove systemd on important msgs, print to stderr, not terminal update show_help messages, man pages --- batch_job/src/work_queue_factory.c | 2 +- chirp/src/chirp_fuse.c | 2 +- chirp/src/chirp_server.c | 2 +- chirp/src/chirp_status.c | 2 +- chirp/src/chirp_tool.c | 2 +- chirp/src/confuga_adm.c | 2 +- configure | 5 --- doc/man/m4/allpairs_master.m4 | 2 +- doc/man/m4/catalog_server.m4 | 2 +- doc/man/m4/chirp_fuse.m4 | 2 +- doc/man/m4/chirp_server.m4 | 2 +- doc/man/m4/chirp_status.m4 | 2 +- doc/man/m4/confuga_adm.m4 | 2 +- doc/man/m4/makeflow.m4 | 2 +- doc/man/m4/parrot_package_create.m4 | 2 +- doc/man/m4/parrot_run.m4 | 2 +- doc/man/m4/resource_monitor.m4 | 2 +- doc/man/m4/resource_monitor_histograms.m4 | 2 +- doc/man/m4/wavefront_master.m4 | 2 +- doc/man/m4/work_queue_factory.m4 | 2 +- doc/man/m4/work_queue_status.m4 | 2 +- doc/man/m4/work_queue_worker.m4 | 2 +- doc/man/md/allpairs_master.md | 2 +- doc/man/md/catalog_server.md | 2 +- doc/man/md/chirp_fuse.md | 2 +- doc/man/md/chirp_server.md | 2 +- doc/man/md/chirp_status.md | 2 +- doc/man/md/confuga_adm.md | 2 +- doc/man/md/makeflow.md | 2 +- doc/man/md/parrot_package_create.md | 2 +- doc/man/md/parrot_run.md | 2 +- doc/man/md/resource_monitor.md | 2 +- doc/man/md/resource_monitor_histograms.md | 2 +- doc/man/md/wavefront_master.md | 2 +- doc/man/md/work_queue_factory.md | 2 +- doc/man/md/work_queue_status.md | 2 +- doc/man/md/work_queue_worker.md | 2 +- dttools/src/auth_test.c | 2 +- dttools/src/catalog_query_tool.c | 2 +- dttools/src/catalog_server.c | 2 +- dttools/src/debug.c | 43 +------------------ parrot/src/parrot_package_create.c | 2 +- parrot/src/pfs_main.cc | 2 +- resource_monitor/src/resource_monitor.c | 2 +- .../src/resource_monitor_histograms.c | 2 +- work_queue/src/work_queue_status.c | 2 +- work_queue/src/work_queue_worker.c | 2 +- 47 files changed, 47 insertions(+), 91 deletions(-) diff --git a/batch_job/src/work_queue_factory.c b/batch_job/src/work_queue_factory.c index 1a741bc274..4b76b1436e 100644 --- a/batch_job/src/work_queue_factory.c +++ b/batch_job/src/work_queue_factory.c @@ -1020,7 +1020,7 @@ static void show_help(const char *cmd) printf(" %-30s Specify the linking libraries for running mesos (for use with -T mesos).\n", "--mesos-preload"); printf(" %-30s Specify the container image for using Kubernetes (for use with -T k8s).\n", "--k8s-image"); printf(" %-30s Specify the container image that contains work_queue_worker availabe for using Kubernetes (for use with -T k8s).\n", "--k8s-worker-image"); - printf(" %-30s Send debugging to this file (can also be :stderr, :stdout, :syslog, or :journal).\n", "-o,--debug-file="); + printf(" %-30s Send debugging to this file (can also be :stderr, or :stdout).\n", "-o,--debug-file="); printf(" %-30s Specify the size of the debug file (must use with -o option).\n", "-O,--debug-file-size="); printf(" %-30s Specify the binary to use for the worker (relative or hard path). It should accept the same arguments as the default work_queue_worker.\n", "--worker-binary="); printf(" %-30s Will make a best attempt to ensure the worker will execute in the specified OS environment, regardless of the underlying OS.\n","--runos="); diff --git a/chirp/src/chirp_fuse.c b/chirp/src/chirp_fuse.c index 6eb8569298..0a72cc9faa 100644 --- a/chirp/src/chirp_fuse.c +++ b/chirp/src/chirp_fuse.c @@ -583,7 +583,7 @@ static void show_help(const char *cmd) fprintf(stdout, " %-30s Run in foreground for debugging.\n", "-f,--foreground"); fprintf(stdout, " %-30s Comma-delimited list of tickets to use for authentication.\n", "-i,--tickets="); fprintf(stdout, " %-30s Mount options passed to FUSE.\n", "-m,--mount-options="); - fprintf(stdout, " %-30s Send debugging to this file. (can also be :stderr, :stdout, :syslog, or :journal)\n", "-o,--debug-file="); + fprintf(stdout, " %-30s Send debugging to this file. (can also be :stderr, or :stdout)\n", "-o,--debug-file="); fprintf(stdout, " %-30s Timeout for network operations. (default is %ds)\n", "-t,--timeout=", chirp_fuse_timeout); fprintf(stdout, " %-30s Show program version.\n", "-v,--version"); fprintf(stdout, " %-30s This message.\n", "-h,--help"); diff --git a/chirp/src/chirp_server.c b/chirp/src/chirp_server.c index b2eae72fa2..e198833a41 100644 --- a/chirp/src/chirp_server.c +++ b/chirp/src/chirp_server.c @@ -1846,7 +1846,7 @@ static void show_help(const char *cmd) fprintf(stdout, "The most common options are:\n"); fprintf(stdout, " %-30s URL of storage directory, like `file://path' or `hdfs://host:port/path'.\n", "-r,--root="); fprintf(stdout, " %-30s Enable debugging for this subsystem.\n", "-d,--debug="); - fprintf(stdout, " %-30s Send debugging to this file. (can also be :stderr, :stdout, :syslog, or :journal)\n", "-o,--debug-file="); + fprintf(stdout, " %-30s Send debugging to this file. (can also be :stderr, or :stdout)\n", "-o,--debug-file="); fprintf(stdout, " %-30s Send status updates to this host. (default: `%s')\n", "-u,--advertise=", CATALOG_HOST); fprintf(stdout, " %-30s Show version info.\n", "-v,--version"); fprintf(stdout, " %-30s This message.\n", "-h,--help"); diff --git a/chirp/src/chirp_status.c b/chirp/src/chirp_status.c index 309b1be0ef..ecf8f38c0e 100644 --- a/chirp/src/chirp_status.c +++ b/chirp/src/chirp_status.c @@ -46,7 +46,7 @@ static void show_help(const char *cmd) fprintf(stdout, "where options are:\n"); fprintf(stdout, " %-30s Query the catalog on this host.\n", "-c,--catalog="); fprintf(stdout, " %-30s Enable debugging for this sybsystem\n", "-d,--debug="); - fprintf(stdout, " %-30s Send debugging to this file. (can also be :stderr, :stdout, :syslog, or :journal)\n", "-o,--debug-file="); + fprintf(stdout, " %-30s Send debugging to this file. (can also be :stderr, or :stdout)\n", "-o,--debug-file="); fprintf(stdout, " %-30s Rotate file once it reaches this size. (default 10M, 0 disables)\n", "-O,--debug-rotate-max="); fprintf(stdout, " %-30s Only show servers with this space available. (example: -A 100MB)\n", "-A,--server-space="); fprintf(stdout, " %-30s Only show servers with this project.\n", " --server-project="); diff --git a/chirp/src/chirp_tool.c b/chirp/src/chirp_tool.c index 5775791fda..0c4d3f9ff0 100644 --- a/chirp/src/chirp_tool.c +++ b/chirp/src/chirp_tool.c @@ -1198,7 +1198,7 @@ static void show_help(const char *cmd) fprintf(stdout, "where options are:\n"); fprintf(stdout, " %-30s Require this authentication mode.\n", "-a,--auth="); fprintf(stdout, " %-30s Enable debugging for this subsystem.\n", "-d,--debug="); - fprintf(stdout, " %-30s Send debugging to this file. (can also be :stderr, :stdout, :syslog, or :journal)\n", "-o,--debug-file="); + fprintf(stdout, " %-30s Send debugging to this file. (can also be :stderr, or :stdout)\n", "-o,--debug-file="); fprintf(stdout, " %-30s Comma-delimited list of tickets to use for authentication.\n", "-i,--tickets="); fprintf(stdout, " %-30s Long transfer information.\n", "-l,--verbose"); fprintf(stdout, " %-30s Set remote operation timeout.\n", "-t,--timeout=