From 7e6975caac5ee84efe147eefa4ca4bfe294bc464 Mon Sep 17 00:00:00 2001 From: Adam Wegrzynek Date: Wed, 18 Apr 2018 08:52:25 +0200 Subject: [PATCH] Add context switches and use getrusage (#50) --- include/Monitoring/ProcessMonitor.h | 26 ++++++------- src/Monitoring.cxx | 3 +- src/ProcessMonitor.cxx | 58 +++++++++++++++-------------- test/testProcessMonitor.cxx | 4 +- 4 files changed, 48 insertions(+), 43 deletions(-) diff --git a/include/Monitoring/ProcessMonitor.h b/include/Monitoring/ProcessMonitor.h index 65fe3698a..57840da62 100644 --- a/include/Monitoring/ProcessMonitor.h +++ b/include/Monitoring/ProcessMonitor.h @@ -13,6 +13,7 @@ #include #include #include +#include #include "Monitoring/Metric.h" @@ -34,29 +35,26 @@ class ProcessMonitor /// Default destructor ~ProcessMonitor() = default; - /// Generates performance metrics (stored in mPsParams vecotr) - std::vector getPidStatus(); + /// Retrieves memory usage (%) + Metric getMemoryUsage(); - /// Generates metrics per network interface: bytesReceived, bytesTransmitted + /// Retrieves bytesReceived, bytesTransmitted per network interface std::vector getNetworkUsage(); + /// Retrieves CPU usage (%) and number of context switches during the interval + std::vector getCpuAndContexts(); private: /// PIDs that are monitored unsigned int mPid; - /// options to be passed to PS - std::string mPsCommand; - - /// mutex to lock vector of PIDs - std::mutex mVectorPidLock; - - /// List of PS params with their types - const std::vector> mPsParams { - {"etime", MetricType::STRING}, {"pcpu", MetricType::DOUBLE}, {"pmem", MetricType::DOUBLE} - }; - /// Executes terminal command std::string exec(const char* cmd); + + /// 'getrusage' values from last execution + struct rusage mPreviousGetrUsage; + + /// Timestamp when process monitoring was executed last time + std::chrono::high_resolution_clock::time_point mTimeLastRun; }; } // namespace monitoring diff --git a/src/Monitoring.cxx b/src/Monitoring.cxx index 415ba3a50..149c7aa2b 100644 --- a/src/Monitoring.cxx +++ b/src/Monitoring.cxx @@ -115,8 +115,9 @@ void Monitoring::processMonitorLoop(int interval) while (mMonitorRunning) { std::this_thread::sleep_for (std::chrono::milliseconds(interval*10)); if ((++loopCount % 100) != 0) continue; - send(mProcessMonitor->getPidStatus()); + send(mProcessMonitor->getCpuAndContexts()); send(mProcessMonitor->getNetworkUsage()); + send(mProcessMonitor->getMemoryUsage()); loopCount = 0; } } diff --git a/src/ProcessMonitor.cxx b/src/ProcessMonitor.cxx index 1b58aefd8..9b65c73d4 100644 --- a/src/ProcessMonitor.cxx +++ b/src/ProcessMonitor.cxx @@ -5,11 +5,11 @@ #include "Monitoring/ProcessMonitor.h" #include "Exceptions/MonitoringInternalException.h" +#include "MonLogger.h" #include -#include #include -#include "MonLogger.h" #include +#include namespace o2 { @@ -20,10 +20,8 @@ namespace monitoring ProcessMonitor::ProcessMonitor() { mPid = static_cast(::getpid()); - for (auto const param : mPsParams) { - mPsCommand = mPsCommand.empty() ? param.first : mPsCommand += (',' + param.first); - } - mPsCommand = "ps --no-headers -o " + mPsCommand + " --pid "; + getrusage(RUSAGE_SELF, &mPreviousGetrUsage); + mTimeLastRun = std::chrono::high_resolution_clock::now(); } std::vector ProcessMonitor::getNetworkUsage() @@ -31,7 +29,7 @@ std::vector ProcessMonitor::getNetworkUsage() std::vector metrics; std::stringstream ss; // get bytes received and transmitted per interface - ss << "cat /proc/" << mPid << "/net/dev | tail -n +3 |awk ' {print $1 $2 \":\" $10}'"; + ss << "cat /proc/" << mPid << "/net/dev | tail -n +3 | grep -v -e 'lo' -e 'virbr0' | awk ' {print $1 $2 \":\" $10}'"; std::string output = exec(ss.str().c_str()); // for each line (each network interfrace) std::istringstream iss(output); @@ -50,31 +48,37 @@ std::vector ProcessMonitor::getNetworkUsage() return metrics; } -std::vector ProcessMonitor::getPidStatus() +Metric ProcessMonitor::getMemoryUsage() { - std::vector metrics; - std::string command = mPsCommand + std::to_string(mPid); + std::string command = "ps --no-headers -o pmem --pid " + std::to_string(mPid); std::string output = exec(command.c_str()); - - // split output into std vector - std::vector pidParams; boost::trim(output); - boost::split(pidParams, output, boost::is_any_of("\t "), boost::token_compress_on); - - // parse output, cast to propriate types - auto j = mPsParams.begin(); - for (auto i = pidParams.begin(); i != pidParams.end(); ++i, ++j) { - if (j->second == MetricType::DOUBLE) { - metrics.emplace_back(Metric{std::stod(*i), j->first}); - } - else if (j->second == MetricType::INT) { - metrics.emplace_back(Metric{std::stoi(*i), j->first}); - } - else { - metrics.emplace_back(Metric{*i, j->first}); - } + return Metric{std::stod(output), "memoryUsagePercentage"}; +} + +std::vector ProcessMonitor::getCpuAndContexts() { + std::vector metrics; + struct rusage currentUsage; + getrusage(RUSAGE_SELF, ¤tUsage); + auto timeNow = std::chrono::high_resolution_clock::now(); + double timePassed = std::chrono::duration_cast(timeNow - mTimeLastRun).count(); + if (timePassed < 950) { // do not run too often + throw MonitoringInternalException("Process Monitor getrusage", "Do not invoke more often then 1ms"); } + double fractionCpuUsed = ( + currentUsage.ru_utime.tv_sec*1000000.0 + currentUsage.ru_utime.tv_usec - (mPreviousGetrUsage.ru_utime.tv_sec*1000000.0 + mPreviousGetrUsage.ru_utime.tv_usec) + + currentUsage.ru_stime.tv_sec*1000000.0 + currentUsage.ru_stime.tv_usec - (mPreviousGetrUsage.ru_stime.tv_sec*1000000.0 + mPreviousGetrUsage.ru_stime.tv_usec) + ) / timePassed; + + metrics.emplace_back(Metric{ + static_cast(std::round(fractionCpuUsed * 100.0 * 100.0 ) / 100.0), "cpuUsedPercentage" + }); + metrics.emplace_back(Metric{ + static_cast(currentUsage.ru_nivcsw - mPreviousGetrUsage.ru_nivcsw), "involuntaryContextSwitches" + }); + mTimeLastRun = timeNow; + mPreviousGetrUsage = currentUsage; return metrics; } diff --git a/test/testProcessMonitor.cxx b/test/testProcessMonitor.cxx index 0ad66fa2c..2b001cdf1 100644 --- a/test/testProcessMonitor.cxx +++ b/test/testProcessMonitor.cxx @@ -11,8 +11,10 @@ namespace Test { BOOST_AUTO_TEST_CASE(createProcessMonitor) { o2::monitoring::ProcessMonitor processMonitor; +#ifdef _OS_LINUX processMonitor.getNetworkUsage(); - processMonitor.getPidStatus(); + processMonitor.getMemoryUsage(); +#endif } } // namespace Test