diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c2c1d304..107136e6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -5,7 +5,7 @@ # All pcm-* executables set(PROJECT_NAMES pcm pcm-numa pcm-latency pcm-power pcm-msr pcm-memory pcm-tsx pcm-pcie pcm-core pcm-iio pcm-lspci pcm-pcicfg pcm-mmio pcm-tpmi pcm-raw pcm-accel) -file(GLOB COMMON_SOURCES msr.cpp cpucounters.cpp pci.cpp mmio.cpp bw.cpp utils.cpp topology.cpp debug.cpp threadpool.cpp uncore_pmu_discovery.cpp) +file(GLOB COMMON_SOURCES pcm-accel-common.cpp msr.cpp cpucounters.cpp pci.cpp mmio.cpp bw.cpp utils.cpp topology.cpp debug.cpp threadpool.cpp uncore_pmu_discovery.cpp) if (APPLE) file(GLOB UNUX_SOURCES dashboard.cpp) @@ -143,6 +143,8 @@ foreach(PROJECT_NAME ${PROJECT_NAMES}) file(READ pcm-sensor-server.service.in SENSOR_SERVICE_IN) string(REPLACE "@@CMAKE_INSTALL_SBINDIR@@" "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_SBINDIR}" SENSOR_SERVICE "${SENSOR_SERVICE_IN}") file(WRITE "${CMAKE_BINARY_DIR}/pcm-sensor-server.service" "${SENSOR_SERVICE}") + file(GLOB PROJECT_FILE ${PROJECT_NAME}.cpp pcm-accel-common.h pcm-accel-common.cpp) + target_include_directories(pcm-sensor-server PUBLIC ${CMAKE_SOURCE_DIR}) if(LINUX_SYSTEMD) install(FILES "${CMAKE_BINARY_DIR}/pcm-sensor-server.service" DESTINATION "${LINUX_SYSTEMD_UNITDIR}") endif(LINUX_SYSTEMD) diff --git a/src/cpucounters.cpp b/src/cpucounters.cpp index 2908f235..e0c526f6 100644 --- a/src/cpucounters.cpp +++ b/src/cpucounters.cpp @@ -2737,6 +2737,8 @@ PCM::PCM() : num_phys_cores_per_socket(0), num_online_cores(0), num_online_sockets(0), + accel(0), + accel_counters_num_max(0), core_gen_counter_num_max(0), core_gen_counter_num_used(0), // 0 means no core gen counters used core_gen_counter_width(0), @@ -3502,6 +3504,7 @@ PCM::ErrorCode PCM::program(const PCM::ProgramMode mode_, const void * parameter lastProgrammedCustomCounters.clear(); lastProgrammedCustomCounters.resize(num_cores); core_global_ctrl_value = 0ULL; + isHWTMAL1Supported(); // ínit value to prevent MT races std::vector > asyncCoreResults; std::vector programmingStatuses(num_cores, PCM::Success); @@ -6480,6 +6483,26 @@ uint32 PCM::getNumSockets() const return (uint32)num_sockets; } +uint32 PCM::getAccel() const +{ + return accel; +} + +void PCM::setAccel(uint32 input) +{ + accel = input; +} + +uint32 PCM::getNumberofAccelCounters() const +{ + return accel_counters_num_max; +} + +void PCM::setNumberofAccelCounters(uint32 input) +{ + accel_counters_num_max = input; +} + uint32 PCM::getNumOnlineSockets() const { return (uint32)num_online_sockets; diff --git a/src/cpucounters.h b/src/cpucounters.h index 3c3bf346..eba20082 100644 --- a/src/cpucounters.h +++ b/src/cpucounters.h @@ -605,6 +605,8 @@ class PCM_API PCM int32 num_phys_cores_per_socket; int32 num_online_cores; int32 num_online_sockets; + uint32 accel; + uint32 accel_counters_num_max; uint32 core_gen_counter_num_max; uint32 core_gen_counter_num_used; uint32 core_gen_counter_width; @@ -1506,9 +1508,29 @@ class PCM_API PCM \return Number of sockets in the system */ uint32 getNumSockets() const; + + /*! \brief Reads the accel type in the system + \return acceltype + */ + uint32 getAccel() const; + + /*! \brief Sets the accel type in the system + \return acceltype + */ + void setAccel(uint32 input); + + /*! \brief Reads the Number of AccelCounters in the system + \return None + */ + uint32 getNumberofAccelCounters() const; + + /*! \brief Sets the Number of AccelCounters in the system + \return number of counters + */ + void setNumberofAccelCounters(uint32 input); /*! \brief Reads number of online sockets (CPUs) in the system - \return Number of online sockets in the system + \return Number of online sockets in the system */ uint32 getNumOnlineSockets() const; @@ -3374,6 +3396,11 @@ class SystemCounterState : public SocketCounterState } public: + typedef uint32_t h_id; + typedef uint32_t v_id; + typedef std::map,uint64_t> ctr_data; + typedef std::vector dev_content; + std::vector accel_counters; std::vector CXLWriteMem,CXLWriteCache; friend uint64 getIncomingQPILinkBytes(uint32 socketNr, uint32 linkNr, const SystemCounterState & before, const SystemCounterState & after); friend uint64 getIncomingQPILinkBytes(uint32 socketNr, uint32 linkNr, const SystemCounterState & now); @@ -3385,6 +3412,7 @@ class SystemCounterState : public SocketCounterState uncoreTSC(0) { PCM * m = PCM::getInstance(); + accel_counters.resize(m->getNumberofAccelCounters()); CXLWriteMem.resize(m->getNumSockets(),0); CXLWriteCache.resize(m->getNumSockets(),0); incomingQPIPackets.resize(m->getNumSockets(), diff --git a/src/dashboard.cpp b/src/dashboard.cpp index 4ed3e756..55f17567 100644 --- a/src/dashboard.cpp +++ b/src/dashboard.cpp @@ -4,7 +4,8 @@ #include #include #include -#include "cpucounters.h" + +#include "pcm-accel-common.h" #include "dashboard.h" namespace pcm { @@ -515,6 +516,12 @@ std::string influxDBCore_Aggregate_Core_Counters(const std::string& S, const std return influxDB_Counters(S, m, "Core Aggregate_Core Counters"); } +std::string influxDBAccel_Counters(const std::string& S, const std::string& m) +{ + AcceleratorCounterState * accs = AcceleratorCounterState::getInstance(); + return std::string("mean(\\\"Sockets_") + S + "_Accelerators_" +accs->getAccelCounterName()+" Counters Device_" + m + "\\\")"; +} + std::string influxDBCore_Aggregate_Core_Counters(const std::string& m) { return influxDB_Counters(m, "Core Aggregate_Core Counters"); @@ -542,6 +549,7 @@ std::mutex dashboardGenMutex; std::string getPCMDashboardJSON(const PCMDashboardType type, int ns, int nu, int nc) { auto pcm = PCM::getInstance(); + auto accs = AcceleratorCounterState::getInstance(); std::lock_guard dashboardGenGuard(dashboardGenMutex); const size_t NumSockets = (ns < 0) ? pcm->getNumSockets() : ns; const size_t NumUPILinksPerSocket = (nu < 0) ? pcm->getQPILinksPerSocket() : nu; @@ -783,6 +791,32 @@ std::string getPCMDashboardJSON(const PCMDashboardType type, int ns, int nu, int dashboard.push(panel); dashboard.push(panel1); } + if (pcm->getAccel() != ACCEL_NOCONFIG){ + auto accelCounters = [&](const std::string & m) + { + auto panel = std::make_shared(0, y, width, height, accs->getAccelCounterName() + " " + m,"Byte/sec", false); + std::shared_ptr panel1; + panel1 = std::make_shared(width, y, max_width - width, height, std::string("Current ") +accs->getAccelCounterName() + " (Byte/sec)"); + y += height; + for (size_t s = 0; s < accs->getNumOfAccelDevs(); ++s) + { + const auto S = std::to_string(s); + const auto suffix = "/1"; + auto t = createTarget("Device "+S, + "mean(\\\"Accelerators_"+accs->getAccelCounterName()+" Counters Device " + S + "_" + m + "\\\")" + suffix, + "rate(" + prometheusMetric(accs->remove_string_inside_use(m)) + "{instance=\\\"$node\\\", aggregate=\\\"system\\\", source=\\\"accel\\\" ,"+accs->getAccelCounterName()+"device=\\\"" + S + "\\\"}" + interval + ")" + suffix); + panel->push(t); + panel1->push(t); + + } + dashboard.push(panel); + dashboard.push(panel1); + }; + for (int j =0;jgetNumberOfCounters();j++) + { + accelCounters(accs->getAccelIndexCounterName(j)); + } + } for (size_t s = 0; s < NumSockets; ++s) { const auto S = std::to_string(s); diff --git a/src/pci.h b/src/pci.h index da2f15e1..ec44e99f 100644 --- a/src/pci.h +++ b/src/pci.h @@ -178,6 +178,7 @@ inline void forAllIntelDevices(F f, int requestedDevice = -1, int requestedFunct auto probe = [&f](const uint32 group, const uint32 bus, const uint32 device, const uint32 function) { + // std::cerr << "Probing " << std::hex << group << ":" << bus << ":" << device << ":" << function << " " << std::dec << "\n"; uint32 value = 0; try { @@ -191,6 +192,7 @@ inline void forAllIntelDevices(F f, int requestedDevice = -1, int requestedFunct } const uint32 vendor_id = value & 0xffff; const uint32 device_id = (value >> 16) & 0xffff; + // std::cerr << "Found dev " << std::hex << vendor_id << ":" << device_id << std::dec << "\n"; if (vendor_id != PCM_INTEL_PCI_VENDOR_ID) { return; @@ -256,12 +258,13 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc) { forAllIntelDevices([&](const uint32 group, const uint32 bus, const uint32 device, const uint32 function, const uint32 /* device_id */) { + // std::cerr << "Intel device scan. found " << std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << std::dec; uint32 status{0}; PciHandleType h(group, bus, device, function); h.read32(6, &status); // read status if (status & 0x10) // has capability list { - // std::cout << "Intel device scan. found "<< std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << " with capability list\n" << std::dec; + // std::cerr << "Intel device scan. found "<< std::hex << group << ":" << bus << ":" << device << ":" << function << " " << device_id << " with capability list\n" << std::dec; VSEC header; uint64 offset = 0x100; do @@ -274,11 +277,11 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc) { return; } - // std::cout << "offset 0x" << std::hex << offset << " header.fields.cap_id: 0x" << header.fields.cap_id << std::dec << "\n"; - // std::cout << ".. found entryID: 0x" << std::hex << header.fields.entryID << std::dec << "\n"; - if (matchFunc(header)) // UNCORE_DISCOVERY_DVSEC_ID_PMON + // std::cerr << "offset 0x" << std::hex << offset << " header.fields.cap_id: 0x" << header.fields.cap_id << std::dec << "\n"; + // std::cerr << ".. found entryID: 0x" << std::hex << header.fields.entryID << std::dec << "\n"; + if (matchFunc(header)) { - // std::cout << ".... found UNCORE_DISCOVERY_DVSEC_ID_PMON\n"; + // std::cerr << ".... found match\n"; auto barOffset = 0x10 + header.fields.tBIR * 4; uint32 bar = 0; if (h.read32(barOffset, &bar) == sizeof(uint32) && bar != 0) // read bar @@ -291,7 +294,12 @@ void processDVSEC(MatchFunc matchFunc, ProcessFunc processFunc) std::cerr << "Error: can't read bar from offset " << barOffset << " \n"; } } + const uint64 lastOffset = offset; offset = header.fields.cap_next & ~3; + if (lastOffset == offset) // the offset did not change + { + return; // deadlock protection + } } while (1); } }); diff --git a/src/pcm-accel-common.cpp b/src/pcm-accel-common.cpp new file mode 100644 index 00000000..9aa7eb7e --- /dev/null +++ b/src/pcm-accel-common.cpp @@ -0,0 +1,456 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2022-2023, Intel Corporation +// written by White.Hu, Pavithran P + +#include "pcm-accel-common.h" +#include "cpucounters.h" +#include + +idx_ccr* idx_get_ccr(uint64_t& ccr) +{ + return new spr_idx_ccr(ccr); +} + +uint32_t AcceleratorCounterState::getNumOfAccelDevs() +{ + uint32_t dev_count = 0; + + if (evt_ctx.accel >= ACCEL_MAX || evt_ctx.m == NULL) + return 0; + + switch (evt_ctx.accel) + { + case ACCEL_IAA: + dev_count = evt_ctx.m->getNumOfIDXAccelDevs(PCM::IDX_IAA); + break; + case ACCEL_DSA: + dev_count = evt_ctx.m->getNumOfIDXAccelDevs(PCM::IDX_DSA); + break; + case ACCEL_QAT: + dev_count = evt_ctx.m->getNumOfIDXAccelDevs(PCM::IDX_QAT); + break; + default: + dev_count = 0; + break; + } + + return dev_count; +} + +uint32_t AcceleratorCounterState::getMaxNumOfAccelCtrs() +{ + uint32_t ctr_count = 0; + + if (evt_ctx.accel >= ACCEL_MAX || evt_ctx.m == NULL) + return 0; + + switch (evt_ctx.accel) + { + case ACCEL_IAA: + case ACCEL_DSA: + case ACCEL_QAT: + ctr_count = evt_ctx.m->getMaxNumOfIDXAccelCtrs(evt_ctx.accel); + break; + default: + ctr_count = 0; + break; + } + + return ctr_count; +} + +int32_t AcceleratorCounterState::programAccelCounters() +{ + std::vector rawEvents; + std::vector filters_wq, filters_tc, filters_pgsz, filters_xfersz, filters_eng; + + if (evt_ctx.m == NULL || evt_ctx.accel >= ACCEL_MAX || evt_ctx.ctrs.size() == 0 || evt_ctx.ctrs.size() > getMaxNumOfAccelCtrs()) + return -1; + + switch (evt_ctx.accel) + { + case ACCEL_IAA: + case ACCEL_DSA: + case ACCEL_QAT: + for (auto pctr = evt_ctx.ctrs.begin(); pctr != evt_ctx.ctrs.end(); ++pctr) + { + rawEvents.push_back(pctr->ccr); + filters_wq.push_back(pctr->cfr_wq); + filters_tc.push_back(pctr->cfr_tc); + filters_pgsz.push_back(pctr->cfr_pgsz); + filters_xfersz.push_back(pctr->cfr_xfersz); + filters_eng.push_back(pctr->cfr_eng); + //std::cout<<"ctr idx=0x" << std::hex << pctr->idx << " hid=0x" << std::hex << pctr->h_id << " vid=0x" << std::hex << pctr->v_id <<" ccr=0x" << std::hex << pctr->ccr << "\n"; + //std::cout<<"mul=0x" << std::hex << pctr->multiplier << " div=0x" << std::hex << pctr->divider << "\n" << std::dec; + } + evt_ctx.m->programIDXAccelCounters(idx_accel_mapping[evt_ctx.accel], rawEvents, filters_wq, filters_eng, filters_tc, filters_pgsz, filters_xfersz); + break; + default: + break; + } + + return 0; +} + +SimpleCounterState AcceleratorCounterState::getAccelCounterState(uint32 dev, uint32 ctr_index) +{ + SimpleCounterState result; + + if (evt_ctx.m == NULL || evt_ctx.accel >= ACCEL_MAX || dev >= getNumOfAccelDevs() || ctr_index >= getMaxNumOfAccelCtrs()) + return result; + + switch (evt_ctx.accel) + { + case ACCEL_IAA: + case ACCEL_DSA: + case ACCEL_QAT: + result = evt_ctx.m->getIDXAccelCounterState(evt_ctx.accel, dev, ctr_index); + break; + case ACCEL_MAX: + case ACCEL_NOCONFIG: + break; + } + + return result; +} + +bool AcceleratorCounterState::isAccelCounterAvailable() +{ + bool ret = true; + + if (evt_ctx.m == NULL || evt_ctx.accel >= ACCEL_MAX) + ret =false; + + if (getNumOfAccelDevs() == 0) + ret = false; + + return ret; +} + +std::string AcceleratorCounterState::getAccelCounterName() +{ + std::string ret; + + switch (evt_ctx.accel) + { + case ACCEL_IAA: + ret = "iaa"; + break; + case ACCEL_DSA: + ret = "dsa"; + break; + case ACCEL_QAT: + ret = "qat"; + break; + default: + ret = "id=" + std::to_string(evt_ctx.accel) + "(unknown)"; + } + + return ret; +} + +bool AcceleratorCounterState::getAccelDevLocation( uint32_t dev, const ACCEL_DEV_LOC_MAPPING loc_map, uint32_t &location) +{ + bool ret = true; + + switch (loc_map) + { + case SOCKET_MAP: + location = evt_ctx.m->getCPUSocketIdOfIDXAccelDev(evt_ctx.accel, dev); + break; + case NUMA_MAP: + location = evt_ctx.m->getNumaNodeOfIDXAccelDev(evt_ctx.accel, dev); + break; + default: + ret = false; + } + + return ret; +} + +/*! \brief Computes number of accelerator counters present in system + + \return Number of accel counters in system +*/ +int AcceleratorCounterState::getNumberOfCounters(){ + + return getCounters().size(); +} + +std::string AcceleratorCounterState::getAccelIndexCounterName(int ctr_index) +{ + accel_counter pctr = getCounters().at(ctr_index); + return pctr.v_event_name; +} + +uint64 AcceleratorCounterState::getAccelIndexCounter(uint32 dev, const SystemCounterState & before,const SystemCounterState & after,int ctr_index) +{ + const uint32_t counter_nb = getCounters().size(); + accel_counter pctr = getCounters().at(ctr_index); + uint64_t raw_result = getNumberOfEvents(before.accel_counters[dev*counter_nb + ctr_index], after.accel_counters[dev*counter_nb + ctr_index]); + uint64_t trans_result = uint64_t (raw_result * pctr.multiplier / (double) pctr.divider ); + return trans_result; +} + +int idx_evt_parse_handler(evt_cb_type cb_type, void *cb_ctx, counter &base_ctr, std::map &ofm, std::string key, uint64 numValue) +{ + accel_evt_parse_context *context = (accel_evt_parse_context *)cb_ctx; + // PCM *m = context->m; + AcceleratorCounterState *accs_; + accs_ = AcceleratorCounterState::getInstance(); + + if (cb_type == EVT_LINE_START) //this event will be called per line(start) + { + context->ctr.cfr_wq = 0xFFFF; + context->ctr.cfr_eng = 0xFFFF; + context->ctr.cfr_tc = 0xFFFF; + context->ctr.cfr_pgsz = 0xFFFF; + context->ctr.cfr_xfersz = 0xFFFF; + context->ctr.ccr = 0; + } + else if (cb_type == EVT_LINE_FIELD) //this event will be called per field of line + { + std::unique_ptr pccr(idx_get_ccr(context->ctr.ccr)); + + //std::cout << "Key:" << key << " Value:" << value << " opcodeFieldMap[key]:" << ofm[key] << "\n"; + switch (ofm[key]) + { + case PCM::EVENT_SELECT: + pccr->set_event_select(numValue); + //std::cout << "pccr value:" << std::hex << pccr->get_ccr_value() <<"\n" << std::dec; + break; + case PCM::ENABLE: + pccr->set_enable(numValue); + //std::cout << "pccr value:" << std::hex << pccr->get_ccr_value() <<"\n" << std::dec; + break; + case EVENT_CATEGORY: + pccr->set_event_category(numValue); + //std::cout << "pccr value:" << std::hex << pccr->get_ccr_value() <<"\n" << std::dec; + break; + case FILTER_WQ: + context->ctr.cfr_wq = (uint32_t)numValue; + break; + case FILTER_ENG: + context->ctr.cfr_eng = (uint32_t)numValue; + break; + case FILTER_TC: + context->ctr.cfr_tc = (uint32_t)numValue; + break; + case FILTER_PGSZ: + context->ctr.cfr_pgsz = (uint32_t)numValue; + break; + case FILTER_XFERSZ: + context->ctr.cfr_xfersz = (uint32_t)numValue; + break; + case PCM::INVALID: + default: + std::cerr << "Field in -o file not recognized. The key is: " << key << "\n"; + return -1; + } + } + else if(cb_type == EVT_LINE_COMPLETE) //this event will be called every line(end) + { + if (context->accel == ACCEL_IAA && base_ctr.h_event_name != "IAA") + { + return 0; //skip non-IAA cfg line + } + else if(context->accel == ACCEL_DSA && base_ctr.h_event_name != "DSA") + { + return 0; //skip non-DSA cfg line + } + else if(context->accel == ACCEL_QAT && base_ctr.h_event_name != "QAT") + { + return 0; //skip non-QAT cfg line + } + + //Validate the total number of counter exceed the maximum or not. + if ((uint32)base_ctr.idx >= accs_->getMaxNumOfAccelCtrs()) + { + std::cerr << "line parse KO due to invalid value!" << std::dec << "\n"; + return 0; //skip the invalid cfg line + } + + context->ctr.h_event_name = base_ctr.h_event_name; + context->ctr.v_event_name = base_ctr.v_event_name; + context->ctr.idx = base_ctr.idx; + context->ctr.multiplier = base_ctr.multiplier; + context->ctr.divider = base_ctr.divider; + context->ctr.h_id = base_ctr.h_id; + context->ctr.v_id = base_ctr.v_id; + //std::cout << "line parse OK, ctrcfg=0x" << std::hex << context->ctr.ccr << ", h_event_name=" << base_ctr.h_event_name << ", v_event_name=" << base_ctr.v_event_name; + //std::cout << ", h_id=0x" << std::hex << base_ctr.h_id << ", v_id=0x" << std::hex << base_ctr.v_id; + //std::cout << ", idx=0x"<< std::hex << base_ctr.idx << ", multiplier=0x" << std::hex << base_ctr.multiplier << ", divider=0x" << std::hex << base_ctr.divider << std::dec << "\n"; + context->ctrs.push_back(context->ctr); + } + + return 0; +} + +std::vector& AcceleratorCounterState::getCounters(){ + return evt_ctx.ctrs; +} + +uint32_t AcceleratorCounterState::getAccel() +{ + return evt_ctx.accel; +} + +void readAccelCounters(SystemCounterState& sycs_) +{ + AcceleratorCounterState *accs_ = AcceleratorCounterState::getInstance(); + PCM *pcm = PCM::getInstance(); + // const uint32_t delay_ms = uint32_t(delay * 1000); + const uint32_t dev_count = accs_->getNumOfAccelDevs(); + const uint32_t counter_nb = accs_->getCounters().size(); + pcm->setNumberofAccelCounters(dev_count*counter_nb); + uint32_t ctr_index = 0; + // accel_content accel_results(ACCEL_MAX, dev_content(ACCEL_IP_DEV_COUNT_MAX, ctr_data())); + sycs_.accel_counters.resize(dev_count*counter_nb); + SimpleCounterState *currState = new SimpleCounterState[dev_count*counter_nb]; + // programAccelCounters(m, accel, ctrs); + + switch (accs_->getAccel()) + { + case ACCEL_IAA: + case ACCEL_DSA: + for (uint32_t dev = 0; dev != dev_count; ++dev) + { + ctr_index = 0; + for (auto pctr = accs_->getCounters().begin(); pctr != accs_->getCounters().end(); ++pctr) + { + sycs_.accel_counters[dev*counter_nb + ctr_index] = accs_->getAccelCounterState( dev, ctr_index); + ctr_index++; + } + } + break; + + case ACCEL_QAT: + // MySleepMs(delay_ms); + + for (uint32_t dev = 0; dev != dev_count; ++dev) + { + pcm->controlQATTelemetry(dev, PCM::QAT_TLM_REFRESH); + ctr_index = 0; + for (auto pctr = accs_->getCounters().begin();pctr != accs_->getCounters().end(); ++pctr) + { + sycs_.accel_counters[dev*counter_nb + ctr_index] = accs_->getAccelCounterState(dev, ctr_index); + + // raw_result = currState[dev*counter_nb + ctr_index].getRawData(); + // trans_result = uint64_t (raw_result * pctr->multiplier / (double) pctr->divider ); + + //accel_result[evt_ctx.accel][dev][std::pair(pctr->h_id,pctr->v_id)] = trans_result; + //std::cout << "collect_data: accel=" << accel << " dev=" << dev << " h_id=" << pctr->h_id << " v_id=" << pctr->v_id << " data=" << std::hex << trans_result << "\n" << std::dec; + ctr_index++; + } + } + break; + } + + delete[] currState; + +} + +AcceleratorCounterState* AcceleratorCounterState::instance = NULL; +AcceleratorCounterState * AcceleratorCounterState::getInstance() + { + // lock-free read + // cppcheck-suppress identicalConditionAfterEarlyExit + if (instance) return instance; + + std::unique_lock instanceCreationMutex; + // cppcheck-suppress identicalConditionAfterEarlyExit + if (instance) return instance; + + return instance = new AcceleratorCounterState(); + } + +std::string AcceleratorCounterState::remove_string_inside_use(std::string text) { + std::string result = ""; + int open_use_count = 0; + for (char c : text) { + if (c == '(') { + open_use_count += 1; + } else if (c == ')' ) { + open_use_count -= 1; + } else if (open_use_count == 0) { + result += c; + } + } + return result; +} + +void AcceleratorCounterState::setEvents(PCM *m,ACCEL_IP accel, std::string specify_evtfile,bool evtfile) +{ + evt_ctx.m = m; + evt_ctx.accel = accel; + if (isAccelCounterAvailable() == true) + { + if (evtfile==false) //All platform use the spr config file by default. + { + ev_file_name = "opCode-143-accel.txt"; + } + else + { + ev_file_name = specify_evtfile; + } + //std::cout << "load event config file from:" << ev_file_name << "\n"; + } + else + { + std::cerr << "Error: " << getAccelCounterName() << " device is NOT available/ready with this platform! Program aborted\n"; + exit(EXIT_FAILURE); + } + + switch (accel) + { + case ACCEL_IAA: + case ACCEL_DSA: + case ACCEL_QAT: + opcodeFieldMap["hname"] = PCM::H_EVENT_NAME; + opcodeFieldMap["vname"] = PCM::V_EVENT_NAME; + opcodeFieldMap["multiplier"] = PCM::MULTIPLIER; + opcodeFieldMap["divider"] = PCM::DIVIDER; + opcodeFieldMap["ctr"] = PCM::COUNTER_INDEX; + opcodeFieldMap["en"] = PCM::ENABLE; + opcodeFieldMap["ev_sel"] = PCM::EVENT_SELECT; + opcodeFieldMap["ev_cat"] = EVENT_CATEGORY; + opcodeFieldMap["filter_wq"] = FILTER_WQ; + opcodeFieldMap["filter_eng"] = FILTER_ENG; + opcodeFieldMap["filter_tc"] = FILTER_TC; + opcodeFieldMap["filter_pgsz"] = FILTER_PGSZ; + opcodeFieldMap["filter_xfersz"] = FILTER_XFERSZ; + + p_evt_handler = idx_evt_parse_handler; + evt_ctx.ctrs.clear();//fill the ctrs by evt_handler callback func. + break; + default: + std::cerr << "Error: Accel type=0x" << std::hex << accel << " is not supported! Program aborted\n" << std::dec; + exit(EXIT_FAILURE); + } + + try + { + load_events(ev_file_name, opcodeFieldMap, p_evt_handler, (void *)&evt_ctx); + } + catch (std::exception & e) + { + std::cerr << "Error: " << e.what() << "\n"; + std::cerr << "Error: event cfg file have the problem, please double check it! Program aborted\n"; + exit(EXIT_FAILURE); + } + if (evt_ctx.ctrs.size() ==0 || evt_ctx.ctrs.size() > getMaxNumOfAccelCtrs()) + { + std::cout<< evt_ctx.ctrs.size()<< " " << getMaxNumOfAccelCtrs(); + std::cerr << "Error: event counter size is 0 or exceed maximum, please check the event cfg file! Program aborted\n"; + exit(EXIT_FAILURE); + } + + if (accel == ACCEL_QAT) + { + const uint32_t dev_count = getNumOfAccelDevs(); + for (uint32_t dev = 0; dev != dev_count; ++dev) + { + m->controlQATTelemetry(dev, PCM::QAT_TLM_START); //start the QAT telemetry service + } + } +} \ No newline at end of file diff --git a/src/pcm-accel-common.h b/src/pcm-accel-common.h new file mode 100644 index 00000000..387b25ac --- /dev/null +++ b/src/pcm-accel-common.h @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2022-2023, Intel Corporation +// written by White.Hu, Pavithran P + +#pragma once +#include "cpucounters.h" +#ifdef __linux__ +#include +#endif +using namespace pcm; + +#define PCM_DELAY_DEFAULT 3.0 // in seconds + +class idx_ccr { + public: + virtual uint64_t get_event_select() const = 0; + virtual void set_event_select(uint64_t value) = 0; + virtual uint64_t get_event_category() const = 0; + virtual void set_event_category(uint64_t value) = 0; + virtual uint64_t get_enable() const = 0; + virtual void set_enable(uint64_t value) = 0; + virtual uint64_t get_ccr_value() const = 0; + virtual void set_ccr_value(uint64_t value) = 0; + virtual ~idx_ccr() {}; +}; + +class spr_idx_ccr: public idx_ccr { + public: + spr_idx_ccr(uint64_t &v){ + ccr_value = &v; + } + virtual uint64_t get_event_select() const { //EVENT bit, bit 32 + return ((*ccr_value >> 32) & 0xFFFFFFF); + } + virtual void set_event_select(uint64_t value) { + *ccr_value |= (value << 32); + } + virtual uint64_t get_event_category() const { //EVENT Categorg, bit 8 + return ((*ccr_value >> 8) & 0xF); + } + virtual void set_event_category(uint64_t value) { + *ccr_value |= (value << 8); + } + virtual uint64_t get_enable() const { //Enable counter, bit 0 + return ((*ccr_value >> 0 ) & 0x01); + } + virtual void set_enable(uint64_t value) { + *ccr_value |= (value << 0); + } + virtual uint64_t get_ccr_value() const { + return *ccr_value; + } + virtual void set_ccr_value(uint64_t value) { + *ccr_value = value; + } + + private: + uint64_t* ccr_value = NULL; +}; + +idx_ccr* idx_get_ccr(uint64_t& ccr); + +typedef enum +{ + ACCEL_IAA, + ACCEL_DSA, + ACCEL_QAT, + ACCEL_MAX, + ACCEL_NOCONFIG, +} ACCEL_IP; + +enum IDXPerfmonField +{ + DPF_BASE = 0x100, //start from 0x100 to different with PerfmonField in cpucounter.h + EVENT_CATEGORY, + FILTER_WQ, + FILTER_ENG, + FILTER_TC, + FILTER_PGSZ, + FILTER_XFERSZ +}; + +typedef enum +{ + SOCKET_MAP, + NUMA_MAP, +} ACCEL_DEV_LOC_MAPPING; + +const std::vector idx_accel_mapping = +{ + PCM::IDX_IAA, + PCM::IDX_DSA, + PCM::IDX_QAT +}; + +#define ACCEL_IP_DEV_COUNT_MAX (16) + +typedef uint32_t h_id; +typedef uint32_t v_id; +typedef std::map,uint64_t> ctr_data; +typedef std::vector dev_content; +typedef std::vector accel_content; + +struct accel_counter : public counter { + //filter config for IDX Accelerator. + uint32_t cfr_wq = 0; + uint32_t cfr_eng = 0; + uint32_t cfr_tc = 0; + uint32_t cfr_pgsz = 0; + uint32_t cfr_xfersz = 0; +}; + +typedef struct +{ + PCM *m; + ACCEL_IP accel; + accel_counter ctr; + std::vector ctrs; +} accel_evt_parse_context; + +typedef int (*pfn_evt_handler)(evt_cb_type, void *, counter &, std::map &, std::string, uint64); + +int idx_evt_parse_handler(evt_cb_type cb_type, void *cb_ctx, counter &base_ctr, std::map &ofm, std::string key, uint64 numValue); +void readAccelCounters(SystemCounterState &sycs_); + +class AcceleratorCounterState { + + private: + AcceleratorCounterState(){}; // forbidden to call directly because it is a singleton + AcceleratorCounterState & operator = (const AcceleratorCounterState &) = delete; + static AcceleratorCounterState * instance; + accel_evt_parse_context evt_ctx = { {}, {}, {}, {} }; + public: + AcceleratorCounterState(const AcceleratorCounterState& obj) = delete; + // std::mutex instanceCreationMutex; + static AcceleratorCounterState * getInstance(); + std::map opcodeFieldMap; + std::string ev_file_name; + pfn_evt_handler p_evt_handler = NULL; + + void setEvents(PCM * m,ACCEL_IP accel,std::string specify_evtfile,bool evtfile); + uint32_t getNumOfAccelDevs(); + uint32_t getAccel(); + uint32_t getMaxNumOfAccelCtrs(); + std::vector& getCounters(); + int32_t programAccelCounters(); + SimpleCounterState getAccelCounterState(uint32 dev, uint32 ctr_index); + bool isAccelCounterAvailable(); + std::string getAccelCounterName(); + void setDSA(); + bool getAccelDevLocation( uint32_t dev, const ACCEL_DEV_LOC_MAPPING loc_map, uint32_t &location); + // void readAccelCounters(SystemCounterState sycs_); + int getNumberOfCounters(); + std::string getAccelIndexCounterName(int ctr_index); + std::string remove_string_inside_use(std::string text); + uint64 getAccelIndexCounter(uint32 dev, const SystemCounterState & before,const SystemCounterState & after,int ctr_index); + +}; \ No newline at end of file diff --git a/src/pcm-accel.cpp b/src/pcm-accel.cpp index 021ea43d..8988589d 100644 --- a/src/pcm-accel.cpp +++ b/src/pcm-accel.cpp @@ -2,7 +2,7 @@ // Copyright (c) 2022, Intel Corporation // written by White.Hu -#include "cpucounters.h" +#include "pcm-accel-common.h" #ifdef _MSC_VER #pragma warning(disable : 4996) // for sprintf #include @@ -20,285 +20,10 @@ #ifdef _MSC_VER #include "freegetopt/getopt.h" #endif -#ifdef __linux__ -#include -#endif #include "lspci.h" #include "utils.h" -using namespace std; using namespace pcm; - -#define PCM_DELAY_DEFAULT 3.0 // in seconds - -class idx_ccr { - public: - virtual uint64_t get_event_select() const = 0; - virtual void set_event_select(uint64_t value) = 0; - virtual uint64_t get_event_category() const = 0; - virtual void set_event_category(uint64_t value) = 0; - virtual uint64_t get_enable() const = 0; - virtual void set_enable(uint64_t value) = 0; - virtual uint64_t get_ccr_value() const = 0; - virtual void set_ccr_value(uint64_t value) = 0; - virtual ~idx_ccr() {}; -}; - -class spr_idx_ccr: public idx_ccr { - public: - spr_idx_ccr(uint64_t &v){ - ccr_value = &v; - } - virtual uint64_t get_event_select() const { //EVENT bit, bit 32 - return ((*ccr_value >> 32) & 0xFFFFFFF); - } - virtual void set_event_select(uint64_t value) { - *ccr_value |= (value << 32); - } - virtual uint64_t get_event_category() const { //EVENT Categorg, bit 8 - return ((*ccr_value >> 8) & 0xF); - } - virtual void set_event_category(uint64_t value) { - *ccr_value |= (value << 8); - } - virtual uint64_t get_enable() const { //Enable counter, bit 0 - return ((*ccr_value >> 0 ) & 0x01); - } - virtual void set_enable(uint64_t value) { - *ccr_value |= (value << 0); - } - virtual uint64_t get_ccr_value() const { - return *ccr_value; - } - virtual void set_ccr_value(uint64_t value) { - *ccr_value = value; - } - - private: - uint64_t* ccr_value = NULL; -}; - -idx_ccr* idx_get_ccr(uint64_t& ccr) -{ - return new spr_idx_ccr(ccr); -} - -typedef enum -{ - ACCEL_IAA, - ACCEL_DSA, - ACCEL_QAT, - ACCEL_MAX, -} ACCEL_IP; - -enum IDXPerfmonField -{ - DPF_BASE = 0x100, //start from 0x100 to different with PerfmonField in cpucounter.h - EVENT_CATEGORY, - FILTER_WQ, - FILTER_ENG, - FILTER_TC, - FILTER_PGSZ, - FILTER_XFERSZ -}; - -typedef enum -{ - SOCKET_MAP, - NUMA_MAP, -} ACCEL_DEV_LOC_MAPPING; - -const std::vector idx_accel_mapping = -{ - PCM::IDX_IAA, - PCM::IDX_DSA, - PCM::IDX_QAT -}; - -#define ACCEL_IP_DEV_COUNT_MAX (16) - -typedef uint32_t h_id; -typedef uint32_t v_id; -typedef std::map,uint64_t> ctr_data; -typedef std::vector dev_content; -typedef std::vector accel_content; - accel_content accel_results(ACCEL_MAX, dev_content(ACCEL_IP_DEV_COUNT_MAX, ctr_data())); - -struct accel_counter : public counter { - //filter config for IDX Accelerator. - uint32_t cfr_wq = 0; - uint32_t cfr_eng = 0; - uint32_t cfr_tc = 0; - uint32_t cfr_pgsz = 0; - uint32_t cfr_xfersz = 0; -}; - -typedef struct -{ - PCM *m; - ACCEL_IP accel; - accel_counter ctr; - vector ctrs; -} accel_evt_parse_context; - -uint32_t getNumOfAccelDevs(PCM *m, ACCEL_IP accel) -{ - uint32_t dev_count = 0; - - if (accel >= ACCEL_MAX || m == NULL) - return 0; - - switch (accel) - { - case ACCEL_IAA: - dev_count = m->getNumOfIDXAccelDevs(PCM::IDX_IAA); - break; - case ACCEL_DSA: - dev_count = m->getNumOfIDXAccelDevs(PCM::IDX_DSA); - break; - case ACCEL_QAT: - dev_count = m->getNumOfIDXAccelDevs(PCM::IDX_QAT); - break; - default: - dev_count = 0; - break; - } - - return dev_count; -} - -uint32_t getMaxNumOfAccelCtrs(PCM *m, ACCEL_IP accel) -{ - uint32_t ctr_count = 0; - - if (accel >= ACCEL_MAX || m == NULL) - return 0; - - switch (accel) - { - case ACCEL_IAA: - case ACCEL_DSA: - case ACCEL_QAT: - ctr_count = m->getMaxNumOfIDXAccelCtrs(accel); - break; - default: - ctr_count = 0; - break; - } - - return ctr_count; -} - -int32_t programAccelCounters(PCM *m, ACCEL_IP accel, std::vector& ctrs) -{ - vector rawEvents; - vector filters_wq, filters_tc, filters_pgsz, filters_xfersz, filters_eng; - - if (m == NULL || accel >= ACCEL_MAX || ctrs.size() == 0 || ctrs.size() > getMaxNumOfAccelCtrs(m, accel)) - return -1; - - switch (accel) - { - case ACCEL_IAA: - case ACCEL_DSA: - case ACCEL_QAT: - for (auto pctr = ctrs.begin(); pctr != ctrs.end(); ++pctr) - { - rawEvents.push_back(pctr->ccr); - filters_wq.push_back(pctr->cfr_wq); - filters_tc.push_back(pctr->cfr_tc); - filters_pgsz.push_back(pctr->cfr_pgsz); - filters_xfersz.push_back(pctr->cfr_xfersz); - filters_eng.push_back(pctr->cfr_eng); - //std::cout<<"ctr idx=0x" << std::hex << pctr->idx << " hid=0x" << std::hex << pctr->h_id << " vid=0x" << std::hex << pctr->v_id <<" ccr=0x" << std::hex << pctr->ccr << "\n"; - //std::cout<<"mul=0x" << std::hex << pctr->multiplier << " div=0x" << std::hex << pctr->divider << "\n" << std::dec; - } - m->programIDXAccelCounters(idx_accel_mapping[accel], rawEvents, filters_wq, filters_eng, filters_tc, filters_pgsz, filters_xfersz); - break; - default: - break; - } - - return 0; -} - -SimpleCounterState getAccelCounterState(PCM *m, ACCEL_IP accel, uint32 dev, uint32 ctr_index) -{ - SimpleCounterState result; - - if (m == NULL || accel >= ACCEL_MAX || dev >= getNumOfAccelDevs(m, accel) || ctr_index >= getMaxNumOfAccelCtrs(m, accel)) - return result; - - switch (accel) - { - case ACCEL_IAA: - case ACCEL_DSA: - case ACCEL_QAT: - result = m->getIDXAccelCounterState(accel, dev, ctr_index); - break; - default: - break; - } - - return result; -} - -bool isAccelCounterAvailable(PCM *m, ACCEL_IP accel) -{ - bool ret = true; - - if (m == NULL || accel >= ACCEL_MAX) - ret =false; - - if (getNumOfAccelDevs(m, accel) == 0) - ret = false; - - return ret; -} - -std::string getAccelCounterName(ACCEL_IP accel) -{ - std::string ret; - - switch (accel) - { - case ACCEL_IAA: - ret = "iaa"; - break; - case ACCEL_DSA: - ret = "dsa"; - break; - case ACCEL_QAT: - ret = "qat"; - break; - default: - ret = "id=" + std::to_string(accel) + "(unknown)"; - break; - } - - return ret; -} - -bool getAccelDevLocation(PCM *m, const ACCEL_IP accel, uint32_t dev, const ACCEL_DEV_LOC_MAPPING loc_map, uint32_t &location) -{ - bool ret = true; - - switch (loc_map) - { - case SOCKET_MAP: - location = m->getCPUSocketIdOfIDXAccelDev(accel, dev); - break; - case NUMA_MAP: - location = m->getNumaNodeOfIDXAccelDev(accel, dev); - break; - default: - ret = false; - break; - } - - return ret; -} - std::vector build_counter_names(std::string dev_name, std::vector& ctrs, const ACCEL_DEV_LOC_MAPPING loc_map) { std::vector v; @@ -367,15 +92,17 @@ void print_usage(const std::string& progname) std::cout << "\n"; } -std::vector build_csv(PCM *m, const ACCEL_IP accel, std::vector& ctrs, +std::vector build_csv(const ACCEL_IP accel, std::vector& ctrs, const bool human_readable, const std::string& csv_delimiter, accel_content& sample_data, const ACCEL_DEV_LOC_MAPPING loc_map) { + AcceleratorCounterState *accs_; + accs_ = AcceleratorCounterState::getInstance(); std::vector result; std::vector current_row; auto header = build_counter_names("Accelerator", ctrs, loc_map); result.push_back(build_csv_row(header, csv_delimiter)); std::map> v_sort; - uint32_t dev_count = getNumOfAccelDevs(m, accel); + uint32_t dev_count = accs_->getNumOfAccelDevs(); for (uint32_t dev = 0; dev != dev_count; ++dev) { @@ -401,7 +128,7 @@ std::vector build_csv(PCM *m, const ACCEL_IP accel, std::vectorgetAccelDevLocation( dev, loc_map, location) == true) { current_row.push_back(std::to_string(location)); //location info } @@ -422,13 +149,15 @@ std::vector build_csv(PCM *m, const ACCEL_IP accel, std::vector build_display(PCM *m, const ACCEL_IP accel, std::vector& ctrs, accel_content& sample_data, const ACCEL_DEV_LOC_MAPPING loc_map) +std::vector build_display(const ACCEL_IP accel, std::vector& ctrs, accel_content& sample_data, const ACCEL_DEV_LOC_MAPPING loc_map) { std::vector buffer; std::vector headers; std::vector data; std::string row; - uint32_t dev_count = getNumOfAccelDevs(m, accel); + AcceleratorCounterState *accs_; + accs_ = AcceleratorCounterState::getInstance(); + uint32_t dev_count = accs_->getNumOfAccelDevs(); headers = build_counter_names("Accelerator", ctrs, loc_map); //Print first row @@ -461,7 +190,7 @@ std::vector build_display(PCM *m, const ACCEL_IP accel, std::vector std::string h_name = v_array[0]->h_event_name; uint32 location = 0xff; - if (getAccelDevLocation(m, accel, dev, loc_map, location) == true) + if (accs_->getAccelDevLocation(dev, loc_map, location) == true) { v_data.push_back(location); //location info } @@ -500,15 +229,15 @@ void collect_data(PCM *m, const double delay, const ACCEL_IP accel, std::vector< { const uint32_t delay_ms = uint32_t(delay * 1000); SimpleCounterState *before, *after; - const uint32_t dev_count = getNumOfAccelDevs(m, accel); + AcceleratorCounterState *accs_; + accs_ = AcceleratorCounterState::getInstance(); + const uint32_t dev_count = accs_->getNumOfAccelDevs(); const uint32_t counter_nb = ctrs.size(); uint32_t ctr_index = 0; before = new SimpleCounterState[dev_count*counter_nb]; after = new SimpleCounterState[dev_count*counter_nb]; - programAccelCounters(m, accel, ctrs); - switch (accel) { case ACCEL_IAA: @@ -518,7 +247,7 @@ void collect_data(PCM *m, const double delay, const ACCEL_IP accel, std::vector< ctr_index = 0; for (auto pctr = ctrs.begin(); pctr != ctrs.end(); ++pctr) { - before[dev*counter_nb + ctr_index] = getAccelCounterState(m, accel, dev, ctr_index); + before[dev*counter_nb + ctr_index] = accs_->getAccelCounterState(dev, ctr_index); ctr_index++; } } @@ -528,7 +257,7 @@ void collect_data(PCM *m, const double delay, const ACCEL_IP accel, std::vector< ctr_index = 0; for (auto pctr = ctrs.begin();pctr != ctrs.end(); ++pctr) { - after[dev*counter_nb + ctr_index] = getAccelCounterState(m, accel, dev, ctr_index); + after[dev*counter_nb + ctr_index] = accs_->getAccelCounterState(dev, ctr_index); uint64_t raw_result = getNumberOfEvents(before[dev*counter_nb + ctr_index], after[dev*counter_nb + ctr_index]); uint64_t trans_result = uint64_t (raw_result * pctr->multiplier / (double) pctr->divider * (1000 / (double) delay_ms)); accel_results[accel][dev][std::pair(pctr->h_id,pctr->v_id)] = trans_result; @@ -547,7 +276,7 @@ void collect_data(PCM *m, const double delay, const ACCEL_IP accel, std::vector< ctr_index = 0; for (auto pctr = ctrs.begin();pctr != ctrs.end(); ++pctr) { - after[dev*counter_nb + ctr_index] = getAccelCounterState(m, accel, dev, ctr_index); + after[dev*counter_nb + ctr_index] = accs_->getAccelCounterState(dev, ctr_index); uint64_t raw_result = after[dev*counter_nb + ctr_index].getRawData(); uint64_t trans_result = uint64_t (raw_result * pctr->multiplier / (double) pctr->divider ); @@ -567,99 +296,9 @@ void collect_data(PCM *m, const double delay, const ACCEL_IP accel, std::vector< delete[] after; } -int idx_evt_parse_handler(evt_cb_type cb_type, void *cb_ctx, counter &base_ctr, std::map &ofm, std::string key, uint64 numValue) -{ - accel_evt_parse_context *context = (accel_evt_parse_context *)cb_ctx; - PCM *m = context->m; - if (cb_type == EVT_LINE_START) //this event will be called per line(start) - { - context->ctr.cfr_wq = 0xFFFF; - context->ctr.cfr_eng = 0xFFFF; - context->ctr.cfr_tc = 0xFFFF; - context->ctr.cfr_pgsz = 0xFFFF; - context->ctr.cfr_xfersz = 0xFFFF; - context->ctr.ccr = 0; - } - else if (cb_type == EVT_LINE_FIELD) //this event will be called per field of line - { - std::unique_ptr pccr(idx_get_ccr(context->ctr.ccr)); - - //std::cout << "Key:" << key << " Value:" << value << " opcodeFieldMap[key]:" << ofm[key] << "\n"; - switch (ofm[key]) - { - case PCM::EVENT_SELECT: - pccr->set_event_select(numValue); - //std::cout << "pccr value:" << std::hex << pccr->get_ccr_value() <<"\n" << std::dec; - break; - case PCM::ENABLE: - pccr->set_enable(numValue); - //std::cout << "pccr value:" << std::hex << pccr->get_ccr_value() <<"\n" << std::dec; - break; - case EVENT_CATEGORY: - pccr->set_event_category(numValue); - //std::cout << "pccr value:" << std::hex << pccr->get_ccr_value() <<"\n" << std::dec; - break; - case FILTER_WQ: - context->ctr.cfr_wq = (uint32_t)numValue; - break; - case FILTER_ENG: - context->ctr.cfr_eng = (uint32_t)numValue; - break; - case FILTER_TC: - context->ctr.cfr_tc = (uint32_t)numValue; - break; - case FILTER_PGSZ: - context->ctr.cfr_pgsz = (uint32_t)numValue; - break; - case FILTER_XFERSZ: - context->ctr.cfr_xfersz = (uint32_t)numValue; - break; - case PCM::INVALID: - default: - std::cerr << "Field in -o file not recognized. The key is: " << key << "\n"; - return -1; - } - } - else if(cb_type == EVT_LINE_COMPLETE) //this event will be called every line(end) - { - if (context->accel == ACCEL_IAA && base_ctr.h_event_name != "IAA") - { - return 0; //skip non-IAA cfg line - } - else if(context->accel == ACCEL_DSA && base_ctr.h_event_name != "DSA") - { - return 0; //skip non-DSA cfg line - } - else if(context->accel == ACCEL_QAT && base_ctr.h_event_name != "QAT") - { - return 0; //skip non-QAT cfg line - } - - //Validate the total number of counter exceed the maximum or not. - if ((uint32)base_ctr.idx >= getMaxNumOfAccelCtrs(m, context->accel)) - { - std::cerr << "line parse KO due to invalid value!" << std::dec << "\n"; - return 0; //skip the invalid cfg line - } - context->ctr.h_event_name = base_ctr.h_event_name; - context->ctr.v_event_name = base_ctr.v_event_name; - context->ctr.idx = base_ctr.idx; - context->ctr.multiplier = base_ctr.multiplier; - context->ctr.divider = base_ctr.divider; - context->ctr.h_id = base_ctr.h_id; - context->ctr.v_id = base_ctr.v_id; - //std::cout << "line parse OK, ctrcfg=0x" << std::hex << context->ctr.ccr << ", h_event_name=" << base_ctr.h_event_name << ", v_event_name=" << base_ctr.v_event_name; - //std::cout << ", h_id=0x" << std::hex << base_ctr.h_id << ", v_id=0x" << std::hex << base_ctr.v_id; - //std::cout << ", idx=0x"<< std::hex << base_ctr.idx << ", multiplier=0x" << std::hex << base_ctr.multiplier << ", divider=0x" << std::hex << base_ctr.divider << std::dec << "\n"; - context->ctrs.push_back(context->ctr); - } - - return 0; -} -typedef int (*pfn_evt_handler)(evt_cb_type, void *, counter &, std::map &, std::string, uint64); PCM_MAIN_NOTHROW; @@ -672,7 +311,7 @@ int mainThrows(int argc, char * argv[]) std::cout << "\n Intel(r) Performance Counter Monitor " << PCM_VERSION ; std::cout << "\n This utility measures Sapphire Rapids-SP accelerators information.\n"; - std::string program = string(argv[0]); + std::string program = std::string(argv[0]); bool csv = false; bool human_readable = false; std::string csv_delimiter = ","; @@ -684,10 +323,10 @@ int mainThrows(int argc, char * argv[]) ACCEL_DEV_LOC_MAPPING loc_map = SOCKET_MAP; //default is socket mapping MainLoop mainLoop; PCM * m; - accel_evt_parse_context evt_ctx; - std::map opcodeFieldMap; + AcceleratorCounterState *accs_; + accs_ = AcceleratorCounterState::getInstance(); + std::string ev_file_name; - pfn_evt_handler p_evt_handler; while (argc > 1) { @@ -803,70 +442,7 @@ int mainThrows(int argc, char * argv[]) exit(EXIT_FAILURE); } - if (isAccelCounterAvailable(m, accel) == true) - { - if (evtfile == false) //All platform use the spr config file by default. - { - ev_file_name = "opCode-143-accel.txt"; - } - else - { - ev_file_name = specify_evtfile; - } - //std::cout << "load event config file from:" << ev_file_name << "\n"; - } - else - { - std::cerr << "Error: " << getAccelCounterName(accel) << " device is NOT available/ready with this platform! Program aborted\n"; - exit(EXIT_FAILURE); - } - - switch (accel) - { - case ACCEL_IAA: - case ACCEL_DSA: - case ACCEL_QAT: - opcodeFieldMap["hname"] = PCM::H_EVENT_NAME; - opcodeFieldMap["vname"] = PCM::V_EVENT_NAME; - opcodeFieldMap["multiplier"] = PCM::MULTIPLIER; - opcodeFieldMap["divider"] = PCM::DIVIDER; - opcodeFieldMap["ctr"] = PCM::COUNTER_INDEX; - opcodeFieldMap["en"] = PCM::ENABLE; - opcodeFieldMap["ev_sel"] = PCM::EVENT_SELECT; - opcodeFieldMap["ev_cat"] = EVENT_CATEGORY; - opcodeFieldMap["filter_wq"] = FILTER_WQ; - opcodeFieldMap["filter_eng"] = FILTER_ENG; - opcodeFieldMap["filter_tc"] = FILTER_TC; - opcodeFieldMap["filter_pgsz"] = FILTER_PGSZ; - opcodeFieldMap["filter_xfersz"] = FILTER_XFERSZ; - - p_evt_handler = idx_evt_parse_handler; - evt_ctx.m = m; - evt_ctx.accel = accel; - evt_ctx.ctrs.clear();//fill the ctrs by evt_handler callback func. - break; - default: - std::cerr << "Error: Accel type=0x" << std::hex << accel << " is not supported! Program aborted\n" << std::dec; - exit(EXIT_FAILURE); - break; - } - - try - { - load_events(ev_file_name, opcodeFieldMap, p_evt_handler, (void *)&evt_ctx); - } - catch (std::exception & e) - { - std::cerr << "Error: " << e.what() << "\n"; - std::cerr << "Error: event cfg file have the problem, please double check it! Program aborted\n"; - exit(EXIT_FAILURE); - } - - if (evt_ctx.ctrs.size() ==0 || evt_ctx.ctrs.size() > getMaxNumOfAccelCtrs(m, evt_ctx.accel)) - { - std::cerr << "Error: event counter size is 0 or exceed maximum, please check the event cfg file! Program aborted\n"; - exit(EXIT_FAILURE); - } + accs_->setEvents(m,accel,specify_evtfile,evtfile); std::ostream* output = &std::cout; std::fstream file_stream; @@ -874,23 +450,16 @@ int mainThrows(int argc, char * argv[]) { file_stream.open(output_file.c_str(), std::ios_base::out); output = &file_stream; - } - - if (accel == ACCEL_QAT) - { - const uint32_t dev_count = getNumOfAccelDevs(m, accel); - for (uint32_t dev = 0; dev != dev_count; ++dev) - { - m->controlQATTelemetry(dev, PCM::QAT_TLM_START); //start the QAT telemetry service - } - } - + } + accs_->programAccelCounters(); + std::vector CTRS= accs_->getCounters(); mainLoop([&]() { - collect_data(m, delay, accel, evt_ctx.ctrs); + + collect_data(m, delay, accel, CTRS); std::vector display_buffer = csv ? - build_csv(m, accel, evt_ctx.ctrs, human_readable, csv_delimiter, accel_results, loc_map) : - build_display(m, accel, evt_ctx.ctrs, accel_results, loc_map); + build_csv( accel, CTRS, human_readable, csv_delimiter, accel_results, loc_map) : + build_display( accel, CTRS, accel_results, loc_map); display(display_buffer, *output); return true; }); diff --git a/src/pcm-sensor-server.cpp b/src/pcm-sensor-server.cpp index ced46bbd..6e7aa692 100644 --- a/src/pcm-sensor-server.cpp +++ b/src/pcm-sensor-server.cpp @@ -5,11 +5,14 @@ // https://github.com/prometheus/prometheus/wiki/Default-port-allocations constexpr unsigned int DEFAULT_HTTP_PORT = 9738; constexpr unsigned int DEFAULT_HTTPS_PORT = DEFAULT_HTTP_PORT; +#include "pcm-accel-common.h" #include #include #include #include +#include + #include #include #include @@ -338,6 +341,12 @@ class JSONPrinter : Visitor endObject( JSONPrinter::LineEndAction::DelimiterAndNewLine, END_LIST ); SystemCounterState before = getSystemCounter( aggPair_.first ); SystemCounterState after = getSystemCounter( aggPair_.second ); + PCM * pcm = PCM::getInstance(); + if (pcm->getAccel()!=ACCEL_NOCONFIG){ + startObject ("Accelerators",BEGIN_OBJECT); + printAccelCounterState(before,after); + endObject( JSONPrinter::LineEndAction::DelimiterAndNewLine, END_OBJECT ); + } startObject( "QPI/UPI Links", BEGIN_OBJECT ); printSystemCounterState( before, after ); endObject( JSONPrinter::LineEndAction::DelimiterAndNewLine, END_OBJECT ); @@ -347,6 +356,7 @@ class JSONPrinter : Visitor startObject( "Uncore Aggregate", BEGIN_OBJECT ); printUncoreCounterState( before, after ); endObject( JSONPrinter::LineEndAction::NewLineOnly, END_OBJECT ); + endObject( JSONPrinter::LineEndAction::NewLineOnly, END_OBJECT ); } @@ -433,6 +443,23 @@ class JSONPrinter : Visitor endObject( JSONPrinter::NewLineOnly, END_OBJECT ); } + void printAccelCounterState( SystemCounterState const& before, SystemCounterState const& after ) { + AcceleratorCounterState* accs_ = AcceleratorCounterState::getInstance(); + uint32 devs = accs_->getNumOfAccelDevs(); + for ( uint32 i=0; i < devs; ++i ) { + startObject( std::string( accs_->getAccelCounterName() + " Counters Device " ) + std::to_string( i ), BEGIN_OBJECT ); + for(int j=0;jgetNumberOfCounters();j++){ + printCounter( accs_->getAccelIndexCounterName(j), accs_->getAccelIndexCounter(i, before, after,j) ); + } + // debug prints + //for(uint32 j=0;jgetNumberOfCounters();j++){ + // std::cout<getAccelIndexCounterName(j) << " "<getAccelIndexCounter(i, before, after,j)<getAccelIndexCounterName()<< accs_->getAccelInboundBW (i, before, after ) << " "<< accs_->getAccelOutboundBW (i, before, after ) << " "<getAccelShareWQ_ReqNb (i, before, after ) << " "<getAccelDedicateWQ_ReqNb (i, before, after ) << std::endl; + endObject( JSONPrinter::DelimiterAndNewLine, END_OBJECT ); + } + } + void printSystemCounterState( SystemCounterState const& before, SystemCounterState const& after ) { PCM* pcm = PCM::getInstance(); uint32 sockets = pcm->getNumSockets(); @@ -596,6 +623,10 @@ class PrometheusPrinter : Visitor SystemCounterState after = getSystemCounter( aggPair_.second ); addToHierarchy( "aggregate=\"system\"" ); PCM* pcm = PCM::getInstance(); + if (pcm->getAccel()!=ACCEL_NOCONFIG){ + printComment( "Accelerator Counters" ); + printAccelCounterState(before,after); + } if ( pcm->isServerCPU() && pcm->getNumSockets() >= 2 ) { printComment( "UPI/QPI Counters" ); printSystemCounterState( before, after ); @@ -686,6 +717,23 @@ class PrometheusPrinter : Visitor removeFromHierarchy(); } + void printAccelCounterState( SystemCounterState const& before, SystemCounterState const& after ) + { + addToHierarchy( "source=\"accel\"" ); + AcceleratorCounterState* accs_ = AcceleratorCounterState::getInstance(); + uint32 devs = accs_->getNumOfAccelDevs(); + + for ( uint32 i=0; i < devs; ++i ) + { + addToHierarchy( std::string( accs_->getAccelCounterName() + "device=\"" ) + std::to_string( i ) + "\"" ); + for(int j=0;jgetNumberOfCounters();j++) + { + printCounter( accs_->remove_string_inside_use(accs_->getAccelIndexCounterName(j)), accs_->getAccelIndexCounter(i, before, after,j) ); + } + removeFromHierarchy(); + } + removeFromHierarchy(); + } void printSystemCounterState( SystemCounterState const& before, SystemCounterState const& after ) { addToHierarchy( "source=\"uncore\"" ); PCM* pcm = PCM::getInstance(); @@ -3167,9 +3215,16 @@ int mainThrows(int argc, char * argv[]) { unsigned short debug_level = 0; std::string certificateFile; std::string privateKeyFile; - + AcceleratorCounterState *accs_; + accs_ = AcceleratorCounterState::getInstance(); null_stream nullStream; check_and_set_silent(argc, argv, nullStream); + ACCEL_IP accel=ACCEL_NOCONFIG; //default is IAA + bool evtfile = false; + std::string specify_evtfile; + // ACCEL_DEV_LOC_MAPPING loc_map = SOCKET_MAP; //default is socket mapping + MainLoop mainLoop; + std::string ev_file_name; if ( argc > 1 ) { std::string arg_value; @@ -3228,11 +3283,69 @@ int mainThrows(int argc, char * argv[]) { { forceRTMAbortMode = true; } + else if (check_argument_equals(argv[i], {"-iaa", "/iaa"})) + { + accel = ACCEL_IAA; + } + else if (check_argument_equals(argv[i], {"-dsa", "/dsa"})) + { + accel = ACCEL_DSA; + std::cout << "Aggregator firstest : " << accs_->getAccelCounterName() << accel; + } +#ifdef __linux__ + else if (check_argument_equals(argv[i], {"-qat", "/qat"})) + { + accel = ACCEL_QAT; + } + // else if (check_argument_equals(argv[i], {"-numa", "/numa"})) + // { + // loc_map = NUMA_MAP; + // } +#endif + else if (extract_argument_value(argv[i], {"-evt", "/evt"}, arg_value)) + { + evtfile = true; + specify_evtfile = std::move(arg_value); + } else if ( check_argument_equals( argv[i], {"-silent", "/silent"} ) ) { // handled in check_and_set_silent continue; } + +#ifdef __linux__ + // check kernel version for driver dependency. + if (accel != ACCEL_NOCONFIG) + { + std::cout << "Info: IDX - Please ensure the required driver(e.g idxd driver for iaa/dsa, qat driver and etc) correct enabled with this system, else the tool may fail to run.\n"; + struct utsname sys_info; + if (!uname(&sys_info)) + { + std::string krel_str; + uint32 krel_major_ver=0, krel_minor_ver=0; + krel_str = sys_info.release; + std::vector krel_info = split(krel_str, '.'); + std::istringstream iss_krel_major(krel_info[0]); + std::istringstream iss_krel_minor(krel_info[1]); + iss_krel_major >> std::setbase(0) >> krel_major_ver; + iss_krel_minor >> std::setbase(0) >> krel_minor_ver; + + switch (accel) + { + case ACCEL_IAA: + case ACCEL_DSA: + if ((krel_major_ver < 5) || (krel_major_ver == 5 && krel_minor_ver < 11)) + { + std::cout<< "Warning: IDX - current linux kernel version(" << krel_str << ") is too old, please upgrade it to the latest due to required idxd driver integrated to kernel since 5.11.\n"; + } + break; + default: + std::cout<< "Info: Chosen "<< accel<<" IDX - current linux kernel version(" << krel_str << ")"; + + } + } + } +#endif #if defined (USE_SSL) else if ( check_argument_equals( argv[i], {"-C", "--certificateFile"} ) ) { @@ -3315,6 +3428,7 @@ int mainThrows(int argc, char * argv[]) { // A HTTP interface to change the programming is planned PCM::ErrorCode status; PCM * pcmInstance = PCM::getInstance(); + pcmInstance->setAccel(accel); assert(pcmInstance); if (forceRTMAbortMode) { @@ -3326,7 +3440,8 @@ int mainThrows(int argc, char * argv[]) { switch ( status ) { case PCM::PMUBusy: { - if ( forcedProgramming == false ) { + if ( forcedProgramming == false ) + { std::cout << "Warning: PMU appears to be busy, do you want to reset it? (y/n)\n"; char answer; std::cin >> answer; @@ -3356,7 +3471,18 @@ int mainThrows(int argc, char * argv[]) { //TODO: check return value when its implemented pcmInstance->programCXLCM(); + if (pcmInstance->getAccel()!=ACCEL_NOCONFIG) + { + if (pcmInstance->supportIDXAccelDev() == false) + { + std::cerr << "Error: IDX accelerator is NOT supported with this platform! Program aborted\n"; + exit(EXIT_FAILURE); + } + accs_->setEvents(pcmInstance,accel,specify_evtfile,evtfile); + + accs_->programAccelCounters(); + } #if defined (USE_SSL) if ( useSSL ) { if ( port == 0 ) diff --git a/src/topology.cpp b/src/topology.cpp index a8cb6c85..6771a646 100644 --- a/src/topology.cpp +++ b/src/topology.cpp @@ -1,8 +1,8 @@ // SPDX-License-Identifier: BSD-3-Clause // Copyright (c) 2016-2022, Intel Corporation -#include "cpucounters.h" #include "topology.h" +#include "pcm-accel-common.h" namespace pcm { @@ -87,6 +87,7 @@ void Aggregator::dispatch( SystemRoot const& syp ) { PCM* pcm = PCM::getInstance(); pcm->readQPICounters( sycs_ ); pcm->readAndAggregateCXLCMCounters( sycs_ ); + readAccelCounters(sycs_); } Aggregator::Aggregator() diff --git a/src/utils.h b/src/utils.h index fcbc67a7..f80478df 100644 --- a/src/utils.h +++ b/src/utils.h @@ -539,9 +539,12 @@ inline uint64 extract_bits(uint64 myin, uint32 beg, uint32 end) } #ifdef _MSC_VER + +#define PCM_MSR_DRV_NAME TEXT("\\\\.\\RDMSR") + inline HANDLE openMSRDriver() { - return CreateFile(TEXT("\\\\.\\RDMSR"), GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, 0, NULL); + return CreateFile(PCM_MSR_DRV_NAME, GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, 0, NULL); } #endif diff --git a/src/windows/windriver.h b/src/windows/windriver.h index add9b9b4..c7b10140 100644 --- a/src/windows/windriver.h +++ b/src/windows/windriver.h @@ -100,9 +100,7 @@ class Driver { if (0 != StartService(hService, 0, NULL)) { - tstring convDriverName(&driverName_[0]); - tstring driverPath = TEXT("\\\\.\\") + convDriverName; - restrictDriverAccess(driverPath.c_str()); + restrictDriverAccess(PCM_MSR_DRV_NAME); return true; } DWORD err = GetLastError(); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 61b16b19..1d10dc8d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,7 +6,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin/tests) if(UNIX) # daemon_alignment_test on Linux and Unix - file(GLOB TEST_FILE daemon_alignment_test.cpp) + file(GLOB TEST_FILE daemon_alignment_test.cpp pcm-accel-common.cpp) add_executable(daemon_alignment_test ${TEST_FILE}) target_link_libraries(daemon_alignment_test)