diff --git a/src/includes/sysFeatures.h b/src/includes/sysFeatures.h index 37e4e1f90..1541a731e 100644 --- a/src/includes/sysFeatures.h +++ b/src/includes/sysFeatures.h @@ -39,5 +39,4 @@ #define HWFEATURES_MAX_STR_LENGTH 100 #define HWFEATURES_MIN_STRLEN(a, b) ((a) > (b) ? (b) : (a)) - #endif diff --git a/src/includes/sysFeatures_intel_thermal.h b/src/includes/sysFeatures_intel_thermal.h new file mode 100644 index 000000000..f4a12321f --- /dev/null +++ b/src/includes/sysFeatures_intel_thermal.h @@ -0,0 +1,8 @@ +#ifndef SYSFEATURES_INTEL_THERMAL_H +#define SYSFEATURES_INTEL_THERMAL_H + +#include + +extern const _SysFeatureList likwid_sysft_intel_cpu_thermal_feature_list; + +#endif //SYSFEATURES_INTEL_THERMAL_H diff --git a/src/sysFeatures_common.c b/src/sysFeatures_common.c index ce0c053bb..5448a2dd4 100644 --- a/src/sysFeatures_common.c +++ b/src/sysFeatures_common.c @@ -362,16 +362,46 @@ static int readmsr_socket(const LikwidDevice_t device, uint64_t reg, uint64_t *m if ((int)t->packageId == device->id.simple.id && t->inCpuSet) { err = HPMaddThread(t->apicId); - if (err < 0) continue; + if (err < 0) + continue; err = HPMread(t->apicId, MSR_DEV, reg, msrData); - if (err < 0) continue; + if (err < 0) + continue; return 0; } } + + if (err < 0) + return err; + return -ENODEV; +} + +static int readmsr_core(const LikwidDevice_t device, uint64_t reg, uint64_t *msrData) +{ + assert(device->type == DEVICE_TYPE_CORE); + + int err = topology_init(); if (err < 0) - { return err; + + CpuTopology_t topo = get_cpuTopology(); + for (unsigned i = 0; i < topo->numHWThreads; i++) + { + HWThread* t = &topo->threadPool[i]; + if ((int)t->coreId == device->id.simple.id && t->inCpuSet) + { + err = HPMaddThread(t->apicId); + if (err < 0) + continue; + err = HPMread(t->apicId, MSR_DEV, reg, msrData); + if (err < 0) + continue; + return 0; + } } + + if (err < 0) + return err; return -ENODEV; } @@ -398,6 +428,9 @@ int likwid_sysft_readmsr(const LikwidDevice_t device, uint64_t reg, uint64_t *ms case DEVICE_TYPE_SOCKET: err = readmsr_socket(device, reg, msrData); break; + case DEVICE_TYPE_CORE: + err = readmsr_core(device, reg, msrData); + break; case DEVICE_TYPE_HWTHREAD: err = readmsr_hwthread(device, reg, msrData); break; diff --git a/src/sysFeatures_intel.c b/src/sysFeatures_intel.c index b1e231882..371d17d4b 100644 --- a/src/sysFeatures_intel.c +++ b/src/sysFeatures_intel.c @@ -48,6 +48,7 @@ #include #include #include +#include static const _HWArchFeatures intel_arch_features[]; @@ -81,6 +82,7 @@ static const _SysFeatureList* intel_arch_feature_inputs[] = { &likwid_sysft_intel_cpu_turbo_feature_list, &likwid_sysft_intel_uncorefreq_feature_list, &likwid_sysft_intel_cpu_spec_ctrl_feature_list, + &likwid_sysft_intel_cpu_thermal_feature_list, NULL, }; diff --git a/src/sysFeatures_intel_thermal.c b/src/sysFeatures_intel_thermal.c new file mode 100644 index 000000000..5f62c3817 --- /dev/null +++ b/src/sysFeatures_intel_thermal.c @@ -0,0 +1,91 @@ +#include + +#include +#include + +#include +#include +#include + +static int intel_thermal_temperature_getter(const LikwidDevice_t device, bool core, char **value) +{ + if (!device || !value) + return -EINVAL; + + int err; + _LikwidDevice socketDev; + if (core) + { + /* If we read temperature from the core, we need to determine its socket. + * That is because the TEMPERATURE_TARGET is per socket only. */ + err = topology_init(); + if (err < 0) + return err; + + bool found = false; + CpuTopology_t topo = get_cpuTopology(); + for (unsigned i = 0; i < topo->numHWThreads; i++) + { + const HWThread *t = &topo->threadPool[i]; + if (t->packageId != i) + continue; + + memset(&socketDev, 0, sizeof(socketDev)); + socketDev.type = DEVICE_TYPE_SOCKET; + socketDev.id.simple.id = t->packageId; + socketDev.internal_id = t->packageId; + found = true; + break; + } + + if (!found) + return -EINVAL; + } else { + socketDev = *device; + } + + uint64_t therm_status_raw; + err = likwid_sysft_readmsr(device, core ? IA32_THERM_STATUS : IA32_PACKAGE_THERM_STATUS, &therm_status_raw); + if (err < 0) + return err; + + const int readout = (int)field64(therm_status_raw, 16, 7); + + uint64_t temp_target_raw; + err = likwid_sysft_readmsr(&socketDev, MSR_TEMPERATURE_TARGET, &temp_target_raw); + if (err < 0) + return err; + + const int temp_target = (int)field64(temp_target_raw, 16, 8); + const int temp_offset = (int)field64(temp_target_raw, 24, 6); + + const int final_temp = temp_target - temp_offset - readout; + + return likwid_sysft_uint64_to_string(final_temp, value); +} + +static int intel_thermal_temperature_core_getter(LikwidDevice_t dev, char **value) +{ + return intel_thermal_temperature_getter(dev, DEVICE_TYPE_CORE, value); +} + +static int intel_thermal_temperature_socket_getter(LikwidDevice_t dev, char **value) +{ + return intel_thermal_temperature_getter(dev, DEVICE_TYPE_SOCKET, value); +} + +static int intel_thermal_tester(void) +{ + return cpuid_hasFeature(TM2); +} + +static _SysFeature intel_thermal_features[] = { + {"core_temp", "thermal", "Current CPU temperature (core)", intel_thermal_temperature_core_getter, NULL, DEVICE_TYPE_CORE, NULL, "degrees C"}, + {"pkg_temp", "thermal", "Current CPU temperature (package)", intel_thermal_temperature_socket_getter, NULL, DEVICE_TYPE_SOCKET, NULL, "degrees C"}, +}; + +const _SysFeatureList likwid_sysft_intel_cpu_thermal_feature_list = { + .num_features = ARRAY_COUNT(intel_thermal_features), + .tester = intel_thermal_tester, + .features = intel_thermal_features, +};