-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsource.c
526 lines (433 loc) · 15.3 KB
/
source.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
/* Read the RAPL registers on recent (>sandybridge) Intel processors */
/* */
/* There are currently three ways to do this: */
/* 1. Read the MSRs directly with /dev/cpu/??/msr */
/* 2. Use the perf_event_open() interface */
/* 3. Read the values from the sysfs powercap interface */
/* */
/* MSR Code originally based on a (never made it upstream) linux-kernel */
/* RAPL driver by Zhang Rui <[email protected]> */
/* https://lkml.org/lkml/2011/5/26/93 */
/* Additional contributions by: */
/* Romain Dolbeau -- romain @ dolbeau.org */
/* */
/* For raw MSR access the /dev/cpu/??/msr driver must be enabled and */
/* permissions set to allow read access. */
/* You might need to "modprobe msr" before it will work. */
/* */
/* perf_event_open() support requires at least Linux 3.14 and to have */
/* /proc/sys/kernel/perf_event_paranoid < 1 */
/* */
/* the sysfs powercap interface got into the kernel in */
/* 2d281d8196e38dd (3.13) */
/* */
/* Vince Weaver -- vincent.weaver @ maine.edu -- 11 September 2015 */
/* */
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <errno.h>
#include <inttypes.h>
#include <unistd.h>
#include <math.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <stdbool.h>
/* AMD Support */
#define MSR_AMD_RAPL_POWER_UNIT 0xc0010299
#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029B
#define MSR_AMD_PP0_ENERGY_STATUS 0xc001029A
/* Intel support */
#define MSR_INTEL_RAPL_POWER_UNIT 0x606
/*
* Platform specific RAPL Domains.
* Note that PP1 RAPL Domain is supported on 062A only
* And DRAM RAPL Domain is supported on 062D only
*/
/* Package RAPL Domain */
#define MSR_PKG_RAPL_POWER_LIMIT 0x610
#define MSR_INTEL_PKG_ENERGY_STATUS 0x611
#define MSR_PKG_PERF_STATUS 0x613
#define MSR_PKG_POWER_INFO 0x614
/* PP0 RAPL Domain */
#define MSR_PP0_POWER_LIMIT 0x638
#define MSR_INTEL_PP0_ENERGY_STATUS 0x639
#define MSR_PP0_POLICY 0x63A
#define MSR_PP0_PERF_STATUS 0x63B
/* PP1 RAPL Domain, may reflect to uncore devices */
#define MSR_PP1_POWER_LIMIT 0x640
#define MSR_PP1_ENERGY_STATUS 0x641
#define MSR_PP1_POLICY 0x642
/* DRAM RAPL Domain */
#define MSR_DRAM_POWER_LIMIT 0x618
#define MSR_DRAM_ENERGY_STATUS 0x619
#define MSR_DRAM_PERF_STATUS 0x61B
#define MSR_DRAM_POWER_INFO 0x61C
/* PSYS RAPL Domain */
#define MSR_PLATFORM_ENERGY_STATUS 0x64d
/* RAPL UNIT BITMASK */
#define POWER_UNIT_OFFSET 0
#define POWER_UNIT_MASK 0x0F
#define ENERGY_UNIT_OFFSET 0x08
#define ENERGY_UNIT_MASK 0x1F00
#define TIME_UNIT_OFFSET 0x10
#define TIME_UNIT_MASK 0xF000
static int open_msr(int core) {
char msr_filename[BUFSIZ];
int fd;
sprintf(msr_filename, "/dev/cpu/%d/msr", core);
fd = open(msr_filename, O_RDONLY);
if ( fd < 0 ) {
if ( errno == ENXIO ) {
fprintf(stderr, "rdmsr: No CPU %d\n", core);
exit(2);
} else if ( errno == EIO ) {
fprintf(stderr, "rdmsr: CPU %d doesn't support MSRs\n",
core);
exit(3);
} else {
perror("rdmsr:open");
exit(127);
}
}
return fd;
}
static uint64_t read_msr(int fd, unsigned int which) {
uint64_t data;
if ( pread(fd, &data, sizeof data, which) != sizeof data ) {
perror("rdmsr:pread");
fprintf(stderr,"Error reading MSR %x\n",which);
exit(127);
}
return data;
}
#define CPU_VENDOR_INTEL 1
#define CPU_VENDOR_AMD 2
#define CPU_SANDYBRIDGE 42
#define CPU_SANDYBRIDGE_EP 45
#define CPU_IVYBRIDGE 58
#define CPU_IVYBRIDGE_EP 62
#define CPU_HASWELL 60
#define CPU_HASWELL_ULT 69
#define CPU_HASWELL_GT3E 70
#define CPU_HASWELL_EP 63
#define CPU_BROADWELL 61
#define CPU_BROADWELL_GT3E 71
#define CPU_BROADWELL_EP 79
#define CPU_BROADWELL_DE 86
#define CPU_SKYLAKE 78
#define CPU_SKYLAKE_HS 94
#define CPU_SKYLAKE_X 85
#define CPU_KNIGHTS_LANDING 87
#define CPU_KNIGHTS_MILL 133
#define CPU_KABYLAKE_MOBILE 142
#define CPU_KABYLAKE 158
#define CPU_ATOM_SILVERMONT 55
#define CPU_ATOM_AIRMONT 76
#define CPU_ATOM_MERRIFIELD 74
#define CPU_ATOM_MOOREFIELD 90
#define CPU_ATOM_GOLDMONT 92
#define CPU_ATOM_GEMINI_LAKE 122
#define CPU_ATOM_DENVERTON 95
#define CPU_TIGER_LAKE 140
#define CPU_AMD_FAM17H 0xc000
// All variables are made static, because we believe that this will
// keep them local in scope to the file and not make them persist in state
// between Threads.
// TODO: If this code ever gets multi-threaded please review this assumption to
// not pollute another threads state
static unsigned int msr_rapl_units,msr_pkg_energy_status,msr_pp0_energy_status;
static unsigned int usleep_time=1000;
static int detect_cpu(void) {
FILE *fff;
int vendor=-1,family,model=-1;
char buffer[BUFSIZ],*result;
char vendor_string[BUFSIZ];
fff=fopen("/proc/cpuinfo","r");
if (fff==NULL) return -1;
while(1) {
result=fgets(buffer,BUFSIZ,fff);
if (result==NULL) break;
if (!strncmp(result,"vendor_id",8)) {
sscanf(result,"%*s%*s%s",vendor_string);
if (!strncmp(vendor_string,"GenuineIntel",12)) {
vendor=CPU_VENDOR_INTEL;
}
if (!strncmp(vendor_string,"AuthenticAMD",12)) {
vendor=CPU_VENDOR_AMD;
}
}
if (!strncmp(result,"cpu family",10)) {
sscanf(result,"%*s%*s%*s%d",&family);
}
if (!strncmp(result,"model",5)) {
sscanf(result,"%*s%*s%d",&model);
}
}
if (vendor==CPU_VENDOR_INTEL) {
if (family!=6) {
fprintf(stderr, "Maybe unsupported CPU family (%d). Please check vendor documentation and make a Pull-Request if wrong.\n",family);
return -1;
}
msr_rapl_units=MSR_INTEL_RAPL_POWER_UNIT;
msr_pkg_energy_status=MSR_INTEL_PKG_ENERGY_STATUS;
msr_pp0_energy_status=MSR_INTEL_PP0_ENERGY_STATUS;
}
if (vendor==CPU_VENDOR_AMD) {
msr_rapl_units=MSR_AMD_RAPL_POWER_UNIT;
msr_pkg_energy_status=MSR_AMD_PKG_ENERGY_STATUS;
msr_pp0_energy_status=MSR_AMD_PP0_ENERGY_STATUS;
if (family!=23 && family!=25) {
fprintf(stderr, "Maybe unsupported CPU family (%d). Please check vendor documentation and make a Pull-Request if wrong.\n",family);
return -1;
}
model=CPU_AMD_FAM17H;
}
fclose(fff);
return model;
}
#define MAX_CPUS 1024
#define MAX_PACKAGES 16
static size_t total_cores=0,total_packages=0;
static int package_map[MAX_PACKAGES];
static int detect_packages(void) {
char filename[BUFSIZ];
FILE *fff;
int package;
int i;
for(i=0;i<MAX_PACKAGES;i++) package_map[i]=-1;
for(i=0;i<MAX_CPUS;i++) {
sprintf(filename,"/sys/devices/system/cpu/cpu%d/topology/physical_package_id",i);
fff=fopen(filename,"r");
if (fff==NULL) break;
fscanf(fff,"%d",&package);
fclose(fff);
if (package_map[package]==-1) {
total_packages++;
package_map[package]=i;
}
}
total_cores=i;
return 0;
}
#define MEASURE_ENERGY_PKG 1
#define MEASURE_DRAM 2
#define MEASURE_PSYS 3
int dram_avail=0;
int different_units=0;
double cpu_energy_units[MAX_PACKAGES],dram_energy_units[MAX_PACKAGES];
uint32_t raw_energy_status_units;
unsigned int energy_status;
double energy_units[MAX_PACKAGES];
static int check_availability(int cpu_model, int measurement_mode) {
if(measurement_mode == MEASURE_DRAM){
switch(cpu_model) {
case CPU_SANDYBRIDGE_EP:
case CPU_IVYBRIDGE_EP:
dram_avail=1;
different_units=0;
break;
case CPU_HASWELL_EP:
case CPU_BROADWELL_EP:
case CPU_SKYLAKE_X:
dram_avail=1;
different_units=1;
break;
case CPU_KNIGHTS_LANDING:
case CPU_KNIGHTS_MILL:
dram_avail=1;
different_units=1;
break;
case CPU_SANDYBRIDGE:
case CPU_IVYBRIDGE:
dram_avail=0;
different_units=0;
break;
case CPU_HASWELL:
case CPU_HASWELL_ULT:
case CPU_HASWELL_GT3E:
case CPU_BROADWELL:
case CPU_BROADWELL_GT3E:
case CPU_ATOM_GOLDMONT:
case CPU_ATOM_GEMINI_LAKE:
case CPU_ATOM_DENVERTON:
dram_avail=1;
different_units=0;
break;
case CPU_SKYLAKE:
case CPU_SKYLAKE_HS:
case CPU_KABYLAKE:
case CPU_KABYLAKE_MOBILE:
dram_avail=1;
different_units=0;
break;
case CPU_AMD_FAM17H:
dram_avail=0;
different_units=0;
break;
case CPU_TIGER_LAKE:
dram_avail=0; // guess, find documentation
different_units=0; // guess, find documentation
break;
}
}
if(measurement_mode == MEASURE_DRAM && !dram_avail) {
fprintf(stderr,"DRAM not available for your processer. %d \n", measurement_mode);
exit(-1);
}
if (cpu_model<0) {
fprintf(stderr, "\tUnsupported CPU model %d\n",cpu_model);
exit(-1);
}
return 0;
}
static int setup_measurement_units(int measurement_mode) {
int fd;
int j;
long long result;
for(j=0;j<total_packages;j++) {
fd=open_msr(package_map[j]);
/* Calculate the units used */
result=read_msr(fd,msr_rapl_units);
// as per specifications, power unit MSR has the following information in the following bits:
// 0-3 -> power units
// 8-12 -> energy status units
// 16-19 -> time units
// 4-7, 13-15, and 20-63 are all reserved bits
//power_units and time_units are not actually used... should we be using them?
//power_units=pow(0.5,(double)(result&0xf)); //multiplying by 0xf will give you the first 4 bits
//time_units=pow(0.5,(double)((result>>16)&0xf));
raw_energy_status_units = (result>>8) & 0x1f; // Extract bits 12:8
cpu_energy_units[j]=pow(0.5,(double)raw_energy_status_units);
if(measurement_mode == MEASURE_DRAM && different_units) {
dram_energy_units[j]=pow(0.5,(double)16);
}
else if (measurement_mode == MEASURE_DRAM && !different_units) {
dram_energy_units[j]=cpu_energy_units[j];
}
close(fd);
}
for(j=0;j<total_packages;j++) {
if(measurement_mode == MEASURE_ENERGY_PKG)
{
energy_status = msr_pkg_energy_status;
energy_units[j] = cpu_energy_units[j];
}
else if(measurement_mode == MEASURE_DRAM) {
energy_status = MSR_DRAM_ENERGY_STATUS;
energy_units[j] = dram_energy_units[j];
}
else if(measurement_mode == MEASURE_PSYS) {
energy_status = MSR_PLATFORM_ENERGY_STATUS;
energy_units[j] = cpu_energy_units[j]; // are identical according to March 2024 Intel Dev Manual to CPU
}
else {
fprintf(stderr,"Unknown measurement mode: %d\n",measurement_mode);
exit(-1);
}
}
return 0;
}
static int check_system() {
int fd = open_msr(0);
if (fd < 0) {
fprintf(stderr, "Couldn't open MSR 0\n");
exit(127);
}
long long msr_data = read_msr(fd, energy_status);
if(msr_data <= 0) {
fprintf(stderr, "rapl MSR had 0 or negative values: %lld\n", msr_data);
exit(127);
}
close(fd);
return 0;
}
static void dump_rapl(int measurement_mode) {
int fd[total_packages];
struct timeval now;
uint64_t result[total_packages];
// Open MSRs for all packages
for (size_t i = 0;i < total_packages;i++) {
fd[i] = open_msr(package_map[i]);
}
// Emit CSV header
char *mode_name;
switch (measurement_mode) {
// Mode names are taken from this intel manual
// https://www.intel.com/content/dam/develop/external/us/en/documents/335592-sdm-vol-4.pdf
case MEASURE_ENERGY_PKG:
mode_name = "pkg_energy_status";
break;
case MEASURE_DRAM:
mode_name = "dram_energy_status";
break;
case MEASURE_PSYS:
mode_name = "platform_energy_counter";
break;
}
printf("package;timestamp_sec;timestamp_usec;%s,energy_status_unit\n", mode_name);
// Continuously read rapl values and dump them to csv
while (true) {
// First read out all time critical values while doing as little other stuff as possible
for (size_t i = 0;i < total_packages;i++) {
result[i] = read_msr(fd[i], energy_status);
// TODO The code currently assumes that the energy_units cannot change.
// Is this assumption correct or should we read energy_units in every cycle?
}
gettimeofday(&now, NULL);
// After all values have been read, initiate "slow" output process
for (size_t i = 0;i < total_packages;i++) {
printf("%ld;%ld;%ld;%ld;%d\n", i, now.tv_sec, now.tv_usec, result[i], raw_energy_status_units);
}
if (usleep_time > 0) {
usleep(usleep_time);
}
}
}
int main(int argc, char **argv) {
int c;
int cpu_model;
int measurement_mode = MEASURE_ENERGY_PKG;
int check_system_flag = 0;
while ((c = getopt (argc, argv, "hi:dcp")) != -1) {
switch (c) {
case 'h':
printf("Usage: %s [-h] [-m]\n\n",argv[0]);
printf("\t-h : displays this help\n");
printf("\t-i : specifies the microseconds sleep time that will be slept between measurements\n");
printf("\t-d : measure the dram energy instead of the CPU package\n");
printf("\t-p : measure the psys energy instead of the CPU package\n");
printf("\t-c : check system and exit\n");
exit(0);
case 'i':
usleep_time = atoi(optarg);
break;
case 'd':
measurement_mode=MEASURE_DRAM;
break;
case 'p':
measurement_mode=MEASURE_PSYS;
break;
case 'c':
check_system_flag = 1;
break;
default:
fprintf(stderr,"Unknown option %c\n",c);
exit(-1);
}
}
setvbuf(stdout, NULL, _IONBF, 0);
cpu_model=detect_cpu();
detect_packages();
check_availability(cpu_model, measurement_mode);
setup_measurement_units(measurement_mode);
if(check_system_flag){
exit(check_system());
}
dump_rapl(measurement_mode);
return 0;
}