aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-25 17:01:28 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-25 17:01:28 -0500
commit10bbe7599e2755d3f3e100103967788b8b5a4bce (patch)
treef4d5bc444584dc211c5797be5aad5e861c9181b3
parent62906027091f1d02de44041524f0769f60bb9cf3 (diff)
parent6886fee4d7a3afaf905a8e0bec62dc8fdc39878d (diff)
Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux
Pull turbostat updates from Len Brown. * 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux: tools/power turbostat: remove obsolete -M, -m, -C, -c options tools/power turbostat: Make extensible via the --add parameter tools/power turbostat: Denverton uses a 25 MHz crystal, not 19.2 MHz tools/power turbostat: line up headers when -M is used tools/power turbostat: fix SKX PKG_CSTATE_LIMIT decoding tools/power turbostat: Support Knights Mill (KNM) tools/power turbostat: Display HWP OOB status tools/power turbostat: fix Denverton BCLK tools/power turbostat: use intel-family.h model strings tools/power/turbostat: Add Denverton RAPL support tools/power/turbostat: Add Denverton support tools/power/turbostat: split core MSR support into status + limit tools/power turbostat: fix error case overflow read of slm_freq_table[] tools/power turbostat: Allocate correct amount of fd and irq entries tools/power turbostat: switch to tab delimited output tools/power turbostat: Gracefully handle ACPI S3 tools/power turbostat: tidy up output on Joule counter overflow
-rw-r--r--tools/power/x86/turbostat/Makefile1
-rw-r--r--tools/power/x86/turbostat/turbostat.826
-rw-r--r--tools/power/x86/turbostat/turbostat.c1017
3 files changed, 673 insertions, 371 deletions
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index 8561e7ddca59..8792ad8dbf83 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -10,6 +10,7 @@ endif
10turbostat : turbostat.c 10turbostat : turbostat.c
11CFLAGS += -Wall 11CFLAGS += -Wall
12CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' 12CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
13CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
13 14
14%: %.c 15%: %.c
15 @mkdir -p $(BUILD_OUTPUT) 16 @mkdir -p $(BUILD_OUTPUT)
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 492e84fbebfa..03cb639b292e 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -25,9 +25,27 @@ Some information is not available on older processors.
25.SS Options 25.SS Options
26Options can be specified with a single or double '-', and only as much of the option 26Options can be specified with a single or double '-', and only as much of the option
27name as necessary to disambiguate it from others is necessary. Note that options are case-sensitive. 27name as necessary to disambiguate it from others is necessary. Note that options are case-sensitive.
28\fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter.
29.PP 28.PP
30\fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter. 29\fB--add attributes\fP add column with counter having specified 'attributes'. The 'location' attribute is required, all others are optional.
30.nf
31 location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP}
32 msrDDD is a decimal offset, eg. msr16
33 msr0xXXX is a hex offset, eg. msr0x10
34
35 scope: {\fBcpu\fP | \fBcore\fP | \fBpackage\fP}
36 sample and print the counter for every cpu, core, or package.
37 default: cpu
38
39 size: {\fBu32\fP | \fBu64\fP }
40 MSRs are read as 64-bits, u32 truncates the displayed value to 32-bits.
41 default: u64
42
43 format: {\fBraw\fP | \fBdelta\fP | \fBpercent\fP}
44 'raw' shows the MSR contents in hex.
45 'delta' shows the difference in values during the measurement interval.
46 'percent' shows the delta as a percentage of the cycles elapsed.
47 default: delta
48.fi
31.PP 49.PP
32\fB--Dump\fP displays the raw counter values. 50\fB--Dump\fP displays the raw counter values.
33.PP 51.PP
@@ -43,10 +61,6 @@ The file is truncated if it already exists, and it is created if it does not exi
43.PP 61.PP
44\fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts. 62\fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts.
45.PP 63.PP
46\fB--MSR MSR#\fP shows the specified 64-bit MSR value.
47.PP
48\fB--msr MSR#\fP shows the specified 32-bit MSR value.
49.PP
50\fB--Package\fP limits output to the system summary plus the 1st thread in each Package. 64\fB--Package\fP limits output to the system summary plus the 1st thread in each Package.
51.PP 65.PP
52\fB--processor\fP limits output to the system summary plus the 1st thread in each processor of each package. Ie. it skips hyper-threaded siblings. 66\fB--processor\fP limits output to the system summary plus the 1st thread in each processor of each package. Ie. it skips hyper-threaded siblings.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 3e199b508a96..f13f61b065c6 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -21,6 +21,7 @@
21 21
22#define _GNU_SOURCE 22#define _GNU_SOURCE
23#include MSRHEADER 23#include MSRHEADER
24#include INTEL_FAMILY_HEADER
24#include <stdarg.h> 25#include <stdarg.h>
25#include <stdio.h> 26#include <stdio.h>
26#include <err.h> 27#include <err.h>
@@ -51,8 +52,6 @@ unsigned int debug;
51unsigned int rapl_joules; 52unsigned int rapl_joules;
52unsigned int summary_only; 53unsigned int summary_only;
53unsigned int dump_only; 54unsigned int dump_only;
54unsigned int skip_c0;
55unsigned int skip_c1;
56unsigned int do_nhm_cstates; 55unsigned int do_nhm_cstates;
57unsigned int do_snb_cstates; 56unsigned int do_snb_cstates;
58unsigned int do_knl_cstates; 57unsigned int do_knl_cstates;
@@ -72,10 +71,6 @@ unsigned int units = 1000000; /* MHz etc */
72unsigned int genuine_intel; 71unsigned int genuine_intel;
73unsigned int has_invariant_tsc; 72unsigned int has_invariant_tsc;
74unsigned int do_nhm_platform_info; 73unsigned int do_nhm_platform_info;
75unsigned int extra_msr_offset32;
76unsigned int extra_msr_offset64;
77unsigned int extra_delta_offset32;
78unsigned int extra_delta_offset64;
79unsigned int aperf_mperf_multiplier = 1; 74unsigned int aperf_mperf_multiplier = 1;
80int do_irq = 1; 75int do_irq = 1;
81int do_smi; 76int do_smi;
@@ -131,9 +126,8 @@ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
131#define RAPL_DRAM_POWER_INFO (1 << 5) 126#define RAPL_DRAM_POWER_INFO (1 << 5)
132 /* 0x61c MSR_DRAM_POWER_INFO */ 127 /* 0x61c MSR_DRAM_POWER_INFO */
133 128
134#define RAPL_CORES (1 << 6) 129#define RAPL_CORES_POWER_LIMIT (1 << 6)
135 /* 0x638 MSR_PP0_POWER_LIMIT */ 130 /* 0x638 MSR_PP0_POWER_LIMIT */
136 /* 0x639 MSR_PP0_ENERGY_STATUS */
137#define RAPL_CORE_POLICY (1 << 7) 131#define RAPL_CORE_POLICY (1 << 7)
138 /* 0x63a MSR_PP0_POLICY */ 132 /* 0x63a MSR_PP0_POLICY */
139 133
@@ -141,11 +135,20 @@ unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
141 /* 0x640 MSR_PP1_POWER_LIMIT */ 135 /* 0x640 MSR_PP1_POWER_LIMIT */
142 /* 0x641 MSR_PP1_ENERGY_STATUS */ 136 /* 0x641 MSR_PP1_ENERGY_STATUS */
143 /* 0x642 MSR_PP1_POLICY */ 137 /* 0x642 MSR_PP1_POLICY */
138
139#define RAPL_CORES_ENERGY_STATUS (1 << 9)
140 /* 0x639 MSR_PP0_ENERGY_STATUS */
141#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
144#define TJMAX_DEFAULT 100 142#define TJMAX_DEFAULT 100
145 143
146#define MAX(a, b) ((a) > (b) ? (a) : (b)) 144#define MAX(a, b) ((a) > (b) ? (a) : (b))
147 145
148int aperf_mperf_unstable; 146/*
147 * buffer size used by sscanf() for added column names
148 * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
149 */
150#define NAME_BYTES 20
151
149int backwards_count; 152int backwards_count;
150char *progname; 153char *progname;
151 154
@@ -157,16 +160,13 @@ struct thread_data {
157 unsigned long long aperf; 160 unsigned long long aperf;
158 unsigned long long mperf; 161 unsigned long long mperf;
159 unsigned long long c1; 162 unsigned long long c1;
160 unsigned long long extra_msr64;
161 unsigned long long extra_delta64;
162 unsigned long long extra_msr32;
163 unsigned long long extra_delta32;
164 unsigned int irq_count; 163 unsigned int irq_count;
165 unsigned int smi_count; 164 unsigned int smi_count;
166 unsigned int cpu_id; 165 unsigned int cpu_id;
167 unsigned int flags; 166 unsigned int flags;
168#define CPU_IS_FIRST_THREAD_IN_CORE 0x2 167#define CPU_IS_FIRST_THREAD_IN_CORE 0x2
169#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 168#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4
169 unsigned long long counter[1];
170} *thread_even, *thread_odd; 170} *thread_even, *thread_odd;
171 171
172struct core_data { 172struct core_data {
@@ -175,6 +175,7 @@ struct core_data {
175 unsigned long long c7; 175 unsigned long long c7;
176 unsigned int core_temp_c; 176 unsigned int core_temp_c;
177 unsigned int core_id; 177 unsigned int core_id;
178 unsigned long long counter[1];
178} *core_even, *core_odd; 179} *core_even, *core_odd;
179 180
180struct pkg_data { 181struct pkg_data {
@@ -199,7 +200,7 @@ struct pkg_data {
199 unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ 200 unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
200 unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ 201 unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
201 unsigned int pkg_temp_c; 202 unsigned int pkg_temp_c;
202 203 unsigned long long counter[1];
203} *package_even, *package_odd; 204} *package_even, *package_odd;
204 205
205#define ODD_COUNTERS thread_odd, core_odd, package_odd 206#define ODD_COUNTERS thread_odd, core_odd, package_odd
@@ -213,11 +214,33 @@ struct pkg_data {
213 (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) 214 (core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no))
214#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) 215#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
215 216
217enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
218enum counter_type {COUNTER_CYCLES, COUNTER_SECONDS};
219enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
220
221struct msr_counter {
222 unsigned int msr_num;
223 char name[NAME_BYTES];
224 unsigned int width;
225 enum counter_type type;
226 enum counter_format format;
227 struct msr_counter *next;
228};
229
230struct sys_counters {
231 unsigned int thread_counter_bytes;
232 unsigned int core_counter_bytes;
233 unsigned int package_counter_bytes;
234 struct msr_counter *tp;
235 struct msr_counter *cp;
236 struct msr_counter *pp;
237} sys;
238
216struct system_summary { 239struct system_summary {
217 struct thread_data threads; 240 struct thread_data threads;
218 struct core_data cores; 241 struct core_data cores;
219 struct pkg_data packages; 242 struct pkg_data packages;
220} sum, average; 243} average;
221 244
222 245
223struct topo_params { 246struct topo_params {
@@ -319,120 +342,148 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
319/* 342/*
320 * Example Format w/ field column widths: 343 * Example Format w/ field column widths:
321 * 344 *
322 * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt 345 * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 ThreadC CoreTmp CoreCnt PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt PkgCnt
323 * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 346 * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
324 */ 347 */
325 348
326void print_header(void) 349void print_header(void)
327{ 350{
351 struct msr_counter *mp;
352
328 if (show_pkg) 353 if (show_pkg)
329 outp += sprintf(outp, " Package"); 354 outp += sprintf(outp, "\tPackage");
330 if (show_core) 355 if (show_core)
331 outp += sprintf(outp, " Core"); 356 outp += sprintf(outp, "\tCore");
332 if (show_cpu) 357 if (show_cpu)
333 outp += sprintf(outp, " CPU"); 358 outp += sprintf(outp, "\tCPU");
334 if (has_aperf) 359 if (has_aperf)
335 outp += sprintf(outp, " Avg_MHz"); 360 outp += sprintf(outp, "\tAvg_MHz");
336 if (has_aperf) 361 if (has_aperf)
337 outp += sprintf(outp, " Busy%%"); 362 outp += sprintf(outp, "\tBusy%%");
338 if (has_aperf) 363 if (has_aperf)
339 outp += sprintf(outp, " Bzy_MHz"); 364 outp += sprintf(outp, "\tBzy_MHz");
340 outp += sprintf(outp, " TSC_MHz"); 365 outp += sprintf(outp, "\tTSC_MHz");
341
342 if (extra_delta_offset32)
343 outp += sprintf(outp, " count 0x%03X", extra_delta_offset32);
344 if (extra_delta_offset64)
345 outp += sprintf(outp, " COUNT 0x%03X", extra_delta_offset64);
346 if (extra_msr_offset32)
347 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32);
348 if (extra_msr_offset64)
349 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64);
350 366
351 if (!debug) 367 if (!debug)
352 goto done; 368 goto done;
353 369
354 if (do_irq) 370 if (do_irq)
355 outp += sprintf(outp, " IRQ"); 371 outp += sprintf(outp, "\tIRQ");
356 if (do_smi) 372 if (do_smi)
357 outp += sprintf(outp, " SMI"); 373 outp += sprintf(outp, "\tSMI");
358 374
359 if (do_nhm_cstates) 375 if (do_nhm_cstates)
360 outp += sprintf(outp, " CPU%%c1"); 376 outp += sprintf(outp, "\tCPU%%c1");
361 if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) 377 if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
362 outp += sprintf(outp, " CPU%%c3"); 378 outp += sprintf(outp, "\tCPU%%c3");
363 if (do_nhm_cstates) 379 if (do_nhm_cstates)
364 outp += sprintf(outp, " CPU%%c6"); 380 outp += sprintf(outp, "\tCPU%%c6");
365 if (do_snb_cstates) 381 if (do_snb_cstates)
366 outp += sprintf(outp, " CPU%%c7"); 382 outp += sprintf(outp, "\tCPU%%c7");
383
384 for (mp = sys.tp; mp; mp = mp->next) {
385 if (mp->format == FORMAT_RAW) {
386 if (mp->width == 64)
387 outp += sprintf(outp, "\t%18.18s", mp->name);
388 else
389 outp += sprintf(outp, "\t%10.10s", mp->name);
390 } else {
391 outp += sprintf(outp, "\t%-7.7s", mp->name);
392 }
393 }
367 394
368 if (do_dts) 395 if (do_dts)
369 outp += sprintf(outp, " CoreTmp"); 396 outp += sprintf(outp, "\tCoreTmp");
397
398 for (mp = sys.cp; mp; mp = mp->next) {
399 if (mp->format == FORMAT_RAW) {
400 if (mp->width == 64)
401 outp += sprintf(outp, "\t%18.18s", mp->name);
402 else
403 outp += sprintf(outp, "\t%10.10s", mp->name);
404 } else {
405 outp += sprintf(outp, "\t%-7.7s", mp->name);
406 }
407 }
408
370 if (do_ptm) 409 if (do_ptm)
371 outp += sprintf(outp, " PkgTmp"); 410 outp += sprintf(outp, "\tPkgTmp");
372 411
373 if (do_gfx_rc6_ms) 412 if (do_gfx_rc6_ms)
374 outp += sprintf(outp, " GFX%%rc6"); 413 outp += sprintf(outp, "\tGFX%%rc6");
375 414
376 if (do_gfx_mhz) 415 if (do_gfx_mhz)
377 outp += sprintf(outp, " GFXMHz"); 416 outp += sprintf(outp, "\tGFXMHz");
378 417
379 if (do_skl_residency) { 418 if (do_skl_residency) {
380 outp += sprintf(outp, " Totl%%C0"); 419 outp += sprintf(outp, "\tTotl%%C0");
381 outp += sprintf(outp, " Any%%C0"); 420 outp += sprintf(outp, "\tAny%%C0");
382 outp += sprintf(outp, " GFX%%C0"); 421 outp += sprintf(outp, "\tGFX%%C0");
383 outp += sprintf(outp, " CPUGFX%%"); 422 outp += sprintf(outp, "\tCPUGFX%%");
384 } 423 }
385 424
386 if (do_pc2) 425 if (do_pc2)
387 outp += sprintf(outp, " Pkg%%pc2"); 426 outp += sprintf(outp, "\tPkg%%pc2");
388 if (do_pc3) 427 if (do_pc3)
389 outp += sprintf(outp, " Pkg%%pc3"); 428 outp += sprintf(outp, "\tPkg%%pc3");
390 if (do_pc6) 429 if (do_pc6)
391 outp += sprintf(outp, " Pkg%%pc6"); 430 outp += sprintf(outp, "\tPkg%%pc6");
392 if (do_pc7) 431 if (do_pc7)
393 outp += sprintf(outp, " Pkg%%pc7"); 432 outp += sprintf(outp, "\tPkg%%pc7");
394 if (do_c8_c9_c10) { 433 if (do_c8_c9_c10) {
395 outp += sprintf(outp, " Pkg%%pc8"); 434 outp += sprintf(outp, "\tPkg%%pc8");
396 outp += sprintf(outp, " Pkg%%pc9"); 435 outp += sprintf(outp, "\tPkg%%pc9");
397 outp += sprintf(outp, " Pk%%pc10"); 436 outp += sprintf(outp, "\tPk%%pc10");
398 } 437 }
399 438
400 if (do_rapl && !rapl_joules) { 439 if (do_rapl && !rapl_joules) {
401 if (do_rapl & RAPL_PKG) 440 if (do_rapl & RAPL_PKG)
402 outp += sprintf(outp, " PkgWatt"); 441 outp += sprintf(outp, "\tPkgWatt");
403 if (do_rapl & RAPL_CORES) 442 if (do_rapl & RAPL_CORES_ENERGY_STATUS)
404 outp += sprintf(outp, " CorWatt"); 443 outp += sprintf(outp, "\tCorWatt");
405 if (do_rapl & RAPL_GFX) 444 if (do_rapl & RAPL_GFX)
406 outp += sprintf(outp, " GFXWatt"); 445 outp += sprintf(outp, "\tGFXWatt");
407 if (do_rapl & RAPL_DRAM) 446 if (do_rapl & RAPL_DRAM)
408 outp += sprintf(outp, " RAMWatt"); 447 outp += sprintf(outp, "\tRAMWatt");
409 if (do_rapl & RAPL_PKG_PERF_STATUS) 448 if (do_rapl & RAPL_PKG_PERF_STATUS)
410 outp += sprintf(outp, " PKG_%%"); 449 outp += sprintf(outp, "\tPKG_%%");
411 if (do_rapl & RAPL_DRAM_PERF_STATUS) 450 if (do_rapl & RAPL_DRAM_PERF_STATUS)
412 outp += sprintf(outp, " RAM_%%"); 451 outp += sprintf(outp, "\tRAM_%%");
413 } else if (do_rapl && rapl_joules) { 452 } else if (do_rapl && rapl_joules) {
414 if (do_rapl & RAPL_PKG) 453 if (do_rapl & RAPL_PKG)
415 outp += sprintf(outp, " Pkg_J"); 454 outp += sprintf(outp, "\tPkg_J");
416 if (do_rapl & RAPL_CORES) 455 if (do_rapl & RAPL_CORES_ENERGY_STATUS)
417 outp += sprintf(outp, " Cor_J"); 456 outp += sprintf(outp, "\tCor_J");
418 if (do_rapl & RAPL_GFX) 457 if (do_rapl & RAPL_GFX)
419 outp += sprintf(outp, " GFX_J"); 458 outp += sprintf(outp, "\tGFX_J");
420 if (do_rapl & RAPL_DRAM) 459 if (do_rapl & RAPL_DRAM)
421 outp += sprintf(outp, " RAM_J"); 460 outp += sprintf(outp, "\tRAM_J");
422 if (do_rapl & RAPL_PKG_PERF_STATUS) 461 if (do_rapl & RAPL_PKG_PERF_STATUS)
423 outp += sprintf(outp, " PKG_%%"); 462 outp += sprintf(outp, "\tPKG_%%");
424 if (do_rapl & RAPL_DRAM_PERF_STATUS) 463 if (do_rapl & RAPL_DRAM_PERF_STATUS)
425 outp += sprintf(outp, " RAM_%%"); 464 outp += sprintf(outp, "\tRAM_%%");
426 outp += sprintf(outp, " time");
427
428 } 465 }
429 done: 466 for (mp = sys.pp; mp; mp = mp->next) {
467 if (mp->format == FORMAT_RAW) {
468 if (mp->width == 64)
469 outp += sprintf(outp, "\t%18.18s", mp->name);
470 else
471 outp += sprintf(outp, "\t%10.10s", mp->name);
472 } else {
473 outp += sprintf(outp, "\t%-7.7s", mp->name);
474 }
475 }
476
477done:
430 outp += sprintf(outp, "\n"); 478 outp += sprintf(outp, "\n");
431} 479}
432 480
433int dump_counters(struct thread_data *t, struct core_data *c, 481int dump_counters(struct thread_data *t, struct core_data *c,
434 struct pkg_data *p) 482 struct pkg_data *p)
435{ 483{
484 int i;
485 struct msr_counter *mp;
486
436 outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); 487 outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
437 488
438 if (t) { 489 if (t) {
@@ -442,18 +493,16 @@ int dump_counters(struct thread_data *t, struct core_data *c,
442 outp += sprintf(outp, "aperf: %016llX\n", t->aperf); 493 outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
443 outp += sprintf(outp, "mperf: %016llX\n", t->mperf); 494 outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
444 outp += sprintf(outp, "c1: %016llX\n", t->c1); 495 outp += sprintf(outp, "c1: %016llX\n", t->c1);
445 outp += sprintf(outp, "msr0x%x: %08llX\n", 496
446 extra_delta_offset32, t->extra_delta32);
447 outp += sprintf(outp, "msr0x%x: %016llX\n",
448 extra_delta_offset64, t->extra_delta64);
449 outp += sprintf(outp, "msr0x%x: %08llX\n",
450 extra_msr_offset32, t->extra_msr32);
451 outp += sprintf(outp, "msr0x%x: %016llX\n",
452 extra_msr_offset64, t->extra_msr64);
453 if (do_irq) 497 if (do_irq)
454 outp += sprintf(outp, "IRQ: %08X\n", t->irq_count); 498 outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
455 if (do_smi) 499 if (do_smi)
456 outp += sprintf(outp, "SMI: %08X\n", t->smi_count); 500 outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
501
502 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
503 outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
504 i, mp->msr_num, t->counter[i]);
505 }
457 } 506 }
458 507
459 if (c) { 508 if (c) {
@@ -462,6 +511,11 @@ int dump_counters(struct thread_data *t, struct core_data *c,
462 outp += sprintf(outp, "c6: %016llX\n", c->c6); 511 outp += sprintf(outp, "c6: %016llX\n", c->c6);
463 outp += sprintf(outp, "c7: %016llX\n", c->c7); 512 outp += sprintf(outp, "c7: %016llX\n", c->c7);
464 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); 513 outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
514
515 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
516 outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
517 i, mp->msr_num, c->counter[i]);
518 }
465 } 519 }
466 520
467 if (p) { 521 if (p) {
@@ -491,6 +545,11 @@ int dump_counters(struct thread_data *t, struct core_data *c,
491 outp += sprintf(outp, "Throttle RAM: %0X\n", 545 outp += sprintf(outp, "Throttle RAM: %0X\n",
492 p->rapl_dram_perf_status); 546 p->rapl_dram_perf_status);
493 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); 547 outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
548
549 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
550 outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
551 i, mp->msr_num, p->counter[i]);
552 }
494 } 553 }
495 554
496 outp += sprintf(outp, "\n"); 555 outp += sprintf(outp, "\n");
@@ -506,6 +565,8 @@ int format_counters(struct thread_data *t, struct core_data *c,
506{ 565{
507 double interval_float; 566 double interval_float;
508 char *fmt8; 567 char *fmt8;
568 int i;
569 struct msr_counter *mp;
509 570
510 /* if showing only 1st thread in core and this isn't one, bail out */ 571 /* if showing only 1st thread in core and this isn't one, bail out */
511 if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 572 if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
@@ -520,99 +581,103 @@ int format_counters(struct thread_data *t, struct core_data *c,
520 /* topo columns, print blanks on 1st (average) line */ 581 /* topo columns, print blanks on 1st (average) line */
521 if (t == &average.threads) { 582 if (t == &average.threads) {
522 if (show_pkg) 583 if (show_pkg)
523 outp += sprintf(outp, " -"); 584 outp += sprintf(outp, "\t-");
524 if (show_core) 585 if (show_core)
525 outp += sprintf(outp, " -"); 586 outp += sprintf(outp, "\t-");
526 if (show_cpu) 587 if (show_cpu)
527 outp += sprintf(outp, " -"); 588 outp += sprintf(outp, "\t-");
528 } else { 589 } else {
529 if (show_pkg) { 590 if (show_pkg) {
530 if (p) 591 if (p)
531 outp += sprintf(outp, "%8d", p->package_id); 592 outp += sprintf(outp, "\t%d", p->package_id);
532 else 593 else
533 outp += sprintf(outp, " -"); 594 outp += sprintf(outp, "\t-");
534 } 595 }
535 if (show_core) { 596 if (show_core) {
536 if (c) 597 if (c)
537 outp += sprintf(outp, "%8d", c->core_id); 598 outp += sprintf(outp, "\t%d", c->core_id);
538 else 599 else
539 outp += sprintf(outp, " -"); 600 outp += sprintf(outp, "\t-");
540 } 601 }
541 if (show_cpu) 602 if (show_cpu)
542 outp += sprintf(outp, "%8d", t->cpu_id); 603 outp += sprintf(outp, "\t%d", t->cpu_id);
543 } 604 }
544 605
545 /* Avg_MHz */ 606 /* Avg_MHz */
546 if (has_aperf) 607 if (has_aperf)
547 outp += sprintf(outp, "%8.0f", 608 outp += sprintf(outp, "\t%.0f",
548 1.0 / units * t->aperf / interval_float); 609 1.0 / units * t->aperf / interval_float);
549 610
550 /* Busy% */ 611 /* Busy% */
551 if (has_aperf) { 612 if (has_aperf)
552 if (!skip_c0) 613 outp += sprintf(outp, "\t%.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
553 outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
554 else
555 outp += sprintf(outp, "********");
556 }
557 614
558 /* Bzy_MHz */ 615 /* Bzy_MHz */
559 if (has_aperf) { 616 if (has_aperf) {
560 if (has_base_hz) 617 if (has_base_hz)
561 outp += sprintf(outp, "%8.0f", base_hz / units * t->aperf / t->mperf); 618 outp += sprintf(outp, "\t%.0f", base_hz / units * t->aperf / t->mperf);
562 else 619 else
563 outp += sprintf(outp, "%8.0f", 620 outp += sprintf(outp, "\t%.0f",
564 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); 621 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
565 } 622 }
566 623
567 /* TSC_MHz */ 624 /* TSC_MHz */
568 outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); 625 outp += sprintf(outp, "\t%.0f", 1.0 * t->tsc/units/interval_float);
569
570 /* delta */
571 if (extra_delta_offset32)
572 outp += sprintf(outp, " %11llu", t->extra_delta32);
573
574 /* DELTA */
575 if (extra_delta_offset64)
576 outp += sprintf(outp, " %11llu", t->extra_delta64);
577 /* msr */
578 if (extra_msr_offset32)
579 outp += sprintf(outp, " 0x%08llx", t->extra_msr32);
580
581 /* MSR */
582 if (extra_msr_offset64)
583 outp += sprintf(outp, " 0x%016llx", t->extra_msr64);
584 626
585 if (!debug) 627 if (!debug)
586 goto done; 628 goto done;
587 629
588 /* IRQ */ 630 /* IRQ */
589 if (do_irq) 631 if (do_irq)
590 outp += sprintf(outp, "%8d", t->irq_count); 632 outp += sprintf(outp, "\t%d", t->irq_count);
591 633
592 /* SMI */ 634 /* SMI */
593 if (do_smi) 635 if (do_smi)
594 outp += sprintf(outp, "%8d", t->smi_count); 636 outp += sprintf(outp, "\t%d", t->smi_count);
595 637
596 if (do_nhm_cstates) { 638 if (do_nhm_cstates)
597 if (!skip_c1) 639 outp += sprintf(outp, "\t%.2f", 100.0 * t->c1/t->tsc);
598 outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc);
599 else
600 outp += sprintf(outp, "********");
601 }
602 640
603 /* print per-core data only for 1st thread in core */ 641 /* print per-core data only for 1st thread in core */
604 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 642 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
605 goto done; 643 goto done;
606 644
607 if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) 645 if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates)
608 outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc); 646 outp += sprintf(outp, "\t%.2f", 100.0 * c->c3/t->tsc);
609 if (do_nhm_cstates) 647 if (do_nhm_cstates)
610 outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc); 648 outp += sprintf(outp, "\t%.2f", 100.0 * c->c6/t->tsc);
611 if (do_snb_cstates) 649 if (do_snb_cstates)
612 outp += sprintf(outp, "%8.2f", 100.0 * c->c7/t->tsc); 650 outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc);
651
652 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
653 if (mp->format == FORMAT_RAW) {
654 if (mp->width == 32)
655 outp += sprintf(outp, "\t0x%08lx", (unsigned long) t->counter[i]);
656 else
657 outp += sprintf(outp, "\t0x%016llx", t->counter[i]);
658 } else if (mp->format == FORMAT_DELTA) {
659 outp += sprintf(outp, "\t%8lld", t->counter[i]);
660 } else if (mp->format == FORMAT_PERCENT) {
661 outp += sprintf(outp, "\t%.2f", 100.0 * t->counter[i]/t->tsc);
662 }
663 }
664
613 665
614 if (do_dts) 666 if (do_dts)
615 outp += sprintf(outp, "%8d", c->core_temp_c); 667 outp += sprintf(outp, "\t%d", c->core_temp_c);
668
669 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
670 if (mp->format == FORMAT_RAW) {
671 if (mp->width == 32)
672 outp += sprintf(outp, "\t0x%08lx", (unsigned long) c->counter[i]);
673 else
674 outp += sprintf(outp, "\t0x%016llx", c->counter[i]);
675 } else if (mp->format == FORMAT_DELTA) {
676 outp += sprintf(outp, "\t%8lld", c->counter[i]);
677 } else if (mp->format == FORMAT_PERCENT) {
678 outp += sprintf(outp, "\t%.2f", 100.0 * c->counter[i]/t->tsc);
679 }
680 }
616 681
617 /* print per-package data only for 1st core in package */ 682 /* print per-package data only for 1st core in package */
618 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 683 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
@@ -620,42 +685,42 @@ int format_counters(struct thread_data *t, struct core_data *c,
620 685
621 /* PkgTmp */ 686 /* PkgTmp */
622 if (do_ptm) 687 if (do_ptm)
623 outp += sprintf(outp, "%8d", p->pkg_temp_c); 688 outp += sprintf(outp, "\t%d", p->pkg_temp_c);
624 689
625 /* GFXrc6 */ 690 /* GFXrc6 */
626 if (do_gfx_rc6_ms) { 691 if (do_gfx_rc6_ms) {
627 if (p->gfx_rc6_ms == -1) { /* detect counter reset */ 692 if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */
628 outp += sprintf(outp, " ***.**"); 693 outp += sprintf(outp, "\t**.**");
629 } else { 694 } else {
630 outp += sprintf(outp, "%8.2f", 695 outp += sprintf(outp, "\t%.2f",
631 p->gfx_rc6_ms / 10.0 / interval_float); 696 p->gfx_rc6_ms / 10.0 / interval_float);
632 } 697 }
633 } 698 }
634 699
635 /* GFXMHz */ 700 /* GFXMHz */
636 if (do_gfx_mhz) 701 if (do_gfx_mhz)
637 outp += sprintf(outp, "%8d", p->gfx_mhz); 702 outp += sprintf(outp, "\t%d", p->gfx_mhz);
638 703
639 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ 704 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
640 if (do_skl_residency) { 705 if (do_skl_residency) {
641 outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); 706 outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
642 outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc); 707 outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
643 outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc); 708 outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
644 outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc); 709 outp += sprintf(outp, "\t%.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
645 } 710 }
646 711
647 if (do_pc2) 712 if (do_pc2)
648 outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc); 713 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc2/t->tsc);
649 if (do_pc3) 714 if (do_pc3)
650 outp += sprintf(outp, "%8.2f", 100.0 * p->pc3/t->tsc); 715 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc3/t->tsc);
651 if (do_pc6) 716 if (do_pc6)
652 outp += sprintf(outp, "%8.2f", 100.0 * p->pc6/t->tsc); 717 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc6/t->tsc);
653 if (do_pc7) 718 if (do_pc7)
654 outp += sprintf(outp, "%8.2f", 100.0 * p->pc7/t->tsc); 719 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc7/t->tsc);
655 if (do_c8_c9_c10) { 720 if (do_c8_c9_c10) {
656 outp += sprintf(outp, "%8.2f", 100.0 * p->pc8/t->tsc); 721 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc8/t->tsc);
657 outp += sprintf(outp, "%8.2f", 100.0 * p->pc9/t->tsc); 722 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc9/t->tsc);
658 outp += sprintf(outp, "%8.2f", 100.0 * p->pc10/t->tsc); 723 outp += sprintf(outp, "\t%.2f", 100.0 * p->pc10/t->tsc);
659 } 724 }
660 725
661 /* 726 /*
@@ -663,14 +728,14 @@ int format_counters(struct thread_data *t, struct core_data *c,
663 * indicate that results are suspect by printing "**" in fraction place. 728 * indicate that results are suspect by printing "**" in fraction place.
664 */ 729 */
665 if (interval_float < rapl_joule_counter_range) 730 if (interval_float < rapl_joule_counter_range)
666 fmt8 = "%8.2f"; 731 fmt8 = "\t%.2f";
667 else 732 else
668 fmt8 = " %6.0f**"; 733 fmt8 = "%6.0f**";
669 734
670 if (do_rapl && !rapl_joules) { 735 if (do_rapl && !rapl_joules) {
671 if (do_rapl & RAPL_PKG) 736 if (do_rapl & RAPL_PKG)
672 outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float); 737 outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float);
673 if (do_rapl & RAPL_CORES) 738 if (do_rapl & RAPL_CORES_ENERGY_STATUS)
674 outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float); 739 outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float);
675 if (do_rapl & RAPL_GFX) 740 if (do_rapl & RAPL_GFX)
676 outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float); 741 outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
@@ -697,9 +762,20 @@ int format_counters(struct thread_data *t, struct core_data *c,
697 outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); 762 outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
698 if (do_rapl & RAPL_DRAM_PERF_STATUS) 763 if (do_rapl & RAPL_DRAM_PERF_STATUS)
699 outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); 764 outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
700
701 outp += sprintf(outp, fmt8, interval_float);
702 } 765 }
766 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
767 if (mp->format == FORMAT_RAW) {
768 if (mp->width == 32)
769 outp += sprintf(outp, "\t0x%08lx", (unsigned long) p->counter[i]);
770 else
771 outp += sprintf(outp, "\t0x%016llx", p->counter[i]);
772 } else if (mp->format == FORMAT_DELTA) {
773 outp += sprintf(outp, "\t%8lld", p->counter[i]);
774 } else if (mp->format == FORMAT_PERCENT) {
775 outp += sprintf(outp, "\t%.2f", 100.0 * p->counter[i]/t->tsc);
776 }
777 }
778
703done: 779done:
704 outp += sprintf(outp, "\n"); 780 outp += sprintf(outp, "\n");
705 781
@@ -752,9 +828,11 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
752 old = 0x100000000 + new - old; \ 828 old = 0x100000000 + new - old; \
753 } 829 }
754 830
755void 831int
756delta_package(struct pkg_data *new, struct pkg_data *old) 832delta_package(struct pkg_data *new, struct pkg_data *old)
757{ 833{
834 int i;
835 struct msr_counter *mp;
758 836
759 if (do_skl_residency) { 837 if (do_skl_residency) {
760 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; 838 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
@@ -788,24 +866,46 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
788 DELTA_WRAP32(new->energy_dram, old->energy_dram); 866 DELTA_WRAP32(new->energy_dram, old->energy_dram);
789 DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); 867 DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
790 DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); 868 DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
869
870 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
871 if (mp->format == FORMAT_RAW)
872 old->counter[i] = new->counter[i];
873 else
874 old->counter[i] = new->counter[i] - old->counter[i];
875 }
876
877 return 0;
791} 878}
792 879
793void 880void
794delta_core(struct core_data *new, struct core_data *old) 881delta_core(struct core_data *new, struct core_data *old)
795{ 882{
883 int i;
884 struct msr_counter *mp;
885
796 old->c3 = new->c3 - old->c3; 886 old->c3 = new->c3 - old->c3;
797 old->c6 = new->c6 - old->c6; 887 old->c6 = new->c6 - old->c6;
798 old->c7 = new->c7 - old->c7; 888 old->c7 = new->c7 - old->c7;
799 old->core_temp_c = new->core_temp_c; 889 old->core_temp_c = new->core_temp_c;
890
891 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
892 if (mp->format == FORMAT_RAW)
893 old->counter[i] = new->counter[i];
894 else
895 old->counter[i] = new->counter[i] - old->counter[i];
896 }
800} 897}
801 898
802/* 899/*
803 * old = new - old 900 * old = new - old
804 */ 901 */
805void 902int
806delta_thread(struct thread_data *new, struct thread_data *old, 903delta_thread(struct thread_data *new, struct thread_data *old,
807 struct core_data *core_delta) 904 struct core_data *core_delta)
808{ 905{
906 int i;
907 struct msr_counter *mp;
908
809 old->tsc = new->tsc - old->tsc; 909 old->tsc = new->tsc - old->tsc;
810 910
811 /* check for TSC < 1 Mcycles over interval */ 911 /* check for TSC < 1 Mcycles over interval */
@@ -821,20 +921,7 @@ delta_thread(struct thread_data *new, struct thread_data *old,
821 old->aperf = new->aperf - old->aperf; 921 old->aperf = new->aperf - old->aperf;
822 old->mperf = new->mperf - old->mperf; 922 old->mperf = new->mperf - old->mperf;
823 } else { 923 } else {
824 924 return -1;
825 if (!aperf_mperf_unstable) {
826 fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname);
827 fprintf(outf, "* Frequency results do not cover entire interval *\n");
828 fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n");
829
830 aperf_mperf_unstable = 1;
831 }
832 /*
833 * mperf delta is likely a huge "positive" number
834 * can not use it for calculating c0 time
835 */
836 skip_c0 = 1;
837 skip_c1 = 1;
838 } 925 }
839 } 926 }
840 927
@@ -865,52 +952,53 @@ delta_thread(struct thread_data *new, struct thread_data *old,
865 old->mperf = 1; /* divide by 0 protection */ 952 old->mperf = 1; /* divide by 0 protection */
866 } 953 }
867 954
868 old->extra_delta32 = new->extra_delta32 - old->extra_delta32;
869 old->extra_delta32 &= 0xFFFFFFFF;
870
871 old->extra_delta64 = new->extra_delta64 - old->extra_delta64;
872
873 /*
874 * Extra MSR is just a snapshot, simply copy latest w/o subtracting
875 */
876 old->extra_msr32 = new->extra_msr32;
877 old->extra_msr64 = new->extra_msr64;
878
879 if (do_irq) 955 if (do_irq)
880 old->irq_count = new->irq_count - old->irq_count; 956 old->irq_count = new->irq_count - old->irq_count;
881 957
882 if (do_smi) 958 if (do_smi)
883 old->smi_count = new->smi_count - old->smi_count; 959 old->smi_count = new->smi_count - old->smi_count;
960
961 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
962 if (mp->format == FORMAT_RAW)
963 old->counter[i] = new->counter[i];
964 else
965 old->counter[i] = new->counter[i] - old->counter[i];
966 }
967 return 0;
884} 968}
885 969
886int delta_cpu(struct thread_data *t, struct core_data *c, 970int delta_cpu(struct thread_data *t, struct core_data *c,
887 struct pkg_data *p, struct thread_data *t2, 971 struct pkg_data *p, struct thread_data *t2,
888 struct core_data *c2, struct pkg_data *p2) 972 struct core_data *c2, struct pkg_data *p2)
889{ 973{
974 int retval = 0;
975
890 /* calculate core delta only for 1st thread in core */ 976 /* calculate core delta only for 1st thread in core */
891 if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) 977 if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
892 delta_core(c, c2); 978 delta_core(c, c2);
893 979
894 /* always calculate thread delta */ 980 /* always calculate thread delta */
895 delta_thread(t, t2, c2); /* c2 is core delta */ 981 retval = delta_thread(t, t2, c2); /* c2 is core delta */
982 if (retval)
983 return retval;
896 984
897 /* calculate package delta only for 1st core in package */ 985 /* calculate package delta only for 1st core in package */
898 if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) 986 if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
899 delta_package(p, p2); 987 retval = delta_package(p, p2);
900 988
901 return 0; 989 return retval;
902} 990}
903 991
904void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 992void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
905{ 993{
994 int i;
995 struct msr_counter *mp;
996
906 t->tsc = 0; 997 t->tsc = 0;
907 t->aperf = 0; 998 t->aperf = 0;
908 t->mperf = 0; 999 t->mperf = 0;
909 t->c1 = 0; 1000 t->c1 = 0;
910 1001
911 t->extra_delta32 = 0;
912 t->extra_delta64 = 0;
913
914 t->irq_count = 0; 1002 t->irq_count = 0;
915 t->smi_count = 0; 1003 t->smi_count = 0;
916 1004
@@ -948,21 +1036,36 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
948 1036
949 p->gfx_rc6_ms = 0; 1037 p->gfx_rc6_ms = 0;
950 p->gfx_mhz = 0; 1038 p->gfx_mhz = 0;
1039
1040 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
1041 t->counter[i] = 0;
1042
1043 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
1044 c->counter[i] = 0;
1045
1046 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
1047 p->counter[i] = 0;
951} 1048}
952int sum_counters(struct thread_data *t, struct core_data *c, 1049int sum_counters(struct thread_data *t, struct core_data *c,
953 struct pkg_data *p) 1050 struct pkg_data *p)
954{ 1051{
1052 int i;
1053 struct msr_counter *mp;
1054
955 average.threads.tsc += t->tsc; 1055 average.threads.tsc += t->tsc;
956 average.threads.aperf += t->aperf; 1056 average.threads.aperf += t->aperf;
957 average.threads.mperf += t->mperf; 1057 average.threads.mperf += t->mperf;
958 average.threads.c1 += t->c1; 1058 average.threads.c1 += t->c1;
959 1059
960 average.threads.extra_delta32 += t->extra_delta32;
961 average.threads.extra_delta64 += t->extra_delta64;
962
963 average.threads.irq_count += t->irq_count; 1060 average.threads.irq_count += t->irq_count;
964 average.threads.smi_count += t->smi_count; 1061 average.threads.smi_count += t->smi_count;
965 1062
1063 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1064 if (mp->format == FORMAT_RAW)
1065 continue;
1066 average.threads.counter[i] += t->counter[i];
1067 }
1068
966 /* sum per-core values only for 1st thread in core */ 1069 /* sum per-core values only for 1st thread in core */
967 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 1070 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
968 return 0; 1071 return 0;
@@ -973,6 +1076,12 @@ int sum_counters(struct thread_data *t, struct core_data *c,
973 1076
974 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); 1077 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
975 1078
1079 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1080 if (mp->format == FORMAT_RAW)
1081 continue;
1082 average.cores.counter[i] += c->counter[i];
1083 }
1084
976 /* sum per-pkg values only for 1st core in pkg */ 1085 /* sum per-pkg values only for 1st core in pkg */
977 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 1086 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
978 return 0; 1087 return 0;
@@ -1007,6 +1116,12 @@ int sum_counters(struct thread_data *t, struct core_data *c,
1007 1116
1008 average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; 1117 average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
1009 average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; 1118 average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
1119
1120 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1121 if (mp->format == FORMAT_RAW)
1122 continue;
1123 average.packages.counter[i] += p->counter[i];
1124 }
1010 return 0; 1125 return 0;
1011} 1126}
1012/* 1127/*
@@ -1016,6 +1131,9 @@ int sum_counters(struct thread_data *t, struct core_data *c,
1016void compute_average(struct thread_data *t, struct core_data *c, 1131void compute_average(struct thread_data *t, struct core_data *c,
1017 struct pkg_data *p) 1132 struct pkg_data *p)
1018{ 1133{
1134 int i;
1135 struct msr_counter *mp;
1136
1019 clear_counters(&average.threads, &average.cores, &average.packages); 1137 clear_counters(&average.threads, &average.cores, &average.packages);
1020 1138
1021 for_all_cpus(sum_counters, t, c, p); 1139 for_all_cpus(sum_counters, t, c, p);
@@ -1025,11 +1143,6 @@ void compute_average(struct thread_data *t, struct core_data *c,
1025 average.threads.mperf /= topo.num_cpus; 1143 average.threads.mperf /= topo.num_cpus;
1026 average.threads.c1 /= topo.num_cpus; 1144 average.threads.c1 /= topo.num_cpus;
1027 1145
1028 average.threads.extra_delta32 /= topo.num_cpus;
1029 average.threads.extra_delta32 &= 0xFFFFFFFF;
1030
1031 average.threads.extra_delta64 /= topo.num_cpus;
1032
1033 average.cores.c3 /= topo.num_cores; 1146 average.cores.c3 /= topo.num_cores;
1034 average.cores.c6 /= topo.num_cores; 1147 average.cores.c6 /= topo.num_cores;
1035 average.cores.c7 /= topo.num_cores; 1148 average.cores.c7 /= topo.num_cores;
@@ -1052,6 +1165,22 @@ void compute_average(struct thread_data *t, struct core_data *c,
1052 average.packages.pc8 /= topo.num_packages; 1165 average.packages.pc8 /= topo.num_packages;
1053 average.packages.pc9 /= topo.num_packages; 1166 average.packages.pc9 /= topo.num_packages;
1054 average.packages.pc10 /= topo.num_packages; 1167 average.packages.pc10 /= topo.num_packages;
1168
1169 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1170 if (mp->format == FORMAT_RAW)
1171 continue;
1172 average.threads.counter[i] /= topo.num_cpus;
1173 }
1174 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1175 if (mp->format == FORMAT_RAW)
1176 continue;
1177 average.cores.counter[i] /= topo.num_cores;
1178 }
1179 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1180 if (mp->format == FORMAT_RAW)
1181 continue;
1182 average.packages.counter[i] /= topo.num_packages;
1183 }
1055} 1184}
1056 1185
1057static unsigned long long rdtsc(void) 1186static unsigned long long rdtsc(void)
@@ -1073,6 +1202,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1073 int cpu = t->cpu_id; 1202 int cpu = t->cpu_id;
1074 unsigned long long msr; 1203 unsigned long long msr;
1075 int aperf_mperf_retry_count = 0; 1204 int aperf_mperf_retry_count = 0;
1205 struct msr_counter *mp;
1206 int i;
1076 1207
1077 if (cpu_migrate(cpu)) { 1208 if (cpu_migrate(cpu)) {
1078 fprintf(outf, "Could not migrate to CPU %d\n", cpu); 1209 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
@@ -1145,31 +1276,18 @@ retry:
1145 return -5; 1276 return -5;
1146 t->smi_count = msr & 0xFFFFFFFF; 1277 t->smi_count = msr & 0xFFFFFFFF;
1147 } 1278 }
1148 if (extra_delta_offset32) {
1149 if (get_msr(cpu, extra_delta_offset32, &msr))
1150 return -5;
1151 t->extra_delta32 = msr & 0xFFFFFFFF;
1152 }
1153
1154 if (extra_delta_offset64)
1155 if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64))
1156 return -5;
1157
1158 if (extra_msr_offset32) {
1159 if (get_msr(cpu, extra_msr_offset32, &msr))
1160 return -5;
1161 t->extra_msr32 = msr & 0xFFFFFFFF;
1162 }
1163
1164 if (extra_msr_offset64)
1165 if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64))
1166 return -5;
1167 1279
1168 if (use_c1_residency_msr) { 1280 if (use_c1_residency_msr) {
1169 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1)) 1281 if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
1170 return -6; 1282 return -6;
1171 } 1283 }
1172 1284
1285 for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
1286 if (get_msr(cpu, mp->msr_num, &t->counter[i]))
1287 return -10;
1288 }
1289
1290
1173 /* collect core counters only for 1st thread in core */ 1291 /* collect core counters only for 1st thread in core */
1174 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 1292 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1175 return 0; 1293 return 0;
@@ -1197,6 +1315,10 @@ retry:
1197 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); 1315 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1198 } 1316 }
1199 1317
1318 for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
1319 if (get_msr(cpu, mp->msr_num, &c->counter[i]))
1320 return -10;
1321 }
1200 1322
1201 /* collect package counters only for 1st core in package */ 1323 /* collect package counters only for 1st core in package */
1202 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 1324 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
@@ -1237,7 +1359,7 @@ retry:
1237 return -13; 1359 return -13;
1238 p->energy_pkg = msr & 0xFFFFFFFF; 1360 p->energy_pkg = msr & 0xFFFFFFFF;
1239 } 1361 }
1240 if (do_rapl & RAPL_CORES) { 1362 if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
1241 if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) 1363 if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
1242 return -14; 1364 return -14;
1243 p->energy_cores = msr & 0xFFFFFFFF; 1365 p->energy_cores = msr & 0xFFFFFFFF;
@@ -1274,6 +1396,11 @@ retry:
1274 if (do_gfx_mhz) 1396 if (do_gfx_mhz)
1275 p->gfx_mhz = gfx_cur_mhz; 1397 p->gfx_mhz = gfx_cur_mhz;
1276 1398
1399 for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
1400 if (get_msr(cpu, mp->msr_num, &p->counter[i]))
1401 return -10;
1402 }
1403
1277 return 0; 1404 return 0;
1278} 1405}
1279 1406
@@ -1310,6 +1437,7 @@ int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV,
1310int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; 1437int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1311int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; 1438int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1312int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; 1439int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1440int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1313 1441
1314 1442
1315static void 1443static void
@@ -1638,7 +1766,7 @@ void free_fd_percpu(void)
1638{ 1766{
1639 int i; 1767 int i;
1640 1768
1641 for (i = 0; i < topo.max_cpu_num; ++i) { 1769 for (i = 0; i < topo.max_cpu_num + 1; ++i) {
1642 if (fd_percpu[i] != 0) 1770 if (fd_percpu[i] != 0)
1643 close(fd_percpu[i]); 1771 close(fd_percpu[i]);
1644 } 1772 }
@@ -2071,7 +2199,10 @@ restart:
2071 } 2199 }
2072 gettimeofday(&tv_odd, (struct timezone *)NULL); 2200 gettimeofday(&tv_odd, (struct timezone *)NULL);
2073 timersub(&tv_odd, &tv_even, &tv_delta); 2201 timersub(&tv_odd, &tv_even, &tv_delta);
2074 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 2202 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
2203 re_initialize();
2204 goto restart;
2205 }
2075 compute_average(EVEN_COUNTERS); 2206 compute_average(EVEN_COUNTERS);
2076 format_all_counters(EVEN_COUNTERS); 2207 format_all_counters(EVEN_COUNTERS);
2077 flush_output_stdout(); 2208 flush_output_stdout();
@@ -2087,7 +2218,10 @@ restart:
2087 } 2218 }
2088 gettimeofday(&tv_even, (struct timezone *)NULL); 2219 gettimeofday(&tv_even, (struct timezone *)NULL);
2089 timersub(&tv_even, &tv_odd, &tv_delta); 2220 timersub(&tv_even, &tv_odd, &tv_delta);
2090 for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); 2221 if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
2222 re_initialize();
2223 goto restart;
2224 }
2091 compute_average(ODD_COUNTERS); 2225 compute_average(ODD_COUNTERS);
2092 format_all_counters(ODD_COUNTERS); 2226 format_all_counters(ODD_COUNTERS);
2093 flush_output_stdout(); 2227 flush_output_stdout();
@@ -2174,47 +2308,51 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
2174 bclk = discover_bclk(family, model); 2308 bclk = discover_bclk(family, model);
2175 2309
2176 switch (model) { 2310 switch (model) {
2177 case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ 2311 case INTEL_FAM6_NEHALEM_EP: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
2178 case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ 2312 case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
2179 case 0x1F: /* Core i7 and i5 Processor - Nehalem */ 2313 case 0x1F: /* Core i7 and i5 Processor - Nehalem */
2180 case 0x25: /* Westmere Client - Clarkdale, Arrandale */ 2314 case INTEL_FAM6_WESTMERE: /* Westmere Client - Clarkdale, Arrandale */
2181 case 0x2C: /* Westmere EP - Gulftown */ 2315 case INTEL_FAM6_WESTMERE_EP: /* Westmere EP - Gulftown */
2182 case 0x2E: /* Nehalem-EX Xeon - Beckton */ 2316 case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
2183 case 0x2F: /* Westmere-EX Xeon - Eagleton */ 2317 case INTEL_FAM6_WESTMERE_EX: /* Westmere-EX Xeon - Eagleton */
2184 pkg_cstate_limits = nhm_pkg_cstate_limits; 2318 pkg_cstate_limits = nhm_pkg_cstate_limits;
2185 break; 2319 break;
2186 case 0x2A: /* SNB */ 2320 case INTEL_FAM6_SANDYBRIDGE: /* SNB */
2187 case 0x2D: /* SNB Xeon */ 2321 case INTEL_FAM6_SANDYBRIDGE_X: /* SNB Xeon */
2188 case 0x3A: /* IVB */ 2322 case INTEL_FAM6_IVYBRIDGE: /* IVB */
2189 case 0x3E: /* IVB Xeon */ 2323 case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
2190 pkg_cstate_limits = snb_pkg_cstate_limits; 2324 pkg_cstate_limits = snb_pkg_cstate_limits;
2191 break; 2325 break;
2192 case 0x3C: /* HSW */ 2326 case INTEL_FAM6_HASWELL_CORE: /* HSW */
2193 case 0x3F: /* HSX */ 2327 case INTEL_FAM6_HASWELL_X: /* HSX */
2194 case 0x45: /* HSW */ 2328 case INTEL_FAM6_HASWELL_ULT: /* HSW */
2195 case 0x46: /* HSW */ 2329 case INTEL_FAM6_HASWELL_GT3E: /* HSW */
2196 case 0x3D: /* BDW */ 2330 case INTEL_FAM6_BROADWELL_CORE: /* BDW */
2197 case 0x47: /* BDW */ 2331 case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
2198 case 0x4F: /* BDX */ 2332 case INTEL_FAM6_BROADWELL_X: /* BDX */
2199 case 0x56: /* BDX-DE */ 2333 case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */
2200 case 0x4E: /* SKL */ 2334 case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
2201 case 0x5E: /* SKL */ 2335 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
2202 case 0x8E: /* KBL */ 2336 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
2203 case 0x9E: /* KBL */ 2337 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
2204 case 0x55: /* SKX */
2205 pkg_cstate_limits = hsw_pkg_cstate_limits; 2338 pkg_cstate_limits = hsw_pkg_cstate_limits;
2206 break; 2339 break;
2207 case 0x37: /* BYT */ 2340 case INTEL_FAM6_SKYLAKE_X: /* SKX */
2208 case 0x4D: /* AVN */ 2341 pkg_cstate_limits = skx_pkg_cstate_limits;
2342 break;
2343 case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */
2344 case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */
2209 pkg_cstate_limits = slv_pkg_cstate_limits; 2345 pkg_cstate_limits = slv_pkg_cstate_limits;
2210 break; 2346 break;
2211 case 0x4C: /* AMT */ 2347 case INTEL_FAM6_ATOM_AIRMONT: /* AMT */
2212 pkg_cstate_limits = amt_pkg_cstate_limits; 2348 pkg_cstate_limits = amt_pkg_cstate_limits;
2213 break; 2349 break;
2214 case 0x57: /* PHI */ 2350 case INTEL_FAM6_XEON_PHI_KNL: /* PHI */
2351 case INTEL_FAM6_XEON_PHI_KNM:
2215 pkg_cstate_limits = phi_pkg_cstate_limits; 2352 pkg_cstate_limits = phi_pkg_cstate_limits;
2216 break; 2353 break;
2217 case 0x5C: /* BXT */ 2354 case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
2355 case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
2218 pkg_cstate_limits = bxt_pkg_cstate_limits; 2356 pkg_cstate_limits = bxt_pkg_cstate_limits;
2219 break; 2357 break;
2220 default: 2358 default:
@@ -2234,9 +2372,10 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
2234{ 2372{
2235 switch (model) { 2373 switch (model) {
2236 /* Nehalem compatible, but do not include turbo-ratio limit support */ 2374 /* Nehalem compatible, but do not include turbo-ratio limit support */
2237 case 0x2E: /* Nehalem-EX Xeon - Beckton */ 2375 case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
2238 case 0x2F: /* Westmere-EX Xeon - Eagleton */ 2376 case INTEL_FAM6_WESTMERE_EX: /* Westmere-EX Xeon - Eagleton */
2239 case 0x57: /* PHI - Knights Landing (different MSR definition) */ 2377 case INTEL_FAM6_XEON_PHI_KNL: /* PHI - Knights Landing (different MSR definition) */
2378 case INTEL_FAM6_XEON_PHI_KNM:
2240 return 0; 2379 return 0;
2241 default: 2380 default:
2242 return 1; 2381 return 1;
@@ -2251,8 +2390,8 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
2251 return 0; 2390 return 0;
2252 2391
2253 switch (model) { 2392 switch (model) {
2254 case 0x3E: /* IVB Xeon */ 2393 case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
2255 case 0x3F: /* HSW Xeon */ 2394 case INTEL_FAM6_HASWELL_X: /* HSW Xeon */
2256 return 1; 2395 return 1;
2257 default: 2396 default:
2258 return 0; 2397 return 0;
@@ -2267,7 +2406,7 @@ int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
2267 return 0; 2406 return 0;
2268 2407
2269 switch (model) { 2408 switch (model) {
2270 case 0x3F: /* HSW Xeon */ 2409 case INTEL_FAM6_HASWELL_X: /* HSW Xeon */
2271 return 1; 2410 return 1;
2272 default: 2411 default:
2273 return 0; 2412 return 0;
@@ -2283,7 +2422,8 @@ int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
2283 return 0; 2422 return 0;
2284 2423
2285 switch (model) { 2424 switch (model) {
2286 case 0x57: /* Knights Landing */ 2425 case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */
2426 case INTEL_FAM6_XEON_PHI_KNM:
2287 return 1; 2427 return 1;
2288 default: 2428 default:
2289 return 0; 2429 return 0;
@@ -2298,22 +2438,23 @@ int has_config_tdp(unsigned int family, unsigned int model)
2298 return 0; 2438 return 0;
2299 2439
2300 switch (model) { 2440 switch (model) {
2301 case 0x3A: /* IVB */ 2441 case INTEL_FAM6_IVYBRIDGE: /* IVB */
2302 case 0x3C: /* HSW */ 2442 case INTEL_FAM6_HASWELL_CORE: /* HSW */
2303 case 0x3F: /* HSX */ 2443 case INTEL_FAM6_HASWELL_X: /* HSX */
2304 case 0x45: /* HSW */ 2444 case INTEL_FAM6_HASWELL_ULT: /* HSW */
2305 case 0x46: /* HSW */ 2445 case INTEL_FAM6_HASWELL_GT3E: /* HSW */
2306 case 0x3D: /* BDW */ 2446 case INTEL_FAM6_BROADWELL_CORE: /* BDW */
2307 case 0x47: /* BDW */ 2447 case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
2308 case 0x4F: /* BDX */ 2448 case INTEL_FAM6_BROADWELL_X: /* BDX */
2309 case 0x56: /* BDX-DE */ 2449 case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */
2310 case 0x4E: /* SKL */ 2450 case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
2311 case 0x5E: /* SKL */ 2451 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
2312 case 0x8E: /* KBL */ 2452 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
2313 case 0x9E: /* KBL */ 2453 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
2314 case 0x55: /* SKX */ 2454 case INTEL_FAM6_SKYLAKE_X: /* SKX */
2315 2455
2316 case 0x57: /* Knights Landing */ 2456 case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */
2457 case INTEL_FAM6_XEON_PHI_KNM:
2317 return 1; 2458 return 1;
2318 default: 2459 default:
2319 return 0; 2460 return 0;
@@ -2593,8 +2734,8 @@ double get_tdp(unsigned int model)
2593 return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; 2734 return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
2594 2735
2595 switch (model) { 2736 switch (model) {
2596 case 0x37: 2737 case INTEL_FAM6_ATOM_SILVERMONT1:
2597 case 0x4D: 2738 case INTEL_FAM6_ATOM_SILVERMONT2:
2598 return 30.0; 2739 return 30.0;
2599 default: 2740 default:
2600 return 135.0; 2741 return 135.0;
@@ -2611,10 +2752,11 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units)
2611 /* only called for genuine_intel, family 6 */ 2752 /* only called for genuine_intel, family 6 */
2612 2753
2613 switch (model) { 2754 switch (model) {
2614 case 0x3F: /* HSX */ 2755 case INTEL_FAM6_HASWELL_X: /* HSX */
2615 case 0x4F: /* BDX */ 2756 case INTEL_FAM6_BROADWELL_X: /* BDX */
2616 case 0x56: /* BDX-DE */ 2757 case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */
2617 case 0x57: /* KNL */ 2758 case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
2759 case INTEL_FAM6_XEON_PHI_KNM:
2618 return (rapl_dram_energy_units = 15.3 / 1000000); 2760 return (rapl_dram_energy_units = 15.3 / 1000000);
2619 default: 2761 default:
2620 return (rapl_energy_units); 2762 return (rapl_energy_units);
@@ -2640,38 +2782,42 @@ void rapl_probe(unsigned int family, unsigned int model)
2640 return; 2782 return;
2641 2783
2642 switch (model) { 2784 switch (model) {
2643 case 0x2A: 2785 case INTEL_FAM6_SANDYBRIDGE:
2644 case 0x3A: 2786 case INTEL_FAM6_IVYBRIDGE:
2645 case 0x3C: /* HSW */ 2787 case INTEL_FAM6_HASWELL_CORE: /* HSW */
2646 case 0x45: /* HSW */ 2788 case INTEL_FAM6_HASWELL_ULT: /* HSW */
2647 case 0x46: /* HSW */ 2789 case INTEL_FAM6_HASWELL_GT3E: /* HSW */
2648 case 0x3D: /* BDW */ 2790 case INTEL_FAM6_BROADWELL_CORE: /* BDW */
2649 case 0x47: /* BDW */ 2791 case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
2650 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; 2792 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
2651 break; 2793 break;
2652 case 0x5C: /* BXT */ 2794 case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
2653 do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO; 2795 do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
2654 break; 2796 break;
2655 case 0x4E: /* SKL */ 2797 case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
2656 case 0x5E: /* SKL */ 2798 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
2657 case 0x8E: /* KBL */ 2799 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
2658 case 0x9E: /* KBL */ 2800 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
2659 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; 2801 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
2660 break; 2802 break;
2661 case 0x3F: /* HSX */ 2803 case INTEL_FAM6_HASWELL_X: /* HSX */
2662 case 0x4F: /* BDX */ 2804 case INTEL_FAM6_BROADWELL_X: /* BDX */
2663 case 0x56: /* BDX-DE */ 2805 case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */
2664 case 0x55: /* SKX */ 2806 case INTEL_FAM6_SKYLAKE_X: /* SKX */
2665 case 0x57: /* KNL */ 2807 case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
2808 case INTEL_FAM6_XEON_PHI_KNM:
2666 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; 2809 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
2667 break; 2810 break;
2668 case 0x2D: 2811 case INTEL_FAM6_SANDYBRIDGE_X:
2669 case 0x3E: 2812 case INTEL_FAM6_IVYBRIDGE_X:
2670 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; 2813 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
2671 break; 2814 break;
2672 case 0x37: /* BYT */ 2815 case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */
2673 case 0x4D: /* AVN */ 2816 case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */
2674 do_rapl = RAPL_PKG | RAPL_CORES ; 2817 do_rapl = RAPL_PKG | RAPL_CORES;
2818 break;
2819 case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
2820 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
2675 break; 2821 break;
2676 default: 2822 default:
2677 return; 2823 return;
@@ -2682,7 +2828,7 @@ void rapl_probe(unsigned int family, unsigned int model)
2682 return; 2828 return;
2683 2829
2684 rapl_power_units = 1.0 / (1 << (msr & 0xF)); 2830 rapl_power_units = 1.0 / (1 << (msr & 0xF));
2685 if (model == 0x37) 2831 if (model == INTEL_FAM6_ATOM_SILVERMONT1)
2686 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; 2832 rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
2687 else 2833 else
2688 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); 2834 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
@@ -2713,11 +2859,11 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model)
2713 return; 2859 return;
2714 2860
2715 switch (model) { 2861 switch (model) {
2716 case 0x3C: /* HSW */ 2862 case INTEL_FAM6_HASWELL_CORE: /* HSW */
2717 case 0x45: /* HSW */ 2863 case INTEL_FAM6_HASWELL_ULT: /* HSW */
2718 case 0x46: /* HSW */ 2864 case INTEL_FAM6_HASWELL_GT3E: /* HSW */
2719 do_gfx_perf_limit_reasons = 1; 2865 do_gfx_perf_limit_reasons = 1;
2720 case 0x3F: /* HSX */ 2866 case INTEL_FAM6_HASWELL_X: /* HSX */
2721 do_core_perf_limit_reasons = 1; 2867 do_core_perf_limit_reasons = 1;
2722 do_ring_perf_limit_reasons = 1; 2868 do_ring_perf_limit_reasons = 1;
2723 default: 2869 default:
@@ -2737,7 +2883,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
2737 cpu = t->cpu_id; 2883 cpu = t->cpu_id;
2738 2884
2739 /* DTS is per-core, no need to print for each thread */ 2885 /* DTS is per-core, no need to print for each thread */
2740 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 2886 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
2741 return 0; 2887 return 0;
2742 2888
2743 if (cpu_migrate(cpu)) { 2889 if (cpu_migrate(cpu)) {
@@ -2886,9 +3032,8 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2886 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); 3032 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
2887 } 3033 }
2888 } 3034 }
2889 if (do_rapl & RAPL_CORES) { 3035 if (do_rapl & RAPL_CORES_POWER_LIMIT) {
2890 if (debug) { 3036 if (debug) {
2891
2892 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) 3037 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
2893 return -9; 3038 return -9;
2894 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", 3039 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
@@ -2927,24 +3072,25 @@ int has_snb_msrs(unsigned int family, unsigned int model)
2927 return 0; 3072 return 0;
2928 3073
2929 switch (model) { 3074 switch (model) {
2930 case 0x2A: 3075 case INTEL_FAM6_SANDYBRIDGE:
2931 case 0x2D: 3076 case INTEL_FAM6_SANDYBRIDGE_X:
2932 case 0x3A: /* IVB */ 3077 case INTEL_FAM6_IVYBRIDGE: /* IVB */
2933 case 0x3E: /* IVB Xeon */ 3078 case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
2934 case 0x3C: /* HSW */ 3079 case INTEL_FAM6_HASWELL_CORE: /* HSW */
2935 case 0x3F: /* HSW */ 3080 case INTEL_FAM6_HASWELL_X: /* HSW */
2936 case 0x45: /* HSW */ 3081 case INTEL_FAM6_HASWELL_ULT: /* HSW */
2937 case 0x46: /* HSW */ 3082 case INTEL_FAM6_HASWELL_GT3E: /* HSW */
2938 case 0x3D: /* BDW */ 3083 case INTEL_FAM6_BROADWELL_CORE: /* BDW */
2939 case 0x47: /* BDW */ 3084 case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
2940 case 0x4F: /* BDX */ 3085 case INTEL_FAM6_BROADWELL_X: /* BDX */
2941 case 0x56: /* BDX-DE */ 3086 case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */
2942 case 0x4E: /* SKL */ 3087 case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
2943 case 0x5E: /* SKL */ 3088 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
2944 case 0x8E: /* KBL */ 3089 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
2945 case 0x9E: /* KBL */ 3090 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
2946 case 0x55: /* SKX */ 3091 case INTEL_FAM6_SKYLAKE_X: /* SKX */
2947 case 0x5C: /* BXT */ 3092 case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
3093 case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
2948 return 1; 3094 return 1;
2949 } 3095 }
2950 return 0; 3096 return 0;
@@ -2968,13 +3114,13 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
2968 return 0; 3114 return 0;
2969 3115
2970 switch (model) { 3116 switch (model) {
2971 case 0x45: /* HSW */ 3117 case INTEL_FAM6_HASWELL_ULT: /* HSW */
2972 case 0x3D: /* BDW */ 3118 case INTEL_FAM6_BROADWELL_CORE: /* BDW */
2973 case 0x4E: /* SKL */ 3119 case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
2974 case 0x5E: /* SKL */ 3120 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
2975 case 0x8E: /* KBL */ 3121 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
2976 case 0x9E: /* KBL */ 3122 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
2977 case 0x5C: /* BXT */ 3123 case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
2978 return 1; 3124 return 1;
2979 } 3125 }
2980 return 0; 3126 return 0;
@@ -2994,10 +3140,10 @@ int has_skl_msrs(unsigned int family, unsigned int model)
2994 return 0; 3140 return 0;
2995 3141
2996 switch (model) { 3142 switch (model) {
2997 case 0x4E: /* SKL */ 3143 case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
2998 case 0x5E: /* SKL */ 3144 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
2999 case 0x8E: /* KBL */ 3145 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
3000 case 0x9E: /* KBL */ 3146 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
3001 return 1; 3147 return 1;
3002 } 3148 }
3003 return 0; 3149 return 0;
@@ -3010,8 +3156,8 @@ int is_slm(unsigned int family, unsigned int model)
3010 if (!genuine_intel) 3156 if (!genuine_intel)
3011 return 0; 3157 return 0;
3012 switch (model) { 3158 switch (model) {
3013 case 0x37: /* BYT */ 3159 case INTEL_FAM6_ATOM_SILVERMONT1: /* BYT */
3014 case 0x4D: /* AVN */ 3160 case INTEL_FAM6_ATOM_SILVERMONT2: /* AVN */
3015 return 1; 3161 return 1;
3016 } 3162 }
3017 return 0; 3163 return 0;
@@ -3022,7 +3168,8 @@ int is_knl(unsigned int family, unsigned int model)
3022 if (!genuine_intel) 3168 if (!genuine_intel)
3023 return 0; 3169 return 0;
3024 switch (model) { 3170 switch (model) {
3025 case 0x57: /* KNL */ 3171 case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
3172 case INTEL_FAM6_XEON_PHI_KNM:
3026 return 1; 3173 return 1;
3027 } 3174 }
3028 return 0; 3175 return 0;
@@ -3050,7 +3197,7 @@ double slm_bclk(void)
3050 i = msr & 0xf; 3197 i = msr & 0xf;
3051 if (i >= SLM_BCLK_FREQS) { 3198 if (i >= SLM_BCLK_FREQS) {
3052 fprintf(outf, "SLM BCLK[%d] invalid\n", i); 3199 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
3053 msr = 3; 3200 i = 3;
3054 } 3201 }
3055 freq = slm_freq_table[i]; 3202 freq = slm_freq_table[i];
3056 3203
@@ -3174,10 +3321,11 @@ void decode_misc_pwr_mgmt_msr(void)
3174 return; 3321 return;
3175 3322
3176 if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) 3323 if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
3177 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n", 3324 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
3178 base_cpu, msr, 3325 base_cpu, msr,
3179 msr & (1 << 0) ? "DIS" : "EN", 3326 msr & (1 << 0) ? "DIS" : "EN",
3180 msr & (1 << 1) ? "EN" : "DIS"); 3327 msr & (1 << 1) ? "EN" : "DIS",
3328 msr & (1 << 8) ? "EN" : "DIS");
3181} 3329}
3182 3330
3183void process_cpuid() 3331void process_cpuid()
@@ -3303,16 +3451,17 @@ void process_cpuid()
3303 3451
3304 if (crystal_hz == 0) 3452 if (crystal_hz == 0)
3305 switch(model) { 3453 switch(model) {
3306 case 0x4E: /* SKL */ 3454 case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
3307 case 0x5E: /* SKL */ 3455 case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
3308 case 0x8E: /* KBL */ 3456 case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
3309 case 0x9E: /* KBL */ 3457 case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
3310 crystal_hz = 24000000; /* 24.0 MHz */ 3458 crystal_hz = 24000000; /* 24.0 MHz */
3311 break; 3459 break;
3312 case 0x55: /* SKX */ 3460 case INTEL_FAM6_SKYLAKE_X: /* SKX */
3461 case INTEL_FAM6_ATOM_DENVERTON: /* DNV */
3313 crystal_hz = 25000000; /* 25.0 MHz */ 3462 crystal_hz = 25000000; /* 25.0 MHz */
3314 break; 3463 break;
3315 case 0x5C: /* BXT */ 3464 case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
3316 crystal_hz = 19200000; /* 19.2 MHz */ 3465 crystal_hz = 19200000; /* 19.2 MHz */
3317 break; 3466 break;
3318 default: 3467 default:
@@ -3385,14 +3534,12 @@ void help()
3385 "when COMMAND completes.\n" 3534 "when COMMAND completes.\n"
3386 "If no COMMAND is specified, turbostat wakes every 5-seconds\n" 3535 "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
3387 "to print statistics, until interrupted.\n" 3536 "to print statistics, until interrupted.\n"
3537 "--add add a counter\n"
3538 " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
3388 "--debug run in \"debug\" mode\n" 3539 "--debug run in \"debug\" mode\n"
3389 "--interval sec Override default 5-second measurement interval\n" 3540 "--interval sec Override default 5-second measurement interval\n"
3390 "--help print this help message\n" 3541 "--help print this help message\n"
3391 "--counter msr print 32-bit counter at address \"msr\"\n"
3392 "--Counter msr print 64-bit Counter at address \"msr\"\n"
3393 "--out file create or truncate \"file\" for all output\n" 3542 "--out file create or truncate \"file\" for all output\n"
3394 "--msr msr print 32-bit value at address \"msr\"\n"
3395 "--MSR msr print 64-bit Value at address \"msr\"\n"
3396 "--version print version information\n" 3543 "--version print version information\n"
3397 "\n" 3544 "\n"
3398 "For more help, run \"man turbostat\"\n"); 3545 "For more help, run \"man turbostat\"\n");
@@ -3515,7 +3662,7 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data
3515 int i; 3662 int i;
3516 3663
3517 *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * 3664 *t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg *
3518 topo.num_packages, sizeof(struct thread_data)); 3665 topo.num_packages, sizeof(struct thread_data) + sys.thread_counter_bytes);
3519 if (*t == NULL) 3666 if (*t == NULL)
3520 goto error; 3667 goto error;
3521 3668
@@ -3524,14 +3671,14 @@ allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data
3524 (*t)[i].cpu_id = -1; 3671 (*t)[i].cpu_id = -1;
3525 3672
3526 *c = calloc(topo.num_cores_per_pkg * topo.num_packages, 3673 *c = calloc(topo.num_cores_per_pkg * topo.num_packages,
3527 sizeof(struct core_data)); 3674 sizeof(struct core_data) + sys.core_counter_bytes);
3528 if (*c == NULL) 3675 if (*c == NULL)
3529 goto error; 3676 goto error;
3530 3677
3531 for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) 3678 for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++)
3532 (*c)[i].core_id = -1; 3679 (*c)[i].core_id = -1;
3533 3680
3534 *p = calloc(topo.num_packages, sizeof(struct pkg_data)); 3681 *p = calloc(topo.num_packages, sizeof(struct pkg_data) + sys.package_counter_bytes);
3535 if (*p == NULL) 3682 if (*p == NULL)
3536 goto error; 3683 goto error;
3537 3684
@@ -3598,7 +3745,7 @@ void allocate_output_buffer()
3598} 3745}
3599void allocate_fd_percpu(void) 3746void allocate_fd_percpu(void)
3600{ 3747{
3601 fd_percpu = calloc(topo.max_cpu_num, sizeof(int)); 3748 fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
3602 if (fd_percpu == NULL) 3749 if (fd_percpu == NULL)
3603 err(-1, "calloc fd_percpu"); 3750 err(-1, "calloc fd_percpu");
3604} 3751}
@@ -3608,9 +3755,9 @@ void allocate_irq_buffers(void)
3608 if (irq_column_2_cpu == NULL) 3755 if (irq_column_2_cpu == NULL)
3609 err(-1, "calloc %d", topo.num_cpus); 3756 err(-1, "calloc %d", topo.num_cpus);
3610 3757
3611 irqs_per_cpu = calloc(topo.max_cpu_num, sizeof(int)); 3758 irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
3612 if (irqs_per_cpu == NULL) 3759 if (irqs_per_cpu == NULL)
3613 err(-1, "calloc %d", topo.max_cpu_num); 3760 err(-1, "calloc %d", topo.max_cpu_num + 1);
3614} 3761}
3615void setup_all_buffers(void) 3762void setup_all_buffers(void)
3616{ 3763{
@@ -3697,9 +3844,12 @@ int fork_it(char **argv)
3697 for_all_cpus(get_counters, ODD_COUNTERS); 3844 for_all_cpus(get_counters, ODD_COUNTERS);
3698 gettimeofday(&tv_odd, (struct timezone *)NULL); 3845 gettimeofday(&tv_odd, (struct timezone *)NULL);
3699 timersub(&tv_odd, &tv_even, &tv_delta); 3846 timersub(&tv_odd, &tv_even, &tv_delta);
3700 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 3847 if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
3701 compute_average(EVEN_COUNTERS); 3848 fprintf(outf, "%s: Counter reset detected\n", progname);
3702 format_all_counters(EVEN_COUNTERS); 3849 else {
3850 compute_average(EVEN_COUNTERS);
3851 format_all_counters(EVEN_COUNTERS);
3852 }
3703 3853
3704 fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); 3854 fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
3705 3855
@@ -3726,24 +3876,170 @@ int get_and_dump_counters(void)
3726} 3876}
3727 3877
3728void print_version() { 3878void print_version() {
3729 fprintf(outf, "turbostat version 4.12 5 Apr 2016" 3879 fprintf(outf, "turbostat version 4.16 24 Dec 2016"
3730 " - Len Brown <lenb@kernel.org>\n"); 3880 " - Len Brown <lenb@kernel.org>\n");
3731} 3881}
3732 3882
3883int add_counter(unsigned int msr_num, char *name, unsigned int width,
3884 enum counter_scope scope, enum counter_type type,
3885 enum counter_format format)
3886{
3887 struct msr_counter *msrp;
3888
3889 msrp = calloc(1, sizeof(struct msr_counter));
3890 if (msrp == NULL) {
3891 perror("calloc");
3892 exit(1);
3893 }
3894
3895 msrp->msr_num = msr_num;
3896 strncpy(msrp->name, name, NAME_BYTES);
3897 msrp->width = width;
3898 msrp->type = type;
3899 msrp->format = format;
3900
3901 switch (scope) {
3902
3903 case SCOPE_CPU:
3904 sys.thread_counter_bytes += 64;
3905 msrp->next = sys.tp;
3906 sys.tp = msrp;
3907 sys.thread_counter_bytes += sizeof(unsigned long long);
3908 break;
3909
3910 case SCOPE_CORE:
3911 sys.core_counter_bytes += 64;
3912 msrp->next = sys.cp;
3913 sys.cp = msrp;
3914 sys.core_counter_bytes += sizeof(unsigned long long);
3915 break;
3916
3917 case SCOPE_PACKAGE:
3918 sys.package_counter_bytes += 64;
3919 msrp->next = sys.pp;
3920 sys.pp = msrp;
3921 sys.package_counter_bytes += sizeof(unsigned long long);
3922 break;
3923 }
3924
3925 return 0;
3926}
3927
3928void parse_add_command(char *add_command)
3929{
3930 int msr_num = 0;
3931 char name_buffer[NAME_BYTES];
3932 int width = 64;
3933 int fail = 0;
3934 enum counter_scope scope = SCOPE_CPU;
3935 enum counter_type type = COUNTER_CYCLES;
3936 enum counter_format format = FORMAT_DELTA;
3937
3938 while (add_command) {
3939
3940 if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
3941 goto next;
3942
3943 if (sscanf(add_command, "msr%d", &msr_num) == 1)
3944 goto next;
3945
3946 if (sscanf(add_command, "u%d", &width) == 1) {
3947 if ((width == 32) || (width == 64))
3948 goto next;
3949 width = 64;
3950 }
3951 if (!strncmp(add_command, "cpu", strlen("cpu"))) {
3952 scope = SCOPE_CPU;
3953 goto next;
3954 }
3955 if (!strncmp(add_command, "core", strlen("core"))) {
3956 scope = SCOPE_CORE;
3957 goto next;
3958 }
3959 if (!strncmp(add_command, "package", strlen("package"))) {
3960 scope = SCOPE_PACKAGE;
3961 goto next;
3962 }
3963 if (!strncmp(add_command, "cycles", strlen("cycles"))) {
3964 type = COUNTER_CYCLES;
3965 goto next;
3966 }
3967 if (!strncmp(add_command, "seconds", strlen("seconds"))) {
3968 type = COUNTER_SECONDS;
3969 goto next;
3970 }
3971 if (!strncmp(add_command, "raw", strlen("raw"))) {
3972 format = FORMAT_RAW;
3973 goto next;
3974 }
3975 if (!strncmp(add_command, "delta", strlen("delta"))) {
3976 format = FORMAT_DELTA;
3977 goto next;
3978 }
3979 if (!strncmp(add_command, "percent", strlen("percent"))) {
3980 format = FORMAT_PERCENT;
3981 goto next;
3982 }
3983
3984 if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { /* 18 < NAME_BYTES */
3985 char *eos;
3986
3987 eos = strchr(name_buffer, ',');
3988 if (eos)
3989 *eos = '\0';
3990 goto next;
3991 }
3992
3993next:
3994 add_command = strchr(add_command, ',');
3995 if (add_command)
3996 add_command++;
3997
3998 }
3999 if (msr_num == 0) {
4000 fprintf(stderr, "--add: (msrDDD | msr0xXXX) required\n");
4001 fail++;
4002 }
4003
4004 /* generate default column header */
4005 if (*name_buffer == '\0') {
4006 if (format == FORMAT_RAW) {
4007 if (width == 32)
4008 sprintf(name_buffer, "msr%d", msr_num);
4009 else
4010 sprintf(name_buffer, "MSR%d", msr_num);
4011 } else if (format == FORMAT_DELTA) {
4012 if (width == 32)
4013 sprintf(name_buffer, "cnt%d", msr_num);
4014 else
4015 sprintf(name_buffer, "CNT%d", msr_num);
4016 } else if (format == FORMAT_PERCENT) {
4017 if (width == 32)
4018 sprintf(name_buffer, "msr%d%%", msr_num);
4019 else
4020 sprintf(name_buffer, "MSR%d%%", msr_num);
4021 }
4022 }
4023
4024 if (add_counter(msr_num, name_buffer, width, scope, type, format))
4025 fail++;
4026
4027 if (fail) {
4028 help();
4029 exit(1);
4030 }
4031}
3733void cmdline(int argc, char **argv) 4032void cmdline(int argc, char **argv)
3734{ 4033{
3735 int opt; 4034 int opt;
3736 int option_index = 0; 4035 int option_index = 0;
3737 static struct option long_options[] = { 4036 static struct option long_options[] = {
3738 {"Counter", required_argument, 0, 'C'}, 4037 {"add", required_argument, 0, 'a'},
3739 {"counter", required_argument, 0, 'c'},
3740 {"Dump", no_argument, 0, 'D'}, 4038 {"Dump", no_argument, 0, 'D'},
3741 {"debug", no_argument, 0, 'd'}, 4039 {"debug", no_argument, 0, 'd'},
3742 {"interval", required_argument, 0, 'i'}, 4040 {"interval", required_argument, 0, 'i'},
3743 {"help", no_argument, 0, 'h'}, 4041 {"help", no_argument, 0, 'h'},
3744 {"Joules", no_argument, 0, 'J'}, 4042 {"Joules", no_argument, 0, 'J'},
3745 {"MSR", required_argument, 0, 'M'},
3746 {"msr", required_argument, 0, 'm'},
3747 {"out", required_argument, 0, 'o'}, 4043 {"out", required_argument, 0, 'o'},
3748 {"Package", no_argument, 0, 'p'}, 4044 {"Package", no_argument, 0, 'p'},
3749 {"processor", no_argument, 0, 'p'}, 4045 {"processor", no_argument, 0, 'p'},
@@ -3758,11 +4054,8 @@ void cmdline(int argc, char **argv)
3758 while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v", 4054 while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v",
3759 long_options, &option_index)) != -1) { 4055 long_options, &option_index)) != -1) {
3760 switch (opt) { 4056 switch (opt) {
3761 case 'C': 4057 case 'a':
3762 sscanf(optarg, "%x", &extra_delta_offset64); 4058 parse_add_command(optarg);
3763 break;
3764 case 'c':
3765 sscanf(optarg, "%x", &extra_delta_offset32);
3766 break; 4059 break;
3767 case 'D': 4060 case 'D':
3768 dump_only++; 4061 dump_only++;
@@ -3791,12 +4084,6 @@ void cmdline(int argc, char **argv)
3791 case 'J': 4084 case 'J':
3792 rapl_joules++; 4085 rapl_joules++;
3793 break; 4086 break;
3794 case 'M':
3795 sscanf(optarg, "%x", &extra_msr_offset64);
3796 break;
3797 case 'm':
3798 sscanf(optarg, "%x", &extra_msr_offset32);
3799 break;
3800 case 'o': 4087 case 'o':
3801 outf = fopen_or_die(optarg, "w"); 4088 outf = fopen_or_die(optarg, "w");
3802 break; 4089 break;