aboutsummaryrefslogtreecommitdiffstats
path: root/tools/power
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2016-03-13 21:13:05 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2016-03-13 21:13:05 -0400
commit3fdb74649b4f18ccaa88766750b616dec6acb5b0 (patch)
tree691e718a361a1db8fae3e87270af8d62adc4946a /tools/power
parent5b3e7e0536bd6326798ab57d14a49b15ad7e3e3f (diff)
parent685b535b2cdb9cdf354321f8af9ed17dcf19d19f (diff)
Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux into pm-tools
Pull turbostat updates for 4.6 from Len Brown. * 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux: tools/power turbostat: bugfix: TDP MSRs print bits fixing tools/power turbostat: correct output for MSR_NHM_SNB_PKG_CST_CFG_CTL dump tools/power turbostat: call __cpuid() instead of __get_cpuid() tools/power turbostat: indicate SMX and SGX support tools/power turbostat: detect and work around syscall jitter tools/power turbostat: show GFX%rc6 tools/power turbostat: show GFXMHz tools/power turbostat: show IRQs per CPU tools/power turbostat: make fewer systems calls tools/power turbostat: fix compiler warnings tools/power turbostat: add --out option for saving output in a file tools/power turbostat: re-name "%Busy" field to "Busy%" tools/power turbostat: Intel Xeon x200: fix turbo-ratio decoding tools/power turbostat: Intel Xeon x200: fix erroneous bclk value tools/power turbostat: allow sub-sec intervals tools/power turbostat: Decode MSR_MISC_PWR_MGMT tools/power turbostat: decode HWP registers x86 msr-index: Simplify syntax for HWP fields tools/power turbostat: CPUID(0x16) leaf shows base, max, and bus frequency tools/power turbostat: decode more CPUID fields
Diffstat (limited to 'tools/power')
-rw-r--r--tools/power/x86/turbostat/turbostat.832
-rw-r--r--tools/power/x86/turbostat/turbostat.c889
2 files changed, 724 insertions, 197 deletions
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 622db685b4f9..89a55d5e32f3 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -34,7 +34,10 @@ name as necessary to disambiguate it from others is necessary. Note that option
34\fB--debug\fP displays additional system configuration information. Invoking this parameter 34\fB--debug\fP displays additional system configuration information. Invoking this parameter
35more than once may also enable internal turbostat debug information. 35more than once may also enable internal turbostat debug information.
36.PP 36.PP
37\fB--interval seconds\fP overrides the default 5-second measurement interval. 37\fB--interval seconds\fP overrides the default 5.0 second measurement interval.
38.PP
39\fB--out output_file\fP turbostat output is written to the specified output_file.
40The file is truncated if it already exists, and it is created if it does not exist.
38.PP 41.PP
39\fB--help\fP displays usage for the most common parameters. 42\fB--help\fP displays usage for the most common parameters.
40.PP 43.PP
@@ -61,7 +64,7 @@ displays the statistics gathered since it was forked.
61.nf 64.nf
62\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. 65\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
63\fBAVG_MHz\fP number of cycles executed divided by time elapsed. 66\fBAVG_MHz\fP number of cycles executed divided by time elapsed.
64\fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. 67\fBBusy%\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state.
65\fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). 68\fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state).
66\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. 69\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
67.fi 70.fi
@@ -83,13 +86,14 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T
83\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. 86\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
84.fi 87.fi
85.PP 88.PP
86.SH EXAMPLE 89.SH PERIODIC EXAMPLE
87Without any parameters, turbostat displays statistics ever 5 seconds. 90Without any parameters, turbostat displays statistics ever 5 seconds.
88(override interval with "-i sec" option, or specify a command 91Periodic output goes to stdout, by default, unless --out is used to specify an output file.
89for turbostat to fork). 92The 5-second interval can be changed with th "-i sec" option.
93Or a command may be specified as in "FORK EXAMPLE" below.
90.nf 94.nf
91[root@hsw]# ./turbostat 95[root@hsw]# ./turbostat
92 CPU Avg_MHz %Busy Bzy_MHz TSC_MHz 96 CPU Avg_MHz Busy% Bzy_MHz TSC_MHz
93 - 488 12.51 3898 3498 97 - 488 12.51 3898 3498
94 0 0 0.01 3885 3498 98 0 0 0.01 3885 3498
95 4 3897 99.99 3898 3498 99 4 3897 99.99 3898 3498
@@ -145,7 +149,7 @@ cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1)
145cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1) 149cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1)
146cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1) 150cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1)
147cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1) 151cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1)
148 Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt 152 Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt
149 - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00 153 - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00
150 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00 154 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00
151 0 4 3897 99.98 3898 3498 0 0.02 155 0 4 3897 99.98 3898 3498 0 0.02
@@ -171,14 +175,16 @@ The --debug option adds additional columns to the measurement ouput, including C
171See the field definitions above. 175See the field definitions above.
172.SH FORK EXAMPLE 176.SH FORK EXAMPLE
173If turbostat is invoked with a command, it will fork that command 177If turbostat is invoked with a command, it will fork that command
174and output the statistics gathered when the command exits. 178and output the statistics gathered after the command exits.
179In this case, turbostat output goes to stderr, by default.
180Output can instead be saved to a file using the --out option.
175eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds 181eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
176until ^C while the other CPUs are mostly idle: 182until ^C while the other CPUs are mostly idle:
177 183
178.nf 184.nf
179root@hsw: turbostat cat /dev/zero > /dev/null 185root@hsw: turbostat cat /dev/zero > /dev/null
180^C 186^C
181 CPU Avg_MHz %Busy Bzy_MHz TSC_MHz 187 CPU Avg_MHz Busy% Bzy_MHz TSC_MHz
182 - 482 12.51 3854 3498 188 - 482 12.51 3854 3498
183 0 0 0.01 1960 3498 189 0 0 0.01 1960 3498
184 4 0 0.00 2128 3498 190 4 0 0.00 2128 3498
@@ -192,12 +198,12 @@ root@hsw: turbostat cat /dev/zero > /dev/null
192 198
193.fi 199.fi
194Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit. 200Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit.
195The first row shows the average MHz and %Busy across all the processors in the system. 201The first row shows the average MHz and Busy% across all the processors in the system.
196 202
197Note that the Avg_MHz column reflects the total number of cycles executed 203Note that the Avg_MHz column reflects the total number of cycles executed
198divided by the measurement interval. If the %Busy column is 100%, 204divided by the measurement interval. If the Busy% column is 100%,
199then the processor was running at that speed the entire interval. 205then the processor was running at that speed the entire interval.
200The Avg_MHz multiplied by the %Busy results in the Bzy_MHz -- 206The Avg_MHz multiplied by the Busy% results in the Bzy_MHz --
201which is the average frequency while the processor was executing -- 207which is the average frequency while the processor was executing --
202not including any non-busy idle time. 208not including any non-busy idle time.
203 209
@@ -233,7 +239,7 @@ in the brand string in /proc/cpuinfo. On a system where
233the TSC stops in idle, TSC_MHz will drop 239the TSC stops in idle, TSC_MHz will drop
234below the processor's base frequency. 240below the processor's base frequency.
235 241
236%Busy = MPERF_delta/TSC_delta 242Busy% = MPERF_delta/TSC_delta
237 243
238Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval 244Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval
239 245
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 0dac7e05a6ac..ee1551b6fa01 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -38,12 +38,15 @@
38#include <string.h> 38#include <string.h>
39#include <ctype.h> 39#include <ctype.h>
40#include <sched.h> 40#include <sched.h>
41#include <time.h>
41#include <cpuid.h> 42#include <cpuid.h>
42#include <linux/capability.h> 43#include <linux/capability.h>
43#include <errno.h> 44#include <errno.h>
44 45
45char *proc_stat = "/proc/stat"; 46char *proc_stat = "/proc/stat";
46unsigned int interval_sec = 5; 47FILE *outf;
48int *fd_percpu;
49struct timespec interval_ts = {5, 0};
47unsigned int debug; 50unsigned int debug;
48unsigned int rapl_joules; 51unsigned int rapl_joules;
49unsigned int summary_only; 52unsigned int summary_only;
@@ -72,6 +75,7 @@ unsigned int extra_msr_offset64;
72unsigned int extra_delta_offset32; 75unsigned int extra_delta_offset32;
73unsigned int extra_delta_offset64; 76unsigned int extra_delta_offset64;
74unsigned int aperf_mperf_multiplier = 1; 77unsigned int aperf_mperf_multiplier = 1;
78int do_irq = 1;
75int do_smi; 79int do_smi;
76double bclk; 80double bclk;
77double base_hz; 81double base_hz;
@@ -86,6 +90,10 @@ char *output_buffer, *outp;
86unsigned int do_rapl; 90unsigned int do_rapl;
87unsigned int do_dts; 91unsigned int do_dts;
88unsigned int do_ptm; 92unsigned int do_ptm;
93unsigned int do_gfx_rc6_ms;
94unsigned long long gfx_cur_rc6_ms;
95unsigned int do_gfx_mhz;
96unsigned int gfx_cur_mhz;
89unsigned int tcc_activation_temp; 97unsigned int tcc_activation_temp;
90unsigned int tcc_activation_temp_override; 98unsigned int tcc_activation_temp_override;
91double rapl_power_units, rapl_time_units; 99double rapl_power_units, rapl_time_units;
@@ -98,6 +106,12 @@ unsigned int crystal_hz;
98unsigned long long tsc_hz; 106unsigned long long tsc_hz;
99int base_cpu; 107int base_cpu;
100double discover_bclk(unsigned int family, unsigned int model); 108double discover_bclk(unsigned int family, unsigned int model);
109unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
110 /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
111unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
112unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
113unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
114unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
101 115
102#define RAPL_PKG (1 << 0) 116#define RAPL_PKG (1 << 0)
103 /* 0x610 MSR_PKG_POWER_LIMIT */ 117 /* 0x610 MSR_PKG_POWER_LIMIT */
@@ -145,6 +159,7 @@ struct thread_data {
145 unsigned long long extra_delta64; 159 unsigned long long extra_delta64;
146 unsigned long long extra_msr32; 160 unsigned long long extra_msr32;
147 unsigned long long extra_delta32; 161 unsigned long long extra_delta32;
162 unsigned int irq_count;
148 unsigned int smi_count; 163 unsigned int smi_count;
149 unsigned int cpu_id; 164 unsigned int cpu_id;
150 unsigned int flags; 165 unsigned int flags;
@@ -172,6 +187,8 @@ struct pkg_data {
172 unsigned long long pkg_any_core_c0; 187 unsigned long long pkg_any_core_c0;
173 unsigned long long pkg_any_gfxe_c0; 188 unsigned long long pkg_any_gfxe_c0;
174 unsigned long long pkg_both_core_gfxe_c0; 189 unsigned long long pkg_both_core_gfxe_c0;
190 unsigned long long gfx_rc6_ms;
191 unsigned int gfx_mhz;
175 unsigned int package_id; 192 unsigned int package_id;
176 unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ 193 unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */
177 unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ 194 unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */
@@ -212,6 +229,9 @@ struct topo_params {
212 229
213struct timeval tv_even, tv_odd, tv_delta; 230struct timeval tv_even, tv_odd, tv_delta;
214 231
232int *irq_column_2_cpu; /* /proc/interrupts column numbers */
233int *irqs_per_cpu; /* indexed by cpu_num */
234
215void setup_all_buffers(void); 235void setup_all_buffers(void);
216 236
217int cpu_is_not_present(int cpu) 237int cpu_is_not_present(int cpu)
@@ -262,23 +282,34 @@ int cpu_migrate(int cpu)
262 else 282 else
263 return 0; 283 return 0;
264} 284}
265 285int get_msr_fd(int cpu)
266int get_msr(int cpu, off_t offset, unsigned long long *msr)
267{ 286{
268 ssize_t retval;
269 char pathname[32]; 287 char pathname[32];
270 int fd; 288 int fd;
271 289
290 fd = fd_percpu[cpu];
291
292 if (fd)
293 return fd;
294
272 sprintf(pathname, "/dev/cpu/%d/msr", cpu); 295 sprintf(pathname, "/dev/cpu/%d/msr", cpu);
273 fd = open(pathname, O_RDONLY); 296 fd = open(pathname, O_RDONLY);
274 if (fd < 0) 297 if (fd < 0)
275 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); 298 err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
276 299
277 retval = pread(fd, msr, sizeof *msr, offset); 300 fd_percpu[cpu] = fd;
278 close(fd); 301
302 return fd;
303}
304
305int get_msr(int cpu, off_t offset, unsigned long long *msr)
306{
307 ssize_t retval;
308
309 retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
279 310
280 if (retval != sizeof *msr) 311 if (retval != sizeof *msr)
281 err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); 312 err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset);
282 313
283 return 0; 314 return 0;
284} 315}
@@ -286,8 +317,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
286/* 317/*
287 * Example Format w/ field column widths: 318 * Example Format w/ field column widths:
288 * 319 *
289 * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz SMI %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt 320 * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
290 * 123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 321 * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
291 */ 322 */
292 323
293void print_header(void) 324void print_header(void)
@@ -301,7 +332,7 @@ void print_header(void)
301 if (has_aperf) 332 if (has_aperf)
302 outp += sprintf(outp, " Avg_MHz"); 333 outp += sprintf(outp, " Avg_MHz");
303 if (has_aperf) 334 if (has_aperf)
304 outp += sprintf(outp, " %%Busy"); 335 outp += sprintf(outp, " Busy%%");
305 if (has_aperf) 336 if (has_aperf)
306 outp += sprintf(outp, " Bzy_MHz"); 337 outp += sprintf(outp, " Bzy_MHz");
307 outp += sprintf(outp, " TSC_MHz"); 338 outp += sprintf(outp, " TSC_MHz");
@@ -318,6 +349,8 @@ void print_header(void)
318 if (!debug) 349 if (!debug)
319 goto done; 350 goto done;
320 351
352 if (do_irq)
353 outp += sprintf(outp, " IRQ");
321 if (do_smi) 354 if (do_smi)
322 outp += sprintf(outp, " SMI"); 355 outp += sprintf(outp, " SMI");
323 356
@@ -335,6 +368,12 @@ void print_header(void)
335 if (do_ptm) 368 if (do_ptm)
336 outp += sprintf(outp, " PkgTmp"); 369 outp += sprintf(outp, " PkgTmp");
337 370
371 if (do_gfx_rc6_ms)
372 outp += sprintf(outp, " GFX%%rc6");
373
374 if (do_gfx_mhz)
375 outp += sprintf(outp, " GFXMHz");
376
338 if (do_skl_residency) { 377 if (do_skl_residency) {
339 outp += sprintf(outp, " Totl%%C0"); 378 outp += sprintf(outp, " Totl%%C0");
340 outp += sprintf(outp, " Any%%C0"); 379 outp += sprintf(outp, " Any%%C0");
@@ -409,6 +448,8 @@ int dump_counters(struct thread_data *t, struct core_data *c,
409 extra_msr_offset32, t->extra_msr32); 448 extra_msr_offset32, t->extra_msr32);
410 outp += sprintf(outp, "msr0x%x: %016llX\n", 449 outp += sprintf(outp, "msr0x%x: %016llX\n",
411 extra_msr_offset64, t->extra_msr64); 450 extra_msr_offset64, t->extra_msr64);
451 if (do_irq)
452 outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
412 if (do_smi) 453 if (do_smi)
413 outp += sprintf(outp, "SMI: %08X\n", t->smi_count); 454 outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
414 } 455 }
@@ -504,7 +545,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
504 outp += sprintf(outp, "%8.0f", 545 outp += sprintf(outp, "%8.0f",
505 1.0 / units * t->aperf / interval_float); 546 1.0 / units * t->aperf / interval_float);
506 547
507 /* %Busy */ 548 /* Busy% */
508 if (has_aperf) { 549 if (has_aperf) {
509 if (!skip_c0) 550 if (!skip_c0)
510 outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); 551 outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
@@ -542,6 +583,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
542 if (!debug) 583 if (!debug)
543 goto done; 584 goto done;
544 585
586 /* IRQ */
587 if (do_irq)
588 outp += sprintf(outp, "%8d", t->irq_count);
589
545 /* SMI */ 590 /* SMI */
546 if (do_smi) 591 if (do_smi)
547 outp += sprintf(outp, "%8d", t->smi_count); 592 outp += sprintf(outp, "%8d", t->smi_count);
@@ -575,6 +620,14 @@ int format_counters(struct thread_data *t, struct core_data *c,
575 if (do_ptm) 620 if (do_ptm)
576 outp += sprintf(outp, "%8d", p->pkg_temp_c); 621 outp += sprintf(outp, "%8d", p->pkg_temp_c);
577 622
623 /* GFXrc6 */
624 if (do_gfx_rc6_ms)
625 outp += sprintf(outp, "%8.2f", 100.0 * p->gfx_rc6_ms / 1000.0 / interval_float);
626
627 /* GFXMHz */
628 if (do_gfx_mhz)
629 outp += sprintf(outp, "%8d", p->gfx_mhz);
630
578 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ 631 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
579 if (do_skl_residency) { 632 if (do_skl_residency) {
580 outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); 633 outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
@@ -645,15 +698,24 @@ done:
645 return 0; 698 return 0;
646} 699}
647 700
648void flush_stdout() 701void flush_output_stdout(void)
649{ 702{
650 fputs(output_buffer, stdout); 703 FILE *filep;
651 fflush(stdout); 704
705 if (outf == stderr)
706 filep = stdout;
707 else
708 filep = outf;
709
710 fputs(output_buffer, filep);
711 fflush(filep);
712
652 outp = output_buffer; 713 outp = output_buffer;
653} 714}
654void flush_stderr() 715void flush_output_stderr(void)
655{ 716{
656 fputs(output_buffer, stderr); 717 fputs(output_buffer, outf);
718 fflush(outf);
657 outp = output_buffer; 719 outp = output_buffer;
658} 720}
659void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 721void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
@@ -704,6 +766,9 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
704 old->pc10 = new->pc10 - old->pc10; 766 old->pc10 = new->pc10 - old->pc10;
705 old->pkg_temp_c = new->pkg_temp_c; 767 old->pkg_temp_c = new->pkg_temp_c;
706 768
769 old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
770 old->gfx_mhz = new->gfx_mhz;
771
707 DELTA_WRAP32(new->energy_pkg, old->energy_pkg); 772 DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
708 DELTA_WRAP32(new->energy_cores, old->energy_cores); 773 DELTA_WRAP32(new->energy_cores, old->energy_cores);
709 DELTA_WRAP32(new->energy_gfx, old->energy_gfx); 774 DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
@@ -745,9 +810,9 @@ delta_thread(struct thread_data *new, struct thread_data *old,
745 } else { 810 } else {
746 811
747 if (!aperf_mperf_unstable) { 812 if (!aperf_mperf_unstable) {
748 fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); 813 fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname);
749 fprintf(stderr, "* Frequency results do not cover entire interval *\n"); 814 fprintf(outf, "* Frequency results do not cover entire interval *\n");
750 fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); 815 fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n");
751 816
752 aperf_mperf_unstable = 1; 817 aperf_mperf_unstable = 1;
753 } 818 }
@@ -782,7 +847,8 @@ delta_thread(struct thread_data *new, struct thread_data *old,
782 } 847 }
783 848
784 if (old->mperf == 0) { 849 if (old->mperf == 0) {
785 if (debug > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); 850 if (debug > 1)
851 fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
786 old->mperf = 1; /* divide by 0 protection */ 852 old->mperf = 1; /* divide by 0 protection */
787 } 853 }
788 854
@@ -797,6 +863,9 @@ delta_thread(struct thread_data *new, struct thread_data *old,
797 old->extra_msr32 = new->extra_msr32; 863 old->extra_msr32 = new->extra_msr32;
798 old->extra_msr64 = new->extra_msr64; 864 old->extra_msr64 = new->extra_msr64;
799 865
866 if (do_irq)
867 old->irq_count = new->irq_count - old->irq_count;
868
800 if (do_smi) 869 if (do_smi)
801 old->smi_count = new->smi_count - old->smi_count; 870 old->smi_count = new->smi_count - old->smi_count;
802} 871}
@@ -826,10 +895,12 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
826 t->mperf = 0; 895 t->mperf = 0;
827 t->c1 = 0; 896 t->c1 = 0;
828 897
829 t->smi_count = 0;
830 t->extra_delta32 = 0; 898 t->extra_delta32 = 0;
831 t->extra_delta64 = 0; 899 t->extra_delta64 = 0;
832 900
901 t->irq_count = 0;
902 t->smi_count = 0;
903
833 /* tells format_counters to dump all fields from this set */ 904 /* tells format_counters to dump all fields from this set */
834 t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; 905 t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
835 906
@@ -861,6 +932,9 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
861 p->rapl_pkg_perf_status = 0; 932 p->rapl_pkg_perf_status = 0;
862 p->rapl_dram_perf_status = 0; 933 p->rapl_dram_perf_status = 0;
863 p->pkg_temp_c = 0; 934 p->pkg_temp_c = 0;
935
936 p->gfx_rc6_ms = 0;
937 p->gfx_mhz = 0;
864} 938}
865int sum_counters(struct thread_data *t, struct core_data *c, 939int sum_counters(struct thread_data *t, struct core_data *c,
866 struct pkg_data *p) 940 struct pkg_data *p)
@@ -873,6 +947,9 @@ int sum_counters(struct thread_data *t, struct core_data *c,
873 average.threads.extra_delta32 += t->extra_delta32; 947 average.threads.extra_delta32 += t->extra_delta32;
874 average.threads.extra_delta64 += t->extra_delta64; 948 average.threads.extra_delta64 += t->extra_delta64;
875 949
950 average.threads.irq_count += t->irq_count;
951 average.threads.smi_count += t->smi_count;
952
876 /* sum per-core values only for 1st thread in core */ 953 /* sum per-core values only for 1st thread in core */
877 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 954 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
878 return 0; 955 return 0;
@@ -910,6 +987,9 @@ int sum_counters(struct thread_data *t, struct core_data *c,
910 average.packages.energy_cores += p->energy_cores; 987 average.packages.energy_cores += p->energy_cores;
911 average.packages.energy_gfx += p->energy_gfx; 988 average.packages.energy_gfx += p->energy_gfx;
912 989
990 average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
991 average.packages.gfx_mhz = p->gfx_mhz;
992
913 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); 993 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
914 994
915 average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; 995 average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
@@ -970,7 +1050,6 @@ static unsigned long long rdtsc(void)
970 return low | ((unsigned long long)high) << 32; 1050 return low | ((unsigned long long)high) << 32;
971} 1051}
972 1052
973
974/* 1053/*
975 * get_counters(...) 1054 * get_counters(...)
976 * migrate to cpu 1055 * migrate to cpu
@@ -980,23 +1059,74 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
980{ 1059{
981 int cpu = t->cpu_id; 1060 int cpu = t->cpu_id;
982 unsigned long long msr; 1061 unsigned long long msr;
1062 int aperf_mperf_retry_count = 0;
983 1063
984 if (cpu_migrate(cpu)) { 1064 if (cpu_migrate(cpu)) {
985 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 1065 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
986 return -1; 1066 return -1;
987 } 1067 }
988 1068
1069retry:
989 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 1070 t->tsc = rdtsc(); /* we are running on local CPU of interest */
990 1071
991 if (has_aperf) { 1072 if (has_aperf) {
1073 unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
1074
1075 /*
1076 * The TSC, APERF and MPERF must be read together for
1077 * APERF/MPERF and MPERF/TSC to give accurate results.
1078 *
1079 * Unfortunately, APERF and MPERF are read by
1080 * individual system call, so delays may occur
1081 * between them. If the time to read them
1082 * varies by a large amount, we re-read them.
1083 */
1084
1085 /*
1086 * This initial dummy APERF read has been seen to
1087 * reduce jitter in the subsequent reads.
1088 */
1089
1090 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
1091 return -3;
1092
1093 t->tsc = rdtsc(); /* re-read close to APERF */
1094
1095 tsc_before = t->tsc;
1096
992 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) 1097 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
993 return -3; 1098 return -3;
1099
1100 tsc_between = rdtsc();
1101
994 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) 1102 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
995 return -4; 1103 return -4;
1104
1105 tsc_after = rdtsc();
1106
1107 aperf_time = tsc_between - tsc_before;
1108 mperf_time = tsc_after - tsc_between;
1109
1110 /*
1111 * If the system call latency to read APERF and MPERF
1112 * differ by more than 2x, then try again.
1113 */
1114 if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
1115 aperf_mperf_retry_count++;
1116 if (aperf_mperf_retry_count < 5)
1117 goto retry;
1118 else
1119 warnx("cpu%d jitter %lld %lld",
1120 cpu, aperf_time, mperf_time);
1121 }
1122 aperf_mperf_retry_count = 0;
1123
996 t->aperf = t->aperf * aperf_mperf_multiplier; 1124 t->aperf = t->aperf * aperf_mperf_multiplier;
997 t->mperf = t->mperf * aperf_mperf_multiplier; 1125 t->mperf = t->mperf * aperf_mperf_multiplier;
998 } 1126 }
999 1127
1128 if (do_irq)
1129 t->irq_count = irqs_per_cpu[cpu];
1000 if (do_smi) { 1130 if (do_smi) {
1001 if (get_msr(cpu, MSR_SMI_COUNT, &msr)) 1131 if (get_msr(cpu, MSR_SMI_COUNT, &msr))
1002 return -5; 1132 return -5;
@@ -1124,6 +1254,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1124 return -17; 1254 return -17;
1125 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); 1255 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
1126 } 1256 }
1257
1258 if (do_gfx_rc6_ms)
1259 p->gfx_rc6_ms = gfx_cur_rc6_ms;
1260
1261 if (do_gfx_mhz)
1262 p->gfx_mhz = gfx_cur_mhz;
1263
1127 return 0; 1264 return 0;
1128} 1265}
1129 1266
@@ -1175,18 +1312,18 @@ dump_nhm_platform_info(void)
1175 1312
1176 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); 1313 get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
1177 1314
1178 fprintf(stderr, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); 1315 fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
1179 1316
1180 ratio = (msr >> 40) & 0xFF; 1317 ratio = (msr >> 40) & 0xFF;
1181 fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n", 1318 fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n",
1182 ratio, bclk, ratio * bclk); 1319 ratio, bclk, ratio * bclk);
1183 1320
1184 ratio = (msr >> 8) & 0xFF; 1321 ratio = (msr >> 8) & 0xFF;
1185 fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", 1322 fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n",
1186 ratio, bclk, ratio * bclk); 1323 ratio, bclk, ratio * bclk);
1187 1324
1188 get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); 1325 get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
1189 fprintf(stderr, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", 1326 fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1190 base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); 1327 base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
1191 1328
1192 return; 1329 return;
@@ -1200,16 +1337,16 @@ dump_hsw_turbo_ratio_limits(void)
1200 1337
1201 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); 1338 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
1202 1339
1203 fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); 1340 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
1204 1341
1205 ratio = (msr >> 8) & 0xFF; 1342 ratio = (msr >> 8) & 0xFF;
1206 if (ratio) 1343 if (ratio)
1207 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", 1344 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
1208 ratio, bclk, ratio * bclk); 1345 ratio, bclk, ratio * bclk);
1209 1346
1210 ratio = (msr >> 0) & 0xFF; 1347 ratio = (msr >> 0) & 0xFF;
1211 if (ratio) 1348 if (ratio)
1212 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", 1349 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
1213 ratio, bclk, ratio * bclk); 1350 ratio, bclk, ratio * bclk);
1214 return; 1351 return;
1215} 1352}
@@ -1222,46 +1359,46 @@ dump_ivt_turbo_ratio_limits(void)
1222 1359
1223 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); 1360 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
1224 1361
1225 fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); 1362 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
1226 1363
1227 ratio = (msr >> 56) & 0xFF; 1364 ratio = (msr >> 56) & 0xFF;
1228 if (ratio) 1365 if (ratio)
1229 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", 1366 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
1230 ratio, bclk, ratio * bclk); 1367 ratio, bclk, ratio * bclk);
1231 1368
1232 ratio = (msr >> 48) & 0xFF; 1369 ratio = (msr >> 48) & 0xFF;
1233 if (ratio) 1370 if (ratio)
1234 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", 1371 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
1235 ratio, bclk, ratio * bclk); 1372 ratio, bclk, ratio * bclk);
1236 1373
1237 ratio = (msr >> 40) & 0xFF; 1374 ratio = (msr >> 40) & 0xFF;
1238 if (ratio) 1375 if (ratio)
1239 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", 1376 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
1240 ratio, bclk, ratio * bclk); 1377 ratio, bclk, ratio * bclk);
1241 1378
1242 ratio = (msr >> 32) & 0xFF; 1379 ratio = (msr >> 32) & 0xFF;
1243 if (ratio) 1380 if (ratio)
1244 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", 1381 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
1245 ratio, bclk, ratio * bclk); 1382 ratio, bclk, ratio * bclk);
1246 1383
1247 ratio = (msr >> 24) & 0xFF; 1384 ratio = (msr >> 24) & 0xFF;
1248 if (ratio) 1385 if (ratio)
1249 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", 1386 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
1250 ratio, bclk, ratio * bclk); 1387 ratio, bclk, ratio * bclk);
1251 1388
1252 ratio = (msr >> 16) & 0xFF; 1389 ratio = (msr >> 16) & 0xFF;
1253 if (ratio) 1390 if (ratio)
1254 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", 1391 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
1255 ratio, bclk, ratio * bclk); 1392 ratio, bclk, ratio * bclk);
1256 1393
1257 ratio = (msr >> 8) & 0xFF; 1394 ratio = (msr >> 8) & 0xFF;
1258 if (ratio) 1395 if (ratio)
1259 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", 1396 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
1260 ratio, bclk, ratio * bclk); 1397 ratio, bclk, ratio * bclk);
1261 1398
1262 ratio = (msr >> 0) & 0xFF; 1399 ratio = (msr >> 0) & 0xFF;
1263 if (ratio) 1400 if (ratio)
1264 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", 1401 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
1265 ratio, bclk, ratio * bclk); 1402 ratio, bclk, ratio * bclk);
1266 return; 1403 return;
1267} 1404}
@@ -1274,46 +1411,46 @@ dump_nhm_turbo_ratio_limits(void)
1274 1411
1275 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); 1412 get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
1276 1413
1277 fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); 1414 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
1278 1415
1279 ratio = (msr >> 56) & 0xFF; 1416 ratio = (msr >> 56) & 0xFF;
1280 if (ratio) 1417 if (ratio)
1281 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", 1418 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
1282 ratio, bclk, ratio * bclk); 1419 ratio, bclk, ratio * bclk);
1283 1420
1284 ratio = (msr >> 48) & 0xFF; 1421 ratio = (msr >> 48) & 0xFF;
1285 if (ratio) 1422 if (ratio)
1286 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", 1423 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
1287 ratio, bclk, ratio * bclk); 1424 ratio, bclk, ratio * bclk);
1288 1425
1289 ratio = (msr >> 40) & 0xFF; 1426 ratio = (msr >> 40) & 0xFF;
1290 if (ratio) 1427 if (ratio)
1291 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", 1428 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
1292 ratio, bclk, ratio * bclk); 1429 ratio, bclk, ratio * bclk);
1293 1430
1294 ratio = (msr >> 32) & 0xFF; 1431 ratio = (msr >> 32) & 0xFF;
1295 if (ratio) 1432 if (ratio)
1296 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", 1433 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
1297 ratio, bclk, ratio * bclk); 1434 ratio, bclk, ratio * bclk);
1298 1435
1299 ratio = (msr >> 24) & 0xFF; 1436 ratio = (msr >> 24) & 0xFF;
1300 if (ratio) 1437 if (ratio)
1301 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", 1438 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
1302 ratio, bclk, ratio * bclk); 1439 ratio, bclk, ratio * bclk);
1303 1440
1304 ratio = (msr >> 16) & 0xFF; 1441 ratio = (msr >> 16) & 0xFF;
1305 if (ratio) 1442 if (ratio)
1306 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", 1443 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
1307 ratio, bclk, ratio * bclk); 1444 ratio, bclk, ratio * bclk);
1308 1445
1309 ratio = (msr >> 8) & 0xFF; 1446 ratio = (msr >> 8) & 0xFF;
1310 if (ratio) 1447 if (ratio)
1311 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", 1448 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
1312 ratio, bclk, ratio * bclk); 1449 ratio, bclk, ratio * bclk);
1313 1450
1314 ratio = (msr >> 0) & 0xFF; 1451 ratio = (msr >> 0) & 0xFF;
1315 if (ratio) 1452 if (ratio)
1316 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", 1453 fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
1317 ratio, bclk, ratio * bclk); 1454 ratio, bclk, ratio * bclk);
1318 return; 1455 return;
1319} 1456}
@@ -1321,21 +1458,23 @@ dump_nhm_turbo_ratio_limits(void)
1321static void 1458static void
1322dump_knl_turbo_ratio_limits(void) 1459dump_knl_turbo_ratio_limits(void)
1323{ 1460{
1324 int cores; 1461 const unsigned int buckets_no = 7;
1325 unsigned int ratio; 1462
1326 unsigned long long msr; 1463 unsigned long long msr;
1327 int delta_cores; 1464 int delta_cores, delta_ratio;
1328 int delta_ratio; 1465 int i, b_nr;
1329 int i; 1466 unsigned int cores[buckets_no];
1467 unsigned int ratio[buckets_no];
1330 1468
1331 get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); 1469 get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
1332 1470
1333 fprintf(stderr, "cpu%d: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", 1471 fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
1334 base_cpu, msr); 1472 base_cpu, msr);
1335 1473
1336 /** 1474 /**
1337 * Turbo encoding in KNL is as follows: 1475 * Turbo encoding in KNL is as follows:
1338 * [7:0] -- Base value of number of active cores of bucket 1. 1476 * [0] -- Reserved
1477 * [7:1] -- Base value of number of active cores of bucket 1.
1339 * [15:8] -- Base value of freq ratio of bucket 1. 1478 * [15:8] -- Base value of freq ratio of bucket 1.
1340 * [20:16] -- +ve delta of number of active cores of bucket 2. 1479 * [20:16] -- +ve delta of number of active cores of bucket 2.
1341 * i.e. active cores of bucket 2 = 1480 * i.e. active cores of bucket 2 =
@@ -1354,29 +1493,25 @@ dump_knl_turbo_ratio_limits(void)
1354 * [60:56]-- +ve delta of number of active cores of bucket 7. 1493 * [60:56]-- +ve delta of number of active cores of bucket 7.
1355 * [63:61]-- -ve delta of freq ratio of bucket 7. 1494 * [63:61]-- -ve delta of freq ratio of bucket 7.
1356 */ 1495 */
1357 cores = msr & 0xFF; 1496
1358 ratio = (msr >> 8) && 0xFF; 1497 b_nr = 0;
1359 if (ratio > 0) 1498 cores[b_nr] = (msr & 0xFF) >> 1;
1360 fprintf(stderr, 1499 ratio[b_nr] = (msr >> 8) & 0xFF;
1361 "%d * %.0f = %.0f MHz max turbo %d active cores\n", 1500
1362 ratio, bclk, ratio * bclk, cores); 1501 for (i = 16; i < 64; i += 8) {
1363
1364 for (i = 16; i < 64; i = i + 8) {
1365 delta_cores = (msr >> i) & 0x1F; 1502 delta_cores = (msr >> i) & 0x1F;
1366 delta_ratio = (msr >> (i + 5)) && 0x7; 1503 delta_ratio = (msr >> (i + 5)) & 0x7;
1367 if (!delta_cores || !delta_ratio) 1504
1368 return; 1505 cores[b_nr + 1] = cores[b_nr] + delta_cores;
1369 cores = cores + delta_cores; 1506 ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
1370 ratio = ratio - delta_ratio; 1507 b_nr++;
1371
1372 /** -ve ratios will make successive ratio calculations
1373 * negative. Hence return instead of carrying on.
1374 */
1375 if (ratio > 0)
1376 fprintf(stderr,
1377 "%d * %.0f = %.0f MHz max turbo %d active cores\n",
1378 ratio, bclk, ratio * bclk, cores);
1379 } 1508 }
1509
1510 for (i = buckets_no - 1; i >= 0; i--)
1511 if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
1512 fprintf(outf,
1513 "%d * %.0f = %.0f MHz max turbo %d active cores\n",
1514 ratio[i], bclk, ratio[i] * bclk, cores[i]);
1380} 1515}
1381 1516
1382static void 1517static void
@@ -1389,15 +1524,15 @@ dump_nhm_cst_cfg(void)
1389#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) 1524#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
1390#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) 1525#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
1391 1526
1392 fprintf(stderr, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr); 1527 fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
1393 1528
1394 fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", 1529 fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
1395 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", 1530 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
1396 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", 1531 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
1397 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", 1532 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
1398 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", 1533 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
1399 (msr & (1 << 15)) ? "" : "UN", 1534 (msr & (1 << 15)) ? "" : "UN",
1400 (unsigned int)msr & 7, 1535 (unsigned int)msr & 0xF,
1401 pkg_cstate_limit_strings[pkg_cstate_limit]); 1536 pkg_cstate_limit_strings[pkg_cstate_limit]);
1402 return; 1537 return;
1403} 1538}
@@ -1408,48 +1543,59 @@ dump_config_tdp(void)
1408 unsigned long long msr; 1543 unsigned long long msr;
1409 1544
1410 get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); 1545 get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
1411 fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); 1546 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
1412 fprintf(stderr, " (base_ratio=%d)\n", (unsigned int)msr & 0xEF); 1547 fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
1413 1548
1414 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); 1549 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
1415 fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); 1550 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
1416 if (msr) { 1551 if (msr) {
1417 fprintf(stderr, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0xEFFF); 1552 fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
1418 fprintf(stderr, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0xEFFF); 1553 fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
1419 fprintf(stderr, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); 1554 fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
1420 fprintf(stderr, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0xEFFF); 1555 fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
1421 } 1556 }
1422 fprintf(stderr, ")\n"); 1557 fprintf(outf, ")\n");
1423 1558
1424 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); 1559 get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
1425 fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); 1560 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
1426 if (msr) { 1561 if (msr) {
1427 fprintf(stderr, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0xEFFF); 1562 fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
1428 fprintf(stderr, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0xEFFF); 1563 fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
1429 fprintf(stderr, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); 1564 fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
1430 fprintf(stderr, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0xEFFF); 1565 fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
1431 } 1566 }
1432 fprintf(stderr, ")\n"); 1567 fprintf(outf, ")\n");
1433 1568
1434 get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); 1569 get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
1435 fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); 1570 fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
1436 if ((msr) & 0x3) 1571 if ((msr) & 0x3)
1437 fprintf(stderr, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); 1572 fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
1438 fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); 1573 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
1439 fprintf(stderr, ")\n"); 1574 fprintf(outf, ")\n");
1440 1575
1441 get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); 1576 get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
1442 fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); 1577 fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
1443 fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F); 1578 fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
1444 fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); 1579 fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
1445 fprintf(stderr, ")\n"); 1580 fprintf(outf, ")\n");
1581}
1582void free_fd_percpu(void)
1583{
1584 int i;
1585
1586 for (i = 0; i < topo.max_cpu_num; ++i) {
1587 if (fd_percpu[i] != 0)
1588 close(fd_percpu[i]);
1589 }
1590
1591 free(fd_percpu);
1446} 1592}
1447 1593
1448void free_all_buffers(void) 1594void free_all_buffers(void)
1449{ 1595{
1450 CPU_FREE(cpu_present_set); 1596 CPU_FREE(cpu_present_set);
1451 cpu_present_set = NULL; 1597 cpu_present_set = NULL;
1452 cpu_present_set = 0; 1598 cpu_present_setsize = 0;
1453 1599
1454 CPU_FREE(cpu_affinity_set); 1600 CPU_FREE(cpu_affinity_set);
1455 cpu_affinity_set = NULL; 1601 cpu_affinity_set = NULL;
@@ -1474,6 +1620,11 @@ void free_all_buffers(void)
1474 free(output_buffer); 1620 free(output_buffer);
1475 output_buffer = NULL; 1621 output_buffer = NULL;
1476 outp = NULL; 1622 outp = NULL;
1623
1624 free_fd_percpu();
1625
1626 free(irq_column_2_cpu);
1627 free(irqs_per_cpu);
1477} 1628}
1478 1629
1479/* 1630/*
@@ -1481,7 +1632,7 @@ void free_all_buffers(void)
1481 */ 1632 */
1482FILE *fopen_or_die(const char *path, const char *mode) 1633FILE *fopen_or_die(const char *path, const char *mode)
1483{ 1634{
1484 FILE *filep = fopen(path, "r"); 1635 FILE *filep = fopen(path, mode);
1485 if (!filep) 1636 if (!filep)
1486 err(1, "%s: open failed", path); 1637 err(1, "%s: open failed", path);
1487 return filep; 1638 return filep;
@@ -1696,6 +1847,136 @@ int mark_cpu_present(int cpu)
1696 return 0; 1847 return 0;
1697} 1848}
1698 1849
1850/*
1851 * snapshot_proc_interrupts()
1852 *
1853 * read and record summary of /proc/interrupts
1854 *
1855 * return 1 if config change requires a restart, else return 0
1856 */
1857int snapshot_proc_interrupts(void)
1858{
1859 static FILE *fp;
1860 int column, retval;
1861
1862 if (fp == NULL)
1863 fp = fopen_or_die("/proc/interrupts", "r");
1864 else
1865 rewind(fp);
1866
1867 /* read 1st line of /proc/interrupts to get cpu* name for each column */
1868 for (column = 0; column < topo.num_cpus; ++column) {
1869 int cpu_number;
1870
1871 retval = fscanf(fp, " CPU%d", &cpu_number);
1872 if (retval != 1)
1873 break;
1874
1875 if (cpu_number > topo.max_cpu_num) {
1876 warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
1877 return 1;
1878 }
1879
1880 irq_column_2_cpu[column] = cpu_number;
1881 irqs_per_cpu[cpu_number] = 0;
1882 }
1883
1884 /* read /proc/interrupt count lines and sum up irqs per cpu */
1885 while (1) {
1886 int column;
1887 char buf[64];
1888
1889 retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */
1890 if (retval != 1)
1891 break;
1892
1893 /* read the count per cpu */
1894 for (column = 0; column < topo.num_cpus; ++column) {
1895
1896 int cpu_number, irq_count;
1897
1898 retval = fscanf(fp, " %d", &irq_count);
1899 if (retval != 1)
1900 break;
1901
1902 cpu_number = irq_column_2_cpu[column];
1903 irqs_per_cpu[cpu_number] += irq_count;
1904
1905 }
1906
1907 while (getc(fp) != '\n')
1908 ; /* flush interrupt description */
1909
1910 }
1911 return 0;
1912}
1913/*
1914 * snapshot_gfx_rc6_ms()
1915 *
1916 * record snapshot of
1917 * /sys/class/drm/card0/power/rc6_residency_ms
1918 *
1919 * return 1 if config change requires a restart, else return 0
1920 */
1921int snapshot_gfx_rc6_ms(void)
1922{
1923 FILE *fp;
1924 int retval;
1925
1926 fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
1927
1928 retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
1929 if (retval != 1)
1930 err(1, "GFX rc6");
1931
1932 fclose(fp);
1933
1934 return 0;
1935}
1936/*
1937 * snapshot_gfx_mhz()
1938 *
1939 * record snapshot of
1940 * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
1941 *
1942 * return 1 if config change requires a restart, else return 0
1943 */
1944int snapshot_gfx_mhz(void)
1945{
1946 static FILE *fp;
1947 int retval;
1948
1949 if (fp == NULL)
1950 fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
1951 else
1952 rewind(fp);
1953
1954 retval = fscanf(fp, "%d", &gfx_cur_mhz);
1955 if (retval != 1)
1956 err(1, "GFX MHz");
1957
1958 return 0;
1959}
1960
1961/*
1962 * snapshot /proc and /sys files
1963 *
1964 * return 1 if configuration restart needed, else return 0
1965 */
1966int snapshot_proc_sysfs_files(void)
1967{
1968 if (snapshot_proc_interrupts())
1969 return 1;
1970
1971 if (do_gfx_rc6_ms)
1972 snapshot_gfx_rc6_ms();
1973
1974 if (do_gfx_mhz)
1975 snapshot_gfx_mhz();
1976
1977 return 0;
1978}
1979
1699void turbostat_loop() 1980void turbostat_loop()
1700{ 1981{
1701 int retval; 1982 int retval;
@@ -1704,6 +1985,7 @@ void turbostat_loop()
1704restart: 1985restart:
1705 restarted++; 1986 restarted++;
1706 1987
1988 snapshot_proc_sysfs_files();
1707 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 1989 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
1708 if (retval < -1) { 1990 if (retval < -1) {
1709 exit(retval); 1991 exit(retval);
@@ -1722,7 +2004,9 @@ restart:
1722 re_initialize(); 2004 re_initialize();
1723 goto restart; 2005 goto restart;
1724 } 2006 }
1725 sleep(interval_sec); 2007 nanosleep(&interval_ts, NULL);
2008 if (snapshot_proc_sysfs_files())
2009 goto restart;
1726 retval = for_all_cpus(get_counters, ODD_COUNTERS); 2010 retval = for_all_cpus(get_counters, ODD_COUNTERS);
1727 if (retval < -1) { 2011 if (retval < -1) {
1728 exit(retval); 2012 exit(retval);
@@ -1735,8 +2019,10 @@ restart:
1735 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 2019 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
1736 compute_average(EVEN_COUNTERS); 2020 compute_average(EVEN_COUNTERS);
1737 format_all_counters(EVEN_COUNTERS); 2021 format_all_counters(EVEN_COUNTERS);
1738 flush_stdout(); 2022 flush_output_stdout();
1739 sleep(interval_sec); 2023 nanosleep(&interval_ts, NULL);
2024 if (snapshot_proc_sysfs_files())
2025 goto restart;
1740 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 2026 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
1741 if (retval < -1) { 2027 if (retval < -1) {
1742 exit(retval); 2028 exit(retval);
@@ -1749,7 +2035,7 @@ restart:
1749 for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); 2035 for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS);
1750 compute_average(ODD_COUNTERS); 2036 compute_average(ODD_COUNTERS);
1751 format_all_counters(ODD_COUNTERS); 2037 format_all_counters(ODD_COUNTERS);
1752 flush_stdout(); 2038 flush_output_stdout();
1753 } 2039 }
1754} 2040}
1755 2041
@@ -1889,6 +2175,7 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
1889 /* Nehalem compatible, but do not include turbo-ratio limit support */ 2175 /* Nehalem compatible, but do not include turbo-ratio limit support */
1890 case 0x2E: /* Nehalem-EX Xeon - Beckton */ 2176 case 0x2E: /* Nehalem-EX Xeon - Beckton */
1891 case 0x2F: /* Westmere-EX Xeon - Eagleton */ 2177 case 0x2F: /* Westmere-EX Xeon - Eagleton */
2178 case 0x57: /* PHI - Knights Landing (different MSR definition) */
1892 return 0; 2179 return 0;
1893 default: 2180 default:
1894 return 1; 2181 return 1;
@@ -1970,7 +2257,7 @@ int has_config_tdp(unsigned int family, unsigned int model)
1970} 2257}
1971 2258
1972static void 2259static void
1973dump_cstate_pstate_config_info(family, model) 2260dump_cstate_pstate_config_info(int family, int model)
1974{ 2261{
1975 if (!do_nhm_platform_info) 2262 if (!do_nhm_platform_info)
1976 return; 2263 return;
@@ -2016,7 +2303,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2016 return 0; 2303 return 0;
2017 2304
2018 if (cpu_migrate(cpu)) { 2305 if (cpu_migrate(cpu)) {
2019 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 2306 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2020 return -1; 2307 return -1;
2021 } 2308 }
2022 2309
@@ -2037,7 +2324,98 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2037 epb_string = "custom"; 2324 epb_string = "custom";
2038 break; 2325 break;
2039 } 2326 }
2040 fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); 2327 fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
2328
2329 return 0;
2330}
2331/*
2332 * print_hwp()
2333 * Decode the MSR_HWP_CAPABILITIES
2334 */
2335int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2336{
2337 unsigned long long msr;
2338 int cpu;
2339
2340 if (!has_hwp)
2341 return 0;
2342
2343 cpu = t->cpu_id;
2344
2345 /* MSR_HWP_CAPABILITIES is per-package */
2346 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
2347 return 0;
2348
2349 if (cpu_migrate(cpu)) {
2350 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2351 return -1;
2352 }
2353
2354 if (get_msr(cpu, MSR_PM_ENABLE, &msr))
2355 return 0;
2356
2357 fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
2358 cpu, msr, (msr & (1 << 0)) ? "" : "No-");
2359
2360 /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
2361 if ((msr & (1 << 0)) == 0)
2362 return 0;
2363
2364 if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
2365 return 0;
2366
2367 fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
2368 "(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n",
2369 cpu, msr,
2370 (unsigned int)HWP_HIGHEST_PERF(msr),
2371 (unsigned int)HWP_GUARANTEED_PERF(msr),
2372 (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
2373 (unsigned int)HWP_LOWEST_PERF(msr));
2374
2375 if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
2376 return 0;
2377
2378 fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
2379 "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n",
2380 cpu, msr,
2381 (unsigned int)(((msr) >> 0) & 0xff),
2382 (unsigned int)(((msr) >> 8) & 0xff),
2383 (unsigned int)(((msr) >> 16) & 0xff),
2384 (unsigned int)(((msr) >> 24) & 0xff),
2385 (unsigned int)(((msr) >> 32) & 0xff3),
2386 (unsigned int)(((msr) >> 42) & 0x1));
2387
2388 if (has_hwp_pkg) {
2389 if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
2390 return 0;
2391
2392 fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
2393 "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n",
2394 cpu, msr,
2395 (unsigned int)(((msr) >> 0) & 0xff),
2396 (unsigned int)(((msr) >> 8) & 0xff),
2397 (unsigned int)(((msr) >> 16) & 0xff),
2398 (unsigned int)(((msr) >> 24) & 0xff),
2399 (unsigned int)(((msr) >> 32) & 0xff3));
2400 }
2401 if (has_hwp_notify) {
2402 if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
2403 return 0;
2404
2405 fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
2406 "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
2407 cpu, msr,
2408 ((msr) & 0x1) ? "EN" : "Dis",
2409 ((msr) & 0x2) ? "EN" : "Dis");
2410 }
2411 if (get_msr(cpu, MSR_HWP_STATUS, &msr))
2412 return 0;
2413
2414 fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
2415 "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
2416 cpu, msr,
2417 ((msr) & 0x1) ? "" : "No-",
2418 ((msr) & 0x2) ? "" : "No-");
2041 2419
2042 return 0; 2420 return 0;
2043} 2421}
@@ -2057,14 +2435,14 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
2057 return 0; 2435 return 0;
2058 2436
2059 if (cpu_migrate(cpu)) { 2437 if (cpu_migrate(cpu)) {
2060 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 2438 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2061 return -1; 2439 return -1;
2062 } 2440 }
2063 2441
2064 if (do_core_perf_limit_reasons) { 2442 if (do_core_perf_limit_reasons) {
2065 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); 2443 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
2066 fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 2444 fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2067 fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", 2445 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
2068 (msr & 1 << 15) ? "bit15, " : "", 2446 (msr & 1 << 15) ? "bit15, " : "",
2069 (msr & 1 << 14) ? "bit14, " : "", 2447 (msr & 1 << 14) ? "bit14, " : "",
2070 (msr & 1 << 13) ? "Transitions, " : "", 2448 (msr & 1 << 13) ? "Transitions, " : "",
@@ -2079,7 +2457,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
2079 (msr & 1 << 2) ? "bit2, " : "", 2457 (msr & 1 << 2) ? "bit2, " : "",
2080 (msr & 1 << 1) ? "ThermStatus, " : "", 2458 (msr & 1 << 1) ? "ThermStatus, " : "",
2081 (msr & 1 << 0) ? "PROCHOT, " : ""); 2459 (msr & 1 << 0) ? "PROCHOT, " : "");
2082 fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", 2460 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
2083 (msr & 1 << 31) ? "bit31, " : "", 2461 (msr & 1 << 31) ? "bit31, " : "",
2084 (msr & 1 << 30) ? "bit30, " : "", 2462 (msr & 1 << 30) ? "bit30, " : "",
2085 (msr & 1 << 29) ? "Transitions, " : "", 2463 (msr & 1 << 29) ? "Transitions, " : "",
@@ -2098,8 +2476,8 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
2098 } 2476 }
2099 if (do_gfx_perf_limit_reasons) { 2477 if (do_gfx_perf_limit_reasons) {
2100 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); 2478 get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
2101 fprintf(stderr, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 2479 fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2102 fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s)", 2480 fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
2103 (msr & 1 << 0) ? "PROCHOT, " : "", 2481 (msr & 1 << 0) ? "PROCHOT, " : "",
2104 (msr & 1 << 1) ? "ThermStatus, " : "", 2482 (msr & 1 << 1) ? "ThermStatus, " : "",
2105 (msr & 1 << 4) ? "Graphics, " : "", 2483 (msr & 1 << 4) ? "Graphics, " : "",
@@ -2108,7 +2486,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
2108 (msr & 1 << 9) ? "GFXPwr, " : "", 2486 (msr & 1 << 9) ? "GFXPwr, " : "",
2109 (msr & 1 << 10) ? "PkgPwrL1, " : "", 2487 (msr & 1 << 10) ? "PkgPwrL1, " : "",
2110 (msr & 1 << 11) ? "PkgPwrL2, " : ""); 2488 (msr & 1 << 11) ? "PkgPwrL2, " : "");
2111 fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s)\n", 2489 fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
2112 (msr & 1 << 16) ? "PROCHOT, " : "", 2490 (msr & 1 << 16) ? "PROCHOT, " : "",
2113 (msr & 1 << 17) ? "ThermStatus, " : "", 2491 (msr & 1 << 17) ? "ThermStatus, " : "",
2114 (msr & 1 << 20) ? "Graphics, " : "", 2492 (msr & 1 << 20) ? "Graphics, " : "",
@@ -2120,15 +2498,15 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
2120 } 2498 }
2121 if (do_ring_perf_limit_reasons) { 2499 if (do_ring_perf_limit_reasons) {
2122 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); 2500 get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
2123 fprintf(stderr, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 2501 fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
2124 fprintf(stderr, " (Active: %s%s%s%s%s%s)", 2502 fprintf(outf, " (Active: %s%s%s%s%s%s)",
2125 (msr & 1 << 0) ? "PROCHOT, " : "", 2503 (msr & 1 << 0) ? "PROCHOT, " : "",
2126 (msr & 1 << 1) ? "ThermStatus, " : "", 2504 (msr & 1 << 1) ? "ThermStatus, " : "",
2127 (msr & 1 << 6) ? "VR-Therm, " : "", 2505 (msr & 1 << 6) ? "VR-Therm, " : "",
2128 (msr & 1 << 8) ? "Amps, " : "", 2506 (msr & 1 << 8) ? "Amps, " : "",
2129 (msr & 1 << 10) ? "PkgPwrL1, " : "", 2507 (msr & 1 << 10) ? "PkgPwrL1, " : "",
2130 (msr & 1 << 11) ? "PkgPwrL2, " : ""); 2508 (msr & 1 << 11) ? "PkgPwrL2, " : "");
2131 fprintf(stderr, " (Logged: %s%s%s%s%s%s)\n", 2509 fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
2132 (msr & 1 << 16) ? "PROCHOT, " : "", 2510 (msr & 1 << 16) ? "PROCHOT, " : "",
2133 (msr & 1 << 17) ? "ThermStatus, " : "", 2511 (msr & 1 << 17) ? "ThermStatus, " : "",
2134 (msr & 1 << 22) ? "VR-Therm, " : "", 2512 (msr & 1 << 22) ? "VR-Therm, " : "",
@@ -2142,7 +2520,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
2142#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ 2520#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
2143#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ 2521#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
2144 2522
2145double get_tdp(model) 2523double get_tdp(int model)
2146{ 2524{
2147 unsigned long long msr; 2525 unsigned long long msr;
2148 2526
@@ -2251,12 +2629,12 @@ void rapl_probe(unsigned int family, unsigned int model)
2251 2629
2252 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; 2630 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
2253 if (debug) 2631 if (debug)
2254 fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); 2632 fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
2255 2633
2256 return; 2634 return;
2257} 2635}
2258 2636
2259void perf_limit_reasons_probe(family, model) 2637void perf_limit_reasons_probe(int family, int model)
2260{ 2638{
2261 if (!genuine_intel) 2639 if (!genuine_intel)
2262 return; 2640 return;
@@ -2293,7 +2671,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
2293 return 0; 2671 return 0;
2294 2672
2295 if (cpu_migrate(cpu)) { 2673 if (cpu_migrate(cpu)) {
2296 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 2674 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2297 return -1; 2675 return -1;
2298 } 2676 }
2299 2677
@@ -2302,7 +2680,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
2302 return 0; 2680 return 0;
2303 2681
2304 dts = (msr >> 16) & 0x7F; 2682 dts = (msr >> 16) & 0x7F;
2305 fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", 2683 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
2306 cpu, msr, tcc_activation_temp - dts); 2684 cpu, msr, tcc_activation_temp - dts);
2307 2685
2308#ifdef THERM_DEBUG 2686#ifdef THERM_DEBUG
@@ -2311,7 +2689,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
2311 2689
2312 dts = (msr >> 16) & 0x7F; 2690 dts = (msr >> 16) & 0x7F;
2313 dts2 = (msr >> 8) & 0x7F; 2691 dts2 = (msr >> 8) & 0x7F;
2314 fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 2692 fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
2315 cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); 2693 cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
2316#endif 2694#endif
2317 } 2695 }
@@ -2325,7 +2703,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
2325 2703
2326 dts = (msr >> 16) & 0x7F; 2704 dts = (msr >> 16) & 0x7F;
2327 resolution = (msr >> 27) & 0xF; 2705 resolution = (msr >> 27) & 0xF;
2328 fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", 2706 fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
2329 cpu, msr, tcc_activation_temp - dts, resolution); 2707 cpu, msr, tcc_activation_temp - dts, resolution);
2330 2708
2331#ifdef THERM_DEBUG 2709#ifdef THERM_DEBUG
@@ -2334,17 +2712,17 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
2334 2712
2335 dts = (msr >> 16) & 0x7F; 2713 dts = (msr >> 16) & 0x7F;
2336 dts2 = (msr >> 8) & 0x7F; 2714 dts2 = (msr >> 8) & 0x7F;
2337 fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", 2715 fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
2338 cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); 2716 cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
2339#endif 2717#endif
2340 } 2718 }
2341 2719
2342 return 0; 2720 return 0;
2343} 2721}
2344 2722
2345void print_power_limit_msr(int cpu, unsigned long long msr, char *label) 2723void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
2346{ 2724{
2347 fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", 2725 fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
2348 cpu, label, 2726 cpu, label,
2349 ((msr >> 15) & 1) ? "EN" : "DIS", 2727 ((msr >> 15) & 1) ? "EN" : "DIS",
2350 ((msr >> 0) & 0x7FFF) * rapl_power_units, 2728 ((msr >> 0) & 0x7FFF) * rapl_power_units,
@@ -2368,7 +2746,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2368 2746
2369 cpu = t->cpu_id; 2747 cpu = t->cpu_id;
2370 if (cpu_migrate(cpu)) { 2748 if (cpu_migrate(cpu)) {
2371 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 2749 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2372 return -1; 2750 return -1;
2373 } 2751 }
2374 2752
@@ -2376,7 +2754,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2376 return -1; 2754 return -1;
2377 2755
2378 if (debug) { 2756 if (debug) {
2379 fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " 2757 fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
2380 "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, 2758 "(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
2381 rapl_power_units, rapl_energy_units, rapl_time_units); 2759 rapl_power_units, rapl_energy_units, rapl_time_units);
2382 } 2760 }
@@ -2386,7 +2764,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2386 return -5; 2764 return -5;
2387 2765
2388 2766
2389 fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 2767 fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
2390 cpu, msr, 2768 cpu, msr,
2391 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 2769 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2392 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 2770 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
@@ -2399,11 +2777,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2399 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) 2777 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
2400 return -9; 2778 return -9;
2401 2779
2402 fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", 2780 fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
2403 cpu, msr, (msr >> 63) & 1 ? "": "UN"); 2781 cpu, msr, (msr >> 63) & 1 ? "": "UN");
2404 2782
2405 print_power_limit_msr(cpu, msr, "PKG Limit #1"); 2783 print_power_limit_msr(cpu, msr, "PKG Limit #1");
2406 fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", 2784 fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
2407 cpu, 2785 cpu,
2408 ((msr >> 47) & 1) ? "EN" : "DIS", 2786 ((msr >> 47) & 1) ? "EN" : "DIS",
2409 ((msr >> 32) & 0x7FFF) * rapl_power_units, 2787 ((msr >> 32) & 0x7FFF) * rapl_power_units,
@@ -2415,7 +2793,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2415 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) 2793 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
2416 return -6; 2794 return -6;
2417 2795
2418 fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 2796 fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
2419 cpu, msr, 2797 cpu, msr,
2420 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 2798 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2421 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 2799 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
@@ -2425,7 +2803,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2425 if (do_rapl & RAPL_DRAM) { 2803 if (do_rapl & RAPL_DRAM) {
2426 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) 2804 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
2427 return -9; 2805 return -9;
2428 fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", 2806 fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
2429 cpu, msr, (msr >> 31) & 1 ? "": "UN"); 2807 cpu, msr, (msr >> 31) & 1 ? "": "UN");
2430 2808
2431 print_power_limit_msr(cpu, msr, "DRAM Limit"); 2809 print_power_limit_msr(cpu, msr, "DRAM Limit");
@@ -2435,7 +2813,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2435 if (get_msr(cpu, MSR_PP0_POLICY, &msr)) 2813 if (get_msr(cpu, MSR_PP0_POLICY, &msr))
2436 return -7; 2814 return -7;
2437 2815
2438 fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); 2816 fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
2439 } 2817 }
2440 } 2818 }
2441 if (do_rapl & RAPL_CORES) { 2819 if (do_rapl & RAPL_CORES) {
@@ -2443,7 +2821,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2443 2821
2444 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) 2822 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
2445 return -9; 2823 return -9;
2446 fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", 2824 fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
2447 cpu, msr, (msr >> 31) & 1 ? "": "UN"); 2825 cpu, msr, (msr >> 31) & 1 ? "": "UN");
2448 print_power_limit_msr(cpu, msr, "Cores Limit"); 2826 print_power_limit_msr(cpu, msr, "Cores Limit");
2449 } 2827 }
@@ -2453,11 +2831,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2453 if (get_msr(cpu, MSR_PP1_POLICY, &msr)) 2831 if (get_msr(cpu, MSR_PP1_POLICY, &msr))
2454 return -8; 2832 return -8;
2455 2833
2456 fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); 2834 fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
2457 2835
2458 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) 2836 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
2459 return -9; 2837 return -9;
2460 fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", 2838 fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
2461 cpu, msr, (msr >> 31) & 1 ? "": "UN"); 2839 cpu, msr, (msr >> 31) & 1 ? "": "UN");
2462 print_power_limit_msr(cpu, msr, "GFX Limit"); 2840 print_power_limit_msr(cpu, msr, "GFX Limit");
2463 } 2841 }
@@ -2583,23 +2961,23 @@ double slm_bclk(void)
2583 double freq; 2961 double freq;
2584 2962
2585 if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) 2963 if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
2586 fprintf(stderr, "SLM BCLK: unknown\n"); 2964 fprintf(outf, "SLM BCLK: unknown\n");
2587 2965
2588 i = msr & 0xf; 2966 i = msr & 0xf;
2589 if (i >= SLM_BCLK_FREQS) { 2967 if (i >= SLM_BCLK_FREQS) {
2590 fprintf(stderr, "SLM BCLK[%d] invalid\n", i); 2968 fprintf(outf, "SLM BCLK[%d] invalid\n", i);
2591 msr = 3; 2969 msr = 3;
2592 } 2970 }
2593 freq = slm_freq_table[i]; 2971 freq = slm_freq_table[i];
2594 2972
2595 fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq); 2973 fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
2596 2974
2597 return freq; 2975 return freq;
2598} 2976}
2599 2977
2600double discover_bclk(unsigned int family, unsigned int model) 2978double discover_bclk(unsigned int family, unsigned int model)
2601{ 2979{
2602 if (has_snb_msrs(family, model)) 2980 if (has_snb_msrs(family, model) || is_knl(family, model))
2603 return 100.00; 2981 return 100.00;
2604 else if (is_slm(family, model)) 2982 else if (is_slm(family, model))
2605 return slm_bclk(); 2983 return slm_bclk();
@@ -2635,13 +3013,13 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
2635 3013
2636 cpu = t->cpu_id; 3014 cpu = t->cpu_id;
2637 if (cpu_migrate(cpu)) { 3015 if (cpu_migrate(cpu)) {
2638 fprintf(stderr, "Could not migrate to CPU %d\n", cpu); 3016 fprintf(outf, "Could not migrate to CPU %d\n", cpu);
2639 return -1; 3017 return -1;
2640 } 3018 }
2641 3019
2642 if (tcc_activation_temp_override != 0) { 3020 if (tcc_activation_temp_override != 0) {
2643 tcc_activation_temp = tcc_activation_temp_override; 3021 tcc_activation_temp = tcc_activation_temp_override;
2644 fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", 3022 fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
2645 cpu, tcc_activation_temp); 3023 cpu, tcc_activation_temp);
2646 return 0; 3024 return 0;
2647 } 3025 }
@@ -2656,7 +3034,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
2656 target_c_local = (msr >> 16) & 0xFF; 3034 target_c_local = (msr >> 16) & 0xFF;
2657 3035
2658 if (debug) 3036 if (debug)
2659 fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", 3037 fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
2660 cpu, msr, target_c_local); 3038 cpu, msr, target_c_local);
2661 3039
2662 if (!target_c_local) 3040 if (!target_c_local)
@@ -2668,37 +3046,93 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
2668 3046
2669guess: 3047guess:
2670 tcc_activation_temp = TJMAX_DEFAULT; 3048 tcc_activation_temp = TJMAX_DEFAULT;
2671 fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", 3049 fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
2672 cpu, tcc_activation_temp); 3050 cpu, tcc_activation_temp);
2673 3051
2674 return 0; 3052 return 0;
2675} 3053}
3054
3055void decode_feature_control_msr(void)
3056{
3057 unsigned long long msr;
3058
3059 if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
3060 fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
3061 base_cpu, msr,
3062 msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
3063 msr & (1 << 18) ? "SGX" : "");
3064}
3065
3066void decode_misc_enable_msr(void)
3067{
3068 unsigned long long msr;
3069
3070 if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
3071 fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n",
3072 base_cpu, msr,
3073 msr & (1 << 3) ? "TCC" : "",
3074 msr & (1 << 16) ? "EIST" : "",
3075 msr & (1 << 18) ? "MONITOR" : "");
3076}
3077
3078/*
3079 * Decode MSR_MISC_PWR_MGMT
3080 *
3081 * Decode the bits according to the Nehalem documentation
3082 * bit[0] seems to continue to have same meaning going forward
3083 * bit[1] less so...
3084 */
3085void decode_misc_pwr_mgmt_msr(void)
3086{
3087 unsigned long long msr;
3088
3089 if (!do_nhm_platform_info)
3090 return;
3091
3092 if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
3093 fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n",
3094 base_cpu, msr,
3095 msr & (1 << 0) ? "DIS" : "EN",
3096 msr & (1 << 1) ? "EN" : "DIS");
3097}
3098
2676void process_cpuid() 3099void process_cpuid()
2677{ 3100{
2678 unsigned int eax, ebx, ecx, edx, max_level; 3101 unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
2679 unsigned int fms, family, model, stepping; 3102 unsigned int fms, family, model, stepping;
2680 3103
2681 eax = ebx = ecx = edx = 0; 3104 eax = ebx = ecx = edx = 0;
2682 3105
2683 __get_cpuid(0, &max_level, &ebx, &ecx, &edx); 3106 __cpuid(0, max_level, ebx, ecx, edx);
2684 3107
2685 if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) 3108 if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
2686 genuine_intel = 1; 3109 genuine_intel = 1;
2687 3110
2688 if (debug) 3111 if (debug)
2689 fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", 3112 fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
2690 (char *)&ebx, (char *)&edx, (char *)&ecx); 3113 (char *)&ebx, (char *)&edx, (char *)&ecx);
2691 3114
2692 __get_cpuid(1, &fms, &ebx, &ecx, &edx); 3115 __cpuid(1, fms, ebx, ecx, edx);
2693 family = (fms >> 8) & 0xf; 3116 family = (fms >> 8) & 0xf;
2694 model = (fms >> 4) & 0xf; 3117 model = (fms >> 4) & 0xf;
2695 stepping = fms & 0xf; 3118 stepping = fms & 0xf;
2696 if (family == 6 || family == 0xf) 3119 if (family == 6 || family == 0xf)
2697 model += ((fms >> 16) & 0xf) << 4; 3120 model += ((fms >> 16) & 0xf) << 4;
2698 3121
2699 if (debug) 3122 if (debug) {
2700 fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", 3123 fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
2701 max_level, family, model, stepping, family, model, stepping); 3124 max_level, family, model, stepping, family, model, stepping);
3125 fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
3126 ecx & (1 << 0) ? "SSE3" : "-",
3127 ecx & (1 << 3) ? "MONITOR" : "-",
3128 ecx & (1 << 6) ? "SMX" : "-",
3129 ecx & (1 << 7) ? "EIST" : "-",
3130 ecx & (1 << 8) ? "TM2" : "-",
3131 edx & (1 << 4) ? "TSC" : "-",
3132 edx & (1 << 5) ? "MSR" : "-",
3133 edx & (1 << 22) ? "ACPI-TM" : "-",
3134 edx & (1 << 29) ? "TM" : "-");
3135 }
2702 3136
2703 if (!(edx & (1 << 5))) 3137 if (!(edx & (1 << 5)))
2704 errx(1, "CPUID: no MSR"); 3138 errx(1, "CPUID: no MSR");
@@ -2709,15 +3143,15 @@ void process_cpuid()
2709 * This check is valid for both Intel and AMD. 3143 * This check is valid for both Intel and AMD.
2710 */ 3144 */
2711 ebx = ecx = edx = 0; 3145 ebx = ecx = edx = 0;
2712 __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx); 3146 __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
2713 3147
2714 if (max_level >= 0x80000007) { 3148 if (max_extended_level >= 0x80000007) {
2715 3149
2716 /* 3150 /*
2717 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 3151 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
2718 * this check is valid for both Intel and AMD 3152 * this check is valid for both Intel and AMD
2719 */ 3153 */
2720 __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); 3154 __cpuid(0x80000007, eax, ebx, ecx, edx);
2721 has_invariant_tsc = edx & (1 << 8); 3155 has_invariant_tsc = edx & (1 << 8);
2722 } 3156 }
2723 3157
@@ -2726,20 +3160,48 @@ void process_cpuid()
2726 * this check is valid for both Intel and AMD 3160 * this check is valid for both Intel and AMD
2727 */ 3161 */
2728 3162
2729 __get_cpuid(0x6, &eax, &ebx, &ecx, &edx); 3163 __cpuid(0x6, eax, ebx, ecx, edx);
2730 has_aperf = ecx & (1 << 0); 3164 has_aperf = ecx & (1 << 0);
2731 do_dts = eax & (1 << 0); 3165 do_dts = eax & (1 << 0);
2732 do_ptm = eax & (1 << 6); 3166 do_ptm = eax & (1 << 6);
3167 has_hwp = eax & (1 << 7);
3168 has_hwp_notify = eax & (1 << 8);
3169 has_hwp_activity_window = eax & (1 << 9);
3170 has_hwp_epp = eax & (1 << 10);
3171 has_hwp_pkg = eax & (1 << 11);
2733 has_epb = ecx & (1 << 3); 3172 has_epb = ecx & (1 << 3);
2734 3173
2735 if (debug) 3174 if (debug)
2736 fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sEPB\n", 3175 fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, "
2737 has_aperf ? "" : "No ", 3176 "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
2738 do_dts ? "" : "No ", 3177 has_aperf ? "" : "No-",
2739 do_ptm ? "" : "No ", 3178 do_dts ? "" : "No-",
2740 has_epb ? "" : "No "); 3179 do_ptm ? "" : "No-",
3180 has_hwp ? "" : "No-",
3181 has_hwp_notify ? "" : "No-",
3182 has_hwp_activity_window ? "" : "No-",
3183 has_hwp_epp ? "" : "No-",
3184 has_hwp_pkg ? "" : "No-",
3185 has_epb ? "" : "No-");
3186
3187 if (debug)
3188 decode_misc_enable_msr();
3189
3190 if (max_level >= 0x7) {
3191 int has_sgx;
2741 3192
2742 if (max_level > 0x15) { 3193 ecx = 0;
3194
3195 __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
3196
3197 has_sgx = ebx & (1 << 2);
3198 fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
3199
3200 if (has_sgx)
3201 decode_feature_control_msr();
3202 }
3203
3204 if (max_level >= 0x15) {
2743 unsigned int eax_crystal; 3205 unsigned int eax_crystal;
2744 unsigned int ebx_tsc; 3206 unsigned int ebx_tsc;
2745 3207
@@ -2747,12 +3209,12 @@ void process_cpuid()
2747 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz 3209 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
2748 */ 3210 */
2749 eax_crystal = ebx_tsc = crystal_hz = edx = 0; 3211 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
2750 __get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx); 3212 __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
2751 3213
2752 if (ebx_tsc != 0) { 3214 if (ebx_tsc != 0) {
2753 3215
2754 if (debug && (ebx != 0)) 3216 if (debug && (ebx != 0))
2755 fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", 3217 fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
2756 eax_crystal, ebx_tsc, crystal_hz); 3218 eax_crystal, ebx_tsc, crystal_hz);
2757 3219
2758 if (crystal_hz == 0) 3220 if (crystal_hz == 0)
@@ -2768,11 +3230,24 @@ void process_cpuid()
2768 if (crystal_hz) { 3230 if (crystal_hz) {
2769 tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; 3231 tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
2770 if (debug) 3232 if (debug)
2771 fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", 3233 fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
2772 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); 3234 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
2773 } 3235 }
2774 } 3236 }
2775 } 3237 }
3238 if (max_level >= 0x16) {
3239 unsigned int base_mhz, max_mhz, bus_mhz, edx;
3240
3241 /*
3242 * CPUID 16H Base MHz, Max MHz, Bus MHz
3243 */
3244 base_mhz = max_mhz = bus_mhz = edx = 0;
3245
3246 __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
3247 if (debug)
3248 fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
3249 base_mhz, max_mhz, bus_mhz);
3250 }
2776 3251
2777 if (has_aperf) 3252 if (has_aperf)
2778 aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); 3253 aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
@@ -2788,21 +3263,28 @@ void process_cpuid()
2788 do_slm_cstates = is_slm(family, model); 3263 do_slm_cstates = is_slm(family, model);
2789 do_knl_cstates = is_knl(family, model); 3264 do_knl_cstates = is_knl(family, model);
2790 3265
3266 if (debug)
3267 decode_misc_pwr_mgmt_msr();
3268
2791 rapl_probe(family, model); 3269 rapl_probe(family, model);
2792 perf_limit_reasons_probe(family, model); 3270 perf_limit_reasons_probe(family, model);
2793 3271
2794 if (debug) 3272 if (debug)
2795 dump_cstate_pstate_config_info(); 3273 dump_cstate_pstate_config_info(family, model);
2796 3274
2797 if (has_skl_msrs(family, model)) 3275 if (has_skl_msrs(family, model))
2798 calculate_tsc_tweak(); 3276 calculate_tsc_tweak();
2799 3277
3278 do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK);
3279
3280 do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK);
3281
2800 return; 3282 return;
2801} 3283}
2802 3284
2803void help() 3285void help()
2804{ 3286{
2805 fprintf(stderr, 3287 fprintf(outf,
2806 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" 3288 "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
2807 "\n" 3289 "\n"
2808 "Turbostat forks the specified COMMAND and prints statistics\n" 3290 "Turbostat forks the specified COMMAND and prints statistics\n"
@@ -2814,6 +3296,7 @@ void help()
2814 "--help print this help message\n" 3296 "--help print this help message\n"
2815 "--counter msr print 32-bit counter at address \"msr\"\n" 3297 "--counter msr print 32-bit counter at address \"msr\"\n"
2816 "--Counter msr print 64-bit Counter at address \"msr\"\n" 3298 "--Counter msr print 64-bit Counter at address \"msr\"\n"
3299 "--out file create or truncate \"file\" for all output\n"
2817 "--msr msr print 32-bit value at address \"msr\"\n" 3300 "--msr msr print 32-bit value at address \"msr\"\n"
2818 "--MSR msr print 64-bit Value at address \"msr\"\n" 3301 "--MSR msr print 64-bit Value at address \"msr\"\n"
2819 "--version print version information\n" 3302 "--version print version information\n"
@@ -2858,7 +3341,7 @@ void topology_probe()
2858 show_cpu = 1; 3341 show_cpu = 1;
2859 3342
2860 if (debug > 1) 3343 if (debug > 1)
2861 fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); 3344 fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
2862 3345
2863 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); 3346 cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
2864 if (cpus == NULL) 3347 if (cpus == NULL)
@@ -2893,7 +3376,7 @@ void topology_probe()
2893 3376
2894 if (cpu_is_not_present(i)) { 3377 if (cpu_is_not_present(i)) {
2895 if (debug > 1) 3378 if (debug > 1)
2896 fprintf(stderr, "cpu%d NOT PRESENT\n", i); 3379 fprintf(outf, "cpu%d NOT PRESENT\n", i);
2897 continue; 3380 continue;
2898 } 3381 }
2899 cpus[i].core_id = get_core_id(i); 3382 cpus[i].core_id = get_core_id(i);
@@ -2908,26 +3391,26 @@ void topology_probe()
2908 if (siblings > max_siblings) 3391 if (siblings > max_siblings)
2909 max_siblings = siblings; 3392 max_siblings = siblings;
2910 if (debug > 1) 3393 if (debug > 1)
2911 fprintf(stderr, "cpu %d pkg %d core %d\n", 3394 fprintf(outf, "cpu %d pkg %d core %d\n",
2912 i, cpus[i].physical_package_id, cpus[i].core_id); 3395 i, cpus[i].physical_package_id, cpus[i].core_id);
2913 } 3396 }
2914 topo.num_cores_per_pkg = max_core_id + 1; 3397 topo.num_cores_per_pkg = max_core_id + 1;
2915 if (debug > 1) 3398 if (debug > 1)
2916 fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", 3399 fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
2917 max_core_id, topo.num_cores_per_pkg); 3400 max_core_id, topo.num_cores_per_pkg);
2918 if (debug && !summary_only && topo.num_cores_per_pkg > 1) 3401 if (debug && !summary_only && topo.num_cores_per_pkg > 1)
2919 show_core = 1; 3402 show_core = 1;
2920 3403
2921 topo.num_packages = max_package_id + 1; 3404 topo.num_packages = max_package_id + 1;
2922 if (debug > 1) 3405 if (debug > 1)
2923 fprintf(stderr, "max_package_id %d, sizing for %d packages\n", 3406 fprintf(outf, "max_package_id %d, sizing for %d packages\n",
2924 max_package_id, topo.num_packages); 3407 max_package_id, topo.num_packages);
2925 if (debug && !summary_only && topo.num_packages > 1) 3408 if (debug && !summary_only && topo.num_packages > 1)
2926 show_pkg = 1; 3409 show_pkg = 1;
2927 3410
2928 topo.num_threads_per_core = max_siblings; 3411 topo.num_threads_per_core = max_siblings;
2929 if (debug > 1) 3412 if (debug > 1)
2930 fprintf(stderr, "max_siblings %d\n", max_siblings); 3413 fprintf(outf, "max_siblings %d\n", max_siblings);
2931 3414
2932 free(cpus); 3415 free(cpus);
2933} 3416}
@@ -3019,10 +3502,27 @@ void allocate_output_buffer()
3019 if (outp == NULL) 3502 if (outp == NULL)
3020 err(-1, "calloc output buffer"); 3503 err(-1, "calloc output buffer");
3021} 3504}
3505void allocate_fd_percpu(void)
3506{
3507 fd_percpu = calloc(topo.max_cpu_num, sizeof(int));
3508 if (fd_percpu == NULL)
3509 err(-1, "calloc fd_percpu");
3510}
3511void allocate_irq_buffers(void)
3512{
3513 irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
3514 if (irq_column_2_cpu == NULL)
3515 err(-1, "calloc %d", topo.num_cpus);
3022 3516
3517 irqs_per_cpu = calloc(topo.max_cpu_num, sizeof(int));
3518 if (irqs_per_cpu == NULL)
3519 err(-1, "calloc %d", topo.max_cpu_num);
3520}
3023void setup_all_buffers(void) 3521void setup_all_buffers(void)
3024{ 3522{
3025 topology_probe(); 3523 topology_probe();
3524 allocate_irq_buffers();
3525 allocate_fd_percpu();
3026 allocate_counters(&thread_even, &core_even, &package_even); 3526 allocate_counters(&thread_even, &core_even, &package_even);
3027 allocate_counters(&thread_odd, &core_odd, &package_odd); 3527 allocate_counters(&thread_odd, &core_odd, &package_odd);
3028 allocate_output_buffer(); 3528 allocate_output_buffer();
@@ -3036,7 +3536,7 @@ void set_base_cpu(void)
3036 err(-ENODEV, "No valid cpus found"); 3536 err(-ENODEV, "No valid cpus found");
3037 3537
3038 if (debug > 1) 3538 if (debug > 1)
3039 fprintf(stderr, "base_cpu = %d\n", base_cpu); 3539 fprintf(outf, "base_cpu = %d\n", base_cpu);
3040} 3540}
3041 3541
3042void turbostat_init() 3542void turbostat_init()
@@ -3049,6 +3549,9 @@ void turbostat_init()
3049 3549
3050 3550
3051 if (debug) 3551 if (debug)
3552 for_all_cpus(print_hwp, ODD_COUNTERS);
3553
3554 if (debug)
3052 for_all_cpus(print_epb, ODD_COUNTERS); 3555 for_all_cpus(print_epb, ODD_COUNTERS);
3053 3556
3054 if (debug) 3557 if (debug)
@@ -3100,9 +3603,10 @@ int fork_it(char **argv)
3100 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); 3603 for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
3101 compute_average(EVEN_COUNTERS); 3604 compute_average(EVEN_COUNTERS);
3102 format_all_counters(EVEN_COUNTERS); 3605 format_all_counters(EVEN_COUNTERS);
3103 flush_stderr();
3104 3606
3105 fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); 3607 fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
3608
3609 flush_output_stderr();
3106 3610
3107 return status; 3611 return status;
3108} 3612}
@@ -3119,13 +3623,13 @@ int get_and_dump_counters(void)
3119 if (status) 3623 if (status)
3120 return status; 3624 return status;
3121 3625
3122 flush_stdout(); 3626 flush_output_stdout();
3123 3627
3124 return status; 3628 return status;
3125} 3629}
3126 3630
3127void print_version() { 3631void print_version() {
3128 fprintf(stderr, "turbostat version 4.8 26-Sep, 2015" 3632 fprintf(outf, "turbostat version 4.11 27 Feb 2016"
3129 " - Len Brown <lenb@kernel.org>\n"); 3633 " - Len Brown <lenb@kernel.org>\n");
3130} 3634}
3131 3635
@@ -3143,6 +3647,7 @@ void cmdline(int argc, char **argv)
3143 {"Joules", no_argument, 0, 'J'}, 3647 {"Joules", no_argument, 0, 'J'},
3144 {"MSR", required_argument, 0, 'M'}, 3648 {"MSR", required_argument, 0, 'M'},
3145 {"msr", required_argument, 0, 'm'}, 3649 {"msr", required_argument, 0, 'm'},
3650 {"out", required_argument, 0, 'o'},
3146 {"Package", no_argument, 0, 'p'}, 3651 {"Package", no_argument, 0, 'p'},
3147 {"processor", no_argument, 0, 'p'}, 3652 {"processor", no_argument, 0, 'p'},
3148 {"Summary", no_argument, 0, 'S'}, 3653 {"Summary", no_argument, 0, 'S'},
@@ -3153,7 +3658,7 @@ void cmdline(int argc, char **argv)
3153 3658
3154 progname = argv[0]; 3659 progname = argv[0];
3155 3660
3156 while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:PpST:v", 3661 while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v",
3157 long_options, &option_index)) != -1) { 3662 long_options, &option_index)) != -1) {
3158 switch (opt) { 3663 switch (opt) {
3159 case 'C': 3664 case 'C':
@@ -3173,7 +3678,18 @@ void cmdline(int argc, char **argv)
3173 help(); 3678 help();
3174 exit(1); 3679 exit(1);
3175 case 'i': 3680 case 'i':
3176 interval_sec = atoi(optarg); 3681 {
3682 double interval = strtod(optarg, NULL);
3683
3684 if (interval < 0.001) {
3685 fprintf(outf, "interval %f seconds is too small\n",
3686 interval);
3687 exit(2);
3688 }
3689
3690 interval_ts.tv_sec = interval;
3691 interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
3692 }
3177 break; 3693 break;
3178 case 'J': 3694 case 'J':
3179 rapl_joules++; 3695 rapl_joules++;
@@ -3184,6 +3700,9 @@ void cmdline(int argc, char **argv)
3184 case 'm': 3700 case 'm':
3185 sscanf(optarg, "%x", &extra_msr_offset32); 3701 sscanf(optarg, "%x", &extra_msr_offset32);
3186 break; 3702 break;
3703 case 'o':
3704 outf = fopen_or_die(optarg, "w");
3705 break;
3187 case 'P': 3706 case 'P':
3188 show_pkg_only++; 3707 show_pkg_only++;
3189 break; 3708 break;
@@ -3206,6 +3725,8 @@ void cmdline(int argc, char **argv)
3206 3725
3207int main(int argc, char **argv) 3726int main(int argc, char **argv)
3208{ 3727{
3728 outf = stderr;
3729
3209 cmdline(argc, argv); 3730 cmdline(argc, argv);
3210 3731
3211 if (debug) 3732 if (debug)