diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2016-03-13 21:13:05 -0400 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2016-03-13 21:13:05 -0400 |
commit | 3fdb74649b4f18ccaa88766750b616dec6acb5b0 (patch) | |
tree | 691e718a361a1db8fae3e87270af8d62adc4946a /tools/power | |
parent | 5b3e7e0536bd6326798ab57d14a49b15ad7e3e3f (diff) | |
parent | 685b535b2cdb9cdf354321f8af9ed17dcf19d19f (diff) |
Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux into pm-tools
Pull turbostat updates for 4.6 from Len Brown.
* 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux:
tools/power turbostat: bugfix: TDP MSRs print bits fixing
tools/power turbostat: correct output for MSR_NHM_SNB_PKG_CST_CFG_CTL dump
tools/power turbostat: call __cpuid() instead of __get_cpuid()
tools/power turbostat: indicate SMX and SGX support
tools/power turbostat: detect and work around syscall jitter
tools/power turbostat: show GFX%rc6
tools/power turbostat: show GFXMHz
tools/power turbostat: show IRQs per CPU
tools/power turbostat: make fewer systems calls
tools/power turbostat: fix compiler warnings
tools/power turbostat: add --out option for saving output in a file
tools/power turbostat: re-name "%Busy" field to "Busy%"
tools/power turbostat: Intel Xeon x200: fix turbo-ratio decoding
tools/power turbostat: Intel Xeon x200: fix erroneous bclk value
tools/power turbostat: allow sub-sec intervals
tools/power turbostat: Decode MSR_MISC_PWR_MGMT
tools/power turbostat: decode HWP registers
x86 msr-index: Simplify syntax for HWP fields
tools/power turbostat: CPUID(0x16) leaf shows base, max, and bus frequency
tools/power turbostat: decode more CPUID fields
Diffstat (limited to 'tools/power')
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 32 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 889 |
2 files changed, 724 insertions, 197 deletions
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 622db685b4f9..89a55d5e32f3 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 | |||
@@ -34,7 +34,10 @@ name as necessary to disambiguate it from others is necessary. Note that option | |||
34 | \fB--debug\fP displays additional system configuration information. Invoking this parameter | 34 | \fB--debug\fP displays additional system configuration information. Invoking this parameter |
35 | more than once may also enable internal turbostat debug information. | 35 | more than once may also enable internal turbostat debug information. |
36 | .PP | 36 | .PP |
37 | \fB--interval seconds\fP overrides the default 5-second measurement interval. | 37 | \fB--interval seconds\fP overrides the default 5.0 second measurement interval. |
38 | .PP | ||
39 | \fB--out output_file\fP turbostat output is written to the specified output_file. | ||
40 | The file is truncated if it already exists, and it is created if it does not exist. | ||
38 | .PP | 41 | .PP |
39 | \fB--help\fP displays usage for the most common parameters. | 42 | \fB--help\fP displays usage for the most common parameters. |
40 | .PP | 43 | .PP |
@@ -61,7 +64,7 @@ displays the statistics gathered since it was forked. | |||
61 | .nf | 64 | .nf |
62 | \fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. | 65 | \fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. |
63 | \fBAVG_MHz\fP number of cycles executed divided by time elapsed. | 66 | \fBAVG_MHz\fP number of cycles executed divided by time elapsed. |
64 | \fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. | 67 | \fBBusy%\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. |
65 | \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). | 68 | \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). |
66 | \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. | 69 | \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. |
67 | .fi | 70 | .fi |
@@ -83,13 +86,14 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T | |||
83 | \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. | 86 | \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. |
84 | .fi | 87 | .fi |
85 | .PP | 88 | .PP |
86 | .SH EXAMPLE | 89 | .SH PERIODIC EXAMPLE |
87 | Without any parameters, turbostat displays statistics ever 5 seconds. | 90 | Without any parameters, turbostat displays statistics ever 5 seconds. |
88 | (override interval with "-i sec" option, or specify a command | 91 | Periodic output goes to stdout, by default, unless --out is used to specify an output file. |
89 | for turbostat to fork). | 92 | The 5-second interval can be changed with th "-i sec" option. |
93 | Or a command may be specified as in "FORK EXAMPLE" below. | ||
90 | .nf | 94 | .nf |
91 | [root@hsw]# ./turbostat | 95 | [root@hsw]# ./turbostat |
92 | CPU Avg_MHz %Busy Bzy_MHz TSC_MHz | 96 | CPU Avg_MHz Busy% Bzy_MHz TSC_MHz |
93 | - 488 12.51 3898 3498 | 97 | - 488 12.51 3898 3498 |
94 | 0 0 0.01 3885 3498 | 98 | 0 0 0.01 3885 3498 |
95 | 4 3897 99.99 3898 3498 | 99 | 4 3897 99.99 3898 3498 |
@@ -145,7 +149,7 @@ cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1) | |||
145 | cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1) | 149 | cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1) |
146 | cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1) | 150 | cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1) |
147 | cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1) | 151 | cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1) |
148 | Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt | 152 | Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt |
149 | - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00 | 153 | - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00 |
150 | 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00 | 154 | 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00 |
151 | 0 4 3897 99.98 3898 3498 0 0.02 | 155 | 0 4 3897 99.98 3898 3498 0 0.02 |
@@ -171,14 +175,16 @@ The --debug option adds additional columns to the measurement ouput, including C | |||
171 | See the field definitions above. | 175 | See the field definitions above. |
172 | .SH FORK EXAMPLE | 176 | .SH FORK EXAMPLE |
173 | If turbostat is invoked with a command, it will fork that command | 177 | If turbostat is invoked with a command, it will fork that command |
174 | and output the statistics gathered when the command exits. | 178 | and output the statistics gathered after the command exits. |
179 | In this case, turbostat output goes to stderr, by default. | ||
180 | Output can instead be saved to a file using the --out option. | ||
175 | eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds | 181 | eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds |
176 | until ^C while the other CPUs are mostly idle: | 182 | until ^C while the other CPUs are mostly idle: |
177 | 183 | ||
178 | .nf | 184 | .nf |
179 | root@hsw: turbostat cat /dev/zero > /dev/null | 185 | root@hsw: turbostat cat /dev/zero > /dev/null |
180 | ^C | 186 | ^C |
181 | CPU Avg_MHz %Busy Bzy_MHz TSC_MHz | 187 | CPU Avg_MHz Busy% Bzy_MHz TSC_MHz |
182 | - 482 12.51 3854 3498 | 188 | - 482 12.51 3854 3498 |
183 | 0 0 0.01 1960 3498 | 189 | 0 0 0.01 1960 3498 |
184 | 4 0 0.00 2128 3498 | 190 | 4 0 0.00 2128 3498 |
@@ -192,12 +198,12 @@ root@hsw: turbostat cat /dev/zero > /dev/null | |||
192 | 198 | ||
193 | .fi | 199 | .fi |
194 | Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit. | 200 | Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit. |
195 | The first row shows the average MHz and %Busy across all the processors in the system. | 201 | The first row shows the average MHz and Busy% across all the processors in the system. |
196 | 202 | ||
197 | Note that the Avg_MHz column reflects the total number of cycles executed | 203 | Note that the Avg_MHz column reflects the total number of cycles executed |
198 | divided by the measurement interval. If the %Busy column is 100%, | 204 | divided by the measurement interval. If the Busy% column is 100%, |
199 | then the processor was running at that speed the entire interval. | 205 | then the processor was running at that speed the entire interval. |
200 | The Avg_MHz multiplied by the %Busy results in the Bzy_MHz -- | 206 | The Avg_MHz multiplied by the Busy% results in the Bzy_MHz -- |
201 | which is the average frequency while the processor was executing -- | 207 | which is the average frequency while the processor was executing -- |
202 | not including any non-busy idle time. | 208 | not including any non-busy idle time. |
203 | 209 | ||
@@ -233,7 +239,7 @@ in the brand string in /proc/cpuinfo. On a system where | |||
233 | the TSC stops in idle, TSC_MHz will drop | 239 | the TSC stops in idle, TSC_MHz will drop |
234 | below the processor's base frequency. | 240 | below the processor's base frequency. |
235 | 241 | ||
236 | %Busy = MPERF_delta/TSC_delta | 242 | Busy% = MPERF_delta/TSC_delta |
237 | 243 | ||
238 | Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval | 244 | Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval |
239 | 245 | ||
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0dac7e05a6ac..ee1551b6fa01 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
@@ -38,12 +38,15 @@ | |||
38 | #include <string.h> | 38 | #include <string.h> |
39 | #include <ctype.h> | 39 | #include <ctype.h> |
40 | #include <sched.h> | 40 | #include <sched.h> |
41 | #include <time.h> | ||
41 | #include <cpuid.h> | 42 | #include <cpuid.h> |
42 | #include <linux/capability.h> | 43 | #include <linux/capability.h> |
43 | #include <errno.h> | 44 | #include <errno.h> |
44 | 45 | ||
45 | char *proc_stat = "/proc/stat"; | 46 | char *proc_stat = "/proc/stat"; |
46 | unsigned int interval_sec = 5; | 47 | FILE *outf; |
48 | int *fd_percpu; | ||
49 | struct timespec interval_ts = {5, 0}; | ||
47 | unsigned int debug; | 50 | unsigned int debug; |
48 | unsigned int rapl_joules; | 51 | unsigned int rapl_joules; |
49 | unsigned int summary_only; | 52 | unsigned int summary_only; |
@@ -72,6 +75,7 @@ unsigned int extra_msr_offset64; | |||
72 | unsigned int extra_delta_offset32; | 75 | unsigned int extra_delta_offset32; |
73 | unsigned int extra_delta_offset64; | 76 | unsigned int extra_delta_offset64; |
74 | unsigned int aperf_mperf_multiplier = 1; | 77 | unsigned int aperf_mperf_multiplier = 1; |
78 | int do_irq = 1; | ||
75 | int do_smi; | 79 | int do_smi; |
76 | double bclk; | 80 | double bclk; |
77 | double base_hz; | 81 | double base_hz; |
@@ -86,6 +90,10 @@ char *output_buffer, *outp; | |||
86 | unsigned int do_rapl; | 90 | unsigned int do_rapl; |
87 | unsigned int do_dts; | 91 | unsigned int do_dts; |
88 | unsigned int do_ptm; | 92 | unsigned int do_ptm; |
93 | unsigned int do_gfx_rc6_ms; | ||
94 | unsigned long long gfx_cur_rc6_ms; | ||
95 | unsigned int do_gfx_mhz; | ||
96 | unsigned int gfx_cur_mhz; | ||
89 | unsigned int tcc_activation_temp; | 97 | unsigned int tcc_activation_temp; |
90 | unsigned int tcc_activation_temp_override; | 98 | unsigned int tcc_activation_temp_override; |
91 | double rapl_power_units, rapl_time_units; | 99 | double rapl_power_units, rapl_time_units; |
@@ -98,6 +106,12 @@ unsigned int crystal_hz; | |||
98 | unsigned long long tsc_hz; | 106 | unsigned long long tsc_hz; |
99 | int base_cpu; | 107 | int base_cpu; |
100 | double discover_bclk(unsigned int family, unsigned int model); | 108 | double discover_bclk(unsigned int family, unsigned int model); |
109 | unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ | ||
110 | /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ | ||
111 | unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ | ||
112 | unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ | ||
113 | unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ | ||
114 | unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ | ||
101 | 115 | ||
102 | #define RAPL_PKG (1 << 0) | 116 | #define RAPL_PKG (1 << 0) |
103 | /* 0x610 MSR_PKG_POWER_LIMIT */ | 117 | /* 0x610 MSR_PKG_POWER_LIMIT */ |
@@ -145,6 +159,7 @@ struct thread_data { | |||
145 | unsigned long long extra_delta64; | 159 | unsigned long long extra_delta64; |
146 | unsigned long long extra_msr32; | 160 | unsigned long long extra_msr32; |
147 | unsigned long long extra_delta32; | 161 | unsigned long long extra_delta32; |
162 | unsigned int irq_count; | ||
148 | unsigned int smi_count; | 163 | unsigned int smi_count; |
149 | unsigned int cpu_id; | 164 | unsigned int cpu_id; |
150 | unsigned int flags; | 165 | unsigned int flags; |
@@ -172,6 +187,8 @@ struct pkg_data { | |||
172 | unsigned long long pkg_any_core_c0; | 187 | unsigned long long pkg_any_core_c0; |
173 | unsigned long long pkg_any_gfxe_c0; | 188 | unsigned long long pkg_any_gfxe_c0; |
174 | unsigned long long pkg_both_core_gfxe_c0; | 189 | unsigned long long pkg_both_core_gfxe_c0; |
190 | unsigned long long gfx_rc6_ms; | ||
191 | unsigned int gfx_mhz; | ||
175 | unsigned int package_id; | 192 | unsigned int package_id; |
176 | unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ | 193 | unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ |
177 | unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ | 194 | unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ |
@@ -212,6 +229,9 @@ struct topo_params { | |||
212 | 229 | ||
213 | struct timeval tv_even, tv_odd, tv_delta; | 230 | struct timeval tv_even, tv_odd, tv_delta; |
214 | 231 | ||
232 | int *irq_column_2_cpu; /* /proc/interrupts column numbers */ | ||
233 | int *irqs_per_cpu; /* indexed by cpu_num */ | ||
234 | |||
215 | void setup_all_buffers(void); | 235 | void setup_all_buffers(void); |
216 | 236 | ||
217 | int cpu_is_not_present(int cpu) | 237 | int cpu_is_not_present(int cpu) |
@@ -262,23 +282,34 @@ int cpu_migrate(int cpu) | |||
262 | else | 282 | else |
263 | return 0; | 283 | return 0; |
264 | } | 284 | } |
265 | 285 | int get_msr_fd(int cpu) | |
266 | int get_msr(int cpu, off_t offset, unsigned long long *msr) | ||
267 | { | 286 | { |
268 | ssize_t retval; | ||
269 | char pathname[32]; | 287 | char pathname[32]; |
270 | int fd; | 288 | int fd; |
271 | 289 | ||
290 | fd = fd_percpu[cpu]; | ||
291 | |||
292 | if (fd) | ||
293 | return fd; | ||
294 | |||
272 | sprintf(pathname, "/dev/cpu/%d/msr", cpu); | 295 | sprintf(pathname, "/dev/cpu/%d/msr", cpu); |
273 | fd = open(pathname, O_RDONLY); | 296 | fd = open(pathname, O_RDONLY); |
274 | if (fd < 0) | 297 | if (fd < 0) |
275 | err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); | 298 | err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); |
276 | 299 | ||
277 | retval = pread(fd, msr, sizeof *msr, offset); | 300 | fd_percpu[cpu] = fd; |
278 | close(fd); | 301 | |
302 | return fd; | ||
303 | } | ||
304 | |||
305 | int get_msr(int cpu, off_t offset, unsigned long long *msr) | ||
306 | { | ||
307 | ssize_t retval; | ||
308 | |||
309 | retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); | ||
279 | 310 | ||
280 | if (retval != sizeof *msr) | 311 | if (retval != sizeof *msr) |
281 | err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); | 312 | err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset); |
282 | 313 | ||
283 | return 0; | 314 | return 0; |
284 | } | 315 | } |
@@ -286,8 +317,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) | |||
286 | /* | 317 | /* |
287 | * Example Format w/ field column widths: | 318 | * Example Format w/ field column widths: |
288 | * | 319 | * |
289 | * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz SMI %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt | 320 | * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt |
290 | * 123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 | 321 | * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 |
291 | */ | 322 | */ |
292 | 323 | ||
293 | void print_header(void) | 324 | void print_header(void) |
@@ -301,7 +332,7 @@ void print_header(void) | |||
301 | if (has_aperf) | 332 | if (has_aperf) |
302 | outp += sprintf(outp, " Avg_MHz"); | 333 | outp += sprintf(outp, " Avg_MHz"); |
303 | if (has_aperf) | 334 | if (has_aperf) |
304 | outp += sprintf(outp, " %%Busy"); | 335 | outp += sprintf(outp, " Busy%%"); |
305 | if (has_aperf) | 336 | if (has_aperf) |
306 | outp += sprintf(outp, " Bzy_MHz"); | 337 | outp += sprintf(outp, " Bzy_MHz"); |
307 | outp += sprintf(outp, " TSC_MHz"); | 338 | outp += sprintf(outp, " TSC_MHz"); |
@@ -318,6 +349,8 @@ void print_header(void) | |||
318 | if (!debug) | 349 | if (!debug) |
319 | goto done; | 350 | goto done; |
320 | 351 | ||
352 | if (do_irq) | ||
353 | outp += sprintf(outp, " IRQ"); | ||
321 | if (do_smi) | 354 | if (do_smi) |
322 | outp += sprintf(outp, " SMI"); | 355 | outp += sprintf(outp, " SMI"); |
323 | 356 | ||
@@ -335,6 +368,12 @@ void print_header(void) | |||
335 | if (do_ptm) | 368 | if (do_ptm) |
336 | outp += sprintf(outp, " PkgTmp"); | 369 | outp += sprintf(outp, " PkgTmp"); |
337 | 370 | ||
371 | if (do_gfx_rc6_ms) | ||
372 | outp += sprintf(outp, " GFX%%rc6"); | ||
373 | |||
374 | if (do_gfx_mhz) | ||
375 | outp += sprintf(outp, " GFXMHz"); | ||
376 | |||
338 | if (do_skl_residency) { | 377 | if (do_skl_residency) { |
339 | outp += sprintf(outp, " Totl%%C0"); | 378 | outp += sprintf(outp, " Totl%%C0"); |
340 | outp += sprintf(outp, " Any%%C0"); | 379 | outp += sprintf(outp, " Any%%C0"); |
@@ -409,6 +448,8 @@ int dump_counters(struct thread_data *t, struct core_data *c, | |||
409 | extra_msr_offset32, t->extra_msr32); | 448 | extra_msr_offset32, t->extra_msr32); |
410 | outp += sprintf(outp, "msr0x%x: %016llX\n", | 449 | outp += sprintf(outp, "msr0x%x: %016llX\n", |
411 | extra_msr_offset64, t->extra_msr64); | 450 | extra_msr_offset64, t->extra_msr64); |
451 | if (do_irq) | ||
452 | outp += sprintf(outp, "IRQ: %08X\n", t->irq_count); | ||
412 | if (do_smi) | 453 | if (do_smi) |
413 | outp += sprintf(outp, "SMI: %08X\n", t->smi_count); | 454 | outp += sprintf(outp, "SMI: %08X\n", t->smi_count); |
414 | } | 455 | } |
@@ -504,7 +545,7 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
504 | outp += sprintf(outp, "%8.0f", | 545 | outp += sprintf(outp, "%8.0f", |
505 | 1.0 / units * t->aperf / interval_float); | 546 | 1.0 / units * t->aperf / interval_float); |
506 | 547 | ||
507 | /* %Busy */ | 548 | /* Busy% */ |
508 | if (has_aperf) { | 549 | if (has_aperf) { |
509 | if (!skip_c0) | 550 | if (!skip_c0) |
510 | outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); | 551 | outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); |
@@ -542,6 +583,10 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
542 | if (!debug) | 583 | if (!debug) |
543 | goto done; | 584 | goto done; |
544 | 585 | ||
586 | /* IRQ */ | ||
587 | if (do_irq) | ||
588 | outp += sprintf(outp, "%8d", t->irq_count); | ||
589 | |||
545 | /* SMI */ | 590 | /* SMI */ |
546 | if (do_smi) | 591 | if (do_smi) |
547 | outp += sprintf(outp, "%8d", t->smi_count); | 592 | outp += sprintf(outp, "%8d", t->smi_count); |
@@ -575,6 +620,14 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
575 | if (do_ptm) | 620 | if (do_ptm) |
576 | outp += sprintf(outp, "%8d", p->pkg_temp_c); | 621 | outp += sprintf(outp, "%8d", p->pkg_temp_c); |
577 | 622 | ||
623 | /* GFXrc6 */ | ||
624 | if (do_gfx_rc6_ms) | ||
625 | outp += sprintf(outp, "%8.2f", 100.0 * p->gfx_rc6_ms / 1000.0 / interval_float); | ||
626 | |||
627 | /* GFXMHz */ | ||
628 | if (do_gfx_mhz) | ||
629 | outp += sprintf(outp, "%8d", p->gfx_mhz); | ||
630 | |||
578 | /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ | 631 | /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ |
579 | if (do_skl_residency) { | 632 | if (do_skl_residency) { |
580 | outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); | 633 | outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); |
@@ -645,15 +698,24 @@ done: | |||
645 | return 0; | 698 | return 0; |
646 | } | 699 | } |
647 | 700 | ||
648 | void flush_stdout() | 701 | void flush_output_stdout(void) |
649 | { | 702 | { |
650 | fputs(output_buffer, stdout); | 703 | FILE *filep; |
651 | fflush(stdout); | 704 | |
705 | if (outf == stderr) | ||
706 | filep = stdout; | ||
707 | else | ||
708 | filep = outf; | ||
709 | |||
710 | fputs(output_buffer, filep); | ||
711 | fflush(filep); | ||
712 | |||
652 | outp = output_buffer; | 713 | outp = output_buffer; |
653 | } | 714 | } |
654 | void flush_stderr() | 715 | void flush_output_stderr(void) |
655 | { | 716 | { |
656 | fputs(output_buffer, stderr); | 717 | fputs(output_buffer, outf); |
718 | fflush(outf); | ||
657 | outp = output_buffer; | 719 | outp = output_buffer; |
658 | } | 720 | } |
659 | void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | 721 | void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
@@ -704,6 +766,9 @@ delta_package(struct pkg_data *new, struct pkg_data *old) | |||
704 | old->pc10 = new->pc10 - old->pc10; | 766 | old->pc10 = new->pc10 - old->pc10; |
705 | old->pkg_temp_c = new->pkg_temp_c; | 767 | old->pkg_temp_c = new->pkg_temp_c; |
706 | 768 | ||
769 | old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; | ||
770 | old->gfx_mhz = new->gfx_mhz; | ||
771 | |||
707 | DELTA_WRAP32(new->energy_pkg, old->energy_pkg); | 772 | DELTA_WRAP32(new->energy_pkg, old->energy_pkg); |
708 | DELTA_WRAP32(new->energy_cores, old->energy_cores); | 773 | DELTA_WRAP32(new->energy_cores, old->energy_cores); |
709 | DELTA_WRAP32(new->energy_gfx, old->energy_gfx); | 774 | DELTA_WRAP32(new->energy_gfx, old->energy_gfx); |
@@ -745,9 +810,9 @@ delta_thread(struct thread_data *new, struct thread_data *old, | |||
745 | } else { | 810 | } else { |
746 | 811 | ||
747 | if (!aperf_mperf_unstable) { | 812 | if (!aperf_mperf_unstable) { |
748 | fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); | 813 | fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname); |
749 | fprintf(stderr, "* Frequency results do not cover entire interval *\n"); | 814 | fprintf(outf, "* Frequency results do not cover entire interval *\n"); |
750 | fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); | 815 | fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n"); |
751 | 816 | ||
752 | aperf_mperf_unstable = 1; | 817 | aperf_mperf_unstable = 1; |
753 | } | 818 | } |
@@ -782,7 +847,8 @@ delta_thread(struct thread_data *new, struct thread_data *old, | |||
782 | } | 847 | } |
783 | 848 | ||
784 | if (old->mperf == 0) { | 849 | if (old->mperf == 0) { |
785 | if (debug > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); | 850 | if (debug > 1) |
851 | fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id); | ||
786 | old->mperf = 1; /* divide by 0 protection */ | 852 | old->mperf = 1; /* divide by 0 protection */ |
787 | } | 853 | } |
788 | 854 | ||
@@ -797,6 +863,9 @@ delta_thread(struct thread_data *new, struct thread_data *old, | |||
797 | old->extra_msr32 = new->extra_msr32; | 863 | old->extra_msr32 = new->extra_msr32; |
798 | old->extra_msr64 = new->extra_msr64; | 864 | old->extra_msr64 = new->extra_msr64; |
799 | 865 | ||
866 | if (do_irq) | ||
867 | old->irq_count = new->irq_count - old->irq_count; | ||
868 | |||
800 | if (do_smi) | 869 | if (do_smi) |
801 | old->smi_count = new->smi_count - old->smi_count; | 870 | old->smi_count = new->smi_count - old->smi_count; |
802 | } | 871 | } |
@@ -826,10 +895,12 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data | |||
826 | t->mperf = 0; | 895 | t->mperf = 0; |
827 | t->c1 = 0; | 896 | t->c1 = 0; |
828 | 897 | ||
829 | t->smi_count = 0; | ||
830 | t->extra_delta32 = 0; | 898 | t->extra_delta32 = 0; |
831 | t->extra_delta64 = 0; | 899 | t->extra_delta64 = 0; |
832 | 900 | ||
901 | t->irq_count = 0; | ||
902 | t->smi_count = 0; | ||
903 | |||
833 | /* tells format_counters to dump all fields from this set */ | 904 | /* tells format_counters to dump all fields from this set */ |
834 | t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; | 905 | t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; |
835 | 906 | ||
@@ -861,6 +932,9 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data | |||
861 | p->rapl_pkg_perf_status = 0; | 932 | p->rapl_pkg_perf_status = 0; |
862 | p->rapl_dram_perf_status = 0; | 933 | p->rapl_dram_perf_status = 0; |
863 | p->pkg_temp_c = 0; | 934 | p->pkg_temp_c = 0; |
935 | |||
936 | p->gfx_rc6_ms = 0; | ||
937 | p->gfx_mhz = 0; | ||
864 | } | 938 | } |
865 | int sum_counters(struct thread_data *t, struct core_data *c, | 939 | int sum_counters(struct thread_data *t, struct core_data *c, |
866 | struct pkg_data *p) | 940 | struct pkg_data *p) |
@@ -873,6 +947,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, | |||
873 | average.threads.extra_delta32 += t->extra_delta32; | 947 | average.threads.extra_delta32 += t->extra_delta32; |
874 | average.threads.extra_delta64 += t->extra_delta64; | 948 | average.threads.extra_delta64 += t->extra_delta64; |
875 | 949 | ||
950 | average.threads.irq_count += t->irq_count; | ||
951 | average.threads.smi_count += t->smi_count; | ||
952 | |||
876 | /* sum per-core values only for 1st thread in core */ | 953 | /* sum per-core values only for 1st thread in core */ |
877 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | 954 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
878 | return 0; | 955 | return 0; |
@@ -910,6 +987,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, | |||
910 | average.packages.energy_cores += p->energy_cores; | 987 | average.packages.energy_cores += p->energy_cores; |
911 | average.packages.energy_gfx += p->energy_gfx; | 988 | average.packages.energy_gfx += p->energy_gfx; |
912 | 989 | ||
990 | average.packages.gfx_rc6_ms = p->gfx_rc6_ms; | ||
991 | average.packages.gfx_mhz = p->gfx_mhz; | ||
992 | |||
913 | average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); | 993 | average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); |
914 | 994 | ||
915 | average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; | 995 | average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; |
@@ -970,7 +1050,6 @@ static unsigned long long rdtsc(void) | |||
970 | return low | ((unsigned long long)high) << 32; | 1050 | return low | ((unsigned long long)high) << 32; |
971 | } | 1051 | } |
972 | 1052 | ||
973 | |||
974 | /* | 1053 | /* |
975 | * get_counters(...) | 1054 | * get_counters(...) |
976 | * migrate to cpu | 1055 | * migrate to cpu |
@@ -980,23 +1059,74 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
980 | { | 1059 | { |
981 | int cpu = t->cpu_id; | 1060 | int cpu = t->cpu_id; |
982 | unsigned long long msr; | 1061 | unsigned long long msr; |
1062 | int aperf_mperf_retry_count = 0; | ||
983 | 1063 | ||
984 | if (cpu_migrate(cpu)) { | 1064 | if (cpu_migrate(cpu)) { |
985 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | 1065 | fprintf(outf, "Could not migrate to CPU %d\n", cpu); |
986 | return -1; | 1066 | return -1; |
987 | } | 1067 | } |
988 | 1068 | ||
1069 | retry: | ||
989 | t->tsc = rdtsc(); /* we are running on local CPU of interest */ | 1070 | t->tsc = rdtsc(); /* we are running on local CPU of interest */ |
990 | 1071 | ||
991 | if (has_aperf) { | 1072 | if (has_aperf) { |
1073 | unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; | ||
1074 | |||
1075 | /* | ||
1076 | * The TSC, APERF and MPERF must be read together for | ||
1077 | * APERF/MPERF and MPERF/TSC to give accurate results. | ||
1078 | * | ||
1079 | * Unfortunately, APERF and MPERF are read by | ||
1080 | * individual system call, so delays may occur | ||
1081 | * between them. If the time to read them | ||
1082 | * varies by a large amount, we re-read them. | ||
1083 | */ | ||
1084 | |||
1085 | /* | ||
1086 | * This initial dummy APERF read has been seen to | ||
1087 | * reduce jitter in the subsequent reads. | ||
1088 | */ | ||
1089 | |||
1090 | if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) | ||
1091 | return -3; | ||
1092 | |||
1093 | t->tsc = rdtsc(); /* re-read close to APERF */ | ||
1094 | |||
1095 | tsc_before = t->tsc; | ||
1096 | |||
992 | if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) | 1097 | if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) |
993 | return -3; | 1098 | return -3; |
1099 | |||
1100 | tsc_between = rdtsc(); | ||
1101 | |||
994 | if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) | 1102 | if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) |
995 | return -4; | 1103 | return -4; |
1104 | |||
1105 | tsc_after = rdtsc(); | ||
1106 | |||
1107 | aperf_time = tsc_between - tsc_before; | ||
1108 | mperf_time = tsc_after - tsc_between; | ||
1109 | |||
1110 | /* | ||
1111 | * If the system call latency to read APERF and MPERF | ||
1112 | * differ by more than 2x, then try again. | ||
1113 | */ | ||
1114 | if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) { | ||
1115 | aperf_mperf_retry_count++; | ||
1116 | if (aperf_mperf_retry_count < 5) | ||
1117 | goto retry; | ||
1118 | else | ||
1119 | warnx("cpu%d jitter %lld %lld", | ||
1120 | cpu, aperf_time, mperf_time); | ||
1121 | } | ||
1122 | aperf_mperf_retry_count = 0; | ||
1123 | |||
996 | t->aperf = t->aperf * aperf_mperf_multiplier; | 1124 | t->aperf = t->aperf * aperf_mperf_multiplier; |
997 | t->mperf = t->mperf * aperf_mperf_multiplier; | 1125 | t->mperf = t->mperf * aperf_mperf_multiplier; |
998 | } | 1126 | } |
999 | 1127 | ||
1128 | if (do_irq) | ||
1129 | t->irq_count = irqs_per_cpu[cpu]; | ||
1000 | if (do_smi) { | 1130 | if (do_smi) { |
1001 | if (get_msr(cpu, MSR_SMI_COUNT, &msr)) | 1131 | if (get_msr(cpu, MSR_SMI_COUNT, &msr)) |
1002 | return -5; | 1132 | return -5; |
@@ -1124,6 +1254,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
1124 | return -17; | 1254 | return -17; |
1125 | p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); | 1255 | p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); |
1126 | } | 1256 | } |
1257 | |||
1258 | if (do_gfx_rc6_ms) | ||
1259 | p->gfx_rc6_ms = gfx_cur_rc6_ms; | ||
1260 | |||
1261 | if (do_gfx_mhz) | ||
1262 | p->gfx_mhz = gfx_cur_mhz; | ||
1263 | |||
1127 | return 0; | 1264 | return 0; |
1128 | } | 1265 | } |
1129 | 1266 | ||
@@ -1175,18 +1312,18 @@ dump_nhm_platform_info(void) | |||
1175 | 1312 | ||
1176 | get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); | 1313 | get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); |
1177 | 1314 | ||
1178 | fprintf(stderr, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); | 1315 | fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); |
1179 | 1316 | ||
1180 | ratio = (msr >> 40) & 0xFF; | 1317 | ratio = (msr >> 40) & 0xFF; |
1181 | fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n", | 1318 | fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n", |
1182 | ratio, bclk, ratio * bclk); | 1319 | ratio, bclk, ratio * bclk); |
1183 | 1320 | ||
1184 | ratio = (msr >> 8) & 0xFF; | 1321 | ratio = (msr >> 8) & 0xFF; |
1185 | fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", | 1322 | fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n", |
1186 | ratio, bclk, ratio * bclk); | 1323 | ratio, bclk, ratio * bclk); |
1187 | 1324 | ||
1188 | get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); | 1325 | get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); |
1189 | fprintf(stderr, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", | 1326 | fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", |
1190 | base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); | 1327 | base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); |
1191 | 1328 | ||
1192 | return; | 1329 | return; |
@@ -1200,16 +1337,16 @@ dump_hsw_turbo_ratio_limits(void) | |||
1200 | 1337 | ||
1201 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); | 1338 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); |
1202 | 1339 | ||
1203 | fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); | 1340 | fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); |
1204 | 1341 | ||
1205 | ratio = (msr >> 8) & 0xFF; | 1342 | ratio = (msr >> 8) & 0xFF; |
1206 | if (ratio) | 1343 | if (ratio) |
1207 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", | 1344 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", |
1208 | ratio, bclk, ratio * bclk); | 1345 | ratio, bclk, ratio * bclk); |
1209 | 1346 | ||
1210 | ratio = (msr >> 0) & 0xFF; | 1347 | ratio = (msr >> 0) & 0xFF; |
1211 | if (ratio) | 1348 | if (ratio) |
1212 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", | 1349 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", |
1213 | ratio, bclk, ratio * bclk); | 1350 | ratio, bclk, ratio * bclk); |
1214 | return; | 1351 | return; |
1215 | } | 1352 | } |
@@ -1222,46 +1359,46 @@ dump_ivt_turbo_ratio_limits(void) | |||
1222 | 1359 | ||
1223 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); | 1360 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); |
1224 | 1361 | ||
1225 | fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); | 1362 | fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); |
1226 | 1363 | ||
1227 | ratio = (msr >> 56) & 0xFF; | 1364 | ratio = (msr >> 56) & 0xFF; |
1228 | if (ratio) | 1365 | if (ratio) |
1229 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", | 1366 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", |
1230 | ratio, bclk, ratio * bclk); | 1367 | ratio, bclk, ratio * bclk); |
1231 | 1368 | ||
1232 | ratio = (msr >> 48) & 0xFF; | 1369 | ratio = (msr >> 48) & 0xFF; |
1233 | if (ratio) | 1370 | if (ratio) |
1234 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", | 1371 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", |
1235 | ratio, bclk, ratio * bclk); | 1372 | ratio, bclk, ratio * bclk); |
1236 | 1373 | ||
1237 | ratio = (msr >> 40) & 0xFF; | 1374 | ratio = (msr >> 40) & 0xFF; |
1238 | if (ratio) | 1375 | if (ratio) |
1239 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", | 1376 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", |
1240 | ratio, bclk, ratio * bclk); | 1377 | ratio, bclk, ratio * bclk); |
1241 | 1378 | ||
1242 | ratio = (msr >> 32) & 0xFF; | 1379 | ratio = (msr >> 32) & 0xFF; |
1243 | if (ratio) | 1380 | if (ratio) |
1244 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", | 1381 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", |
1245 | ratio, bclk, ratio * bclk); | 1382 | ratio, bclk, ratio * bclk); |
1246 | 1383 | ||
1247 | ratio = (msr >> 24) & 0xFF; | 1384 | ratio = (msr >> 24) & 0xFF; |
1248 | if (ratio) | 1385 | if (ratio) |
1249 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", | 1386 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", |
1250 | ratio, bclk, ratio * bclk); | 1387 | ratio, bclk, ratio * bclk); |
1251 | 1388 | ||
1252 | ratio = (msr >> 16) & 0xFF; | 1389 | ratio = (msr >> 16) & 0xFF; |
1253 | if (ratio) | 1390 | if (ratio) |
1254 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", | 1391 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", |
1255 | ratio, bclk, ratio * bclk); | 1392 | ratio, bclk, ratio * bclk); |
1256 | 1393 | ||
1257 | ratio = (msr >> 8) & 0xFF; | 1394 | ratio = (msr >> 8) & 0xFF; |
1258 | if (ratio) | 1395 | if (ratio) |
1259 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", | 1396 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", |
1260 | ratio, bclk, ratio * bclk); | 1397 | ratio, bclk, ratio * bclk); |
1261 | 1398 | ||
1262 | ratio = (msr >> 0) & 0xFF; | 1399 | ratio = (msr >> 0) & 0xFF; |
1263 | if (ratio) | 1400 | if (ratio) |
1264 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", | 1401 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", |
1265 | ratio, bclk, ratio * bclk); | 1402 | ratio, bclk, ratio * bclk); |
1266 | return; | 1403 | return; |
1267 | } | 1404 | } |
@@ -1274,46 +1411,46 @@ dump_nhm_turbo_ratio_limits(void) | |||
1274 | 1411 | ||
1275 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); | 1412 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); |
1276 | 1413 | ||
1277 | fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); | 1414 | fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); |
1278 | 1415 | ||
1279 | ratio = (msr >> 56) & 0xFF; | 1416 | ratio = (msr >> 56) & 0xFF; |
1280 | if (ratio) | 1417 | if (ratio) |
1281 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", | 1418 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", |
1282 | ratio, bclk, ratio * bclk); | 1419 | ratio, bclk, ratio * bclk); |
1283 | 1420 | ||
1284 | ratio = (msr >> 48) & 0xFF; | 1421 | ratio = (msr >> 48) & 0xFF; |
1285 | if (ratio) | 1422 | if (ratio) |
1286 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", | 1423 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", |
1287 | ratio, bclk, ratio * bclk); | 1424 | ratio, bclk, ratio * bclk); |
1288 | 1425 | ||
1289 | ratio = (msr >> 40) & 0xFF; | 1426 | ratio = (msr >> 40) & 0xFF; |
1290 | if (ratio) | 1427 | if (ratio) |
1291 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", | 1428 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", |
1292 | ratio, bclk, ratio * bclk); | 1429 | ratio, bclk, ratio * bclk); |
1293 | 1430 | ||
1294 | ratio = (msr >> 32) & 0xFF; | 1431 | ratio = (msr >> 32) & 0xFF; |
1295 | if (ratio) | 1432 | if (ratio) |
1296 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", | 1433 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", |
1297 | ratio, bclk, ratio * bclk); | 1434 | ratio, bclk, ratio * bclk); |
1298 | 1435 | ||
1299 | ratio = (msr >> 24) & 0xFF; | 1436 | ratio = (msr >> 24) & 0xFF; |
1300 | if (ratio) | 1437 | if (ratio) |
1301 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", | 1438 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", |
1302 | ratio, bclk, ratio * bclk); | 1439 | ratio, bclk, ratio * bclk); |
1303 | 1440 | ||
1304 | ratio = (msr >> 16) & 0xFF; | 1441 | ratio = (msr >> 16) & 0xFF; |
1305 | if (ratio) | 1442 | if (ratio) |
1306 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", | 1443 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", |
1307 | ratio, bclk, ratio * bclk); | 1444 | ratio, bclk, ratio * bclk); |
1308 | 1445 | ||
1309 | ratio = (msr >> 8) & 0xFF; | 1446 | ratio = (msr >> 8) & 0xFF; |
1310 | if (ratio) | 1447 | if (ratio) |
1311 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", | 1448 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", |
1312 | ratio, bclk, ratio * bclk); | 1449 | ratio, bclk, ratio * bclk); |
1313 | 1450 | ||
1314 | ratio = (msr >> 0) & 0xFF; | 1451 | ratio = (msr >> 0) & 0xFF; |
1315 | if (ratio) | 1452 | if (ratio) |
1316 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", | 1453 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", |
1317 | ratio, bclk, ratio * bclk); | 1454 | ratio, bclk, ratio * bclk); |
1318 | return; | 1455 | return; |
1319 | } | 1456 | } |
@@ -1321,21 +1458,23 @@ dump_nhm_turbo_ratio_limits(void) | |||
1321 | static void | 1458 | static void |
1322 | dump_knl_turbo_ratio_limits(void) | 1459 | dump_knl_turbo_ratio_limits(void) |
1323 | { | 1460 | { |
1324 | int cores; | 1461 | const unsigned int buckets_no = 7; |
1325 | unsigned int ratio; | 1462 | |
1326 | unsigned long long msr; | 1463 | unsigned long long msr; |
1327 | int delta_cores; | 1464 | int delta_cores, delta_ratio; |
1328 | int delta_ratio; | 1465 | int i, b_nr; |
1329 | int i; | 1466 | unsigned int cores[buckets_no]; |
1467 | unsigned int ratio[buckets_no]; | ||
1330 | 1468 | ||
1331 | get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); | 1469 | get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); |
1332 | 1470 | ||
1333 | fprintf(stderr, "cpu%d: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", | 1471 | fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", |
1334 | base_cpu, msr); | 1472 | base_cpu, msr); |
1335 | 1473 | ||
1336 | /** | 1474 | /** |
1337 | * Turbo encoding in KNL is as follows: | 1475 | * Turbo encoding in KNL is as follows: |
1338 | * [7:0] -- Base value of number of active cores of bucket 1. | 1476 | * [0] -- Reserved |
1477 | * [7:1] -- Base value of number of active cores of bucket 1. | ||
1339 | * [15:8] -- Base value of freq ratio of bucket 1. | 1478 | * [15:8] -- Base value of freq ratio of bucket 1. |
1340 | * [20:16] -- +ve delta of number of active cores of bucket 2. | 1479 | * [20:16] -- +ve delta of number of active cores of bucket 2. |
1341 | * i.e. active cores of bucket 2 = | 1480 | * i.e. active cores of bucket 2 = |
@@ -1354,29 +1493,25 @@ dump_knl_turbo_ratio_limits(void) | |||
1354 | * [60:56]-- +ve delta of number of active cores of bucket 7. | 1493 | * [60:56]-- +ve delta of number of active cores of bucket 7. |
1355 | * [63:61]-- -ve delta of freq ratio of bucket 7. | 1494 | * [63:61]-- -ve delta of freq ratio of bucket 7. |
1356 | */ | 1495 | */ |
1357 | cores = msr & 0xFF; | 1496 | |
1358 | ratio = (msr >> 8) && 0xFF; | 1497 | b_nr = 0; |
1359 | if (ratio > 0) | 1498 | cores[b_nr] = (msr & 0xFF) >> 1; |
1360 | fprintf(stderr, | 1499 | ratio[b_nr] = (msr >> 8) & 0xFF; |
1361 | "%d * %.0f = %.0f MHz max turbo %d active cores\n", | 1500 | |
1362 | ratio, bclk, ratio * bclk, cores); | 1501 | for (i = 16; i < 64; i += 8) { |
1363 | |||
1364 | for (i = 16; i < 64; i = i + 8) { | ||
1365 | delta_cores = (msr >> i) & 0x1F; | 1502 | delta_cores = (msr >> i) & 0x1F; |
1366 | delta_ratio = (msr >> (i + 5)) && 0x7; | 1503 | delta_ratio = (msr >> (i + 5)) & 0x7; |
1367 | if (!delta_cores || !delta_ratio) | 1504 | |
1368 | return; | 1505 | cores[b_nr + 1] = cores[b_nr] + delta_cores; |
1369 | cores = cores + delta_cores; | 1506 | ratio[b_nr + 1] = ratio[b_nr] - delta_ratio; |
1370 | ratio = ratio - delta_ratio; | 1507 | b_nr++; |
1371 | |||
1372 | /** -ve ratios will make successive ratio calculations | ||
1373 | * negative. Hence return instead of carrying on. | ||
1374 | */ | ||
1375 | if (ratio > 0) | ||
1376 | fprintf(stderr, | ||
1377 | "%d * %.0f = %.0f MHz max turbo %d active cores\n", | ||
1378 | ratio, bclk, ratio * bclk, cores); | ||
1379 | } | 1508 | } |
1509 | |||
1510 | for (i = buckets_no - 1; i >= 0; i--) | ||
1511 | if (i > 0 ? ratio[i] != ratio[i - 1] : 1) | ||
1512 | fprintf(outf, | ||
1513 | "%d * %.0f = %.0f MHz max turbo %d active cores\n", | ||
1514 | ratio[i], bclk, ratio[i] * bclk, cores[i]); | ||
1380 | } | 1515 | } |
1381 | 1516 | ||
1382 | static void | 1517 | static void |
@@ -1389,15 +1524,15 @@ dump_nhm_cst_cfg(void) | |||
1389 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) | 1524 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) |
1390 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | 1525 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) |
1391 | 1526 | ||
1392 | fprintf(stderr, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr); | 1527 | fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr); |
1393 | 1528 | ||
1394 | fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", | 1529 | fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", |
1395 | (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", | 1530 | (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", |
1396 | (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", | 1531 | (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", |
1397 | (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", | 1532 | (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", |
1398 | (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", | 1533 | (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", |
1399 | (msr & (1 << 15)) ? "" : "UN", | 1534 | (msr & (1 << 15)) ? "" : "UN", |
1400 | (unsigned int)msr & 7, | 1535 | (unsigned int)msr & 0xF, |
1401 | pkg_cstate_limit_strings[pkg_cstate_limit]); | 1536 | pkg_cstate_limit_strings[pkg_cstate_limit]); |
1402 | return; | 1537 | return; |
1403 | } | 1538 | } |
@@ -1408,48 +1543,59 @@ dump_config_tdp(void) | |||
1408 | unsigned long long msr; | 1543 | unsigned long long msr; |
1409 | 1544 | ||
1410 | get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); | 1545 | get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); |
1411 | fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); | 1546 | fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); |
1412 | fprintf(stderr, " (base_ratio=%d)\n", (unsigned int)msr & 0xEF); | 1547 | fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF); |
1413 | 1548 | ||
1414 | get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); | 1549 | get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); |
1415 | fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); | 1550 | fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); |
1416 | if (msr) { | 1551 | if (msr) { |
1417 | fprintf(stderr, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0xEFFF); | 1552 | fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF); |
1418 | fprintf(stderr, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0xEFFF); | 1553 | fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF); |
1419 | fprintf(stderr, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); | 1554 | fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); |
1420 | fprintf(stderr, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0xEFFF); | 1555 | fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF); |
1421 | } | 1556 | } |
1422 | fprintf(stderr, ")\n"); | 1557 | fprintf(outf, ")\n"); |
1423 | 1558 | ||
1424 | get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); | 1559 | get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); |
1425 | fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); | 1560 | fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); |
1426 | if (msr) { | 1561 | if (msr) { |
1427 | fprintf(stderr, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0xEFFF); | 1562 | fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF); |
1428 | fprintf(stderr, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0xEFFF); | 1563 | fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF); |
1429 | fprintf(stderr, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); | 1564 | fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); |
1430 | fprintf(stderr, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0xEFFF); | 1565 | fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF); |
1431 | } | 1566 | } |
1432 | fprintf(stderr, ")\n"); | 1567 | fprintf(outf, ")\n"); |
1433 | 1568 | ||
1434 | get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); | 1569 | get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); |
1435 | fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); | 1570 | fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); |
1436 | if ((msr) & 0x3) | 1571 | if ((msr) & 0x3) |
1437 | fprintf(stderr, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); | 1572 | fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); |
1438 | fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); | 1573 | fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); |
1439 | fprintf(stderr, ")\n"); | 1574 | fprintf(outf, ")\n"); |
1440 | 1575 | ||
1441 | get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); | 1576 | get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); |
1442 | fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); | 1577 | fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); |
1443 | fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F); | 1578 | fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF); |
1444 | fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); | 1579 | fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); |
1445 | fprintf(stderr, ")\n"); | 1580 | fprintf(outf, ")\n"); |
1581 | } | ||
1582 | void free_fd_percpu(void) | ||
1583 | { | ||
1584 | int i; | ||
1585 | |||
1586 | for (i = 0; i < topo.max_cpu_num; ++i) { | ||
1587 | if (fd_percpu[i] != 0) | ||
1588 | close(fd_percpu[i]); | ||
1589 | } | ||
1590 | |||
1591 | free(fd_percpu); | ||
1446 | } | 1592 | } |
1447 | 1593 | ||
1448 | void free_all_buffers(void) | 1594 | void free_all_buffers(void) |
1449 | { | 1595 | { |
1450 | CPU_FREE(cpu_present_set); | 1596 | CPU_FREE(cpu_present_set); |
1451 | cpu_present_set = NULL; | 1597 | cpu_present_set = NULL; |
1452 | cpu_present_set = 0; | 1598 | cpu_present_setsize = 0; |
1453 | 1599 | ||
1454 | CPU_FREE(cpu_affinity_set); | 1600 | CPU_FREE(cpu_affinity_set); |
1455 | cpu_affinity_set = NULL; | 1601 | cpu_affinity_set = NULL; |
@@ -1474,6 +1620,11 @@ void free_all_buffers(void) | |||
1474 | free(output_buffer); | 1620 | free(output_buffer); |
1475 | output_buffer = NULL; | 1621 | output_buffer = NULL; |
1476 | outp = NULL; | 1622 | outp = NULL; |
1623 | |||
1624 | free_fd_percpu(); | ||
1625 | |||
1626 | free(irq_column_2_cpu); | ||
1627 | free(irqs_per_cpu); | ||
1477 | } | 1628 | } |
1478 | 1629 | ||
1479 | /* | 1630 | /* |
@@ -1481,7 +1632,7 @@ void free_all_buffers(void) | |||
1481 | */ | 1632 | */ |
1482 | FILE *fopen_or_die(const char *path, const char *mode) | 1633 | FILE *fopen_or_die(const char *path, const char *mode) |
1483 | { | 1634 | { |
1484 | FILE *filep = fopen(path, "r"); | 1635 | FILE *filep = fopen(path, mode); |
1485 | if (!filep) | 1636 | if (!filep) |
1486 | err(1, "%s: open failed", path); | 1637 | err(1, "%s: open failed", path); |
1487 | return filep; | 1638 | return filep; |
@@ -1696,6 +1847,136 @@ int mark_cpu_present(int cpu) | |||
1696 | return 0; | 1847 | return 0; |
1697 | } | 1848 | } |
1698 | 1849 | ||
1850 | /* | ||
1851 | * snapshot_proc_interrupts() | ||
1852 | * | ||
1853 | * read and record summary of /proc/interrupts | ||
1854 | * | ||
1855 | * return 1 if config change requires a restart, else return 0 | ||
1856 | */ | ||
1857 | int snapshot_proc_interrupts(void) | ||
1858 | { | ||
1859 | static FILE *fp; | ||
1860 | int column, retval; | ||
1861 | |||
1862 | if (fp == NULL) | ||
1863 | fp = fopen_or_die("/proc/interrupts", "r"); | ||
1864 | else | ||
1865 | rewind(fp); | ||
1866 | |||
1867 | /* read 1st line of /proc/interrupts to get cpu* name for each column */ | ||
1868 | for (column = 0; column < topo.num_cpus; ++column) { | ||
1869 | int cpu_number; | ||
1870 | |||
1871 | retval = fscanf(fp, " CPU%d", &cpu_number); | ||
1872 | if (retval != 1) | ||
1873 | break; | ||
1874 | |||
1875 | if (cpu_number > topo.max_cpu_num) { | ||
1876 | warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num); | ||
1877 | return 1; | ||
1878 | } | ||
1879 | |||
1880 | irq_column_2_cpu[column] = cpu_number; | ||
1881 | irqs_per_cpu[cpu_number] = 0; | ||
1882 | } | ||
1883 | |||
1884 | /* read /proc/interrupt count lines and sum up irqs per cpu */ | ||
1885 | while (1) { | ||
1886 | int column; | ||
1887 | char buf[64]; | ||
1888 | |||
1889 | retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */ | ||
1890 | if (retval != 1) | ||
1891 | break; | ||
1892 | |||
1893 | /* read the count per cpu */ | ||
1894 | for (column = 0; column < topo.num_cpus; ++column) { | ||
1895 | |||
1896 | int cpu_number, irq_count; | ||
1897 | |||
1898 | retval = fscanf(fp, " %d", &irq_count); | ||
1899 | if (retval != 1) | ||
1900 | break; | ||
1901 | |||
1902 | cpu_number = irq_column_2_cpu[column]; | ||
1903 | irqs_per_cpu[cpu_number] += irq_count; | ||
1904 | |||
1905 | } | ||
1906 | |||
1907 | while (getc(fp) != '\n') | ||
1908 | ; /* flush interrupt description */ | ||
1909 | |||
1910 | } | ||
1911 | return 0; | ||
1912 | } | ||
1913 | /* | ||
1914 | * snapshot_gfx_rc6_ms() | ||
1915 | * | ||
1916 | * record snapshot of | ||
1917 | * /sys/class/drm/card0/power/rc6_residency_ms | ||
1918 | * | ||
1919 | * return 1 if config change requires a restart, else return 0 | ||
1920 | */ | ||
1921 | int snapshot_gfx_rc6_ms(void) | ||
1922 | { | ||
1923 | FILE *fp; | ||
1924 | int retval; | ||
1925 | |||
1926 | fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r"); | ||
1927 | |||
1928 | retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms); | ||
1929 | if (retval != 1) | ||
1930 | err(1, "GFX rc6"); | ||
1931 | |||
1932 | fclose(fp); | ||
1933 | |||
1934 | return 0; | ||
1935 | } | ||
1936 | /* | ||
1937 | * snapshot_gfx_mhz() | ||
1938 | * | ||
1939 | * record snapshot of | ||
1940 | * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz | ||
1941 | * | ||
1942 | * return 1 if config change requires a restart, else return 0 | ||
1943 | */ | ||
1944 | int snapshot_gfx_mhz(void) | ||
1945 | { | ||
1946 | static FILE *fp; | ||
1947 | int retval; | ||
1948 | |||
1949 | if (fp == NULL) | ||
1950 | fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r"); | ||
1951 | else | ||
1952 | rewind(fp); | ||
1953 | |||
1954 | retval = fscanf(fp, "%d", &gfx_cur_mhz); | ||
1955 | if (retval != 1) | ||
1956 | err(1, "GFX MHz"); | ||
1957 | |||
1958 | return 0; | ||
1959 | } | ||
1960 | |||
1961 | /* | ||
1962 | * snapshot /proc and /sys files | ||
1963 | * | ||
1964 | * return 1 if configuration restart needed, else return 0 | ||
1965 | */ | ||
1966 | int snapshot_proc_sysfs_files(void) | ||
1967 | { | ||
1968 | if (snapshot_proc_interrupts()) | ||
1969 | return 1; | ||
1970 | |||
1971 | if (do_gfx_rc6_ms) | ||
1972 | snapshot_gfx_rc6_ms(); | ||
1973 | |||
1974 | if (do_gfx_mhz) | ||
1975 | snapshot_gfx_mhz(); | ||
1976 | |||
1977 | return 0; | ||
1978 | } | ||
1979 | |||
1699 | void turbostat_loop() | 1980 | void turbostat_loop() |
1700 | { | 1981 | { |
1701 | int retval; | 1982 | int retval; |
@@ -1704,6 +1985,7 @@ void turbostat_loop() | |||
1704 | restart: | 1985 | restart: |
1705 | restarted++; | 1986 | restarted++; |
1706 | 1987 | ||
1988 | snapshot_proc_sysfs_files(); | ||
1707 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); | 1989 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
1708 | if (retval < -1) { | 1990 | if (retval < -1) { |
1709 | exit(retval); | 1991 | exit(retval); |
@@ -1722,7 +2004,9 @@ restart: | |||
1722 | re_initialize(); | 2004 | re_initialize(); |
1723 | goto restart; | 2005 | goto restart; |
1724 | } | 2006 | } |
1725 | sleep(interval_sec); | 2007 | nanosleep(&interval_ts, NULL); |
2008 | if (snapshot_proc_sysfs_files()) | ||
2009 | goto restart; | ||
1726 | retval = for_all_cpus(get_counters, ODD_COUNTERS); | 2010 | retval = for_all_cpus(get_counters, ODD_COUNTERS); |
1727 | if (retval < -1) { | 2011 | if (retval < -1) { |
1728 | exit(retval); | 2012 | exit(retval); |
@@ -1735,8 +2019,10 @@ restart: | |||
1735 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); | 2019 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); |
1736 | compute_average(EVEN_COUNTERS); | 2020 | compute_average(EVEN_COUNTERS); |
1737 | format_all_counters(EVEN_COUNTERS); | 2021 | format_all_counters(EVEN_COUNTERS); |
1738 | flush_stdout(); | 2022 | flush_output_stdout(); |
1739 | sleep(interval_sec); | 2023 | nanosleep(&interval_ts, NULL); |
2024 | if (snapshot_proc_sysfs_files()) | ||
2025 | goto restart; | ||
1740 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); | 2026 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
1741 | if (retval < -1) { | 2027 | if (retval < -1) { |
1742 | exit(retval); | 2028 | exit(retval); |
@@ -1749,7 +2035,7 @@ restart: | |||
1749 | for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); | 2035 | for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); |
1750 | compute_average(ODD_COUNTERS); | 2036 | compute_average(ODD_COUNTERS); |
1751 | format_all_counters(ODD_COUNTERS); | 2037 | format_all_counters(ODD_COUNTERS); |
1752 | flush_stdout(); | 2038 | flush_output_stdout(); |
1753 | } | 2039 | } |
1754 | } | 2040 | } |
1755 | 2041 | ||
@@ -1889,6 +2175,7 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) | |||
1889 | /* Nehalem compatible, but do not include turbo-ratio limit support */ | 2175 | /* Nehalem compatible, but do not include turbo-ratio limit support */ |
1890 | case 0x2E: /* Nehalem-EX Xeon - Beckton */ | 2176 | case 0x2E: /* Nehalem-EX Xeon - Beckton */ |
1891 | case 0x2F: /* Westmere-EX Xeon - Eagleton */ | 2177 | case 0x2F: /* Westmere-EX Xeon - Eagleton */ |
2178 | case 0x57: /* PHI - Knights Landing (different MSR definition) */ | ||
1892 | return 0; | 2179 | return 0; |
1893 | default: | 2180 | default: |
1894 | return 1; | 2181 | return 1; |
@@ -1970,7 +2257,7 @@ int has_config_tdp(unsigned int family, unsigned int model) | |||
1970 | } | 2257 | } |
1971 | 2258 | ||
1972 | static void | 2259 | static void |
1973 | dump_cstate_pstate_config_info(family, model) | 2260 | dump_cstate_pstate_config_info(int family, int model) |
1974 | { | 2261 | { |
1975 | if (!do_nhm_platform_info) | 2262 | if (!do_nhm_platform_info) |
1976 | return; | 2263 | return; |
@@ -2016,7 +2303,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
2016 | return 0; | 2303 | return 0; |
2017 | 2304 | ||
2018 | if (cpu_migrate(cpu)) { | 2305 | if (cpu_migrate(cpu)) { |
2019 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | 2306 | fprintf(outf, "Could not migrate to CPU %d\n", cpu); |
2020 | return -1; | 2307 | return -1; |
2021 | } | 2308 | } |
2022 | 2309 | ||
@@ -2037,7 +2324,98 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
2037 | epb_string = "custom"; | 2324 | epb_string = "custom"; |
2038 | break; | 2325 | break; |
2039 | } | 2326 | } |
2040 | fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); | 2327 | fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); |
2328 | |||
2329 | return 0; | ||
2330 | } | ||
2331 | /* | ||
2332 | * print_hwp() | ||
2333 | * Decode the MSR_HWP_CAPABILITIES | ||
2334 | */ | ||
2335 | int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
2336 | { | ||
2337 | unsigned long long msr; | ||
2338 | int cpu; | ||
2339 | |||
2340 | if (!has_hwp) | ||
2341 | return 0; | ||
2342 | |||
2343 | cpu = t->cpu_id; | ||
2344 | |||
2345 | /* MSR_HWP_CAPABILITIES is per-package */ | ||
2346 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
2347 | return 0; | ||
2348 | |||
2349 | if (cpu_migrate(cpu)) { | ||
2350 | fprintf(outf, "Could not migrate to CPU %d\n", cpu); | ||
2351 | return -1; | ||
2352 | } | ||
2353 | |||
2354 | if (get_msr(cpu, MSR_PM_ENABLE, &msr)) | ||
2355 | return 0; | ||
2356 | |||
2357 | fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", | ||
2358 | cpu, msr, (msr & (1 << 0)) ? "" : "No-"); | ||
2359 | |||
2360 | /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ | ||
2361 | if ((msr & (1 << 0)) == 0) | ||
2362 | return 0; | ||
2363 | |||
2364 | if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr)) | ||
2365 | return 0; | ||
2366 | |||
2367 | fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " | ||
2368 | "(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n", | ||
2369 | cpu, msr, | ||
2370 | (unsigned int)HWP_HIGHEST_PERF(msr), | ||
2371 | (unsigned int)HWP_GUARANTEED_PERF(msr), | ||
2372 | (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), | ||
2373 | (unsigned int)HWP_LOWEST_PERF(msr)); | ||
2374 | |||
2375 | if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) | ||
2376 | return 0; | ||
2377 | |||
2378 | fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " | ||
2379 | "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n", | ||
2380 | cpu, msr, | ||
2381 | (unsigned int)(((msr) >> 0) & 0xff), | ||
2382 | (unsigned int)(((msr) >> 8) & 0xff), | ||
2383 | (unsigned int)(((msr) >> 16) & 0xff), | ||
2384 | (unsigned int)(((msr) >> 24) & 0xff), | ||
2385 | (unsigned int)(((msr) >> 32) & 0xff3), | ||
2386 | (unsigned int)(((msr) >> 42) & 0x1)); | ||
2387 | |||
2388 | if (has_hwp_pkg) { | ||
2389 | if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) | ||
2390 | return 0; | ||
2391 | |||
2392 | fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " | ||
2393 | "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n", | ||
2394 | cpu, msr, | ||
2395 | (unsigned int)(((msr) >> 0) & 0xff), | ||
2396 | (unsigned int)(((msr) >> 8) & 0xff), | ||
2397 | (unsigned int)(((msr) >> 16) & 0xff), | ||
2398 | (unsigned int)(((msr) >> 24) & 0xff), | ||
2399 | (unsigned int)(((msr) >> 32) & 0xff3)); | ||
2400 | } | ||
2401 | if (has_hwp_notify) { | ||
2402 | if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) | ||
2403 | return 0; | ||
2404 | |||
2405 | fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " | ||
2406 | "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", | ||
2407 | cpu, msr, | ||
2408 | ((msr) & 0x1) ? "EN" : "Dis", | ||
2409 | ((msr) & 0x2) ? "EN" : "Dis"); | ||
2410 | } | ||
2411 | if (get_msr(cpu, MSR_HWP_STATUS, &msr)) | ||
2412 | return 0; | ||
2413 | |||
2414 | fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " | ||
2415 | "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", | ||
2416 | cpu, msr, | ||
2417 | ((msr) & 0x1) ? "" : "No-", | ||
2418 | ((msr) & 0x2) ? "" : "No-"); | ||
2041 | 2419 | ||
2042 | return 0; | 2420 | return 0; |
2043 | } | 2421 | } |
@@ -2057,14 +2435,14 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data | |||
2057 | return 0; | 2435 | return 0; |
2058 | 2436 | ||
2059 | if (cpu_migrate(cpu)) { | 2437 | if (cpu_migrate(cpu)) { |
2060 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | 2438 | fprintf(outf, "Could not migrate to CPU %d\n", cpu); |
2061 | return -1; | 2439 | return -1; |
2062 | } | 2440 | } |
2063 | 2441 | ||
2064 | if (do_core_perf_limit_reasons) { | 2442 | if (do_core_perf_limit_reasons) { |
2065 | get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); | 2443 | get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); |
2066 | fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); | 2444 | fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); |
2067 | fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", | 2445 | fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", |
2068 | (msr & 1 << 15) ? "bit15, " : "", | 2446 | (msr & 1 << 15) ? "bit15, " : "", |
2069 | (msr & 1 << 14) ? "bit14, " : "", | 2447 | (msr & 1 << 14) ? "bit14, " : "", |
2070 | (msr & 1 << 13) ? "Transitions, " : "", | 2448 | (msr & 1 << 13) ? "Transitions, " : "", |
@@ -2079,7 +2457,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data | |||
2079 | (msr & 1 << 2) ? "bit2, " : "", | 2457 | (msr & 1 << 2) ? "bit2, " : "", |
2080 | (msr & 1 << 1) ? "ThermStatus, " : "", | 2458 | (msr & 1 << 1) ? "ThermStatus, " : "", |
2081 | (msr & 1 << 0) ? "PROCHOT, " : ""); | 2459 | (msr & 1 << 0) ? "PROCHOT, " : ""); |
2082 | fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", | 2460 | fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", |
2083 | (msr & 1 << 31) ? "bit31, " : "", | 2461 | (msr & 1 << 31) ? "bit31, " : "", |
2084 | (msr & 1 << 30) ? "bit30, " : "", | 2462 | (msr & 1 << 30) ? "bit30, " : "", |
2085 | (msr & 1 << 29) ? "Transitions, " : "", | 2463 | (msr & 1 << 29) ? "Transitions, " : "", |
@@ -2098,8 +2476,8 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data | |||
2098 | } | 2476 | } |
2099 | if (do_gfx_perf_limit_reasons) { | 2477 | if (do_gfx_perf_limit_reasons) { |
2100 | get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); | 2478 | get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); |
2101 | fprintf(stderr, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); | 2479 | fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); |
2102 | fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s)", | 2480 | fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", |
2103 | (msr & 1 << 0) ? "PROCHOT, " : "", | 2481 | (msr & 1 << 0) ? "PROCHOT, " : "", |
2104 | (msr & 1 << 1) ? "ThermStatus, " : "", | 2482 | (msr & 1 << 1) ? "ThermStatus, " : "", |
2105 | (msr & 1 << 4) ? "Graphics, " : "", | 2483 | (msr & 1 << 4) ? "Graphics, " : "", |
@@ -2108,7 +2486,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data | |||
2108 | (msr & 1 << 9) ? "GFXPwr, " : "", | 2486 | (msr & 1 << 9) ? "GFXPwr, " : "", |
2109 | (msr & 1 << 10) ? "PkgPwrL1, " : "", | 2487 | (msr & 1 << 10) ? "PkgPwrL1, " : "", |
2110 | (msr & 1 << 11) ? "PkgPwrL2, " : ""); | 2488 | (msr & 1 << 11) ? "PkgPwrL2, " : ""); |
2111 | fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s)\n", | 2489 | fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", |
2112 | (msr & 1 << 16) ? "PROCHOT, " : "", | 2490 | (msr & 1 << 16) ? "PROCHOT, " : "", |
2113 | (msr & 1 << 17) ? "ThermStatus, " : "", | 2491 | (msr & 1 << 17) ? "ThermStatus, " : "", |
2114 | (msr & 1 << 20) ? "Graphics, " : "", | 2492 | (msr & 1 << 20) ? "Graphics, " : "", |
@@ -2120,15 +2498,15 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data | |||
2120 | } | 2498 | } |
2121 | if (do_ring_perf_limit_reasons) { | 2499 | if (do_ring_perf_limit_reasons) { |
2122 | get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); | 2500 | get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); |
2123 | fprintf(stderr, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); | 2501 | fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); |
2124 | fprintf(stderr, " (Active: %s%s%s%s%s%s)", | 2502 | fprintf(outf, " (Active: %s%s%s%s%s%s)", |
2125 | (msr & 1 << 0) ? "PROCHOT, " : "", | 2503 | (msr & 1 << 0) ? "PROCHOT, " : "", |
2126 | (msr & 1 << 1) ? "ThermStatus, " : "", | 2504 | (msr & 1 << 1) ? "ThermStatus, " : "", |
2127 | (msr & 1 << 6) ? "VR-Therm, " : "", | 2505 | (msr & 1 << 6) ? "VR-Therm, " : "", |
2128 | (msr & 1 << 8) ? "Amps, " : "", | 2506 | (msr & 1 << 8) ? "Amps, " : "", |
2129 | (msr & 1 << 10) ? "PkgPwrL1, " : "", | 2507 | (msr & 1 << 10) ? "PkgPwrL1, " : "", |
2130 | (msr & 1 << 11) ? "PkgPwrL2, " : ""); | 2508 | (msr & 1 << 11) ? "PkgPwrL2, " : ""); |
2131 | fprintf(stderr, " (Logged: %s%s%s%s%s%s)\n", | 2509 | fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", |
2132 | (msr & 1 << 16) ? "PROCHOT, " : "", | 2510 | (msr & 1 << 16) ? "PROCHOT, " : "", |
2133 | (msr & 1 << 17) ? "ThermStatus, " : "", | 2511 | (msr & 1 << 17) ? "ThermStatus, " : "", |
2134 | (msr & 1 << 22) ? "VR-Therm, " : "", | 2512 | (msr & 1 << 22) ? "VR-Therm, " : "", |
@@ -2142,7 +2520,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data | |||
2142 | #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ | 2520 | #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ |
2143 | #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ | 2521 | #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ |
2144 | 2522 | ||
2145 | double get_tdp(model) | 2523 | double get_tdp(int model) |
2146 | { | 2524 | { |
2147 | unsigned long long msr; | 2525 | unsigned long long msr; |
2148 | 2526 | ||
@@ -2251,12 +2629,12 @@ void rapl_probe(unsigned int family, unsigned int model) | |||
2251 | 2629 | ||
2252 | rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; | 2630 | rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; |
2253 | if (debug) | 2631 | if (debug) |
2254 | fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); | 2632 | fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); |
2255 | 2633 | ||
2256 | return; | 2634 | return; |
2257 | } | 2635 | } |
2258 | 2636 | ||
2259 | void perf_limit_reasons_probe(family, model) | 2637 | void perf_limit_reasons_probe(int family, int model) |
2260 | { | 2638 | { |
2261 | if (!genuine_intel) | 2639 | if (!genuine_intel) |
2262 | return; | 2640 | return; |
@@ -2293,7 +2671,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p | |||
2293 | return 0; | 2671 | return 0; |
2294 | 2672 | ||
2295 | if (cpu_migrate(cpu)) { | 2673 | if (cpu_migrate(cpu)) { |
2296 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | 2674 | fprintf(outf, "Could not migrate to CPU %d\n", cpu); |
2297 | return -1; | 2675 | return -1; |
2298 | } | 2676 | } |
2299 | 2677 | ||
@@ -2302,7 +2680,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p | |||
2302 | return 0; | 2680 | return 0; |
2303 | 2681 | ||
2304 | dts = (msr >> 16) & 0x7F; | 2682 | dts = (msr >> 16) & 0x7F; |
2305 | fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", | 2683 | fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", |
2306 | cpu, msr, tcc_activation_temp - dts); | 2684 | cpu, msr, tcc_activation_temp - dts); |
2307 | 2685 | ||
2308 | #ifdef THERM_DEBUG | 2686 | #ifdef THERM_DEBUG |
@@ -2311,7 +2689,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p | |||
2311 | 2689 | ||
2312 | dts = (msr >> 16) & 0x7F; | 2690 | dts = (msr >> 16) & 0x7F; |
2313 | dts2 = (msr >> 8) & 0x7F; | 2691 | dts2 = (msr >> 8) & 0x7F; |
2314 | fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", | 2692 | fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", |
2315 | cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); | 2693 | cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); |
2316 | #endif | 2694 | #endif |
2317 | } | 2695 | } |
@@ -2325,7 +2703,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p | |||
2325 | 2703 | ||
2326 | dts = (msr >> 16) & 0x7F; | 2704 | dts = (msr >> 16) & 0x7F; |
2327 | resolution = (msr >> 27) & 0xF; | 2705 | resolution = (msr >> 27) & 0xF; |
2328 | fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", | 2706 | fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", |
2329 | cpu, msr, tcc_activation_temp - dts, resolution); | 2707 | cpu, msr, tcc_activation_temp - dts, resolution); |
2330 | 2708 | ||
2331 | #ifdef THERM_DEBUG | 2709 | #ifdef THERM_DEBUG |
@@ -2334,17 +2712,17 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p | |||
2334 | 2712 | ||
2335 | dts = (msr >> 16) & 0x7F; | 2713 | dts = (msr >> 16) & 0x7F; |
2336 | dts2 = (msr >> 8) & 0x7F; | 2714 | dts2 = (msr >> 8) & 0x7F; |
2337 | fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", | 2715 | fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", |
2338 | cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); | 2716 | cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); |
2339 | #endif | 2717 | #endif |
2340 | } | 2718 | } |
2341 | 2719 | ||
2342 | return 0; | 2720 | return 0; |
2343 | } | 2721 | } |
2344 | 2722 | ||
2345 | void print_power_limit_msr(int cpu, unsigned long long msr, char *label) | 2723 | void print_power_limit_msr(int cpu, unsigned long long msr, char *label) |
2346 | { | 2724 | { |
2347 | fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", | 2725 | fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", |
2348 | cpu, label, | 2726 | cpu, label, |
2349 | ((msr >> 15) & 1) ? "EN" : "DIS", | 2727 | ((msr >> 15) & 1) ? "EN" : "DIS", |
2350 | ((msr >> 0) & 0x7FFF) * rapl_power_units, | 2728 | ((msr >> 0) & 0x7FFF) * rapl_power_units, |
@@ -2368,7 +2746,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
2368 | 2746 | ||
2369 | cpu = t->cpu_id; | 2747 | cpu = t->cpu_id; |
2370 | if (cpu_migrate(cpu)) { | 2748 | if (cpu_migrate(cpu)) { |
2371 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | 2749 | fprintf(outf, "Could not migrate to CPU %d\n", cpu); |
2372 | return -1; | 2750 | return -1; |
2373 | } | 2751 | } |
2374 | 2752 | ||
@@ -2376,7 +2754,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
2376 | return -1; | 2754 | return -1; |
2377 | 2755 | ||
2378 | if (debug) { | 2756 | if (debug) { |
2379 | fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " | 2757 | fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " |
2380 | "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, | 2758 | "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, |
2381 | rapl_power_units, rapl_energy_units, rapl_time_units); | 2759 | rapl_power_units, rapl_energy_units, rapl_time_units); |
2382 | } | 2760 | } |
@@ -2386,7 +2764,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
2386 | return -5; | 2764 | return -5; |
2387 | 2765 | ||
2388 | 2766 | ||
2389 | fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", | 2767 | fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", |
2390 | cpu, msr, | 2768 | cpu, msr, |
2391 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, | 2769 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, |
2392 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, | 2770 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, |
@@ -2399,11 +2777,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
2399 | if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) | 2777 | if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) |
2400 | return -9; | 2778 | return -9; |
2401 | 2779 | ||
2402 | fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", | 2780 | fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", |
2403 | cpu, msr, (msr >> 63) & 1 ? "": "UN"); | 2781 | cpu, msr, (msr >> 63) & 1 ? "": "UN"); |
2404 | 2782 | ||
2405 | print_power_limit_msr(cpu, msr, "PKG Limit #1"); | 2783 | print_power_limit_msr(cpu, msr, "PKG Limit #1"); |
2406 | fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", | 2784 | fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", |
2407 | cpu, | 2785 | cpu, |
2408 | ((msr >> 47) & 1) ? "EN" : "DIS", | 2786 | ((msr >> 47) & 1) ? "EN" : "DIS", |
2409 | ((msr >> 32) & 0x7FFF) * rapl_power_units, | 2787 | ((msr >> 32) & 0x7FFF) * rapl_power_units, |
@@ -2415,7 +2793,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
2415 | if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) | 2793 | if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) |
2416 | return -6; | 2794 | return -6; |
2417 | 2795 | ||
2418 | fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", | 2796 | fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", |
2419 | cpu, msr, | 2797 | cpu, msr, |
2420 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, | 2798 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, |
2421 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, | 2799 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, |
@@ -2425,7 +2803,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
2425 | if (do_rapl & RAPL_DRAM) { | 2803 | if (do_rapl & RAPL_DRAM) { |
2426 | if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) | 2804 | if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) |
2427 | return -9; | 2805 | return -9; |
2428 | fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", | 2806 | fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", |
2429 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | 2807 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); |
2430 | 2808 | ||
2431 | print_power_limit_msr(cpu, msr, "DRAM Limit"); | 2809 | print_power_limit_msr(cpu, msr, "DRAM Limit"); |
@@ -2435,7 +2813,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
2435 | if (get_msr(cpu, MSR_PP0_POLICY, &msr)) | 2813 | if (get_msr(cpu, MSR_PP0_POLICY, &msr)) |
2436 | return -7; | 2814 | return -7; |
2437 | 2815 | ||
2438 | fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); | 2816 | fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); |
2439 | } | 2817 | } |
2440 | } | 2818 | } |
2441 | if (do_rapl & RAPL_CORES) { | 2819 | if (do_rapl & RAPL_CORES) { |
@@ -2443,7 +2821,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
2443 | 2821 | ||
2444 | if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) | 2822 | if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) |
2445 | return -9; | 2823 | return -9; |
2446 | fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", | 2824 | fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", |
2447 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | 2825 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); |
2448 | print_power_limit_msr(cpu, msr, "Cores Limit"); | 2826 | print_power_limit_msr(cpu, msr, "Cores Limit"); |
2449 | } | 2827 | } |
@@ -2453,11 +2831,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
2453 | if (get_msr(cpu, MSR_PP1_POLICY, &msr)) | 2831 | if (get_msr(cpu, MSR_PP1_POLICY, &msr)) |
2454 | return -8; | 2832 | return -8; |
2455 | 2833 | ||
2456 | fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); | 2834 | fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); |
2457 | 2835 | ||
2458 | if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) | 2836 | if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) |
2459 | return -9; | 2837 | return -9; |
2460 | fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", | 2838 | fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", |
2461 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | 2839 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); |
2462 | print_power_limit_msr(cpu, msr, "GFX Limit"); | 2840 | print_power_limit_msr(cpu, msr, "GFX Limit"); |
2463 | } | 2841 | } |
@@ -2583,23 +2961,23 @@ double slm_bclk(void) | |||
2583 | double freq; | 2961 | double freq; |
2584 | 2962 | ||
2585 | if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) | 2963 | if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) |
2586 | fprintf(stderr, "SLM BCLK: unknown\n"); | 2964 | fprintf(outf, "SLM BCLK: unknown\n"); |
2587 | 2965 | ||
2588 | i = msr & 0xf; | 2966 | i = msr & 0xf; |
2589 | if (i >= SLM_BCLK_FREQS) { | 2967 | if (i >= SLM_BCLK_FREQS) { |
2590 | fprintf(stderr, "SLM BCLK[%d] invalid\n", i); | 2968 | fprintf(outf, "SLM BCLK[%d] invalid\n", i); |
2591 | msr = 3; | 2969 | msr = 3; |
2592 | } | 2970 | } |
2593 | freq = slm_freq_table[i]; | 2971 | freq = slm_freq_table[i]; |
2594 | 2972 | ||
2595 | fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq); | 2973 | fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); |
2596 | 2974 | ||
2597 | return freq; | 2975 | return freq; |
2598 | } | 2976 | } |
2599 | 2977 | ||
2600 | double discover_bclk(unsigned int family, unsigned int model) | 2978 | double discover_bclk(unsigned int family, unsigned int model) |
2601 | { | 2979 | { |
2602 | if (has_snb_msrs(family, model)) | 2980 | if (has_snb_msrs(family, model) || is_knl(family, model)) |
2603 | return 100.00; | 2981 | return 100.00; |
2604 | else if (is_slm(family, model)) | 2982 | else if (is_slm(family, model)) |
2605 | return slm_bclk(); | 2983 | return slm_bclk(); |
@@ -2635,13 +3013,13 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk | |||
2635 | 3013 | ||
2636 | cpu = t->cpu_id; | 3014 | cpu = t->cpu_id; |
2637 | if (cpu_migrate(cpu)) { | 3015 | if (cpu_migrate(cpu)) { |
2638 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | 3016 | fprintf(outf, "Could not migrate to CPU %d\n", cpu); |
2639 | return -1; | 3017 | return -1; |
2640 | } | 3018 | } |
2641 | 3019 | ||
2642 | if (tcc_activation_temp_override != 0) { | 3020 | if (tcc_activation_temp_override != 0) { |
2643 | tcc_activation_temp = tcc_activation_temp_override; | 3021 | tcc_activation_temp = tcc_activation_temp_override; |
2644 | fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", | 3022 | fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", |
2645 | cpu, tcc_activation_temp); | 3023 | cpu, tcc_activation_temp); |
2646 | return 0; | 3024 | return 0; |
2647 | } | 3025 | } |
@@ -2656,7 +3034,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk | |||
2656 | target_c_local = (msr >> 16) & 0xFF; | 3034 | target_c_local = (msr >> 16) & 0xFF; |
2657 | 3035 | ||
2658 | if (debug) | 3036 | if (debug) |
2659 | fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", | 3037 | fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", |
2660 | cpu, msr, target_c_local); | 3038 | cpu, msr, target_c_local); |
2661 | 3039 | ||
2662 | if (!target_c_local) | 3040 | if (!target_c_local) |
@@ -2668,37 +3046,93 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk | |||
2668 | 3046 | ||
2669 | guess: | 3047 | guess: |
2670 | tcc_activation_temp = TJMAX_DEFAULT; | 3048 | tcc_activation_temp = TJMAX_DEFAULT; |
2671 | fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", | 3049 | fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", |
2672 | cpu, tcc_activation_temp); | 3050 | cpu, tcc_activation_temp); |
2673 | 3051 | ||
2674 | return 0; | 3052 | return 0; |
2675 | } | 3053 | } |
3054 | |||
3055 | void decode_feature_control_msr(void) | ||
3056 | { | ||
3057 | unsigned long long msr; | ||
3058 | |||
3059 | if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr)) | ||
3060 | fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", | ||
3061 | base_cpu, msr, | ||
3062 | msr & FEATURE_CONTROL_LOCKED ? "" : "UN-", | ||
3063 | msr & (1 << 18) ? "SGX" : ""); | ||
3064 | } | ||
3065 | |||
3066 | void decode_misc_enable_msr(void) | ||
3067 | { | ||
3068 | unsigned long long msr; | ||
3069 | |||
3070 | if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) | ||
3071 | fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n", | ||
3072 | base_cpu, msr, | ||
3073 | msr & (1 << 3) ? "TCC" : "", | ||
3074 | msr & (1 << 16) ? "EIST" : "", | ||
3075 | msr & (1 << 18) ? "MONITOR" : ""); | ||
3076 | } | ||
3077 | |||
3078 | /* | ||
3079 | * Decode MSR_MISC_PWR_MGMT | ||
3080 | * | ||
3081 | * Decode the bits according to the Nehalem documentation | ||
3082 | * bit[0] seems to continue to have same meaning going forward | ||
3083 | * bit[1] less so... | ||
3084 | */ | ||
3085 | void decode_misc_pwr_mgmt_msr(void) | ||
3086 | { | ||
3087 | unsigned long long msr; | ||
3088 | |||
3089 | if (!do_nhm_platform_info) | ||
3090 | return; | ||
3091 | |||
3092 | if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) | ||
3093 | fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n", | ||
3094 | base_cpu, msr, | ||
3095 | msr & (1 << 0) ? "DIS" : "EN", | ||
3096 | msr & (1 << 1) ? "EN" : "DIS"); | ||
3097 | } | ||
3098 | |||
2676 | void process_cpuid() | 3099 | void process_cpuid() |
2677 | { | 3100 | { |
2678 | unsigned int eax, ebx, ecx, edx, max_level; | 3101 | unsigned int eax, ebx, ecx, edx, max_level, max_extended_level; |
2679 | unsigned int fms, family, model, stepping; | 3102 | unsigned int fms, family, model, stepping; |
2680 | 3103 | ||
2681 | eax = ebx = ecx = edx = 0; | 3104 | eax = ebx = ecx = edx = 0; |
2682 | 3105 | ||
2683 | __get_cpuid(0, &max_level, &ebx, &ecx, &edx); | 3106 | __cpuid(0, max_level, ebx, ecx, edx); |
2684 | 3107 | ||
2685 | if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) | 3108 | if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) |
2686 | genuine_intel = 1; | 3109 | genuine_intel = 1; |
2687 | 3110 | ||
2688 | if (debug) | 3111 | if (debug) |
2689 | fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", | 3112 | fprintf(outf, "CPUID(0): %.4s%.4s%.4s ", |
2690 | (char *)&ebx, (char *)&edx, (char *)&ecx); | 3113 | (char *)&ebx, (char *)&edx, (char *)&ecx); |
2691 | 3114 | ||
2692 | __get_cpuid(1, &fms, &ebx, &ecx, &edx); | 3115 | __cpuid(1, fms, ebx, ecx, edx); |
2693 | family = (fms >> 8) & 0xf; | 3116 | family = (fms >> 8) & 0xf; |
2694 | model = (fms >> 4) & 0xf; | 3117 | model = (fms >> 4) & 0xf; |
2695 | stepping = fms & 0xf; | 3118 | stepping = fms & 0xf; |
2696 | if (family == 6 || family == 0xf) | 3119 | if (family == 6 || family == 0xf) |
2697 | model += ((fms >> 16) & 0xf) << 4; | 3120 | model += ((fms >> 16) & 0xf) << 4; |
2698 | 3121 | ||
2699 | if (debug) | 3122 | if (debug) { |
2700 | fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", | 3123 | fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", |
2701 | max_level, family, model, stepping, family, model, stepping); | 3124 | max_level, family, model, stepping, family, model, stepping); |
3125 | fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n", | ||
3126 | ecx & (1 << 0) ? "SSE3" : "-", | ||
3127 | ecx & (1 << 3) ? "MONITOR" : "-", | ||
3128 | ecx & (1 << 6) ? "SMX" : "-", | ||
3129 | ecx & (1 << 7) ? "EIST" : "-", | ||
3130 | ecx & (1 << 8) ? "TM2" : "-", | ||
3131 | edx & (1 << 4) ? "TSC" : "-", | ||
3132 | edx & (1 << 5) ? "MSR" : "-", | ||
3133 | edx & (1 << 22) ? "ACPI-TM" : "-", | ||
3134 | edx & (1 << 29) ? "TM" : "-"); | ||
3135 | } | ||
2702 | 3136 | ||
2703 | if (!(edx & (1 << 5))) | 3137 | if (!(edx & (1 << 5))) |
2704 | errx(1, "CPUID: no MSR"); | 3138 | errx(1, "CPUID: no MSR"); |
@@ -2709,15 +3143,15 @@ void process_cpuid() | |||
2709 | * This check is valid for both Intel and AMD. | 3143 | * This check is valid for both Intel and AMD. |
2710 | */ | 3144 | */ |
2711 | ebx = ecx = edx = 0; | 3145 | ebx = ecx = edx = 0; |
2712 | __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx); | 3146 | __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); |
2713 | 3147 | ||
2714 | if (max_level >= 0x80000007) { | 3148 | if (max_extended_level >= 0x80000007) { |
2715 | 3149 | ||
2716 | /* | 3150 | /* |
2717 | * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 | 3151 | * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 |
2718 | * this check is valid for both Intel and AMD | 3152 | * this check is valid for both Intel and AMD |
2719 | */ | 3153 | */ |
2720 | __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); | 3154 | __cpuid(0x80000007, eax, ebx, ecx, edx); |
2721 | has_invariant_tsc = edx & (1 << 8); | 3155 | has_invariant_tsc = edx & (1 << 8); |
2722 | } | 3156 | } |
2723 | 3157 | ||
@@ -2726,20 +3160,48 @@ void process_cpuid() | |||
2726 | * this check is valid for both Intel and AMD | 3160 | * this check is valid for both Intel and AMD |
2727 | */ | 3161 | */ |
2728 | 3162 | ||
2729 | __get_cpuid(0x6, &eax, &ebx, &ecx, &edx); | 3163 | __cpuid(0x6, eax, ebx, ecx, edx); |
2730 | has_aperf = ecx & (1 << 0); | 3164 | has_aperf = ecx & (1 << 0); |
2731 | do_dts = eax & (1 << 0); | 3165 | do_dts = eax & (1 << 0); |
2732 | do_ptm = eax & (1 << 6); | 3166 | do_ptm = eax & (1 << 6); |
3167 | has_hwp = eax & (1 << 7); | ||
3168 | has_hwp_notify = eax & (1 << 8); | ||
3169 | has_hwp_activity_window = eax & (1 << 9); | ||
3170 | has_hwp_epp = eax & (1 << 10); | ||
3171 | has_hwp_pkg = eax & (1 << 11); | ||
2733 | has_epb = ecx & (1 << 3); | 3172 | has_epb = ecx & (1 << 3); |
2734 | 3173 | ||
2735 | if (debug) | 3174 | if (debug) |
2736 | fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sEPB\n", | 3175 | fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, " |
2737 | has_aperf ? "" : "No ", | 3176 | "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", |
2738 | do_dts ? "" : "No ", | 3177 | has_aperf ? "" : "No-", |
2739 | do_ptm ? "" : "No ", | 3178 | do_dts ? "" : "No-", |
2740 | has_epb ? "" : "No "); | 3179 | do_ptm ? "" : "No-", |
3180 | has_hwp ? "" : "No-", | ||
3181 | has_hwp_notify ? "" : "No-", | ||
3182 | has_hwp_activity_window ? "" : "No-", | ||
3183 | has_hwp_epp ? "" : "No-", | ||
3184 | has_hwp_pkg ? "" : "No-", | ||
3185 | has_epb ? "" : "No-"); | ||
3186 | |||
3187 | if (debug) | ||
3188 | decode_misc_enable_msr(); | ||
3189 | |||
3190 | if (max_level >= 0x7) { | ||
3191 | int has_sgx; | ||
2741 | 3192 | ||
2742 | if (max_level > 0x15) { | 3193 | ecx = 0; |
3194 | |||
3195 | __cpuid_count(0x7, 0, eax, ebx, ecx, edx); | ||
3196 | |||
3197 | has_sgx = ebx & (1 << 2); | ||
3198 | fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-"); | ||
3199 | |||
3200 | if (has_sgx) | ||
3201 | decode_feature_control_msr(); | ||
3202 | } | ||
3203 | |||
3204 | if (max_level >= 0x15) { | ||
2743 | unsigned int eax_crystal; | 3205 | unsigned int eax_crystal; |
2744 | unsigned int ebx_tsc; | 3206 | unsigned int ebx_tsc; |
2745 | 3207 | ||
@@ -2747,12 +3209,12 @@ void process_cpuid() | |||
2747 | * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz | 3209 | * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz |
2748 | */ | 3210 | */ |
2749 | eax_crystal = ebx_tsc = crystal_hz = edx = 0; | 3211 | eax_crystal = ebx_tsc = crystal_hz = edx = 0; |
2750 | __get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx); | 3212 | __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); |
2751 | 3213 | ||
2752 | if (ebx_tsc != 0) { | 3214 | if (ebx_tsc != 0) { |
2753 | 3215 | ||
2754 | if (debug && (ebx != 0)) | 3216 | if (debug && (ebx != 0)) |
2755 | fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", | 3217 | fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", |
2756 | eax_crystal, ebx_tsc, crystal_hz); | 3218 | eax_crystal, ebx_tsc, crystal_hz); |
2757 | 3219 | ||
2758 | if (crystal_hz == 0) | 3220 | if (crystal_hz == 0) |
@@ -2768,11 +3230,24 @@ void process_cpuid() | |||
2768 | if (crystal_hz) { | 3230 | if (crystal_hz) { |
2769 | tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; | 3231 | tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; |
2770 | if (debug) | 3232 | if (debug) |
2771 | fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", | 3233 | fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", |
2772 | tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); | 3234 | tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); |
2773 | } | 3235 | } |
2774 | } | 3236 | } |
2775 | } | 3237 | } |
3238 | if (max_level >= 0x16) { | ||
3239 | unsigned int base_mhz, max_mhz, bus_mhz, edx; | ||
3240 | |||
3241 | /* | ||
3242 | * CPUID 16H Base MHz, Max MHz, Bus MHz | ||
3243 | */ | ||
3244 | base_mhz = max_mhz = bus_mhz = edx = 0; | ||
3245 | |||
3246 | __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); | ||
3247 | if (debug) | ||
3248 | fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", | ||
3249 | base_mhz, max_mhz, bus_mhz); | ||
3250 | } | ||
2776 | 3251 | ||
2777 | if (has_aperf) | 3252 | if (has_aperf) |
2778 | aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); | 3253 | aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); |
@@ -2788,21 +3263,28 @@ void process_cpuid() | |||
2788 | do_slm_cstates = is_slm(family, model); | 3263 | do_slm_cstates = is_slm(family, model); |
2789 | do_knl_cstates = is_knl(family, model); | 3264 | do_knl_cstates = is_knl(family, model); |
2790 | 3265 | ||
3266 | if (debug) | ||
3267 | decode_misc_pwr_mgmt_msr(); | ||
3268 | |||
2791 | rapl_probe(family, model); | 3269 | rapl_probe(family, model); |
2792 | perf_limit_reasons_probe(family, model); | 3270 | perf_limit_reasons_probe(family, model); |
2793 | 3271 | ||
2794 | if (debug) | 3272 | if (debug) |
2795 | dump_cstate_pstate_config_info(); | 3273 | dump_cstate_pstate_config_info(family, model); |
2796 | 3274 | ||
2797 | if (has_skl_msrs(family, model)) | 3275 | if (has_skl_msrs(family, model)) |
2798 | calculate_tsc_tweak(); | 3276 | calculate_tsc_tweak(); |
2799 | 3277 | ||
3278 | do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK); | ||
3279 | |||
3280 | do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK); | ||
3281 | |||
2800 | return; | 3282 | return; |
2801 | } | 3283 | } |
2802 | 3284 | ||
2803 | void help() | 3285 | void help() |
2804 | { | 3286 | { |
2805 | fprintf(stderr, | 3287 | fprintf(outf, |
2806 | "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" | 3288 | "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" |
2807 | "\n" | 3289 | "\n" |
2808 | "Turbostat forks the specified COMMAND and prints statistics\n" | 3290 | "Turbostat forks the specified COMMAND and prints statistics\n" |
@@ -2814,6 +3296,7 @@ void help() | |||
2814 | "--help print this help message\n" | 3296 | "--help print this help message\n" |
2815 | "--counter msr print 32-bit counter at address \"msr\"\n" | 3297 | "--counter msr print 32-bit counter at address \"msr\"\n" |
2816 | "--Counter msr print 64-bit Counter at address \"msr\"\n" | 3298 | "--Counter msr print 64-bit Counter at address \"msr\"\n" |
3299 | "--out file create or truncate \"file\" for all output\n" | ||
2817 | "--msr msr print 32-bit value at address \"msr\"\n" | 3300 | "--msr msr print 32-bit value at address \"msr\"\n" |
2818 | "--MSR msr print 64-bit Value at address \"msr\"\n" | 3301 | "--MSR msr print 64-bit Value at address \"msr\"\n" |
2819 | "--version print version information\n" | 3302 | "--version print version information\n" |
@@ -2858,7 +3341,7 @@ void topology_probe() | |||
2858 | show_cpu = 1; | 3341 | show_cpu = 1; |
2859 | 3342 | ||
2860 | if (debug > 1) | 3343 | if (debug > 1) |
2861 | fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); | 3344 | fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); |
2862 | 3345 | ||
2863 | cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); | 3346 | cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); |
2864 | if (cpus == NULL) | 3347 | if (cpus == NULL) |
@@ -2893,7 +3376,7 @@ void topology_probe() | |||
2893 | 3376 | ||
2894 | if (cpu_is_not_present(i)) { | 3377 | if (cpu_is_not_present(i)) { |
2895 | if (debug > 1) | 3378 | if (debug > 1) |
2896 | fprintf(stderr, "cpu%d NOT PRESENT\n", i); | 3379 | fprintf(outf, "cpu%d NOT PRESENT\n", i); |
2897 | continue; | 3380 | continue; |
2898 | } | 3381 | } |
2899 | cpus[i].core_id = get_core_id(i); | 3382 | cpus[i].core_id = get_core_id(i); |
@@ -2908,26 +3391,26 @@ void topology_probe() | |||
2908 | if (siblings > max_siblings) | 3391 | if (siblings > max_siblings) |
2909 | max_siblings = siblings; | 3392 | max_siblings = siblings; |
2910 | if (debug > 1) | 3393 | if (debug > 1) |
2911 | fprintf(stderr, "cpu %d pkg %d core %d\n", | 3394 | fprintf(outf, "cpu %d pkg %d core %d\n", |
2912 | i, cpus[i].physical_package_id, cpus[i].core_id); | 3395 | i, cpus[i].physical_package_id, cpus[i].core_id); |
2913 | } | 3396 | } |
2914 | topo.num_cores_per_pkg = max_core_id + 1; | 3397 | topo.num_cores_per_pkg = max_core_id + 1; |
2915 | if (debug > 1) | 3398 | if (debug > 1) |
2916 | fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", | 3399 | fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", |
2917 | max_core_id, topo.num_cores_per_pkg); | 3400 | max_core_id, topo.num_cores_per_pkg); |
2918 | if (debug && !summary_only && topo.num_cores_per_pkg > 1) | 3401 | if (debug && !summary_only && topo.num_cores_per_pkg > 1) |
2919 | show_core = 1; | 3402 | show_core = 1; |
2920 | 3403 | ||
2921 | topo.num_packages = max_package_id + 1; | 3404 | topo.num_packages = max_package_id + 1; |
2922 | if (debug > 1) | 3405 | if (debug > 1) |
2923 | fprintf(stderr, "max_package_id %d, sizing for %d packages\n", | 3406 | fprintf(outf, "max_package_id %d, sizing for %d packages\n", |
2924 | max_package_id, topo.num_packages); | 3407 | max_package_id, topo.num_packages); |
2925 | if (debug && !summary_only && topo.num_packages > 1) | 3408 | if (debug && !summary_only && topo.num_packages > 1) |
2926 | show_pkg = 1; | 3409 | show_pkg = 1; |
2927 | 3410 | ||
2928 | topo.num_threads_per_core = max_siblings; | 3411 | topo.num_threads_per_core = max_siblings; |
2929 | if (debug > 1) | 3412 | if (debug > 1) |
2930 | fprintf(stderr, "max_siblings %d\n", max_siblings); | 3413 | fprintf(outf, "max_siblings %d\n", max_siblings); |
2931 | 3414 | ||
2932 | free(cpus); | 3415 | free(cpus); |
2933 | } | 3416 | } |
@@ -3019,10 +3502,27 @@ void allocate_output_buffer() | |||
3019 | if (outp == NULL) | 3502 | if (outp == NULL) |
3020 | err(-1, "calloc output buffer"); | 3503 | err(-1, "calloc output buffer"); |
3021 | } | 3504 | } |
3505 | void allocate_fd_percpu(void) | ||
3506 | { | ||
3507 | fd_percpu = calloc(topo.max_cpu_num, sizeof(int)); | ||
3508 | if (fd_percpu == NULL) | ||
3509 | err(-1, "calloc fd_percpu"); | ||
3510 | } | ||
3511 | void allocate_irq_buffers(void) | ||
3512 | { | ||
3513 | irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); | ||
3514 | if (irq_column_2_cpu == NULL) | ||
3515 | err(-1, "calloc %d", topo.num_cpus); | ||
3022 | 3516 | ||
3517 | irqs_per_cpu = calloc(topo.max_cpu_num, sizeof(int)); | ||
3518 | if (irqs_per_cpu == NULL) | ||
3519 | err(-1, "calloc %d", topo.max_cpu_num); | ||
3520 | } | ||
3023 | void setup_all_buffers(void) | 3521 | void setup_all_buffers(void) |
3024 | { | 3522 | { |
3025 | topology_probe(); | 3523 | topology_probe(); |
3524 | allocate_irq_buffers(); | ||
3525 | allocate_fd_percpu(); | ||
3026 | allocate_counters(&thread_even, &core_even, &package_even); | 3526 | allocate_counters(&thread_even, &core_even, &package_even); |
3027 | allocate_counters(&thread_odd, &core_odd, &package_odd); | 3527 | allocate_counters(&thread_odd, &core_odd, &package_odd); |
3028 | allocate_output_buffer(); | 3528 | allocate_output_buffer(); |
@@ -3036,7 +3536,7 @@ void set_base_cpu(void) | |||
3036 | err(-ENODEV, "No valid cpus found"); | 3536 | err(-ENODEV, "No valid cpus found"); |
3037 | 3537 | ||
3038 | if (debug > 1) | 3538 | if (debug > 1) |
3039 | fprintf(stderr, "base_cpu = %d\n", base_cpu); | 3539 | fprintf(outf, "base_cpu = %d\n", base_cpu); |
3040 | } | 3540 | } |
3041 | 3541 | ||
3042 | void turbostat_init() | 3542 | void turbostat_init() |
@@ -3049,6 +3549,9 @@ void turbostat_init() | |||
3049 | 3549 | ||
3050 | 3550 | ||
3051 | if (debug) | 3551 | if (debug) |
3552 | for_all_cpus(print_hwp, ODD_COUNTERS); | ||
3553 | |||
3554 | if (debug) | ||
3052 | for_all_cpus(print_epb, ODD_COUNTERS); | 3555 | for_all_cpus(print_epb, ODD_COUNTERS); |
3053 | 3556 | ||
3054 | if (debug) | 3557 | if (debug) |
@@ -3100,9 +3603,10 @@ int fork_it(char **argv) | |||
3100 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); | 3603 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); |
3101 | compute_average(EVEN_COUNTERS); | 3604 | compute_average(EVEN_COUNTERS); |
3102 | format_all_counters(EVEN_COUNTERS); | 3605 | format_all_counters(EVEN_COUNTERS); |
3103 | flush_stderr(); | ||
3104 | 3606 | ||
3105 | fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); | 3607 | fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); |
3608 | |||
3609 | flush_output_stderr(); | ||
3106 | 3610 | ||
3107 | return status; | 3611 | return status; |
3108 | } | 3612 | } |
@@ -3119,13 +3623,13 @@ int get_and_dump_counters(void) | |||
3119 | if (status) | 3623 | if (status) |
3120 | return status; | 3624 | return status; |
3121 | 3625 | ||
3122 | flush_stdout(); | 3626 | flush_output_stdout(); |
3123 | 3627 | ||
3124 | return status; | 3628 | return status; |
3125 | } | 3629 | } |
3126 | 3630 | ||
3127 | void print_version() { | 3631 | void print_version() { |
3128 | fprintf(stderr, "turbostat version 4.8 26-Sep, 2015" | 3632 | fprintf(outf, "turbostat version 4.11 27 Feb 2016" |
3129 | " - Len Brown <lenb@kernel.org>\n"); | 3633 | " - Len Brown <lenb@kernel.org>\n"); |
3130 | } | 3634 | } |
3131 | 3635 | ||
@@ -3143,6 +3647,7 @@ void cmdline(int argc, char **argv) | |||
3143 | {"Joules", no_argument, 0, 'J'}, | 3647 | {"Joules", no_argument, 0, 'J'}, |
3144 | {"MSR", required_argument, 0, 'M'}, | 3648 | {"MSR", required_argument, 0, 'M'}, |
3145 | {"msr", required_argument, 0, 'm'}, | 3649 | {"msr", required_argument, 0, 'm'}, |
3650 | {"out", required_argument, 0, 'o'}, | ||
3146 | {"Package", no_argument, 0, 'p'}, | 3651 | {"Package", no_argument, 0, 'p'}, |
3147 | {"processor", no_argument, 0, 'p'}, | 3652 | {"processor", no_argument, 0, 'p'}, |
3148 | {"Summary", no_argument, 0, 'S'}, | 3653 | {"Summary", no_argument, 0, 'S'}, |
@@ -3153,7 +3658,7 @@ void cmdline(int argc, char **argv) | |||
3153 | 3658 | ||
3154 | progname = argv[0]; | 3659 | progname = argv[0]; |
3155 | 3660 | ||
3156 | while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:PpST:v", | 3661 | while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v", |
3157 | long_options, &option_index)) != -1) { | 3662 | long_options, &option_index)) != -1) { |
3158 | switch (opt) { | 3663 | switch (opt) { |
3159 | case 'C': | 3664 | case 'C': |
@@ -3173,7 +3678,18 @@ void cmdline(int argc, char **argv) | |||
3173 | help(); | 3678 | help(); |
3174 | exit(1); | 3679 | exit(1); |
3175 | case 'i': | 3680 | case 'i': |
3176 | interval_sec = atoi(optarg); | 3681 | { |
3682 | double interval = strtod(optarg, NULL); | ||
3683 | |||
3684 | if (interval < 0.001) { | ||
3685 | fprintf(outf, "interval %f seconds is too small\n", | ||
3686 | interval); | ||
3687 | exit(2); | ||
3688 | } | ||
3689 | |||
3690 | interval_ts.tv_sec = interval; | ||
3691 | interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; | ||
3692 | } | ||
3177 | break; | 3693 | break; |
3178 | case 'J': | 3694 | case 'J': |
3179 | rapl_joules++; | 3695 | rapl_joules++; |
@@ -3184,6 +3700,9 @@ void cmdline(int argc, char **argv) | |||
3184 | case 'm': | 3700 | case 'm': |
3185 | sscanf(optarg, "%x", &extra_msr_offset32); | 3701 | sscanf(optarg, "%x", &extra_msr_offset32); |
3186 | break; | 3702 | break; |
3703 | case 'o': | ||
3704 | outf = fopen_or_die(optarg, "w"); | ||
3705 | break; | ||
3187 | case 'P': | 3706 | case 'P': |
3188 | show_pkg_only++; | 3707 | show_pkg_only++; |
3189 | break; | 3708 | break; |
@@ -3206,6 +3725,8 @@ void cmdline(int argc, char **argv) | |||
3206 | 3725 | ||
3207 | int main(int argc, char **argv) | 3726 | int main(int argc, char **argv) |
3208 | { | 3727 | { |
3728 | outf = stderr; | ||
3729 | |||
3209 | cmdline(argc, argv); | 3730 | cmdline(argc, argv); |
3210 | 3731 | ||
3211 | if (debug) | 3732 | if (debug) |