diff options
| author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2016-03-13 21:13:05 -0400 |
|---|---|---|
| committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2016-03-13 21:13:05 -0400 |
| commit | 3fdb74649b4f18ccaa88766750b616dec6acb5b0 (patch) | |
| tree | 691e718a361a1db8fae3e87270af8d62adc4946a /tools/power | |
| parent | 5b3e7e0536bd6326798ab57d14a49b15ad7e3e3f (diff) | |
| parent | 685b535b2cdb9cdf354321f8af9ed17dcf19d19f (diff) | |
Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux into pm-tools
Pull turbostat updates for 4.6 from Len Brown.
* 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux:
tools/power turbostat: bugfix: TDP MSRs print bits fixing
tools/power turbostat: correct output for MSR_NHM_SNB_PKG_CST_CFG_CTL dump
tools/power turbostat: call __cpuid() instead of __get_cpuid()
tools/power turbostat: indicate SMX and SGX support
tools/power turbostat: detect and work around syscall jitter
tools/power turbostat: show GFX%rc6
tools/power turbostat: show GFXMHz
tools/power turbostat: show IRQs per CPU
tools/power turbostat: make fewer systems calls
tools/power turbostat: fix compiler warnings
tools/power turbostat: add --out option for saving output in a file
tools/power turbostat: re-name "%Busy" field to "Busy%"
tools/power turbostat: Intel Xeon x200: fix turbo-ratio decoding
tools/power turbostat: Intel Xeon x200: fix erroneous bclk value
tools/power turbostat: allow sub-sec intervals
tools/power turbostat: Decode MSR_MISC_PWR_MGMT
tools/power turbostat: decode HWP registers
x86 msr-index: Simplify syntax for HWP fields
tools/power turbostat: CPUID(0x16) leaf shows base, max, and bus frequency
tools/power turbostat: decode more CPUID fields
Diffstat (limited to 'tools/power')
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 32 | ||||
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 889 |
2 files changed, 724 insertions, 197 deletions
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 622db685b4f9..89a55d5e32f3 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 | |||
| @@ -34,7 +34,10 @@ name as necessary to disambiguate it from others is necessary. Note that option | |||
| 34 | \fB--debug\fP displays additional system configuration information. Invoking this parameter | 34 | \fB--debug\fP displays additional system configuration information. Invoking this parameter |
| 35 | more than once may also enable internal turbostat debug information. | 35 | more than once may also enable internal turbostat debug information. |
| 36 | .PP | 36 | .PP |
| 37 | \fB--interval seconds\fP overrides the default 5-second measurement interval. | 37 | \fB--interval seconds\fP overrides the default 5.0 second measurement interval. |
| 38 | .PP | ||
| 39 | \fB--out output_file\fP turbostat output is written to the specified output_file. | ||
| 40 | The file is truncated if it already exists, and it is created if it does not exist. | ||
| 38 | .PP | 41 | .PP |
| 39 | \fB--help\fP displays usage for the most common parameters. | 42 | \fB--help\fP displays usage for the most common parameters. |
| 40 | .PP | 43 | .PP |
| @@ -61,7 +64,7 @@ displays the statistics gathered since it was forked. | |||
| 61 | .nf | 64 | .nf |
| 62 | \fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. | 65 | \fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. |
| 63 | \fBAVG_MHz\fP number of cycles executed divided by time elapsed. | 66 | \fBAVG_MHz\fP number of cycles executed divided by time elapsed. |
| 64 | \fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. | 67 | \fBBusy%\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. |
| 65 | \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). | 68 | \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). |
| 66 | \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. | 69 | \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. |
| 67 | .fi | 70 | .fi |
| @@ -83,13 +86,14 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T | |||
| 83 | \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. | 86 | \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. |
| 84 | .fi | 87 | .fi |
| 85 | .PP | 88 | .PP |
| 86 | .SH EXAMPLE | 89 | .SH PERIODIC EXAMPLE |
| 87 | Without any parameters, turbostat displays statistics ever 5 seconds. | 90 | Without any parameters, turbostat displays statistics ever 5 seconds. |
| 88 | (override interval with "-i sec" option, or specify a command | 91 | Periodic output goes to stdout, by default, unless --out is used to specify an output file. |
| 89 | for turbostat to fork). | 92 | The 5-second interval can be changed with th "-i sec" option. |
| 93 | Or a command may be specified as in "FORK EXAMPLE" below. | ||
| 90 | .nf | 94 | .nf |
| 91 | [root@hsw]# ./turbostat | 95 | [root@hsw]# ./turbostat |
| 92 | CPU Avg_MHz %Busy Bzy_MHz TSC_MHz | 96 | CPU Avg_MHz Busy% Bzy_MHz TSC_MHz |
| 93 | - 488 12.51 3898 3498 | 97 | - 488 12.51 3898 3498 |
| 94 | 0 0 0.01 3885 3498 | 98 | 0 0 0.01 3885 3498 |
| 95 | 4 3897 99.99 3898 3498 | 99 | 4 3897 99.99 3898 3498 |
| @@ -145,7 +149,7 @@ cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1) | |||
| 145 | cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1) | 149 | cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1) |
| 146 | cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1) | 150 | cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1) |
| 147 | cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1) | 151 | cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1) |
| 148 | Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt | 152 | Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt |
| 149 | - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00 | 153 | - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00 |
| 150 | 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00 | 154 | 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00 |
| 151 | 0 4 3897 99.98 3898 3498 0 0.02 | 155 | 0 4 3897 99.98 3898 3498 0 0.02 |
| @@ -171,14 +175,16 @@ The --debug option adds additional columns to the measurement ouput, including C | |||
| 171 | See the field definitions above. | 175 | See the field definitions above. |
| 172 | .SH FORK EXAMPLE | 176 | .SH FORK EXAMPLE |
| 173 | If turbostat is invoked with a command, it will fork that command | 177 | If turbostat is invoked with a command, it will fork that command |
| 174 | and output the statistics gathered when the command exits. | 178 | and output the statistics gathered after the command exits. |
| 179 | In this case, turbostat output goes to stderr, by default. | ||
| 180 | Output can instead be saved to a file using the --out option. | ||
| 175 | eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds | 181 | eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds |
| 176 | until ^C while the other CPUs are mostly idle: | 182 | until ^C while the other CPUs are mostly idle: |
| 177 | 183 | ||
| 178 | .nf | 184 | .nf |
| 179 | root@hsw: turbostat cat /dev/zero > /dev/null | 185 | root@hsw: turbostat cat /dev/zero > /dev/null |
| 180 | ^C | 186 | ^C |
| 181 | CPU Avg_MHz %Busy Bzy_MHz TSC_MHz | 187 | CPU Avg_MHz Busy% Bzy_MHz TSC_MHz |
| 182 | - 482 12.51 3854 3498 | 188 | - 482 12.51 3854 3498 |
| 183 | 0 0 0.01 1960 3498 | 189 | 0 0 0.01 1960 3498 |
| 184 | 4 0 0.00 2128 3498 | 190 | 4 0 0.00 2128 3498 |
| @@ -192,12 +198,12 @@ root@hsw: turbostat cat /dev/zero > /dev/null | |||
| 192 | 198 | ||
| 193 | .fi | 199 | .fi |
| 194 | Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit. | 200 | Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit. |
| 195 | The first row shows the average MHz and %Busy across all the processors in the system. | 201 | The first row shows the average MHz and Busy% across all the processors in the system. |
| 196 | 202 | ||
| 197 | Note that the Avg_MHz column reflects the total number of cycles executed | 203 | Note that the Avg_MHz column reflects the total number of cycles executed |
| 198 | divided by the measurement interval. If the %Busy column is 100%, | 204 | divided by the measurement interval. If the Busy% column is 100%, |
| 199 | then the processor was running at that speed the entire interval. | 205 | then the processor was running at that speed the entire interval. |
| 200 | The Avg_MHz multiplied by the %Busy results in the Bzy_MHz -- | 206 | The Avg_MHz multiplied by the Busy% results in the Bzy_MHz -- |
| 201 | which is the average frequency while the processor was executing -- | 207 | which is the average frequency while the processor was executing -- |
| 202 | not including any non-busy idle time. | 208 | not including any non-busy idle time. |
| 203 | 209 | ||
| @@ -233,7 +239,7 @@ in the brand string in /proc/cpuinfo. On a system where | |||
| 233 | the TSC stops in idle, TSC_MHz will drop | 239 | the TSC stops in idle, TSC_MHz will drop |
| 234 | below the processor's base frequency. | 240 | below the processor's base frequency. |
| 235 | 241 | ||
| 236 | %Busy = MPERF_delta/TSC_delta | 242 | Busy% = MPERF_delta/TSC_delta |
| 237 | 243 | ||
| 238 | Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval | 244 | Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval |
| 239 | 245 | ||
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0dac7e05a6ac..ee1551b6fa01 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
| @@ -38,12 +38,15 @@ | |||
| 38 | #include <string.h> | 38 | #include <string.h> |
| 39 | #include <ctype.h> | 39 | #include <ctype.h> |
| 40 | #include <sched.h> | 40 | #include <sched.h> |
| 41 | #include <time.h> | ||
| 41 | #include <cpuid.h> | 42 | #include <cpuid.h> |
| 42 | #include <linux/capability.h> | 43 | #include <linux/capability.h> |
| 43 | #include <errno.h> | 44 | #include <errno.h> |
| 44 | 45 | ||
| 45 | char *proc_stat = "/proc/stat"; | 46 | char *proc_stat = "/proc/stat"; |
| 46 | unsigned int interval_sec = 5; | 47 | FILE *outf; |
| 48 | int *fd_percpu; | ||
| 49 | struct timespec interval_ts = {5, 0}; | ||
| 47 | unsigned int debug; | 50 | unsigned int debug; |
| 48 | unsigned int rapl_joules; | 51 | unsigned int rapl_joules; |
| 49 | unsigned int summary_only; | 52 | unsigned int summary_only; |
| @@ -72,6 +75,7 @@ unsigned int extra_msr_offset64; | |||
| 72 | unsigned int extra_delta_offset32; | 75 | unsigned int extra_delta_offset32; |
| 73 | unsigned int extra_delta_offset64; | 76 | unsigned int extra_delta_offset64; |
| 74 | unsigned int aperf_mperf_multiplier = 1; | 77 | unsigned int aperf_mperf_multiplier = 1; |
| 78 | int do_irq = 1; | ||
| 75 | int do_smi; | 79 | int do_smi; |
| 76 | double bclk; | 80 | double bclk; |
| 77 | double base_hz; | 81 | double base_hz; |
| @@ -86,6 +90,10 @@ char *output_buffer, *outp; | |||
| 86 | unsigned int do_rapl; | 90 | unsigned int do_rapl; |
| 87 | unsigned int do_dts; | 91 | unsigned int do_dts; |
| 88 | unsigned int do_ptm; | 92 | unsigned int do_ptm; |
| 93 | unsigned int do_gfx_rc6_ms; | ||
| 94 | unsigned long long gfx_cur_rc6_ms; | ||
| 95 | unsigned int do_gfx_mhz; | ||
| 96 | unsigned int gfx_cur_mhz; | ||
| 89 | unsigned int tcc_activation_temp; | 97 | unsigned int tcc_activation_temp; |
| 90 | unsigned int tcc_activation_temp_override; | 98 | unsigned int tcc_activation_temp_override; |
| 91 | double rapl_power_units, rapl_time_units; | 99 | double rapl_power_units, rapl_time_units; |
| @@ -98,6 +106,12 @@ unsigned int crystal_hz; | |||
| 98 | unsigned long long tsc_hz; | 106 | unsigned long long tsc_hz; |
| 99 | int base_cpu; | 107 | int base_cpu; |
| 100 | double discover_bclk(unsigned int family, unsigned int model); | 108 | double discover_bclk(unsigned int family, unsigned int model); |
| 109 | unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ | ||
| 110 | /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ | ||
| 111 | unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ | ||
| 112 | unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ | ||
| 113 | unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ | ||
| 114 | unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ | ||
| 101 | 115 | ||
| 102 | #define RAPL_PKG (1 << 0) | 116 | #define RAPL_PKG (1 << 0) |
| 103 | /* 0x610 MSR_PKG_POWER_LIMIT */ | 117 | /* 0x610 MSR_PKG_POWER_LIMIT */ |
| @@ -145,6 +159,7 @@ struct thread_data { | |||
| 145 | unsigned long long extra_delta64; | 159 | unsigned long long extra_delta64; |
| 146 | unsigned long long extra_msr32; | 160 | unsigned long long extra_msr32; |
| 147 | unsigned long long extra_delta32; | 161 | unsigned long long extra_delta32; |
| 162 | unsigned int irq_count; | ||
| 148 | unsigned int smi_count; | 163 | unsigned int smi_count; |
| 149 | unsigned int cpu_id; | 164 | unsigned int cpu_id; |
| 150 | unsigned int flags; | 165 | unsigned int flags; |
| @@ -172,6 +187,8 @@ struct pkg_data { | |||
| 172 | unsigned long long pkg_any_core_c0; | 187 | unsigned long long pkg_any_core_c0; |
| 173 | unsigned long long pkg_any_gfxe_c0; | 188 | unsigned long long pkg_any_gfxe_c0; |
| 174 | unsigned long long pkg_both_core_gfxe_c0; | 189 | unsigned long long pkg_both_core_gfxe_c0; |
| 190 | unsigned long long gfx_rc6_ms; | ||
| 191 | unsigned int gfx_mhz; | ||
| 175 | unsigned int package_id; | 192 | unsigned int package_id; |
| 176 | unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ | 193 | unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ |
| 177 | unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ | 194 | unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ |
| @@ -212,6 +229,9 @@ struct topo_params { | |||
| 212 | 229 | ||
| 213 | struct timeval tv_even, tv_odd, tv_delta; | 230 | struct timeval tv_even, tv_odd, tv_delta; |
| 214 | 231 | ||
| 232 | int *irq_column_2_cpu; /* /proc/interrupts column numbers */ | ||
| 233 | int *irqs_per_cpu; /* indexed by cpu_num */ | ||
| 234 | |||
| 215 | void setup_all_buffers(void); | 235 | void setup_all_buffers(void); |
| 216 | 236 | ||
| 217 | int cpu_is_not_present(int cpu) | 237 | int cpu_is_not_present(int cpu) |
| @@ -262,23 +282,34 @@ int cpu_migrate(int cpu) | |||
| 262 | else | 282 | else |
| 263 | return 0; | 283 | return 0; |
| 264 | } | 284 | } |
| 265 | 285 | int get_msr_fd(int cpu) | |
| 266 | int get_msr(int cpu, off_t offset, unsigned long long *msr) | ||
| 267 | { | 286 | { |
| 268 | ssize_t retval; | ||
| 269 | char pathname[32]; | 287 | char pathname[32]; |
| 270 | int fd; | 288 | int fd; |
| 271 | 289 | ||
| 290 | fd = fd_percpu[cpu]; | ||
| 291 | |||
| 292 | if (fd) | ||
| 293 | return fd; | ||
| 294 | |||
| 272 | sprintf(pathname, "/dev/cpu/%d/msr", cpu); | 295 | sprintf(pathname, "/dev/cpu/%d/msr", cpu); |
| 273 | fd = open(pathname, O_RDONLY); | 296 | fd = open(pathname, O_RDONLY); |
| 274 | if (fd < 0) | 297 | if (fd < 0) |
| 275 | err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); | 298 | err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); |
| 276 | 299 | ||
| 277 | retval = pread(fd, msr, sizeof *msr, offset); | 300 | fd_percpu[cpu] = fd; |
| 278 | close(fd); | 301 | |
| 302 | return fd; | ||
| 303 | } | ||
| 304 | |||
| 305 | int get_msr(int cpu, off_t offset, unsigned long long *msr) | ||
| 306 | { | ||
| 307 | ssize_t retval; | ||
| 308 | |||
| 309 | retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); | ||
| 279 | 310 | ||
| 280 | if (retval != sizeof *msr) | 311 | if (retval != sizeof *msr) |
| 281 | err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); | 312 | err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset); |
| 282 | 313 | ||
| 283 | return 0; | 314 | return 0; |
| 284 | } | 315 | } |
| @@ -286,8 +317,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) | |||
| 286 | /* | 317 | /* |
| 287 | * Example Format w/ field column widths: | 318 | * Example Format w/ field column widths: |
| 288 | * | 319 | * |
| 289 | * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz SMI %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt | 320 | * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt |
| 290 | * 123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 | 321 | * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678 |
| 291 | */ | 322 | */ |
| 292 | 323 | ||
| 293 | void print_header(void) | 324 | void print_header(void) |
| @@ -301,7 +332,7 @@ void print_header(void) | |||
| 301 | if (has_aperf) | 332 | if (has_aperf) |
| 302 | outp += sprintf(outp, " Avg_MHz"); | 333 | outp += sprintf(outp, " Avg_MHz"); |
| 303 | if (has_aperf) | 334 | if (has_aperf) |
| 304 | outp += sprintf(outp, " %%Busy"); | 335 | outp += sprintf(outp, " Busy%%"); |
| 305 | if (has_aperf) | 336 | if (has_aperf) |
| 306 | outp += sprintf(outp, " Bzy_MHz"); | 337 | outp += sprintf(outp, " Bzy_MHz"); |
| 307 | outp += sprintf(outp, " TSC_MHz"); | 338 | outp += sprintf(outp, " TSC_MHz"); |
| @@ -318,6 +349,8 @@ void print_header(void) | |||
| 318 | if (!debug) | 349 | if (!debug) |
| 319 | goto done; | 350 | goto done; |
| 320 | 351 | ||
| 352 | if (do_irq) | ||
| 353 | outp += sprintf(outp, " IRQ"); | ||
| 321 | if (do_smi) | 354 | if (do_smi) |
| 322 | outp += sprintf(outp, " SMI"); | 355 | outp += sprintf(outp, " SMI"); |
| 323 | 356 | ||
| @@ -335,6 +368,12 @@ void print_header(void) | |||
| 335 | if (do_ptm) | 368 | if (do_ptm) |
| 336 | outp += sprintf(outp, " PkgTmp"); | 369 | outp += sprintf(outp, " PkgTmp"); |
| 337 | 370 | ||
| 371 | if (do_gfx_rc6_ms) | ||
| 372 | outp += sprintf(outp, " GFX%%rc6"); | ||
| 373 | |||
| 374 | if (do_gfx_mhz) | ||
| 375 | outp += sprintf(outp, " GFXMHz"); | ||
| 376 | |||
| 338 | if (do_skl_residency) { | 377 | if (do_skl_residency) { |
| 339 | outp += sprintf(outp, " Totl%%C0"); | 378 | outp += sprintf(outp, " Totl%%C0"); |
| 340 | outp += sprintf(outp, " Any%%C0"); | 379 | outp += sprintf(outp, " Any%%C0"); |
| @@ -409,6 +448,8 @@ int dump_counters(struct thread_data *t, struct core_data *c, | |||
| 409 | extra_msr_offset32, t->extra_msr32); | 448 | extra_msr_offset32, t->extra_msr32); |
| 410 | outp += sprintf(outp, "msr0x%x: %016llX\n", | 449 | outp += sprintf(outp, "msr0x%x: %016llX\n", |
| 411 | extra_msr_offset64, t->extra_msr64); | 450 | extra_msr_offset64, t->extra_msr64); |
| 451 | if (do_irq) | ||
| 452 | outp += sprintf(outp, "IRQ: %08X\n", t->irq_count); | ||
| 412 | if (do_smi) | 453 | if (do_smi) |
| 413 | outp += sprintf(outp, "SMI: %08X\n", t->smi_count); | 454 | outp += sprintf(outp, "SMI: %08X\n", t->smi_count); |
| 414 | } | 455 | } |
| @@ -504,7 +545,7 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
| 504 | outp += sprintf(outp, "%8.0f", | 545 | outp += sprintf(outp, "%8.0f", |
| 505 | 1.0 / units * t->aperf / interval_float); | 546 | 1.0 / units * t->aperf / interval_float); |
| 506 | 547 | ||
| 507 | /* %Busy */ | 548 | /* Busy% */ |
| 508 | if (has_aperf) { | 549 | if (has_aperf) { |
| 509 | if (!skip_c0) | 550 | if (!skip_c0) |
| 510 | outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); | 551 | outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak); |
| @@ -542,6 +583,10 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
| 542 | if (!debug) | 583 | if (!debug) |
| 543 | goto done; | 584 | goto done; |
| 544 | 585 | ||
| 586 | /* IRQ */ | ||
| 587 | if (do_irq) | ||
| 588 | outp += sprintf(outp, "%8d", t->irq_count); | ||
| 589 | |||
| 545 | /* SMI */ | 590 | /* SMI */ |
| 546 | if (do_smi) | 591 | if (do_smi) |
| 547 | outp += sprintf(outp, "%8d", t->smi_count); | 592 | outp += sprintf(outp, "%8d", t->smi_count); |
| @@ -575,6 +620,14 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
| 575 | if (do_ptm) | 620 | if (do_ptm) |
| 576 | outp += sprintf(outp, "%8d", p->pkg_temp_c); | 621 | outp += sprintf(outp, "%8d", p->pkg_temp_c); |
| 577 | 622 | ||
| 623 | /* GFXrc6 */ | ||
| 624 | if (do_gfx_rc6_ms) | ||
| 625 | outp += sprintf(outp, "%8.2f", 100.0 * p->gfx_rc6_ms / 1000.0 / interval_float); | ||
| 626 | |||
| 627 | /* GFXMHz */ | ||
| 628 | if (do_gfx_mhz) | ||
| 629 | outp += sprintf(outp, "%8d", p->gfx_mhz); | ||
| 630 | |||
| 578 | /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ | 631 | /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ |
| 579 | if (do_skl_residency) { | 632 | if (do_skl_residency) { |
| 580 | outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); | 633 | outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); |
| @@ -645,15 +698,24 @@ done: | |||
| 645 | return 0; | 698 | return 0; |
| 646 | } | 699 | } |
| 647 | 700 | ||
| 648 | void flush_stdout() | 701 | void flush_output_stdout(void) |
| 649 | { | 702 | { |
| 650 | fputs(output_buffer, stdout); | 703 | FILE *filep; |
| 651 | fflush(stdout); | 704 | |
| 705 | if (outf == stderr) | ||
| 706 | filep = stdout; | ||
| 707 | else | ||
| 708 | filep = outf; | ||
| 709 | |||
| 710 | fputs(output_buffer, filep); | ||
| 711 | fflush(filep); | ||
| 712 | |||
| 652 | outp = output_buffer; | 713 | outp = output_buffer; |
| 653 | } | 714 | } |
| 654 | void flush_stderr() | 715 | void flush_output_stderr(void) |
| 655 | { | 716 | { |
| 656 | fputs(output_buffer, stderr); | 717 | fputs(output_buffer, outf); |
| 718 | fflush(outf); | ||
| 657 | outp = output_buffer; | 719 | outp = output_buffer; |
| 658 | } | 720 | } |
| 659 | void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | 721 | void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
| @@ -704,6 +766,9 @@ delta_package(struct pkg_data *new, struct pkg_data *old) | |||
| 704 | old->pc10 = new->pc10 - old->pc10; | 766 | old->pc10 = new->pc10 - old->pc10; |
| 705 | old->pkg_temp_c = new->pkg_temp_c; | 767 | old->pkg_temp_c = new->pkg_temp_c; |
| 706 | 768 | ||
| 769 | old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; | ||
| 770 | old->gfx_mhz = new->gfx_mhz; | ||
| 771 | |||
| 707 | DELTA_WRAP32(new->energy_pkg, old->energy_pkg); | 772 | DELTA_WRAP32(new->energy_pkg, old->energy_pkg); |
| 708 | DELTA_WRAP32(new->energy_cores, old->energy_cores); | 773 | DELTA_WRAP32(new->energy_cores, old->energy_cores); |
| 709 | DELTA_WRAP32(new->energy_gfx, old->energy_gfx); | 774 | DELTA_WRAP32(new->energy_gfx, old->energy_gfx); |
| @@ -745,9 +810,9 @@ delta_thread(struct thread_data *new, struct thread_data *old, | |||
| 745 | } else { | 810 | } else { |
| 746 | 811 | ||
| 747 | if (!aperf_mperf_unstable) { | 812 | if (!aperf_mperf_unstable) { |
| 748 | fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); | 813 | fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname); |
| 749 | fprintf(stderr, "* Frequency results do not cover entire interval *\n"); | 814 | fprintf(outf, "* Frequency results do not cover entire interval *\n"); |
| 750 | fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); | 815 | fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n"); |
| 751 | 816 | ||
| 752 | aperf_mperf_unstable = 1; | 817 | aperf_mperf_unstable = 1; |
| 753 | } | 818 | } |
| @@ -782,7 +847,8 @@ delta_thread(struct thread_data *new, struct thread_data *old, | |||
| 782 | } | 847 | } |
| 783 | 848 | ||
| 784 | if (old->mperf == 0) { | 849 | if (old->mperf == 0) { |
| 785 | if (debug > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); | 850 | if (debug > 1) |
| 851 | fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id); | ||
| 786 | old->mperf = 1; /* divide by 0 protection */ | 852 | old->mperf = 1; /* divide by 0 protection */ |
| 787 | } | 853 | } |
| 788 | 854 | ||
| @@ -797,6 +863,9 @@ delta_thread(struct thread_data *new, struct thread_data *old, | |||
| 797 | old->extra_msr32 = new->extra_msr32; | 863 | old->extra_msr32 = new->extra_msr32; |
| 798 | old->extra_msr64 = new->extra_msr64; | 864 | old->extra_msr64 = new->extra_msr64; |
| 799 | 865 | ||
| 866 | if (do_irq) | ||
| 867 | old->irq_count = new->irq_count - old->irq_count; | ||
| 868 | |||
| 800 | if (do_smi) | 869 | if (do_smi) |
| 801 | old->smi_count = new->smi_count - old->smi_count; | 870 | old->smi_count = new->smi_count - old->smi_count; |
| 802 | } | 871 | } |
| @@ -826,10 +895,12 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data | |||
| 826 | t->mperf = 0; | 895 | t->mperf = 0; |
| 827 | t->c1 = 0; | 896 | t->c1 = 0; |
| 828 | 897 | ||
| 829 | t->smi_count = 0; | ||
| 830 | t->extra_delta32 = 0; | 898 | t->extra_delta32 = 0; |
| 831 | t->extra_delta64 = 0; | 899 | t->extra_delta64 = 0; |
| 832 | 900 | ||
| 901 | t->irq_count = 0; | ||
| 902 | t->smi_count = 0; | ||
| 903 | |||
| 833 | /* tells format_counters to dump all fields from this set */ | 904 | /* tells format_counters to dump all fields from this set */ |
| 834 | t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; | 905 | t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; |
| 835 | 906 | ||
| @@ -861,6 +932,9 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data | |||
| 861 | p->rapl_pkg_perf_status = 0; | 932 | p->rapl_pkg_perf_status = 0; |
| 862 | p->rapl_dram_perf_status = 0; | 933 | p->rapl_dram_perf_status = 0; |
| 863 | p->pkg_temp_c = 0; | 934 | p->pkg_temp_c = 0; |
| 935 | |||
| 936 | p->gfx_rc6_ms = 0; | ||
| 937 | p->gfx_mhz = 0; | ||
| 864 | } | 938 | } |
| 865 | int sum_counters(struct thread_data *t, struct core_data *c, | 939 | int sum_counters(struct thread_data *t, struct core_data *c, |
| 866 | struct pkg_data *p) | 940 | struct pkg_data *p) |
| @@ -873,6 +947,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, | |||
| 873 | average.threads.extra_delta32 += t->extra_delta32; | 947 | average.threads.extra_delta32 += t->extra_delta32; |
| 874 | average.threads.extra_delta64 += t->extra_delta64; | 948 | average.threads.extra_delta64 += t->extra_delta64; |
| 875 | 949 | ||
| 950 | average.threads.irq_count += t->irq_count; | ||
| 951 | average.threads.smi_count += t->smi_count; | ||
| 952 | |||
| 876 | /* sum per-core values only for 1st thread in core */ | 953 | /* sum per-core values only for 1st thread in core */ |
| 877 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | 954 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
| 878 | return 0; | 955 | return 0; |
| @@ -910,6 +987,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, | |||
| 910 | average.packages.energy_cores += p->energy_cores; | 987 | average.packages.energy_cores += p->energy_cores; |
| 911 | average.packages.energy_gfx += p->energy_gfx; | 988 | average.packages.energy_gfx += p->energy_gfx; |
| 912 | 989 | ||
| 990 | average.packages.gfx_rc6_ms = p->gfx_rc6_ms; | ||
| 991 | average.packages.gfx_mhz = p->gfx_mhz; | ||
| 992 | |||
| 913 | average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); | 993 | average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); |
| 914 | 994 | ||
| 915 | average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; | 995 | average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; |
| @@ -970,7 +1050,6 @@ static unsigned long long rdtsc(void) | |||
| 970 | return low | ((unsigned long long)high) << 32; | 1050 | return low | ((unsigned long long)high) << 32; |
| 971 | } | 1051 | } |
| 972 | 1052 | ||
| 973 | |||
| 974 | /* | 1053 | /* |
| 975 | * get_counters(...) | 1054 | * get_counters(...) |
| 976 | * migrate to cpu | 1055 | * migrate to cpu |
| @@ -980,23 +1059,74 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 980 | { | 1059 | { |
| 981 | int cpu = t->cpu_id; | 1060 | int cpu = t->cpu_id; |
| 982 | unsigned long long msr; | 1061 | unsigned long long msr; |
| 1062 | int aperf_mperf_retry_count = 0; | ||
| 983 | 1063 | ||
| 984 | if (cpu_migrate(cpu)) { | 1064 | if (cpu_migrate(cpu)) { |
| 985 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | 1065 | fprintf(outf, "Could not migrate to CPU %d\n", cpu); |
| 986 | return -1; | 1066 | return -1; |
| 987 | } | 1067 | } |
| 988 | 1068 | ||
| 1069 | retry: | ||
| 989 | t->tsc = rdtsc(); /* we are running on local CPU of interest */ | 1070 | t->tsc = rdtsc(); /* we are running on local CPU of interest */ |
| 990 | 1071 | ||
| 991 | if (has_aperf) { | 1072 | if (has_aperf) { |
| 1073 | unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; | ||
| 1074 | |||
| 1075 | /* | ||
| 1076 | * The TSC, APERF and MPERF must be read together for | ||
| 1077 | * APERF/MPERF and MPERF/TSC to give accurate results. | ||
| 1078 | * | ||
| 1079 | * Unfortunately, APERF and MPERF are read by | ||
| 1080 | * individual system call, so delays may occur | ||
| 1081 | * between them. If the time to read them | ||
| 1082 | * varies by a large amount, we re-read them. | ||
| 1083 | */ | ||
| 1084 | |||
| 1085 | /* | ||
| 1086 | * This initial dummy APERF read has been seen to | ||
| 1087 | * reduce jitter in the subsequent reads. | ||
| 1088 | */ | ||
| 1089 | |||
| 1090 | if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) | ||
| 1091 | return -3; | ||
| 1092 | |||
| 1093 | t->tsc = rdtsc(); /* re-read close to APERF */ | ||
| 1094 | |||
| 1095 | tsc_before = t->tsc; | ||
| 1096 | |||
| 992 | if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) | 1097 | if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) |
| 993 | return -3; | 1098 | return -3; |
| 1099 | |||
| 1100 | tsc_between = rdtsc(); | ||
| 1101 | |||
| 994 | if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) | 1102 | if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) |
| 995 | return -4; | 1103 | return -4; |
| 1104 | |||
| 1105 | tsc_after = rdtsc(); | ||
| 1106 | |||
| 1107 | aperf_time = tsc_between - tsc_before; | ||
| 1108 | mperf_time = tsc_after - tsc_between; | ||
| 1109 | |||
| 1110 | /* | ||
| 1111 | * If the system call latency to read APERF and MPERF | ||
| 1112 | * differ by more than 2x, then try again. | ||
| 1113 | */ | ||
| 1114 | if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) { | ||
| 1115 | aperf_mperf_retry_count++; | ||
| 1116 | if (aperf_mperf_retry_count < 5) | ||
| 1117 | goto retry; | ||
| 1118 | else | ||
| 1119 | warnx("cpu%d jitter %lld %lld", | ||
| 1120 | cpu, aperf_time, mperf_time); | ||
| 1121 | } | ||
| 1122 | aperf_mperf_retry_count = 0; | ||
| 1123 | |||
| 996 | t->aperf = t->aperf * aperf_mperf_multiplier; | 1124 | t->aperf = t->aperf * aperf_mperf_multiplier; |
| 997 | t->mperf = t->mperf * aperf_mperf_multiplier; | 1125 | t->mperf = t->mperf * aperf_mperf_multiplier; |
| 998 | } | 1126 | } |
| 999 | 1127 | ||
| 1128 | if (do_irq) | ||
| 1129 | t->irq_count = irqs_per_cpu[cpu]; | ||
| 1000 | if (do_smi) { | 1130 | if (do_smi) { |
| 1001 | if (get_msr(cpu, MSR_SMI_COUNT, &msr)) | 1131 | if (get_msr(cpu, MSR_SMI_COUNT, &msr)) |
| 1002 | return -5; | 1132 | return -5; |
| @@ -1124,6 +1254,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 1124 | return -17; | 1254 | return -17; |
| 1125 | p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); | 1255 | p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); |
| 1126 | } | 1256 | } |
| 1257 | |||
| 1258 | if (do_gfx_rc6_ms) | ||
| 1259 | p->gfx_rc6_ms = gfx_cur_rc6_ms; | ||
| 1260 | |||
| 1261 | if (do_gfx_mhz) | ||
| 1262 | p->gfx_mhz = gfx_cur_mhz; | ||
| 1263 | |||
| 1127 | return 0; | 1264 | return 0; |
| 1128 | } | 1265 | } |
| 1129 | 1266 | ||
| @@ -1175,18 +1312,18 @@ dump_nhm_platform_info(void) | |||
| 1175 | 1312 | ||
| 1176 | get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); | 1313 | get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); |
| 1177 | 1314 | ||
| 1178 | fprintf(stderr, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); | 1315 | fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); |
| 1179 | 1316 | ||
| 1180 | ratio = (msr >> 40) & 0xFF; | 1317 | ratio = (msr >> 40) & 0xFF; |
| 1181 | fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n", | 1318 | fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n", |
| 1182 | ratio, bclk, ratio * bclk); | 1319 | ratio, bclk, ratio * bclk); |
| 1183 | 1320 | ||
| 1184 | ratio = (msr >> 8) & 0xFF; | 1321 | ratio = (msr >> 8) & 0xFF; |
| 1185 | fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", | 1322 | fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n", |
| 1186 | ratio, bclk, ratio * bclk); | 1323 | ratio, bclk, ratio * bclk); |
| 1187 | 1324 | ||
| 1188 | get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); | 1325 | get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); |
| 1189 | fprintf(stderr, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", | 1326 | fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", |
| 1190 | base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); | 1327 | base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); |
| 1191 | 1328 | ||
| 1192 | return; | 1329 | return; |
| @@ -1200,16 +1337,16 @@ dump_hsw_turbo_ratio_limits(void) | |||
| 1200 | 1337 | ||
| 1201 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); | 1338 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); |
| 1202 | 1339 | ||
| 1203 | fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); | 1340 | fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); |
| 1204 | 1341 | ||
| 1205 | ratio = (msr >> 8) & 0xFF; | 1342 | ratio = (msr >> 8) & 0xFF; |
| 1206 | if (ratio) | 1343 | if (ratio) |
| 1207 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", | 1344 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", |
| 1208 | ratio, bclk, ratio * bclk); | 1345 | ratio, bclk, ratio * bclk); |
| 1209 | 1346 | ||
| 1210 | ratio = (msr >> 0) & 0xFF; | 1347 | ratio = (msr >> 0) & 0xFF; |
| 1211 | if (ratio) | 1348 | if (ratio) |
| 1212 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", | 1349 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", |
| 1213 | ratio, bclk, ratio * bclk); | 1350 | ratio, bclk, ratio * bclk); |
| 1214 | return; | 1351 | return; |
| 1215 | } | 1352 | } |
| @@ -1222,46 +1359,46 @@ dump_ivt_turbo_ratio_limits(void) | |||
| 1222 | 1359 | ||
| 1223 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); | 1360 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); |
| 1224 | 1361 | ||
| 1225 | fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); | 1362 | fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); |
| 1226 | 1363 | ||
| 1227 | ratio = (msr >> 56) & 0xFF; | 1364 | ratio = (msr >> 56) & 0xFF; |
| 1228 | if (ratio) | 1365 | if (ratio) |
| 1229 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", | 1366 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", |
| 1230 | ratio, bclk, ratio * bclk); | 1367 | ratio, bclk, ratio * bclk); |
| 1231 | 1368 | ||
| 1232 | ratio = (msr >> 48) & 0xFF; | 1369 | ratio = (msr >> 48) & 0xFF; |
| 1233 | if (ratio) | 1370 | if (ratio) |
| 1234 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", | 1371 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", |
| 1235 | ratio, bclk, ratio * bclk); | 1372 | ratio, bclk, ratio * bclk); |
| 1236 | 1373 | ||
| 1237 | ratio = (msr >> 40) & 0xFF; | 1374 | ratio = (msr >> 40) & 0xFF; |
| 1238 | if (ratio) | 1375 | if (ratio) |
| 1239 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", | 1376 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", |
| 1240 | ratio, bclk, ratio * bclk); | 1377 | ratio, bclk, ratio * bclk); |
| 1241 | 1378 | ||
| 1242 | ratio = (msr >> 32) & 0xFF; | 1379 | ratio = (msr >> 32) & 0xFF; |
| 1243 | if (ratio) | 1380 | if (ratio) |
| 1244 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", | 1381 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", |
| 1245 | ratio, bclk, ratio * bclk); | 1382 | ratio, bclk, ratio * bclk); |
| 1246 | 1383 | ||
| 1247 | ratio = (msr >> 24) & 0xFF; | 1384 | ratio = (msr >> 24) & 0xFF; |
| 1248 | if (ratio) | 1385 | if (ratio) |
| 1249 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", | 1386 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", |
| 1250 | ratio, bclk, ratio * bclk); | 1387 | ratio, bclk, ratio * bclk); |
| 1251 | 1388 | ||
| 1252 | ratio = (msr >> 16) & 0xFF; | 1389 | ratio = (msr >> 16) & 0xFF; |
| 1253 | if (ratio) | 1390 | if (ratio) |
| 1254 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", | 1391 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", |
| 1255 | ratio, bclk, ratio * bclk); | 1392 | ratio, bclk, ratio * bclk); |
| 1256 | 1393 | ||
| 1257 | ratio = (msr >> 8) & 0xFF; | 1394 | ratio = (msr >> 8) & 0xFF; |
| 1258 | if (ratio) | 1395 | if (ratio) |
| 1259 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", | 1396 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", |
| 1260 | ratio, bclk, ratio * bclk); | 1397 | ratio, bclk, ratio * bclk); |
| 1261 | 1398 | ||
| 1262 | ratio = (msr >> 0) & 0xFF; | 1399 | ratio = (msr >> 0) & 0xFF; |
| 1263 | if (ratio) | 1400 | if (ratio) |
| 1264 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", | 1401 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", |
| 1265 | ratio, bclk, ratio * bclk); | 1402 | ratio, bclk, ratio * bclk); |
| 1266 | return; | 1403 | return; |
| 1267 | } | 1404 | } |
| @@ -1274,46 +1411,46 @@ dump_nhm_turbo_ratio_limits(void) | |||
| 1274 | 1411 | ||
| 1275 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); | 1412 | get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); |
| 1276 | 1413 | ||
| 1277 | fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); | 1414 | fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); |
| 1278 | 1415 | ||
| 1279 | ratio = (msr >> 56) & 0xFF; | 1416 | ratio = (msr >> 56) & 0xFF; |
| 1280 | if (ratio) | 1417 | if (ratio) |
| 1281 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", | 1418 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", |
| 1282 | ratio, bclk, ratio * bclk); | 1419 | ratio, bclk, ratio * bclk); |
| 1283 | 1420 | ||
| 1284 | ratio = (msr >> 48) & 0xFF; | 1421 | ratio = (msr >> 48) & 0xFF; |
| 1285 | if (ratio) | 1422 | if (ratio) |
| 1286 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", | 1423 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", |
| 1287 | ratio, bclk, ratio * bclk); | 1424 | ratio, bclk, ratio * bclk); |
| 1288 | 1425 | ||
| 1289 | ratio = (msr >> 40) & 0xFF; | 1426 | ratio = (msr >> 40) & 0xFF; |
| 1290 | if (ratio) | 1427 | if (ratio) |
| 1291 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", | 1428 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", |
| 1292 | ratio, bclk, ratio * bclk); | 1429 | ratio, bclk, ratio * bclk); |
| 1293 | 1430 | ||
| 1294 | ratio = (msr >> 32) & 0xFF; | 1431 | ratio = (msr >> 32) & 0xFF; |
| 1295 | if (ratio) | 1432 | if (ratio) |
| 1296 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", | 1433 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", |
| 1297 | ratio, bclk, ratio * bclk); | 1434 | ratio, bclk, ratio * bclk); |
| 1298 | 1435 | ||
| 1299 | ratio = (msr >> 24) & 0xFF; | 1436 | ratio = (msr >> 24) & 0xFF; |
| 1300 | if (ratio) | 1437 | if (ratio) |
| 1301 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", | 1438 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", |
| 1302 | ratio, bclk, ratio * bclk); | 1439 | ratio, bclk, ratio * bclk); |
| 1303 | 1440 | ||
| 1304 | ratio = (msr >> 16) & 0xFF; | 1441 | ratio = (msr >> 16) & 0xFF; |
| 1305 | if (ratio) | 1442 | if (ratio) |
| 1306 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", | 1443 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", |
| 1307 | ratio, bclk, ratio * bclk); | 1444 | ratio, bclk, ratio * bclk); |
| 1308 | 1445 | ||
| 1309 | ratio = (msr >> 8) & 0xFF; | 1446 | ratio = (msr >> 8) & 0xFF; |
| 1310 | if (ratio) | 1447 | if (ratio) |
| 1311 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", | 1448 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", |
| 1312 | ratio, bclk, ratio * bclk); | 1449 | ratio, bclk, ratio * bclk); |
| 1313 | 1450 | ||
| 1314 | ratio = (msr >> 0) & 0xFF; | 1451 | ratio = (msr >> 0) & 0xFF; |
| 1315 | if (ratio) | 1452 | if (ratio) |
| 1316 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", | 1453 | fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", |
| 1317 | ratio, bclk, ratio * bclk); | 1454 | ratio, bclk, ratio * bclk); |
| 1318 | return; | 1455 | return; |
| 1319 | } | 1456 | } |
| @@ -1321,21 +1458,23 @@ dump_nhm_turbo_ratio_limits(void) | |||
| 1321 | static void | 1458 | static void |
| 1322 | dump_knl_turbo_ratio_limits(void) | 1459 | dump_knl_turbo_ratio_limits(void) |
| 1323 | { | 1460 | { |
| 1324 | int cores; | 1461 | const unsigned int buckets_no = 7; |
| 1325 | unsigned int ratio; | 1462 | |
| 1326 | unsigned long long msr; | 1463 | unsigned long long msr; |
| 1327 | int delta_cores; | 1464 | int delta_cores, delta_ratio; |
| 1328 | int delta_ratio; | 1465 | int i, b_nr; |
| 1329 | int i; | 1466 | unsigned int cores[buckets_no]; |
| 1467 | unsigned int ratio[buckets_no]; | ||
| 1330 | 1468 | ||
| 1331 | get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); | 1469 | get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); |
| 1332 | 1470 | ||
| 1333 | fprintf(stderr, "cpu%d: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", | 1471 | fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", |
| 1334 | base_cpu, msr); | 1472 | base_cpu, msr); |
| 1335 | 1473 | ||
| 1336 | /** | 1474 | /** |
| 1337 | * Turbo encoding in KNL is as follows: | 1475 | * Turbo encoding in KNL is as follows: |
| 1338 | * [7:0] -- Base value of number of active cores of bucket 1. | 1476 | * [0] -- Reserved |
| 1477 | * [7:1] -- Base value of number of active cores of bucket 1. | ||
| 1339 | * [15:8] -- Base value of freq ratio of bucket 1. | 1478 | * [15:8] -- Base value of freq ratio of bucket 1. |
| 1340 | * [20:16] -- +ve delta of number of active cores of bucket 2. | 1479 | * [20:16] -- +ve delta of number of active cores of bucket 2. |
| 1341 | * i.e. active cores of bucket 2 = | 1480 | * i.e. active cores of bucket 2 = |
| @@ -1354,29 +1493,25 @@ dump_knl_turbo_ratio_limits(void) | |||
| 1354 | * [60:56]-- +ve delta of number of active cores of bucket 7. | 1493 | * [60:56]-- +ve delta of number of active cores of bucket 7. |
| 1355 | * [63:61]-- -ve delta of freq ratio of bucket 7. | 1494 | * [63:61]-- -ve delta of freq ratio of bucket 7. |
| 1356 | */ | 1495 | */ |
| 1357 | cores = msr & 0xFF; | 1496 | |
| 1358 | ratio = (msr >> 8) && 0xFF; | 1497 | b_nr = 0; |
| 1359 | if (ratio > 0) | 1498 | cores[b_nr] = (msr & 0xFF) >> 1; |
| 1360 | fprintf(stderr, | 1499 | ratio[b_nr] = (msr >> 8) & 0xFF; |
| 1361 | "%d * %.0f = %.0f MHz max turbo %d active cores\n", | 1500 | |
| 1362 | ratio, bclk, ratio * bclk, cores); | 1501 | for (i = 16; i < 64; i += 8) { |
| 1363 | |||
| 1364 | for (i = 16; i < 64; i = i + 8) { | ||
| 1365 | delta_cores = (msr >> i) & 0x1F; | 1502 | delta_cores = (msr >> i) & 0x1F; |
| 1366 | delta_ratio = (msr >> (i + 5)) && 0x7; | 1503 | delta_ratio = (msr >> (i + 5)) & 0x7; |
| 1367 | if (!delta_cores || !delta_ratio) | 1504 | |
| 1368 | return; | 1505 | cores[b_nr + 1] = cores[b_nr] + delta_cores; |
| 1369 | cores = cores + delta_cores; | 1506 | ratio[b_nr + 1] = ratio[b_nr] - delta_ratio; |
| 1370 | ratio = ratio - delta_ratio; | 1507 | b_nr++; |
| 1371 | |||
| 1372 | /** -ve ratios will make successive ratio calculations | ||
| 1373 | * negative. Hence return instead of carrying on. | ||
| 1374 | */ | ||
| 1375 | if (ratio > 0) | ||
| 1376 | fprintf(stderr, | ||
| 1377 | "%d * %.0f = %.0f MHz max turbo %d active cores\n", | ||
| 1378 | ratio, bclk, ratio * bclk, cores); | ||
| 1379 | } | 1508 | } |
| 1509 | |||
| 1510 | for (i = buckets_no - 1; i >= 0; i--) | ||
| 1511 | if (i > 0 ? ratio[i] != ratio[i - 1] : 1) | ||
| 1512 | fprintf(outf, | ||
| 1513 | "%d * %.0f = %.0f MHz max turbo %d active cores\n", | ||
| 1514 | ratio[i], bclk, ratio[i] * bclk, cores[i]); | ||
| 1380 | } | 1515 | } |
| 1381 | 1516 | ||
| 1382 | static void | 1517 | static void |
| @@ -1389,15 +1524,15 @@ dump_nhm_cst_cfg(void) | |||
| 1389 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) | 1524 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) |
| 1390 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | 1525 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) |
| 1391 | 1526 | ||
| 1392 | fprintf(stderr, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr); | 1527 | fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr); |
| 1393 | 1528 | ||
| 1394 | fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", | 1529 | fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", |
| 1395 | (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", | 1530 | (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", |
| 1396 | (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", | 1531 | (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", |
| 1397 | (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", | 1532 | (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", |
| 1398 | (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", | 1533 | (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", |
| 1399 | (msr & (1 << 15)) ? "" : "UN", | 1534 | (msr & (1 << 15)) ? "" : "UN", |
| 1400 | (unsigned int)msr & 7, | 1535 | (unsigned int)msr & 0xF, |
| 1401 | pkg_cstate_limit_strings[pkg_cstate_limit]); | 1536 | pkg_cstate_limit_strings[pkg_cstate_limit]); |
| 1402 | return; | 1537 | return; |
| 1403 | } | 1538 | } |
| @@ -1408,48 +1543,59 @@ dump_config_tdp(void) | |||
| 1408 | unsigned long long msr; | 1543 | unsigned long long msr; |
| 1409 | 1544 | ||
| 1410 | get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); | 1545 | get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); |
| 1411 | fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); | 1546 | fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); |
| 1412 | fprintf(stderr, " (base_ratio=%d)\n", (unsigned int)msr & 0xEF); | 1547 | fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF); |
| 1413 | 1548 | ||
| 1414 | get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); | 1549 | get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); |
| 1415 | fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); | 1550 | fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); |
| 1416 | if (msr) { | 1551 | if (msr) { |
| 1417 | fprintf(stderr, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0xEFFF); | 1552 | fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF); |
| 1418 | fprintf(stderr, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0xEFFF); | 1553 | fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF); |
| 1419 | fprintf(stderr, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); | 1554 | fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); |
| 1420 | fprintf(stderr, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0xEFFF); | 1555 | fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF); |
| 1421 | } | 1556 | } |
| 1422 | fprintf(stderr, ")\n"); | 1557 | fprintf(outf, ")\n"); |
| 1423 | 1558 | ||
| 1424 | get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); | 1559 | get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); |
| 1425 | fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); | 1560 | fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); |
| 1426 | if (msr) { | 1561 | if (msr) { |
| 1427 | fprintf(stderr, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0xEFFF); | 1562 | fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF); |
| 1428 | fprintf(stderr, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0xEFFF); | 1563 | fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF); |
| 1429 | fprintf(stderr, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF); | 1564 | fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); |
| 1430 | fprintf(stderr, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0xEFFF); | 1565 | fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF); |
| 1431 | } | 1566 | } |
| 1432 | fprintf(stderr, ")\n"); | 1567 | fprintf(outf, ")\n"); |
| 1433 | 1568 | ||
| 1434 | get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); | 1569 | get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); |
| 1435 | fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); | 1570 | fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); |
| 1436 | if ((msr) & 0x3) | 1571 | if ((msr) & 0x3) |
| 1437 | fprintf(stderr, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); | 1572 | fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); |
| 1438 | fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); | 1573 | fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); |
| 1439 | fprintf(stderr, ")\n"); | 1574 | fprintf(outf, ")\n"); |
| 1440 | 1575 | ||
| 1441 | get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); | 1576 | get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); |
| 1442 | fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); | 1577 | fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); |
| 1443 | fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F); | 1578 | fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF); |
| 1444 | fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1); | 1579 | fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); |
| 1445 | fprintf(stderr, ")\n"); | 1580 | fprintf(outf, ")\n"); |
| 1581 | } | ||
| 1582 | void free_fd_percpu(void) | ||
| 1583 | { | ||
| 1584 | int i; | ||
| 1585 | |||
| 1586 | for (i = 0; i < topo.max_cpu_num; ++i) { | ||
| 1587 | if (fd_percpu[i] != 0) | ||
| 1588 | close(fd_percpu[i]); | ||
| 1589 | } | ||
| 1590 | |||
| 1591 | free(fd_percpu); | ||
| 1446 | } | 1592 | } |
| 1447 | 1593 | ||
| 1448 | void free_all_buffers(void) | 1594 | void free_all_buffers(void) |
| 1449 | { | 1595 | { |
| 1450 | CPU_FREE(cpu_present_set); | 1596 | CPU_FREE(cpu_present_set); |
| 1451 | cpu_present_set = NULL; | 1597 | cpu_present_set = NULL; |
| 1452 | cpu_present_set = 0; | 1598 | cpu_present_setsize = 0; |
| 1453 | 1599 | ||
| 1454 | CPU_FREE(cpu_affinity_set); | 1600 | CPU_FREE(cpu_affinity_set); |
| 1455 | cpu_affinity_set = NULL; | 1601 | cpu_affinity_set = NULL; |
| @@ -1474,6 +1620,11 @@ void free_all_buffers(void) | |||
| 1474 | free(output_buffer); | 1620 | free(output_buffer); |
| 1475 | output_buffer = NULL; | 1621 | output_buffer = NULL; |
| 1476 | outp = NULL; | 1622 | outp = NULL; |
| 1623 | |||
| 1624 | free_fd_percpu(); | ||
| 1625 | |||
| 1626 | free(irq_column_2_cpu); | ||
| 1627 | free(irqs_per_cpu); | ||
| 1477 | } | 1628 | } |
| 1478 | 1629 | ||
| 1479 | /* | 1630 | /* |
| @@ -1481,7 +1632,7 @@ void free_all_buffers(void) | |||
| 1481 | */ | 1632 | */ |
| 1482 | FILE *fopen_or_die(const char *path, const char *mode) | 1633 | FILE *fopen_or_die(const char *path, const char *mode) |
| 1483 | { | 1634 | { |
| 1484 | FILE *filep = fopen(path, "r"); | 1635 | FILE *filep = fopen(path, mode); |
| 1485 | if (!filep) | 1636 | if (!filep) |
| 1486 | err(1, "%s: open failed", path); | 1637 | err(1, "%s: open failed", path); |
| 1487 | return filep; | 1638 | return filep; |
| @@ -1696,6 +1847,136 @@ int mark_cpu_present(int cpu) | |||
| 1696 | return 0; | 1847 | return 0; |
| 1697 | } | 1848 | } |
| 1698 | 1849 | ||
| 1850 | /* | ||
| 1851 | * snapshot_proc_interrupts() | ||
| 1852 | * | ||
| 1853 | * read and record summary of /proc/interrupts | ||
| 1854 | * | ||
| 1855 | * return 1 if config change requires a restart, else return 0 | ||
| 1856 | */ | ||
| 1857 | int snapshot_proc_interrupts(void) | ||
| 1858 | { | ||
| 1859 | static FILE *fp; | ||
| 1860 | int column, retval; | ||
| 1861 | |||
| 1862 | if (fp == NULL) | ||
| 1863 | fp = fopen_or_die("/proc/interrupts", "r"); | ||
| 1864 | else | ||
| 1865 | rewind(fp); | ||
| 1866 | |||
| 1867 | /* read 1st line of /proc/interrupts to get cpu* name for each column */ | ||
| 1868 | for (column = 0; column < topo.num_cpus; ++column) { | ||
| 1869 | int cpu_number; | ||
| 1870 | |||
| 1871 | retval = fscanf(fp, " CPU%d", &cpu_number); | ||
| 1872 | if (retval != 1) | ||
| 1873 | break; | ||
| 1874 | |||
| 1875 | if (cpu_number > topo.max_cpu_num) { | ||
| 1876 | warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num); | ||
| 1877 | return 1; | ||
| 1878 | } | ||
| 1879 | |||
| 1880 | irq_column_2_cpu[column] = cpu_number; | ||
| 1881 | irqs_per_cpu[cpu_number] = 0; | ||
| 1882 | } | ||
| 1883 | |||
| 1884 | /* read /proc/interrupt count lines and sum up irqs per cpu */ | ||
| 1885 | while (1) { | ||
| 1886 | int column; | ||
| 1887 | char buf[64]; | ||
| 1888 | |||
| 1889 | retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */ | ||
| 1890 | if (retval != 1) | ||
| 1891 | break; | ||
| 1892 | |||
| 1893 | /* read the count per cpu */ | ||
| 1894 | for (column = 0; column < topo.num_cpus; ++column) { | ||
| 1895 | |||
| 1896 | int cpu_number, irq_count; | ||
| 1897 | |||
| 1898 | retval = fscanf(fp, " %d", &irq_count); | ||
| 1899 | if (retval != 1) | ||
| 1900 | break; | ||
| 1901 | |||
| 1902 | cpu_number = irq_column_2_cpu[column]; | ||
| 1903 | irqs_per_cpu[cpu_number] += irq_count; | ||
| 1904 | |||
| 1905 | } | ||
| 1906 | |||
| 1907 | while (getc(fp) != '\n') | ||
| 1908 | ; /* flush interrupt description */ | ||
| 1909 | |||
| 1910 | } | ||
| 1911 | return 0; | ||
| 1912 | } | ||
| 1913 | /* | ||
| 1914 | * snapshot_gfx_rc6_ms() | ||
| 1915 | * | ||
| 1916 | * record snapshot of | ||
| 1917 | * /sys/class/drm/card0/power/rc6_residency_ms | ||
| 1918 | * | ||
| 1919 | * return 1 if config change requires a restart, else return 0 | ||
| 1920 | */ | ||
| 1921 | int snapshot_gfx_rc6_ms(void) | ||
| 1922 | { | ||
| 1923 | FILE *fp; | ||
| 1924 | int retval; | ||
| 1925 | |||
| 1926 | fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r"); | ||
| 1927 | |||
| 1928 | retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms); | ||
| 1929 | if (retval != 1) | ||
| 1930 | err(1, "GFX rc6"); | ||
| 1931 | |||
| 1932 | fclose(fp); | ||
| 1933 | |||
| 1934 | return 0; | ||
| 1935 | } | ||
| 1936 | /* | ||
| 1937 | * snapshot_gfx_mhz() | ||
| 1938 | * | ||
| 1939 | * record snapshot of | ||
| 1940 | * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz | ||
| 1941 | * | ||
| 1942 | * return 1 if config change requires a restart, else return 0 | ||
| 1943 | */ | ||
| 1944 | int snapshot_gfx_mhz(void) | ||
| 1945 | { | ||
| 1946 | static FILE *fp; | ||
| 1947 | int retval; | ||
| 1948 | |||
| 1949 | if (fp == NULL) | ||
| 1950 | fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r"); | ||
| 1951 | else | ||
| 1952 | rewind(fp); | ||
| 1953 | |||
| 1954 | retval = fscanf(fp, "%d", &gfx_cur_mhz); | ||
| 1955 | if (retval != 1) | ||
| 1956 | err(1, "GFX MHz"); | ||
| 1957 | |||
| 1958 | return 0; | ||
| 1959 | } | ||
| 1960 | |||
| 1961 | /* | ||
| 1962 | * snapshot /proc and /sys files | ||
| 1963 | * | ||
| 1964 | * return 1 if configuration restart needed, else return 0 | ||
| 1965 | */ | ||
| 1966 | int snapshot_proc_sysfs_files(void) | ||
| 1967 | { | ||
| 1968 | if (snapshot_proc_interrupts()) | ||
| 1969 | return 1; | ||
| 1970 | |||
| 1971 | if (do_gfx_rc6_ms) | ||
| 1972 | snapshot_gfx_rc6_ms(); | ||
| 1973 | |||
| 1974 | if (do_gfx_mhz) | ||
| 1975 | snapshot_gfx_mhz(); | ||
| 1976 | |||
| 1977 | return 0; | ||
| 1978 | } | ||
| 1979 | |||
| 1699 | void turbostat_loop() | 1980 | void turbostat_loop() |
| 1700 | { | 1981 | { |
| 1701 | int retval; | 1982 | int retval; |
| @@ -1704,6 +1985,7 @@ void turbostat_loop() | |||
| 1704 | restart: | 1985 | restart: |
| 1705 | restarted++; | 1986 | restarted++; |
| 1706 | 1987 | ||
| 1988 | snapshot_proc_sysfs_files(); | ||
| 1707 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); | 1989 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
| 1708 | if (retval < -1) { | 1990 | if (retval < -1) { |
| 1709 | exit(retval); | 1991 | exit(retval); |
| @@ -1722,7 +2004,9 @@ restart: | |||
| 1722 | re_initialize(); | 2004 | re_initialize(); |
| 1723 | goto restart; | 2005 | goto restart; |
| 1724 | } | 2006 | } |
| 1725 | sleep(interval_sec); | 2007 | nanosleep(&interval_ts, NULL); |
| 2008 | if (snapshot_proc_sysfs_files()) | ||
| 2009 | goto restart; | ||
| 1726 | retval = for_all_cpus(get_counters, ODD_COUNTERS); | 2010 | retval = for_all_cpus(get_counters, ODD_COUNTERS); |
| 1727 | if (retval < -1) { | 2011 | if (retval < -1) { |
| 1728 | exit(retval); | 2012 | exit(retval); |
| @@ -1735,8 +2019,10 @@ restart: | |||
| 1735 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); | 2019 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); |
| 1736 | compute_average(EVEN_COUNTERS); | 2020 | compute_average(EVEN_COUNTERS); |
| 1737 | format_all_counters(EVEN_COUNTERS); | 2021 | format_all_counters(EVEN_COUNTERS); |
| 1738 | flush_stdout(); | 2022 | flush_output_stdout(); |
| 1739 | sleep(interval_sec); | 2023 | nanosleep(&interval_ts, NULL); |
| 2024 | if (snapshot_proc_sysfs_files()) | ||
| 2025 | goto restart; | ||
| 1740 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); | 2026 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
| 1741 | if (retval < -1) { | 2027 | if (retval < -1) { |
| 1742 | exit(retval); | 2028 | exit(retval); |
| @@ -1749,7 +2035,7 @@ restart: | |||
| 1749 | for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); | 2035 | for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); |
| 1750 | compute_average(ODD_COUNTERS); | 2036 | compute_average(ODD_COUNTERS); |
| 1751 | format_all_counters(ODD_COUNTERS); | 2037 | format_all_counters(ODD_COUNTERS); |
| 1752 | flush_stdout(); | 2038 | flush_output_stdout(); |
| 1753 | } | 2039 | } |
| 1754 | } | 2040 | } |
| 1755 | 2041 | ||
| @@ -1889,6 +2175,7 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) | |||
| 1889 | /* Nehalem compatible, but do not include turbo-ratio limit support */ | 2175 | /* Nehalem compatible, but do not include turbo-ratio limit support */ |
| 1890 | case 0x2E: /* Nehalem-EX Xeon - Beckton */ | 2176 | case 0x2E: /* Nehalem-EX Xeon - Beckton */ |
| 1891 | case 0x2F: /* Westmere-EX Xeon - Eagleton */ | 2177 | case 0x2F: /* Westmere-EX Xeon - Eagleton */ |
| 2178 | case 0x57: /* PHI - Knights Landing (different MSR definition) */ | ||
| 1892 | return 0; | 2179 | return 0; |
| 1893 | default: | 2180 | default: |
| 1894 | return 1; | 2181 | return 1; |
| @@ -1970,7 +2257,7 @@ int has_config_tdp(unsigned int family, unsigned int model) | |||
| 1970 | } | 2257 | } |
| 1971 | 2258 | ||
| 1972 | static void | 2259 | static void |
| 1973 | dump_cstate_pstate_config_info(family, model) | 2260 | dump_cstate_pstate_config_info(int family, int model) |
| 1974 | { | 2261 | { |
| 1975 | if (!do_nhm_platform_info) | 2262 | if (!do_nhm_platform_info) |
| 1976 | return; | 2263 | return; |
| @@ -2016,7 +2303,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 2016 | return 0; | 2303 | return 0; |
| 2017 | 2304 | ||
| 2018 | if (cpu_migrate(cpu)) { | 2305 | if (cpu_migrate(cpu)) { |
| 2019 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | 2306 | fprintf(outf, "Could not migrate to CPU %d\n", cpu); |
| 2020 | return -1; | 2307 | return -1; |
| 2021 | } | 2308 | } |
| 2022 | 2309 | ||
| @@ -2037,7 +2324,98 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 2037 | epb_string = "custom"; | 2324 | epb_string = "custom"; |
| 2038 | break; | 2325 | break; |
| 2039 | } | 2326 | } |
| 2040 | fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); | 2327 | fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); |
| 2328 | |||
| 2329 | return 0; | ||
| 2330 | } | ||
| 2331 | /* | ||
| 2332 | * print_hwp() | ||
| 2333 | * Decode the MSR_HWP_CAPABILITIES | ||
| 2334 | */ | ||
| 2335 | int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
| 2336 | { | ||
| 2337 | unsigned long long msr; | ||
| 2338 | int cpu; | ||
| 2339 | |||
| 2340 | if (!has_hwp) | ||
| 2341 | return 0; | ||
| 2342 | |||
| 2343 | cpu = t->cpu_id; | ||
| 2344 | |||
| 2345 | /* MSR_HWP_CAPABILITIES is per-package */ | ||
| 2346 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
| 2347 | return 0; | ||
| 2348 | |||
| 2349 | if (cpu_migrate(cpu)) { | ||
| 2350 | fprintf(outf, "Could not migrate to CPU %d\n", cpu); | ||
| 2351 | return -1; | ||
| 2352 | } | ||
| 2353 | |||
| 2354 | if (get_msr(cpu, MSR_PM_ENABLE, &msr)) | ||
| 2355 | return 0; | ||
| 2356 | |||
| 2357 | fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", | ||
| 2358 | cpu, msr, (msr & (1 << 0)) ? "" : "No-"); | ||
| 2359 | |||
| 2360 | /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ | ||
| 2361 | if ((msr & (1 << 0)) == 0) | ||
| 2362 | return 0; | ||
| 2363 | |||
| 2364 | if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr)) | ||
| 2365 | return 0; | ||
| 2366 | |||
| 2367 | fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " | ||
| 2368 | "(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n", | ||
| 2369 | cpu, msr, | ||
| 2370 | (unsigned int)HWP_HIGHEST_PERF(msr), | ||
| 2371 | (unsigned int)HWP_GUARANTEED_PERF(msr), | ||
| 2372 | (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), | ||
| 2373 | (unsigned int)HWP_LOWEST_PERF(msr)); | ||
| 2374 | |||
| 2375 | if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) | ||
| 2376 | return 0; | ||
| 2377 | |||
| 2378 | fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " | ||
| 2379 | "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n", | ||
| 2380 | cpu, msr, | ||
| 2381 | (unsigned int)(((msr) >> 0) & 0xff), | ||
| 2382 | (unsigned int)(((msr) >> 8) & 0xff), | ||
| 2383 | (unsigned int)(((msr) >> 16) & 0xff), | ||
| 2384 | (unsigned int)(((msr) >> 24) & 0xff), | ||
| 2385 | (unsigned int)(((msr) >> 32) & 0xff3), | ||
| 2386 | (unsigned int)(((msr) >> 42) & 0x1)); | ||
| 2387 | |||
| 2388 | if (has_hwp_pkg) { | ||
| 2389 | if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) | ||
| 2390 | return 0; | ||
| 2391 | |||
| 2392 | fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " | ||
| 2393 | "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n", | ||
| 2394 | cpu, msr, | ||
| 2395 | (unsigned int)(((msr) >> 0) & 0xff), | ||
| 2396 | (unsigned int)(((msr) >> 8) & 0xff), | ||
| 2397 | (unsigned int)(((msr) >> 16) & 0xff), | ||
| 2398 | (unsigned int)(((msr) >> 24) & 0xff), | ||
| 2399 | (unsigned int)(((msr) >> 32) & 0xff3)); | ||
| 2400 | } | ||
| 2401 | if (has_hwp_notify) { | ||
| 2402 | if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) | ||
| 2403 | return 0; | ||
| 2404 | |||
| 2405 | fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " | ||
| 2406 | "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", | ||
| 2407 | cpu, msr, | ||
| 2408 | ((msr) & 0x1) ? "EN" : "Dis", | ||
| 2409 | ((msr) & 0x2) ? "EN" : "Dis"); | ||
| 2410 | } | ||
| 2411 | if (get_msr(cpu, MSR_HWP_STATUS, &msr)) | ||
| 2412 | return 0; | ||
| 2413 | |||
| 2414 | fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " | ||
| 2415 | "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", | ||
| 2416 | cpu, msr, | ||
| 2417 | ((msr) & 0x1) ? "" : "No-", | ||
| 2418 | ((msr) & 0x2) ? "" : "No-"); | ||
| 2041 | 2419 | ||
| 2042 | return 0; | 2420 | return 0; |
| 2043 | } | 2421 | } |
| @@ -2057,14 +2435,14 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data | |||
| 2057 | return 0; | 2435 | return 0; |
| 2058 | 2436 | ||
| 2059 | if (cpu_migrate(cpu)) { | 2437 | if (cpu_migrate(cpu)) { |
| 2060 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | 2438 | fprintf(outf, "Could not migrate to CPU %d\n", cpu); |
| 2061 | return -1; | 2439 | return -1; |
| 2062 | } | 2440 | } |
| 2063 | 2441 | ||
| 2064 | if (do_core_perf_limit_reasons) { | 2442 | if (do_core_perf_limit_reasons) { |
| 2065 | get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); | 2443 | get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); |
| 2066 | fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); | 2444 | fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); |
| 2067 | fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", | 2445 | fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", |
| 2068 | (msr & 1 << 15) ? "bit15, " : "", | 2446 | (msr & 1 << 15) ? "bit15, " : "", |
| 2069 | (msr & 1 << 14) ? "bit14, " : "", | 2447 | (msr & 1 << 14) ? "bit14, " : "", |
| 2070 | (msr & 1 << 13) ? "Transitions, " : "", | 2448 | (msr & 1 << 13) ? "Transitions, " : "", |
| @@ -2079,7 +2457,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data | |||
| 2079 | (msr & 1 << 2) ? "bit2, " : "", | 2457 | (msr & 1 << 2) ? "bit2, " : "", |
| 2080 | (msr & 1 << 1) ? "ThermStatus, " : "", | 2458 | (msr & 1 << 1) ? "ThermStatus, " : "", |
| 2081 | (msr & 1 << 0) ? "PROCHOT, " : ""); | 2459 | (msr & 1 << 0) ? "PROCHOT, " : ""); |
| 2082 | fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", | 2460 | fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", |
| 2083 | (msr & 1 << 31) ? "bit31, " : "", | 2461 | (msr & 1 << 31) ? "bit31, " : "", |
| 2084 | (msr & 1 << 30) ? "bit30, " : "", | 2462 | (msr & 1 << 30) ? "bit30, " : "", |
| 2085 | (msr & 1 << 29) ? "Transitions, " : "", | 2463 | (msr & 1 << 29) ? "Transitions, " : "", |
| @@ -2098,8 +2476,8 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data | |||
| 2098 | } | 2476 | } |
| 2099 | if (do_gfx_perf_limit_reasons) { | 2477 | if (do_gfx_perf_limit_reasons) { |
| 2100 | get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); | 2478 | get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); |
| 2101 | fprintf(stderr, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); | 2479 | fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); |
| 2102 | fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s)", | 2480 | fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", |
| 2103 | (msr & 1 << 0) ? "PROCHOT, " : "", | 2481 | (msr & 1 << 0) ? "PROCHOT, " : "", |
| 2104 | (msr & 1 << 1) ? "ThermStatus, " : "", | 2482 | (msr & 1 << 1) ? "ThermStatus, " : "", |
| 2105 | (msr & 1 << 4) ? "Graphics, " : "", | 2483 | (msr & 1 << 4) ? "Graphics, " : "", |
| @@ -2108,7 +2486,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data | |||
| 2108 | (msr & 1 << 9) ? "GFXPwr, " : "", | 2486 | (msr & 1 << 9) ? "GFXPwr, " : "", |
| 2109 | (msr & 1 << 10) ? "PkgPwrL1, " : "", | 2487 | (msr & 1 << 10) ? "PkgPwrL1, " : "", |
| 2110 | (msr & 1 << 11) ? "PkgPwrL2, " : ""); | 2488 | (msr & 1 << 11) ? "PkgPwrL2, " : ""); |
| 2111 | fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s)\n", | 2489 | fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", |
| 2112 | (msr & 1 << 16) ? "PROCHOT, " : "", | 2490 | (msr & 1 << 16) ? "PROCHOT, " : "", |
| 2113 | (msr & 1 << 17) ? "ThermStatus, " : "", | 2491 | (msr & 1 << 17) ? "ThermStatus, " : "", |
| 2114 | (msr & 1 << 20) ? "Graphics, " : "", | 2492 | (msr & 1 << 20) ? "Graphics, " : "", |
| @@ -2120,15 +2498,15 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data | |||
| 2120 | } | 2498 | } |
| 2121 | if (do_ring_perf_limit_reasons) { | 2499 | if (do_ring_perf_limit_reasons) { |
| 2122 | get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); | 2500 | get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); |
| 2123 | fprintf(stderr, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); | 2501 | fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); |
| 2124 | fprintf(stderr, " (Active: %s%s%s%s%s%s)", | 2502 | fprintf(outf, " (Active: %s%s%s%s%s%s)", |
| 2125 | (msr & 1 << 0) ? "PROCHOT, " : "", | 2503 | (msr & 1 << 0) ? "PROCHOT, " : "", |
| 2126 | (msr & 1 << 1) ? "ThermStatus, " : "", | 2504 | (msr & 1 << 1) ? "ThermStatus, " : "", |
| 2127 | (msr & 1 << 6) ? "VR-Therm, " : "", | 2505 | (msr & 1 << 6) ? "VR-Therm, " : "", |
| 2128 | (msr & 1 << 8) ? "Amps, " : "", | 2506 | (msr & 1 << 8) ? "Amps, " : "", |
| 2129 | (msr & 1 << 10) ? "PkgPwrL1, " : "", | 2507 | (msr & 1 << 10) ? "PkgPwrL1, " : "", |
| 2130 | (msr & 1 << 11) ? "PkgPwrL2, " : ""); | 2508 | (msr & 1 << 11) ? "PkgPwrL2, " : ""); |
| 2131 | fprintf(stderr, " (Logged: %s%s%s%s%s%s)\n", | 2509 | fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", |
| 2132 | (msr & 1 << 16) ? "PROCHOT, " : "", | 2510 | (msr & 1 << 16) ? "PROCHOT, " : "", |
| 2133 | (msr & 1 << 17) ? "ThermStatus, " : "", | 2511 | (msr & 1 << 17) ? "ThermStatus, " : "", |
| 2134 | (msr & 1 << 22) ? "VR-Therm, " : "", | 2512 | (msr & 1 << 22) ? "VR-Therm, " : "", |
| @@ -2142,7 +2520,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data | |||
| 2142 | #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ | 2520 | #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ |
| 2143 | #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ | 2521 | #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ |
| 2144 | 2522 | ||
| 2145 | double get_tdp(model) | 2523 | double get_tdp(int model) |
| 2146 | { | 2524 | { |
| 2147 | unsigned long long msr; | 2525 | unsigned long long msr; |
| 2148 | 2526 | ||
| @@ -2251,12 +2629,12 @@ void rapl_probe(unsigned int family, unsigned int model) | |||
| 2251 | 2629 | ||
| 2252 | rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; | 2630 | rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; |
| 2253 | if (debug) | 2631 | if (debug) |
| 2254 | fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); | 2632 | fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); |
| 2255 | 2633 | ||
| 2256 | return; | 2634 | return; |
| 2257 | } | 2635 | } |
| 2258 | 2636 | ||
| 2259 | void perf_limit_reasons_probe(family, model) | 2637 | void perf_limit_reasons_probe(int family, int model) |
| 2260 | { | 2638 | { |
| 2261 | if (!genuine_intel) | 2639 | if (!genuine_intel) |
| 2262 | return; | 2640 | return; |
| @@ -2293,7 +2671,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p | |||
| 2293 | return 0; | 2671 | return 0; |
| 2294 | 2672 | ||
| 2295 | if (cpu_migrate(cpu)) { | 2673 | if (cpu_migrate(cpu)) { |
| 2296 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | 2674 | fprintf(outf, "Could not migrate to CPU %d\n", cpu); |
| 2297 | return -1; | 2675 | return -1; |
| 2298 | } | 2676 | } |
| 2299 | 2677 | ||
| @@ -2302,7 +2680,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p | |||
| 2302 | return 0; | 2680 | return 0; |
| 2303 | 2681 | ||
| 2304 | dts = (msr >> 16) & 0x7F; | 2682 | dts = (msr >> 16) & 0x7F; |
| 2305 | fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", | 2683 | fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", |
| 2306 | cpu, msr, tcc_activation_temp - dts); | 2684 | cpu, msr, tcc_activation_temp - dts); |
| 2307 | 2685 | ||
| 2308 | #ifdef THERM_DEBUG | 2686 | #ifdef THERM_DEBUG |
| @@ -2311,7 +2689,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p | |||
| 2311 | 2689 | ||
| 2312 | dts = (msr >> 16) & 0x7F; | 2690 | dts = (msr >> 16) & 0x7F; |
| 2313 | dts2 = (msr >> 8) & 0x7F; | 2691 | dts2 = (msr >> 8) & 0x7F; |
| 2314 | fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", | 2692 | fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", |
| 2315 | cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); | 2693 | cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); |
| 2316 | #endif | 2694 | #endif |
| 2317 | } | 2695 | } |
| @@ -2325,7 +2703,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p | |||
| 2325 | 2703 | ||
| 2326 | dts = (msr >> 16) & 0x7F; | 2704 | dts = (msr >> 16) & 0x7F; |
| 2327 | resolution = (msr >> 27) & 0xF; | 2705 | resolution = (msr >> 27) & 0xF; |
| 2328 | fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", | 2706 | fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", |
| 2329 | cpu, msr, tcc_activation_temp - dts, resolution); | 2707 | cpu, msr, tcc_activation_temp - dts, resolution); |
| 2330 | 2708 | ||
| 2331 | #ifdef THERM_DEBUG | 2709 | #ifdef THERM_DEBUG |
| @@ -2334,17 +2712,17 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p | |||
| 2334 | 2712 | ||
| 2335 | dts = (msr >> 16) & 0x7F; | 2713 | dts = (msr >> 16) & 0x7F; |
| 2336 | dts2 = (msr >> 8) & 0x7F; | 2714 | dts2 = (msr >> 8) & 0x7F; |
| 2337 | fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", | 2715 | fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", |
| 2338 | cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); | 2716 | cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); |
| 2339 | #endif | 2717 | #endif |
| 2340 | } | 2718 | } |
| 2341 | 2719 | ||
| 2342 | return 0; | 2720 | return 0; |
| 2343 | } | 2721 | } |
| 2344 | 2722 | ||
| 2345 | void print_power_limit_msr(int cpu, unsigned long long msr, char *label) | 2723 | void print_power_limit_msr(int cpu, unsigned long long msr, char *label) |
| 2346 | { | 2724 | { |
| 2347 | fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", | 2725 | fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", |
| 2348 | cpu, label, | 2726 | cpu, label, |
| 2349 | ((msr >> 15) & 1) ? "EN" : "DIS", | 2727 | ((msr >> 15) & 1) ? "EN" : "DIS", |
| 2350 | ((msr >> 0) & 0x7FFF) * rapl_power_units, | 2728 | ((msr >> 0) & 0x7FFF) * rapl_power_units, |
| @@ -2368,7 +2746,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 2368 | 2746 | ||
| 2369 | cpu = t->cpu_id; | 2747 | cpu = t->cpu_id; |
| 2370 | if (cpu_migrate(cpu)) { | 2748 | if (cpu_migrate(cpu)) { |
| 2371 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | 2749 | fprintf(outf, "Could not migrate to CPU %d\n", cpu); |
| 2372 | return -1; | 2750 | return -1; |
| 2373 | } | 2751 | } |
| 2374 | 2752 | ||
| @@ -2376,7 +2754,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 2376 | return -1; | 2754 | return -1; |
| 2377 | 2755 | ||
| 2378 | if (debug) { | 2756 | if (debug) { |
| 2379 | fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " | 2757 | fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " |
| 2380 | "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, | 2758 | "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, |
| 2381 | rapl_power_units, rapl_energy_units, rapl_time_units); | 2759 | rapl_power_units, rapl_energy_units, rapl_time_units); |
| 2382 | } | 2760 | } |
| @@ -2386,7 +2764,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 2386 | return -5; | 2764 | return -5; |
| 2387 | 2765 | ||
| 2388 | 2766 | ||
| 2389 | fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", | 2767 | fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", |
| 2390 | cpu, msr, | 2768 | cpu, msr, |
| 2391 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, | 2769 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, |
| 2392 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, | 2770 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, |
| @@ -2399,11 +2777,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 2399 | if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) | 2777 | if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) |
| 2400 | return -9; | 2778 | return -9; |
| 2401 | 2779 | ||
| 2402 | fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", | 2780 | fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", |
| 2403 | cpu, msr, (msr >> 63) & 1 ? "": "UN"); | 2781 | cpu, msr, (msr >> 63) & 1 ? "": "UN"); |
| 2404 | 2782 | ||
| 2405 | print_power_limit_msr(cpu, msr, "PKG Limit #1"); | 2783 | print_power_limit_msr(cpu, msr, "PKG Limit #1"); |
| 2406 | fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", | 2784 | fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", |
| 2407 | cpu, | 2785 | cpu, |
| 2408 | ((msr >> 47) & 1) ? "EN" : "DIS", | 2786 | ((msr >> 47) & 1) ? "EN" : "DIS", |
| 2409 | ((msr >> 32) & 0x7FFF) * rapl_power_units, | 2787 | ((msr >> 32) & 0x7FFF) * rapl_power_units, |
| @@ -2415,7 +2793,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 2415 | if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) | 2793 | if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) |
| 2416 | return -6; | 2794 | return -6; |
| 2417 | 2795 | ||
| 2418 | fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", | 2796 | fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", |
| 2419 | cpu, msr, | 2797 | cpu, msr, |
| 2420 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, | 2798 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, |
| 2421 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, | 2799 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, |
| @@ -2425,7 +2803,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 2425 | if (do_rapl & RAPL_DRAM) { | 2803 | if (do_rapl & RAPL_DRAM) { |
| 2426 | if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) | 2804 | if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) |
| 2427 | return -9; | 2805 | return -9; |
| 2428 | fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", | 2806 | fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", |
| 2429 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | 2807 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); |
| 2430 | 2808 | ||
| 2431 | print_power_limit_msr(cpu, msr, "DRAM Limit"); | 2809 | print_power_limit_msr(cpu, msr, "DRAM Limit"); |
| @@ -2435,7 +2813,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 2435 | if (get_msr(cpu, MSR_PP0_POLICY, &msr)) | 2813 | if (get_msr(cpu, MSR_PP0_POLICY, &msr)) |
| 2436 | return -7; | 2814 | return -7; |
| 2437 | 2815 | ||
| 2438 | fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); | 2816 | fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); |
| 2439 | } | 2817 | } |
| 2440 | } | 2818 | } |
| 2441 | if (do_rapl & RAPL_CORES) { | 2819 | if (do_rapl & RAPL_CORES) { |
| @@ -2443,7 +2821,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 2443 | 2821 | ||
| 2444 | if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) | 2822 | if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) |
| 2445 | return -9; | 2823 | return -9; |
| 2446 | fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", | 2824 | fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", |
| 2447 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | 2825 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); |
| 2448 | print_power_limit_msr(cpu, msr, "Cores Limit"); | 2826 | print_power_limit_msr(cpu, msr, "Cores Limit"); |
| 2449 | } | 2827 | } |
| @@ -2453,11 +2831,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 2453 | if (get_msr(cpu, MSR_PP1_POLICY, &msr)) | 2831 | if (get_msr(cpu, MSR_PP1_POLICY, &msr)) |
| 2454 | return -8; | 2832 | return -8; |
| 2455 | 2833 | ||
| 2456 | fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); | 2834 | fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); |
| 2457 | 2835 | ||
| 2458 | if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) | 2836 | if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) |
| 2459 | return -9; | 2837 | return -9; |
| 2460 | fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", | 2838 | fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", |
| 2461 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | 2839 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); |
| 2462 | print_power_limit_msr(cpu, msr, "GFX Limit"); | 2840 | print_power_limit_msr(cpu, msr, "GFX Limit"); |
| 2463 | } | 2841 | } |
| @@ -2583,23 +2961,23 @@ double slm_bclk(void) | |||
| 2583 | double freq; | 2961 | double freq; |
| 2584 | 2962 | ||
| 2585 | if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) | 2963 | if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) |
| 2586 | fprintf(stderr, "SLM BCLK: unknown\n"); | 2964 | fprintf(outf, "SLM BCLK: unknown\n"); |
| 2587 | 2965 | ||
| 2588 | i = msr & 0xf; | 2966 | i = msr & 0xf; |
| 2589 | if (i >= SLM_BCLK_FREQS) { | 2967 | if (i >= SLM_BCLK_FREQS) { |
| 2590 | fprintf(stderr, "SLM BCLK[%d] invalid\n", i); | 2968 | fprintf(outf, "SLM BCLK[%d] invalid\n", i); |
| 2591 | msr = 3; | 2969 | msr = 3; |
| 2592 | } | 2970 | } |
| 2593 | freq = slm_freq_table[i]; | 2971 | freq = slm_freq_table[i]; |
| 2594 | 2972 | ||
| 2595 | fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq); | 2973 | fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); |
| 2596 | 2974 | ||
| 2597 | return freq; | 2975 | return freq; |
| 2598 | } | 2976 | } |
| 2599 | 2977 | ||
| 2600 | double discover_bclk(unsigned int family, unsigned int model) | 2978 | double discover_bclk(unsigned int family, unsigned int model) |
| 2601 | { | 2979 | { |
| 2602 | if (has_snb_msrs(family, model)) | 2980 | if (has_snb_msrs(family, model) || is_knl(family, model)) |
| 2603 | return 100.00; | 2981 | return 100.00; |
| 2604 | else if (is_slm(family, model)) | 2982 | else if (is_slm(family, model)) |
| 2605 | return slm_bclk(); | 2983 | return slm_bclk(); |
| @@ -2635,13 +3013,13 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk | |||
| 2635 | 3013 | ||
| 2636 | cpu = t->cpu_id; | 3014 | cpu = t->cpu_id; |
| 2637 | if (cpu_migrate(cpu)) { | 3015 | if (cpu_migrate(cpu)) { |
| 2638 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | 3016 | fprintf(outf, "Could not migrate to CPU %d\n", cpu); |
| 2639 | return -1; | 3017 | return -1; |
| 2640 | } | 3018 | } |
| 2641 | 3019 | ||
| 2642 | if (tcc_activation_temp_override != 0) { | 3020 | if (tcc_activation_temp_override != 0) { |
| 2643 | tcc_activation_temp = tcc_activation_temp_override; | 3021 | tcc_activation_temp = tcc_activation_temp_override; |
| 2644 | fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", | 3022 | fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", |
| 2645 | cpu, tcc_activation_temp); | 3023 | cpu, tcc_activation_temp); |
| 2646 | return 0; | 3024 | return 0; |
| 2647 | } | 3025 | } |
| @@ -2656,7 +3034,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk | |||
| 2656 | target_c_local = (msr >> 16) & 0xFF; | 3034 | target_c_local = (msr >> 16) & 0xFF; |
| 2657 | 3035 | ||
| 2658 | if (debug) | 3036 | if (debug) |
| 2659 | fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", | 3037 | fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", |
| 2660 | cpu, msr, target_c_local); | 3038 | cpu, msr, target_c_local); |
| 2661 | 3039 | ||
| 2662 | if (!target_c_local) | 3040 | if (!target_c_local) |
| @@ -2668,37 +3046,93 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk | |||
| 2668 | 3046 | ||
| 2669 | guess: | 3047 | guess: |
| 2670 | tcc_activation_temp = TJMAX_DEFAULT; | 3048 | tcc_activation_temp = TJMAX_DEFAULT; |
| 2671 | fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", | 3049 | fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", |
| 2672 | cpu, tcc_activation_temp); | 3050 | cpu, tcc_activation_temp); |
| 2673 | 3051 | ||
| 2674 | return 0; | 3052 | return 0; |
| 2675 | } | 3053 | } |
| 3054 | |||
| 3055 | void decode_feature_control_msr(void) | ||
| 3056 | { | ||
| 3057 | unsigned long long msr; | ||
| 3058 | |||
| 3059 | if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr)) | ||
| 3060 | fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", | ||
| 3061 | base_cpu, msr, | ||
| 3062 | msr & FEATURE_CONTROL_LOCKED ? "" : "UN-", | ||
| 3063 | msr & (1 << 18) ? "SGX" : ""); | ||
| 3064 | } | ||
| 3065 | |||
| 3066 | void decode_misc_enable_msr(void) | ||
| 3067 | { | ||
| 3068 | unsigned long long msr; | ||
| 3069 | |||
| 3070 | if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) | ||
| 3071 | fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n", | ||
| 3072 | base_cpu, msr, | ||
| 3073 | msr & (1 << 3) ? "TCC" : "", | ||
| 3074 | msr & (1 << 16) ? "EIST" : "", | ||
| 3075 | msr & (1 << 18) ? "MONITOR" : ""); | ||
| 3076 | } | ||
| 3077 | |||
| 3078 | /* | ||
| 3079 | * Decode MSR_MISC_PWR_MGMT | ||
| 3080 | * | ||
| 3081 | * Decode the bits according to the Nehalem documentation | ||
| 3082 | * bit[0] seems to continue to have same meaning going forward | ||
| 3083 | * bit[1] less so... | ||
| 3084 | */ | ||
| 3085 | void decode_misc_pwr_mgmt_msr(void) | ||
| 3086 | { | ||
| 3087 | unsigned long long msr; | ||
| 3088 | |||
| 3089 | if (!do_nhm_platform_info) | ||
| 3090 | return; | ||
| 3091 | |||
| 3092 | if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) | ||
| 3093 | fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n", | ||
| 3094 | base_cpu, msr, | ||
| 3095 | msr & (1 << 0) ? "DIS" : "EN", | ||
| 3096 | msr & (1 << 1) ? "EN" : "DIS"); | ||
| 3097 | } | ||
| 3098 | |||
| 2676 | void process_cpuid() | 3099 | void process_cpuid() |
| 2677 | { | 3100 | { |
| 2678 | unsigned int eax, ebx, ecx, edx, max_level; | 3101 | unsigned int eax, ebx, ecx, edx, max_level, max_extended_level; |
| 2679 | unsigned int fms, family, model, stepping; | 3102 | unsigned int fms, family, model, stepping; |
| 2680 | 3103 | ||
| 2681 | eax = ebx = ecx = edx = 0; | 3104 | eax = ebx = ecx = edx = 0; |
| 2682 | 3105 | ||
| 2683 | __get_cpuid(0, &max_level, &ebx, &ecx, &edx); | 3106 | __cpuid(0, max_level, ebx, ecx, edx); |
| 2684 | 3107 | ||
| 2685 | if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) | 3108 | if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) |
| 2686 | genuine_intel = 1; | 3109 | genuine_intel = 1; |
| 2687 | 3110 | ||
| 2688 | if (debug) | 3111 | if (debug) |
| 2689 | fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", | 3112 | fprintf(outf, "CPUID(0): %.4s%.4s%.4s ", |
| 2690 | (char *)&ebx, (char *)&edx, (char *)&ecx); | 3113 | (char *)&ebx, (char *)&edx, (char *)&ecx); |
| 2691 | 3114 | ||
| 2692 | __get_cpuid(1, &fms, &ebx, &ecx, &edx); | 3115 | __cpuid(1, fms, ebx, ecx, edx); |
| 2693 | family = (fms >> 8) & 0xf; | 3116 | family = (fms >> 8) & 0xf; |
| 2694 | model = (fms >> 4) & 0xf; | 3117 | model = (fms >> 4) & 0xf; |
| 2695 | stepping = fms & 0xf; | 3118 | stepping = fms & 0xf; |
| 2696 | if (family == 6 || family == 0xf) | 3119 | if (family == 6 || family == 0xf) |
| 2697 | model += ((fms >> 16) & 0xf) << 4; | 3120 | model += ((fms >> 16) & 0xf) << 4; |
| 2698 | 3121 | ||
| 2699 | if (debug) | 3122 | if (debug) { |
| 2700 | fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", | 3123 | fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", |
| 2701 | max_level, family, model, stepping, family, model, stepping); | 3124 | max_level, family, model, stepping, family, model, stepping); |
| 3125 | fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n", | ||
| 3126 | ecx & (1 << 0) ? "SSE3" : "-", | ||
| 3127 | ecx & (1 << 3) ? "MONITOR" : "-", | ||
| 3128 | ecx & (1 << 6) ? "SMX" : "-", | ||
| 3129 | ecx & (1 << 7) ? "EIST" : "-", | ||
| 3130 | ecx & (1 << 8) ? "TM2" : "-", | ||
| 3131 | edx & (1 << 4) ? "TSC" : "-", | ||
| 3132 | edx & (1 << 5) ? "MSR" : "-", | ||
| 3133 | edx & (1 << 22) ? "ACPI-TM" : "-", | ||
| 3134 | edx & (1 << 29) ? "TM" : "-"); | ||
| 3135 | } | ||
| 2702 | 3136 | ||
| 2703 | if (!(edx & (1 << 5))) | 3137 | if (!(edx & (1 << 5))) |
| 2704 | errx(1, "CPUID: no MSR"); | 3138 | errx(1, "CPUID: no MSR"); |
| @@ -2709,15 +3143,15 @@ void process_cpuid() | |||
| 2709 | * This check is valid for both Intel and AMD. | 3143 | * This check is valid for both Intel and AMD. |
| 2710 | */ | 3144 | */ |
| 2711 | ebx = ecx = edx = 0; | 3145 | ebx = ecx = edx = 0; |
| 2712 | __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx); | 3146 | __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); |
| 2713 | 3147 | ||
| 2714 | if (max_level >= 0x80000007) { | 3148 | if (max_extended_level >= 0x80000007) { |
| 2715 | 3149 | ||
| 2716 | /* | 3150 | /* |
| 2717 | * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 | 3151 | * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 |
| 2718 | * this check is valid for both Intel and AMD | 3152 | * this check is valid for both Intel and AMD |
| 2719 | */ | 3153 | */ |
| 2720 | __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); | 3154 | __cpuid(0x80000007, eax, ebx, ecx, edx); |
| 2721 | has_invariant_tsc = edx & (1 << 8); | 3155 | has_invariant_tsc = edx & (1 << 8); |
| 2722 | } | 3156 | } |
| 2723 | 3157 | ||
| @@ -2726,20 +3160,48 @@ void process_cpuid() | |||
| 2726 | * this check is valid for both Intel and AMD | 3160 | * this check is valid for both Intel and AMD |
| 2727 | */ | 3161 | */ |
| 2728 | 3162 | ||
| 2729 | __get_cpuid(0x6, &eax, &ebx, &ecx, &edx); | 3163 | __cpuid(0x6, eax, ebx, ecx, edx); |
| 2730 | has_aperf = ecx & (1 << 0); | 3164 | has_aperf = ecx & (1 << 0); |
| 2731 | do_dts = eax & (1 << 0); | 3165 | do_dts = eax & (1 << 0); |
| 2732 | do_ptm = eax & (1 << 6); | 3166 | do_ptm = eax & (1 << 6); |
| 3167 | has_hwp = eax & (1 << 7); | ||
| 3168 | has_hwp_notify = eax & (1 << 8); | ||
| 3169 | has_hwp_activity_window = eax & (1 << 9); | ||
| 3170 | has_hwp_epp = eax & (1 << 10); | ||
| 3171 | has_hwp_pkg = eax & (1 << 11); | ||
| 2733 | has_epb = ecx & (1 << 3); | 3172 | has_epb = ecx & (1 << 3); |
| 2734 | 3173 | ||
| 2735 | if (debug) | 3174 | if (debug) |
| 2736 | fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sEPB\n", | 3175 | fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, " |
| 2737 | has_aperf ? "" : "No ", | 3176 | "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", |
| 2738 | do_dts ? "" : "No ", | 3177 | has_aperf ? "" : "No-", |
| 2739 | do_ptm ? "" : "No ", | 3178 | do_dts ? "" : "No-", |
| 2740 | has_epb ? "" : "No "); | 3179 | do_ptm ? "" : "No-", |
| 3180 | has_hwp ? "" : "No-", | ||
| 3181 | has_hwp_notify ? "" : "No-", | ||
| 3182 | has_hwp_activity_window ? "" : "No-", | ||
| 3183 | has_hwp_epp ? "" : "No-", | ||
| 3184 | has_hwp_pkg ? "" : "No-", | ||
| 3185 | has_epb ? "" : "No-"); | ||
| 3186 | |||
| 3187 | if (debug) | ||
| 3188 | decode_misc_enable_msr(); | ||
| 3189 | |||
| 3190 | if (max_level >= 0x7) { | ||
| 3191 | int has_sgx; | ||
| 2741 | 3192 | ||
| 2742 | if (max_level > 0x15) { | 3193 | ecx = 0; |
| 3194 | |||
| 3195 | __cpuid_count(0x7, 0, eax, ebx, ecx, edx); | ||
| 3196 | |||
| 3197 | has_sgx = ebx & (1 << 2); | ||
| 3198 | fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-"); | ||
| 3199 | |||
| 3200 | if (has_sgx) | ||
| 3201 | decode_feature_control_msr(); | ||
| 3202 | } | ||
| 3203 | |||
| 3204 | if (max_level >= 0x15) { | ||
| 2743 | unsigned int eax_crystal; | 3205 | unsigned int eax_crystal; |
| 2744 | unsigned int ebx_tsc; | 3206 | unsigned int ebx_tsc; |
| 2745 | 3207 | ||
| @@ -2747,12 +3209,12 @@ void process_cpuid() | |||
| 2747 | * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz | 3209 | * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz |
| 2748 | */ | 3210 | */ |
| 2749 | eax_crystal = ebx_tsc = crystal_hz = edx = 0; | 3211 | eax_crystal = ebx_tsc = crystal_hz = edx = 0; |
| 2750 | __get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx); | 3212 | __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); |
| 2751 | 3213 | ||
| 2752 | if (ebx_tsc != 0) { | 3214 | if (ebx_tsc != 0) { |
| 2753 | 3215 | ||
| 2754 | if (debug && (ebx != 0)) | 3216 | if (debug && (ebx != 0)) |
| 2755 | fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", | 3217 | fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", |
| 2756 | eax_crystal, ebx_tsc, crystal_hz); | 3218 | eax_crystal, ebx_tsc, crystal_hz); |
| 2757 | 3219 | ||
| 2758 | if (crystal_hz == 0) | 3220 | if (crystal_hz == 0) |
| @@ -2768,11 +3230,24 @@ void process_cpuid() | |||
| 2768 | if (crystal_hz) { | 3230 | if (crystal_hz) { |
| 2769 | tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; | 3231 | tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; |
| 2770 | if (debug) | 3232 | if (debug) |
| 2771 | fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", | 3233 | fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", |
| 2772 | tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); | 3234 | tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); |
| 2773 | } | 3235 | } |
| 2774 | } | 3236 | } |
| 2775 | } | 3237 | } |
| 3238 | if (max_level >= 0x16) { | ||
| 3239 | unsigned int base_mhz, max_mhz, bus_mhz, edx; | ||
| 3240 | |||
| 3241 | /* | ||
| 3242 | * CPUID 16H Base MHz, Max MHz, Bus MHz | ||
| 3243 | */ | ||
| 3244 | base_mhz = max_mhz = bus_mhz = edx = 0; | ||
| 3245 | |||
| 3246 | __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); | ||
| 3247 | if (debug) | ||
| 3248 | fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", | ||
| 3249 | base_mhz, max_mhz, bus_mhz); | ||
| 3250 | } | ||
| 2776 | 3251 | ||
| 2777 | if (has_aperf) | 3252 | if (has_aperf) |
| 2778 | aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); | 3253 | aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); |
| @@ -2788,21 +3263,28 @@ void process_cpuid() | |||
| 2788 | do_slm_cstates = is_slm(family, model); | 3263 | do_slm_cstates = is_slm(family, model); |
| 2789 | do_knl_cstates = is_knl(family, model); | 3264 | do_knl_cstates = is_knl(family, model); |
| 2790 | 3265 | ||
| 3266 | if (debug) | ||
| 3267 | decode_misc_pwr_mgmt_msr(); | ||
| 3268 | |||
| 2791 | rapl_probe(family, model); | 3269 | rapl_probe(family, model); |
| 2792 | perf_limit_reasons_probe(family, model); | 3270 | perf_limit_reasons_probe(family, model); |
| 2793 | 3271 | ||
| 2794 | if (debug) | 3272 | if (debug) |
| 2795 | dump_cstate_pstate_config_info(); | 3273 | dump_cstate_pstate_config_info(family, model); |
| 2796 | 3274 | ||
| 2797 | if (has_skl_msrs(family, model)) | 3275 | if (has_skl_msrs(family, model)) |
| 2798 | calculate_tsc_tweak(); | 3276 | calculate_tsc_tweak(); |
| 2799 | 3277 | ||
| 3278 | do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK); | ||
| 3279 | |||
| 3280 | do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK); | ||
| 3281 | |||
| 2800 | return; | 3282 | return; |
| 2801 | } | 3283 | } |
| 2802 | 3284 | ||
| 2803 | void help() | 3285 | void help() |
| 2804 | { | 3286 | { |
| 2805 | fprintf(stderr, | 3287 | fprintf(outf, |
| 2806 | "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" | 3288 | "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" |
| 2807 | "\n" | 3289 | "\n" |
| 2808 | "Turbostat forks the specified COMMAND and prints statistics\n" | 3290 | "Turbostat forks the specified COMMAND and prints statistics\n" |
| @@ -2814,6 +3296,7 @@ void help() | |||
| 2814 | "--help print this help message\n" | 3296 | "--help print this help message\n" |
| 2815 | "--counter msr print 32-bit counter at address \"msr\"\n" | 3297 | "--counter msr print 32-bit counter at address \"msr\"\n" |
| 2816 | "--Counter msr print 64-bit Counter at address \"msr\"\n" | 3298 | "--Counter msr print 64-bit Counter at address \"msr\"\n" |
| 3299 | "--out file create or truncate \"file\" for all output\n" | ||
| 2817 | "--msr msr print 32-bit value at address \"msr\"\n" | 3300 | "--msr msr print 32-bit value at address \"msr\"\n" |
| 2818 | "--MSR msr print 64-bit Value at address \"msr\"\n" | 3301 | "--MSR msr print 64-bit Value at address \"msr\"\n" |
| 2819 | "--version print version information\n" | 3302 | "--version print version information\n" |
| @@ -2858,7 +3341,7 @@ void topology_probe() | |||
| 2858 | show_cpu = 1; | 3341 | show_cpu = 1; |
| 2859 | 3342 | ||
| 2860 | if (debug > 1) | 3343 | if (debug > 1) |
| 2861 | fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); | 3344 | fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); |
| 2862 | 3345 | ||
| 2863 | cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); | 3346 | cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); |
| 2864 | if (cpus == NULL) | 3347 | if (cpus == NULL) |
| @@ -2893,7 +3376,7 @@ void topology_probe() | |||
| 2893 | 3376 | ||
| 2894 | if (cpu_is_not_present(i)) { | 3377 | if (cpu_is_not_present(i)) { |
| 2895 | if (debug > 1) | 3378 | if (debug > 1) |
| 2896 | fprintf(stderr, "cpu%d NOT PRESENT\n", i); | 3379 | fprintf(outf, "cpu%d NOT PRESENT\n", i); |
| 2897 | continue; | 3380 | continue; |
| 2898 | } | 3381 | } |
| 2899 | cpus[i].core_id = get_core_id(i); | 3382 | cpus[i].core_id = get_core_id(i); |
| @@ -2908,26 +3391,26 @@ void topology_probe() | |||
| 2908 | if (siblings > max_siblings) | 3391 | if (siblings > max_siblings) |
| 2909 | max_siblings = siblings; | 3392 | max_siblings = siblings; |
| 2910 | if (debug > 1) | 3393 | if (debug > 1) |
| 2911 | fprintf(stderr, "cpu %d pkg %d core %d\n", | 3394 | fprintf(outf, "cpu %d pkg %d core %d\n", |
| 2912 | i, cpus[i].physical_package_id, cpus[i].core_id); | 3395 | i, cpus[i].physical_package_id, cpus[i].core_id); |
| 2913 | } | 3396 | } |
| 2914 | topo.num_cores_per_pkg = max_core_id + 1; | 3397 | topo.num_cores_per_pkg = max_core_id + 1; |
| 2915 | if (debug > 1) | 3398 | if (debug > 1) |
| 2916 | fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", | 3399 | fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", |
| 2917 | max_core_id, topo.num_cores_per_pkg); | 3400 | max_core_id, topo.num_cores_per_pkg); |
| 2918 | if (debug && !summary_only && topo.num_cores_per_pkg > 1) | 3401 | if (debug && !summary_only && topo.num_cores_per_pkg > 1) |
| 2919 | show_core = 1; | 3402 | show_core = 1; |
| 2920 | 3403 | ||
| 2921 | topo.num_packages = max_package_id + 1; | 3404 | topo.num_packages = max_package_id + 1; |
| 2922 | if (debug > 1) | 3405 | if (debug > 1) |
| 2923 | fprintf(stderr, "max_package_id %d, sizing for %d packages\n", | 3406 | fprintf(outf, "max_package_id %d, sizing for %d packages\n", |
| 2924 | max_package_id, topo.num_packages); | 3407 | max_package_id, topo.num_packages); |
| 2925 | if (debug && !summary_only && topo.num_packages > 1) | 3408 | if (debug && !summary_only && topo.num_packages > 1) |
| 2926 | show_pkg = 1; | 3409 | show_pkg = 1; |
| 2927 | 3410 | ||
| 2928 | topo.num_threads_per_core = max_siblings; | 3411 | topo.num_threads_per_core = max_siblings; |
| 2929 | if (debug > 1) | 3412 | if (debug > 1) |
| 2930 | fprintf(stderr, "max_siblings %d\n", max_siblings); | 3413 | fprintf(outf, "max_siblings %d\n", max_siblings); |
| 2931 | 3414 | ||
| 2932 | free(cpus); | 3415 | free(cpus); |
| 2933 | } | 3416 | } |
| @@ -3019,10 +3502,27 @@ void allocate_output_buffer() | |||
| 3019 | if (outp == NULL) | 3502 | if (outp == NULL) |
| 3020 | err(-1, "calloc output buffer"); | 3503 | err(-1, "calloc output buffer"); |
| 3021 | } | 3504 | } |
| 3505 | void allocate_fd_percpu(void) | ||
| 3506 | { | ||
| 3507 | fd_percpu = calloc(topo.max_cpu_num, sizeof(int)); | ||
| 3508 | if (fd_percpu == NULL) | ||
| 3509 | err(-1, "calloc fd_percpu"); | ||
| 3510 | } | ||
| 3511 | void allocate_irq_buffers(void) | ||
| 3512 | { | ||
| 3513 | irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); | ||
| 3514 | if (irq_column_2_cpu == NULL) | ||
| 3515 | err(-1, "calloc %d", topo.num_cpus); | ||
| 3022 | 3516 | ||
| 3517 | irqs_per_cpu = calloc(topo.max_cpu_num, sizeof(int)); | ||
| 3518 | if (irqs_per_cpu == NULL) | ||
| 3519 | err(-1, "calloc %d", topo.max_cpu_num); | ||
| 3520 | } | ||
| 3023 | void setup_all_buffers(void) | 3521 | void setup_all_buffers(void) |
| 3024 | { | 3522 | { |
| 3025 | topology_probe(); | 3523 | topology_probe(); |
| 3524 | allocate_irq_buffers(); | ||
| 3525 | allocate_fd_percpu(); | ||
| 3026 | allocate_counters(&thread_even, &core_even, &package_even); | 3526 | allocate_counters(&thread_even, &core_even, &package_even); |
| 3027 | allocate_counters(&thread_odd, &core_odd, &package_odd); | 3527 | allocate_counters(&thread_odd, &core_odd, &package_odd); |
| 3028 | allocate_output_buffer(); | 3528 | allocate_output_buffer(); |
| @@ -3036,7 +3536,7 @@ void set_base_cpu(void) | |||
| 3036 | err(-ENODEV, "No valid cpus found"); | 3536 | err(-ENODEV, "No valid cpus found"); |
| 3037 | 3537 | ||
| 3038 | if (debug > 1) | 3538 | if (debug > 1) |
| 3039 | fprintf(stderr, "base_cpu = %d\n", base_cpu); | 3539 | fprintf(outf, "base_cpu = %d\n", base_cpu); |
| 3040 | } | 3540 | } |
| 3041 | 3541 | ||
| 3042 | void turbostat_init() | 3542 | void turbostat_init() |
| @@ -3049,6 +3549,9 @@ void turbostat_init() | |||
| 3049 | 3549 | ||
| 3050 | 3550 | ||
| 3051 | if (debug) | 3551 | if (debug) |
| 3552 | for_all_cpus(print_hwp, ODD_COUNTERS); | ||
| 3553 | |||
| 3554 | if (debug) | ||
| 3052 | for_all_cpus(print_epb, ODD_COUNTERS); | 3555 | for_all_cpus(print_epb, ODD_COUNTERS); |
| 3053 | 3556 | ||
| 3054 | if (debug) | 3557 | if (debug) |
| @@ -3100,9 +3603,10 @@ int fork_it(char **argv) | |||
| 3100 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); | 3603 | for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); |
| 3101 | compute_average(EVEN_COUNTERS); | 3604 | compute_average(EVEN_COUNTERS); |
| 3102 | format_all_counters(EVEN_COUNTERS); | 3605 | format_all_counters(EVEN_COUNTERS); |
| 3103 | flush_stderr(); | ||
| 3104 | 3606 | ||
| 3105 | fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); | 3607 | fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); |
| 3608 | |||
| 3609 | flush_output_stderr(); | ||
| 3106 | 3610 | ||
| 3107 | return status; | 3611 | return status; |
| 3108 | } | 3612 | } |
| @@ -3119,13 +3623,13 @@ int get_and_dump_counters(void) | |||
| 3119 | if (status) | 3623 | if (status) |
| 3120 | return status; | 3624 | return status; |
| 3121 | 3625 | ||
| 3122 | flush_stdout(); | 3626 | flush_output_stdout(); |
| 3123 | 3627 | ||
| 3124 | return status; | 3628 | return status; |
| 3125 | } | 3629 | } |
| 3126 | 3630 | ||
| 3127 | void print_version() { | 3631 | void print_version() { |
| 3128 | fprintf(stderr, "turbostat version 4.8 26-Sep, 2015" | 3632 | fprintf(outf, "turbostat version 4.11 27 Feb 2016" |
| 3129 | " - Len Brown <lenb@kernel.org>\n"); | 3633 | " - Len Brown <lenb@kernel.org>\n"); |
| 3130 | } | 3634 | } |
| 3131 | 3635 | ||
| @@ -3143,6 +3647,7 @@ void cmdline(int argc, char **argv) | |||
| 3143 | {"Joules", no_argument, 0, 'J'}, | 3647 | {"Joules", no_argument, 0, 'J'}, |
| 3144 | {"MSR", required_argument, 0, 'M'}, | 3648 | {"MSR", required_argument, 0, 'M'}, |
| 3145 | {"msr", required_argument, 0, 'm'}, | 3649 | {"msr", required_argument, 0, 'm'}, |
| 3650 | {"out", required_argument, 0, 'o'}, | ||
| 3146 | {"Package", no_argument, 0, 'p'}, | 3651 | {"Package", no_argument, 0, 'p'}, |
| 3147 | {"processor", no_argument, 0, 'p'}, | 3652 | {"processor", no_argument, 0, 'p'}, |
| 3148 | {"Summary", no_argument, 0, 'S'}, | 3653 | {"Summary", no_argument, 0, 'S'}, |
| @@ -3153,7 +3658,7 @@ void cmdline(int argc, char **argv) | |||
| 3153 | 3658 | ||
| 3154 | progname = argv[0]; | 3659 | progname = argv[0]; |
| 3155 | 3660 | ||
| 3156 | while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:PpST:v", | 3661 | while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v", |
| 3157 | long_options, &option_index)) != -1) { | 3662 | long_options, &option_index)) != -1) { |
| 3158 | switch (opt) { | 3663 | switch (opt) { |
| 3159 | case 'C': | 3664 | case 'C': |
| @@ -3173,7 +3678,18 @@ void cmdline(int argc, char **argv) | |||
| 3173 | help(); | 3678 | help(); |
| 3174 | exit(1); | 3679 | exit(1); |
| 3175 | case 'i': | 3680 | case 'i': |
| 3176 | interval_sec = atoi(optarg); | 3681 | { |
| 3682 | double interval = strtod(optarg, NULL); | ||
| 3683 | |||
| 3684 | if (interval < 0.001) { | ||
| 3685 | fprintf(outf, "interval %f seconds is too small\n", | ||
| 3686 | interval); | ||
| 3687 | exit(2); | ||
| 3688 | } | ||
| 3689 | |||
| 3690 | interval_ts.tv_sec = interval; | ||
| 3691 | interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; | ||
| 3692 | } | ||
| 3177 | break; | 3693 | break; |
| 3178 | case 'J': | 3694 | case 'J': |
| 3179 | rapl_joules++; | 3695 | rapl_joules++; |
| @@ -3184,6 +3700,9 @@ void cmdline(int argc, char **argv) | |||
| 3184 | case 'm': | 3700 | case 'm': |
| 3185 | sscanf(optarg, "%x", &extra_msr_offset32); | 3701 | sscanf(optarg, "%x", &extra_msr_offset32); |
| 3186 | break; | 3702 | break; |
| 3703 | case 'o': | ||
| 3704 | outf = fopen_or_die(optarg, "w"); | ||
| 3705 | break; | ||
| 3187 | case 'P': | 3706 | case 'P': |
| 3188 | show_pkg_only++; | 3707 | show_pkg_only++; |
| 3189 | break; | 3708 | break; |
| @@ -3206,6 +3725,8 @@ void cmdline(int argc, char **argv) | |||
| 3206 | 3725 | ||
| 3207 | int main(int argc, char **argv) | 3726 | int main(int argc, char **argv) |
| 3208 | { | 3727 | { |
| 3728 | outf = stderr; | ||
| 3729 | |||
| 3209 | cmdline(argc, argv); | 3730 | cmdline(argc, argv); |
| 3210 | 3731 | ||
| 3211 | if (debug) | 3732 | if (debug) |
