diff options
| -rw-r--r-- | arch/x86/include/uapi/asm/msr-index.h | 4 | ||||
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 66 | ||||
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 338 |
3 files changed, 313 insertions, 95 deletions
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index c8aa65d56027..f721330541cb 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
| @@ -152,6 +152,10 @@ | |||
| 152 | #define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 | 152 | #define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 |
| 153 | #define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 | 153 | #define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 |
| 154 | 154 | ||
| 155 | #define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 | ||
| 156 | #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 | ||
| 157 | #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 | ||
| 158 | |||
| 155 | /* Hardware P state interface */ | 159 | /* Hardware P state interface */ |
| 156 | #define MSR_PPERF 0x0000064e | 160 | #define MSR_PPERF 0x0000064e |
| 157 | #define MSR_PERF_LIMIT_REASONS 0x0000064f | 161 | #define MSR_PERF_LIMIT_REASONS 0x0000064f |
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 56bfb523c5bb..9b950699e63d 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 | |||
| @@ -12,16 +12,16 @@ turbostat \- Report processor frequency and idle statistics | |||
| 12 | .RB [ "\-i interval_sec" ] | 12 | .RB [ "\-i interval_sec" ] |
| 13 | .SH DESCRIPTION | 13 | .SH DESCRIPTION |
| 14 | \fBturbostat \fP reports processor topology, frequency, | 14 | \fBturbostat \fP reports processor topology, frequency, |
| 15 | idle power-state statistics, temperature and power on modern X86 processors. | 15 | idle power-state statistics, temperature and power on X86 processors. |
| 16 | Either \fBcommand\fP is forked and statistics are printed | 16 | There are two ways to invoke turbostat. |
| 17 | upon its completion, or statistics are printed periodically. | 17 | The first method is to supply a |
| 18 | 18 | \fBcommand\fP, which is forked and statistics are printed | |
| 19 | \fBturbostat \fP | 19 | upon its completion. |
| 20 | must be run on root, and | 20 | The second method is to omit the command, |
| 21 | minimally requires that the processor | 21 | and turbodstat will print statistics every 5 seconds. |
| 22 | supports an "invariant" TSC, plus the APERF and MPERF MSRs. | 22 | The 5-second interval can changed using the -i option. |
| 23 | Additional information is reported depending on hardware counter support. | 23 | |
| 24 | 24 | Some information is not availalbe on older processors. | |
| 25 | .SS Options | 25 | .SS Options |
| 26 | The \fB-p\fP option limits output to the 1st thread in 1st core of each package. | 26 | The \fB-p\fP option limits output to the 1st thread in 1st core of each package. |
| 27 | .PP | 27 | .PP |
| @@ -130,12 +130,13 @@ cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1) | |||
| 130 | ... | 130 | ... |
| 131 | .fi | 131 | .fi |
| 132 | The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency | 132 | The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency |
| 133 | available at the minimum package voltage. The \fBTSC frequency\fP is the nominal | 133 | available at the minimum package voltage. The \fBTSC frequency\fP is the base |
| 134 | maximum frequency of the processor if turbo-mode were not available. This frequency | 134 | frequency of the processor -- this should match the brand string |
| 135 | in /proc/cpuinfo. This base frequency | ||
| 135 | should be sustainable on all CPUs indefinitely, given nominal power and cooling. | 136 | should be sustainable on all CPUs indefinitely, given nominal power and cooling. |
| 136 | The remaining rows show what maximum turbo frequency is possible | 137 | The remaining rows show what maximum turbo frequency is possible |
| 137 | depending on the number of idle cores. Note that this information is | 138 | depending on the number of idle cores. Note that not all information is |
| 138 | not available on all processors. | 139 | available on all processors. |
| 139 | .SH FORK EXAMPLE | 140 | .SH FORK EXAMPLE |
| 140 | If turbostat is invoked with a command, it will fork that command | 141 | If turbostat is invoked with a command, it will fork that command |
| 141 | and output the statistics gathered when the command exits. | 142 | and output the statistics gathered when the command exits. |
| @@ -176,6 +177,11 @@ not including any non-busy idle time. | |||
| 176 | 177 | ||
| 177 | .B "turbostat " | 178 | .B "turbostat " |
| 178 | must be run as root. | 179 | must be run as root. |
| 180 | Alternatively, non-root users can be enabled to run turbostat this way: | ||
| 181 | |||
| 182 | # setcap cap_sys_rawio=ep ./turbostat | ||
| 183 | |||
| 184 | # chmod +r /dev/cpu/*/msr | ||
| 179 | 185 | ||
| 180 | .B "turbostat " | 186 | .B "turbostat " |
| 181 | reads hardware counters, but doesn't write them. | 187 | reads hardware counters, but doesn't write them. |
| @@ -184,15 +190,33 @@ multiple invocations of itself. | |||
| 184 | 190 | ||
| 185 | \fBturbostat \fP | 191 | \fBturbostat \fP |
| 186 | may work poorly on Linux-2.6.20 through 2.6.29, | 192 | may work poorly on Linux-2.6.20 through 2.6.29, |
| 187 | as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF | 193 | as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF MSRs |
| 188 | in those kernels. | 194 | in those kernels. |
| 189 | 195 | ||
| 190 | If the TSC column does not make sense, then | 196 | AVG_MHz = APERF_delta/measurement_interval. This is the actual |
| 191 | the other numbers will also make no sense. | 197 | number of elapsed cycles divided by the entire sample interval -- |
| 192 | Turbostat is lightweight, and its data collection is not atomic. | 198 | including idle time. Note that this calculation is resiliant |
| 193 | These issues are usually caused by an extremely short measurement | 199 | to systems lacking a non-stop TSC. |
| 194 | interval (much less than 1 second), or system activity that prevents | 200 | |
| 195 | turbostat from being able to run on all CPUS to quickly collect data. | 201 | TSC_MHz = TSC_delta/measurement_interval. |
| 202 | On a system with an invariant TSC, this value will be constant | ||
| 203 | and will closely match the base frequency value shown | ||
| 204 | in the brand string in /proc/cpuinfo. On a system where | ||
| 205 | the TSC stops in idle, TSC_MHz will drop | ||
| 206 | below the processor's base frequency. | ||
| 207 | |||
| 208 | %Busy = MPERF_delta/TSC_delta | ||
| 209 | |||
| 210 | Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval | ||
| 211 | |||
| 212 | Note that these calculations depend on TSC_delta, so they | ||
| 213 | are not reliable during intervals when TSC_MHz is not running at the base frequency. | ||
| 214 | |||
| 215 | Turbostat data collection is not atomic. | ||
| 216 | Extremely short measurement intervals (much less than 1 second), | ||
| 217 | or system activity that prevents turbostat from being able | ||
| 218 | to run on all CPUS to quickly collect data, will result in | ||
| 219 | inconsistent results. | ||
| 196 | 220 | ||
| 197 | The APERF, MPERF MSRs are defined to count non-halted cycles. | 221 | The APERF, MPERF MSRs are defined to count non-halted cycles. |
| 198 | Although it is not guaranteed by the architecture, turbostat assumes | 222 | Although it is not guaranteed by the architecture, turbostat assumes |
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 5b1b807265a1..a02c02f25e88 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
| @@ -38,6 +38,8 @@ | |||
| 38 | #include <ctype.h> | 38 | #include <ctype.h> |
| 39 | #include <sched.h> | 39 | #include <sched.h> |
| 40 | #include <cpuid.h> | 40 | #include <cpuid.h> |
| 41 | #include <linux/capability.h> | ||
| 42 | #include <errno.h> | ||
| 41 | 43 | ||
| 42 | char *proc_stat = "/proc/stat"; | 44 | char *proc_stat = "/proc/stat"; |
| 43 | unsigned int interval_sec = 5; /* set with -i interval_sec */ | 45 | unsigned int interval_sec = 5; /* set with -i interval_sec */ |
| @@ -59,8 +61,8 @@ unsigned int has_epb; | |||
| 59 | unsigned int units = 1000000; /* MHz etc */ | 61 | unsigned int units = 1000000; /* MHz etc */ |
| 60 | unsigned int genuine_intel; | 62 | unsigned int genuine_intel; |
| 61 | unsigned int has_invariant_tsc; | 63 | unsigned int has_invariant_tsc; |
| 62 | unsigned int do_nehalem_platform_info; | 64 | unsigned int do_nhm_platform_info; |
| 63 | unsigned int do_nehalem_turbo_ratio_limit; | 65 | unsigned int do_nhm_turbo_ratio_limit; |
| 64 | unsigned int do_ivt_turbo_ratio_limit; | 66 | unsigned int do_ivt_turbo_ratio_limit; |
| 65 | unsigned int extra_msr_offset32; | 67 | unsigned int extra_msr_offset32; |
| 66 | unsigned int extra_msr_offset64; | 68 | unsigned int extra_msr_offset64; |
| @@ -81,6 +83,9 @@ unsigned int tcc_activation_temp; | |||
| 81 | unsigned int tcc_activation_temp_override; | 83 | unsigned int tcc_activation_temp_override; |
| 82 | double rapl_power_units, rapl_energy_units, rapl_time_units; | 84 | double rapl_power_units, rapl_energy_units, rapl_time_units; |
| 83 | double rapl_joule_counter_range; | 85 | double rapl_joule_counter_range; |
| 86 | unsigned int do_core_perf_limit_reasons; | ||
| 87 | unsigned int do_gfx_perf_limit_reasons; | ||
| 88 | unsigned int do_ring_perf_limit_reasons; | ||
| 84 | 89 | ||
| 85 | #define RAPL_PKG (1 << 0) | 90 | #define RAPL_PKG (1 << 0) |
| 86 | /* 0x610 MSR_PKG_POWER_LIMIT */ | 91 | /* 0x610 MSR_PKG_POWER_LIMIT */ |
| @@ -251,15 +256,13 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) | |||
| 251 | sprintf(pathname, "/dev/cpu/%d/msr", cpu); | 256 | sprintf(pathname, "/dev/cpu/%d/msr", cpu); |
| 252 | fd = open(pathname, O_RDONLY); | 257 | fd = open(pathname, O_RDONLY); |
| 253 | if (fd < 0) | 258 | if (fd < 0) |
| 254 | return -1; | 259 | err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); |
| 255 | 260 | ||
| 256 | retval = pread(fd, msr, sizeof *msr, offset); | 261 | retval = pread(fd, msr, sizeof *msr, offset); |
| 257 | close(fd); | 262 | close(fd); |
| 258 | 263 | ||
| 259 | if (retval != sizeof *msr) { | 264 | if (retval != sizeof *msr) |
| 260 | fprintf(stderr, "%s offset 0x%llx read failed\n", pathname, (unsigned long long)offset); | 265 | err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); |
| 261 | return -1; | ||
| 262 | } | ||
| 263 | 266 | ||
| 264 | return 0; | 267 | return 0; |
| 265 | } | 268 | } |
| @@ -281,7 +284,7 @@ void print_header(void) | |||
| 281 | outp += sprintf(outp, " CPU"); | 284 | outp += sprintf(outp, " CPU"); |
| 282 | if (has_aperf) | 285 | if (has_aperf) |
| 283 | outp += sprintf(outp, " Avg_MHz"); | 286 | outp += sprintf(outp, " Avg_MHz"); |
| 284 | if (do_nhm_cstates) | 287 | if (has_aperf) |
| 285 | outp += sprintf(outp, " %%Busy"); | 288 | outp += sprintf(outp, " %%Busy"); |
| 286 | if (has_aperf) | 289 | if (has_aperf) |
| 287 | outp += sprintf(outp, " Bzy_MHz"); | 290 | outp += sprintf(outp, " Bzy_MHz"); |
| @@ -337,7 +340,7 @@ void print_header(void) | |||
| 337 | outp += sprintf(outp, " PKG_%%"); | 340 | outp += sprintf(outp, " PKG_%%"); |
| 338 | if (do_rapl & RAPL_DRAM_PERF_STATUS) | 341 | if (do_rapl & RAPL_DRAM_PERF_STATUS) |
| 339 | outp += sprintf(outp, " RAM_%%"); | 342 | outp += sprintf(outp, " RAM_%%"); |
| 340 | } else { | 343 | } else if (do_rapl && rapl_joules) { |
| 341 | if (do_rapl & RAPL_PKG) | 344 | if (do_rapl & RAPL_PKG) |
| 342 | outp += sprintf(outp, " Pkg_J"); | 345 | outp += sprintf(outp, " Pkg_J"); |
| 343 | if (do_rapl & RAPL_CORES) | 346 | if (do_rapl & RAPL_CORES) |
| @@ -457,25 +460,25 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
| 457 | outp += sprintf(outp, "%8d", t->cpu_id); | 460 | outp += sprintf(outp, "%8d", t->cpu_id); |
| 458 | } | 461 | } |
| 459 | 462 | ||
| 460 | /* AvgMHz */ | 463 | /* Avg_MHz */ |
| 461 | if (has_aperf) | 464 | if (has_aperf) |
| 462 | outp += sprintf(outp, "%8.0f", | 465 | outp += sprintf(outp, "%8.0f", |
| 463 | 1.0 / units * t->aperf / interval_float); | 466 | 1.0 / units * t->aperf / interval_float); |
| 464 | 467 | ||
| 465 | /* %c0 */ | 468 | /* %Busy */ |
| 466 | if (do_nhm_cstates) { | 469 | if (has_aperf) { |
| 467 | if (!skip_c0) | 470 | if (!skip_c0) |
| 468 | outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc); | 471 | outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc); |
| 469 | else | 472 | else |
| 470 | outp += sprintf(outp, "********"); | 473 | outp += sprintf(outp, "********"); |
| 471 | } | 474 | } |
| 472 | 475 | ||
| 473 | /* BzyMHz */ | 476 | /* Bzy_MHz */ |
| 474 | if (has_aperf) | 477 | if (has_aperf) |
| 475 | outp += sprintf(outp, "%8.0f", | 478 | outp += sprintf(outp, "%8.0f", |
| 476 | 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); | 479 | 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); |
| 477 | 480 | ||
| 478 | /* TSC */ | 481 | /* TSC_MHz */ |
| 479 | outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); | 482 | outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); |
| 480 | 483 | ||
| 481 | /* SMI */ | 484 | /* SMI */ |
| @@ -561,7 +564,7 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
| 561 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); | 564 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); |
| 562 | if (do_rapl & RAPL_DRAM_PERF_STATUS) | 565 | if (do_rapl & RAPL_DRAM_PERF_STATUS) |
| 563 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); | 566 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); |
| 564 | } else { | 567 | } else if (do_rapl && rapl_joules) { |
| 565 | if (do_rapl & RAPL_PKG) | 568 | if (do_rapl & RAPL_PKG) |
| 566 | outp += sprintf(outp, fmt8, | 569 | outp += sprintf(outp, fmt8, |
| 567 | p->energy_pkg * rapl_energy_units); | 570 | p->energy_pkg * rapl_energy_units); |
| @@ -578,8 +581,8 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
| 578 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); | 581 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); |
| 579 | if (do_rapl & RAPL_DRAM_PERF_STATUS) | 582 | if (do_rapl & RAPL_DRAM_PERF_STATUS) |
| 580 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); | 583 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); |
| 581 | outp += sprintf(outp, fmt8, interval_float); | ||
| 582 | 584 | ||
| 585 | outp += sprintf(outp, fmt8, interval_float); | ||
| 583 | } | 586 | } |
| 584 | done: | 587 | done: |
| 585 | outp += sprintf(outp, "\n"); | 588 | outp += sprintf(outp, "\n"); |
| @@ -670,24 +673,26 @@ delta_thread(struct thread_data *new, struct thread_data *old, | |||
| 670 | 673 | ||
| 671 | old->c1 = new->c1 - old->c1; | 674 | old->c1 = new->c1 - old->c1; |
| 672 | 675 | ||
| 673 | if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { | 676 | if (has_aperf) { |
| 674 | old->aperf = new->aperf - old->aperf; | 677 | if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { |
| 675 | old->mperf = new->mperf - old->mperf; | 678 | old->aperf = new->aperf - old->aperf; |
| 676 | } else { | 679 | old->mperf = new->mperf - old->mperf; |
| 680 | } else { | ||
| 677 | 681 | ||
| 678 | if (!aperf_mperf_unstable) { | 682 | if (!aperf_mperf_unstable) { |
| 679 | fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); | 683 | fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); |
| 680 | fprintf(stderr, "* Frequency results do not cover entire interval *\n"); | 684 | fprintf(stderr, "* Frequency results do not cover entire interval *\n"); |
| 681 | fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); | 685 | fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); |
| 682 | 686 | ||
| 683 | aperf_mperf_unstable = 1; | 687 | aperf_mperf_unstable = 1; |
| 688 | } | ||
| 689 | /* | ||
| 690 | * mperf delta is likely a huge "positive" number | ||
| 691 | * can not use it for calculating c0 time | ||
| 692 | */ | ||
| 693 | skip_c0 = 1; | ||
| 694 | skip_c1 = 1; | ||
| 684 | } | 695 | } |
| 685 | /* | ||
| 686 | * mperf delta is likely a huge "positive" number | ||
| 687 | * can not use it for calculating c0 time | ||
| 688 | */ | ||
| 689 | skip_c0 = 1; | ||
| 690 | skip_c1 = 1; | ||
| 691 | } | 696 | } |
| 692 | 697 | ||
| 693 | 698 | ||
| @@ -1019,7 +1024,7 @@ void print_verbose_header(void) | |||
| 1019 | unsigned long long msr; | 1024 | unsigned long long msr; |
| 1020 | unsigned int ratio; | 1025 | unsigned int ratio; |
| 1021 | 1026 | ||
| 1022 | if (!do_nehalem_platform_info) | 1027 | if (!do_nhm_platform_info) |
| 1023 | return; | 1028 | return; |
| 1024 | 1029 | ||
| 1025 | get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); | 1030 | get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); |
| @@ -1132,7 +1137,7 @@ print_nhm_turbo_ratio_limits: | |||
| 1132 | } | 1137 | } |
| 1133 | fprintf(stderr, ")\n"); | 1138 | fprintf(stderr, ")\n"); |
| 1134 | 1139 | ||
| 1135 | if (!do_nehalem_turbo_ratio_limit) | 1140 | if (!do_nhm_turbo_ratio_limit) |
| 1136 | return; | 1141 | return; |
| 1137 | 1142 | ||
| 1138 | get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); | 1143 | get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); |
| @@ -1178,6 +1183,7 @@ print_nhm_turbo_ratio_limits: | |||
| 1178 | if (ratio) | 1183 | if (ratio) |
| 1179 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", | 1184 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", |
| 1180 | ratio, bclk, ratio * bclk); | 1185 | ratio, bclk, ratio * bclk); |
| 1186 | |||
| 1181 | } | 1187 | } |
| 1182 | 1188 | ||
| 1183 | void free_all_buffers(void) | 1189 | void free_all_buffers(void) |
| @@ -1458,17 +1464,60 @@ void check_dev_msr() | |||
| 1458 | struct stat sb; | 1464 | struct stat sb; |
| 1459 | 1465 | ||
| 1460 | if (stat("/dev/cpu/0/msr", &sb)) | 1466 | if (stat("/dev/cpu/0/msr", &sb)) |
| 1461 | err(-5, "no /dev/cpu/0/msr\n" | 1467 | err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); |
| 1462 | "Try \"# modprobe msr\""); | ||
| 1463 | } | 1468 | } |
| 1464 | 1469 | ||
| 1465 | void check_super_user() | 1470 | void check_permissions() |
| 1466 | { | 1471 | { |
| 1467 | if (getuid() != 0) | 1472 | struct __user_cap_header_struct cap_header_data; |
| 1468 | errx(-6, "must be root"); | 1473 | cap_user_header_t cap_header = &cap_header_data; |
| 1474 | struct __user_cap_data_struct cap_data_data; | ||
| 1475 | cap_user_data_t cap_data = &cap_data_data; | ||
| 1476 | extern int capget(cap_user_header_t hdrp, cap_user_data_t datap); | ||
| 1477 | int do_exit = 0; | ||
| 1478 | |||
| 1479 | /* check for CAP_SYS_RAWIO */ | ||
| 1480 | cap_header->pid = getpid(); | ||
| 1481 | cap_header->version = _LINUX_CAPABILITY_VERSION; | ||
| 1482 | if (capget(cap_header, cap_data) < 0) | ||
| 1483 | err(-6, "capget(2) failed"); | ||
| 1484 | |||
| 1485 | if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) { | ||
| 1486 | do_exit++; | ||
| 1487 | warnx("capget(CAP_SYS_RAWIO) failed," | ||
| 1488 | " try \"# setcap cap_sys_rawio=ep %s\"", progname); | ||
| 1489 | } | ||
| 1490 | |||
| 1491 | /* test file permissions */ | ||
| 1492 | if (euidaccess("/dev/cpu/0/msr", R_OK)) { | ||
| 1493 | do_exit++; | ||
| 1494 | warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr"); | ||
| 1495 | } | ||
| 1496 | |||
| 1497 | /* if all else fails, thell them to be root */ | ||
| 1498 | if (do_exit) | ||
| 1499 | if (getuid() != 0) | ||
| 1500 | warnx("... or simply run as root"); | ||
| 1501 | |||
| 1502 | if (do_exit) | ||
| 1503 | exit(-6); | ||
| 1469 | } | 1504 | } |
| 1470 | 1505 | ||
| 1471 | int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) | 1506 | /* |
| 1507 | * NHM adds support for additional MSRs: | ||
| 1508 | * | ||
| 1509 | * MSR_SMI_COUNT 0x00000034 | ||
| 1510 | * | ||
| 1511 | * MSR_NHM_PLATFORM_INFO 0x000000ce | ||
| 1512 | * MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 | ||
| 1513 | * | ||
| 1514 | * MSR_PKG_C3_RESIDENCY 0x000003f8 | ||
| 1515 | * MSR_PKG_C6_RESIDENCY 0x000003f9 | ||
| 1516 | * MSR_CORE_C3_RESIDENCY 0x000003fc | ||
| 1517 | * MSR_CORE_C6_RESIDENCY 0x000003fd | ||
| 1518 | * | ||
| 1519 | */ | ||
| 1520 | int has_nhm_msrs(unsigned int family, unsigned int model) | ||
| 1472 | { | 1521 | { |
| 1473 | if (!genuine_intel) | 1522 | if (!genuine_intel) |
| 1474 | return 0; | 1523 | return 0; |
| @@ -1495,13 +1544,27 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) | |||
| 1495 | case 0x3D: /* BDW */ | 1544 | case 0x3D: /* BDW */ |
| 1496 | case 0x4F: /* BDX */ | 1545 | case 0x4F: /* BDX */ |
| 1497 | case 0x56: /* BDX-DE */ | 1546 | case 0x56: /* BDX-DE */ |
| 1498 | return 1; | ||
| 1499 | case 0x2E: /* Nehalem-EX Xeon - Beckton */ | 1547 | case 0x2E: /* Nehalem-EX Xeon - Beckton */ |
| 1500 | case 0x2F: /* Westmere-EX Xeon - Eagleton */ | 1548 | case 0x2F: /* Westmere-EX Xeon - Eagleton */ |
| 1549 | return 1; | ||
| 1501 | default: | 1550 | default: |
| 1502 | return 0; | 1551 | return 0; |
| 1503 | } | 1552 | } |
| 1504 | } | 1553 | } |
| 1554 | int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) | ||
| 1555 | { | ||
| 1556 | if (!has_nhm_msrs(family, model)) | ||
| 1557 | return 0; | ||
| 1558 | |||
| 1559 | switch (model) { | ||
| 1560 | /* Nehalem compatible, but do not include turbo-ratio limit support */ | ||
| 1561 | case 0x2E: /* Nehalem-EX Xeon - Beckton */ | ||
| 1562 | case 0x2F: /* Westmere-EX Xeon - Eagleton */ | ||
| 1563 | return 0; | ||
| 1564 | default: | ||
| 1565 | return 1; | ||
| 1566 | } | ||
| 1567 | } | ||
| 1505 | int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) | 1568 | int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) |
| 1506 | { | 1569 | { |
| 1507 | if (!genuine_intel) | 1570 | if (!genuine_intel) |
| @@ -1564,6 +1627,103 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 1564 | return 0; | 1627 | return 0; |
| 1565 | } | 1628 | } |
| 1566 | 1629 | ||
| 1630 | /* | ||
| 1631 | * print_perf_limit() | ||
| 1632 | */ | ||
| 1633 | int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
| 1634 | { | ||
| 1635 | unsigned long long msr; | ||
| 1636 | int cpu; | ||
| 1637 | |||
| 1638 | cpu = t->cpu_id; | ||
| 1639 | |||
| 1640 | /* per-package */ | ||
| 1641 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
| 1642 | return 0; | ||
| 1643 | |||
| 1644 | if (cpu_migrate(cpu)) { | ||
| 1645 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
| 1646 | return -1; | ||
| 1647 | } | ||
| 1648 | |||
| 1649 | if (do_core_perf_limit_reasons) { | ||
| 1650 | get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); | ||
| 1651 | fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); | ||
| 1652 | fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", | ||
| 1653 | (msr & 1 << 0) ? "PROCHOT, " : "", | ||
| 1654 | (msr & 1 << 1) ? "ThermStatus, " : "", | ||
| 1655 | (msr & 1 << 2) ? "bit2, " : "", | ||
| 1656 | (msr & 1 << 4) ? "Graphics, " : "", | ||
| 1657 | (msr & 1 << 5) ? "Auto-HWP, " : "", | ||
| 1658 | (msr & 1 << 6) ? "VR-Therm, " : "", | ||
| 1659 | (msr & 1 << 8) ? "Amps, " : "", | ||
| 1660 | (msr & 1 << 9) ? "CorePwr, " : "", | ||
| 1661 | (msr & 1 << 10) ? "PkgPwrL1, " : "", | ||
| 1662 | (msr & 1 << 11) ? "PkgPwrL2, " : "", | ||
| 1663 | (msr & 1 << 12) ? "MultiCoreTurbo, " : "", | ||
| 1664 | (msr & 1 << 13) ? "Transitions, " : "", | ||
| 1665 | (msr & 1 << 14) ? "bit14, " : "", | ||
| 1666 | (msr & 1 << 15) ? "bit15, " : ""); | ||
| 1667 | fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", | ||
| 1668 | (msr & 1 << 16) ? "PROCHOT, " : "", | ||
| 1669 | (msr & 1 << 17) ? "ThermStatus, " : "", | ||
| 1670 | (msr & 1 << 18) ? "bit18, " : "", | ||
| 1671 | (msr & 1 << 20) ? "Graphics, " : "", | ||
| 1672 | (msr & 1 << 21) ? "Auto-HWP, " : "", | ||
| 1673 | (msr & 1 << 22) ? "VR-Therm, " : "", | ||
| 1674 | (msr & 1 << 24) ? "Amps, " : "", | ||
| 1675 | (msr & 1 << 25) ? "CorePwr, " : "", | ||
| 1676 | (msr & 1 << 26) ? "PkgPwrL1, " : "", | ||
| 1677 | (msr & 1 << 27) ? "PkgPwrL2, " : "", | ||
| 1678 | (msr & 1 << 28) ? "MultiCoreTurbo, " : "", | ||
| 1679 | (msr & 1 << 29) ? "Transitions, " : "", | ||
| 1680 | (msr & 1 << 30) ? "bit30, " : "", | ||
| 1681 | (msr & 1 << 31) ? "bit31, " : ""); | ||
| 1682 | |||
| 1683 | } | ||
| 1684 | if (do_gfx_perf_limit_reasons) { | ||
| 1685 | get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); | ||
| 1686 | fprintf(stderr, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); | ||
| 1687 | fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s)", | ||
| 1688 | (msr & 1 << 0) ? "PROCHOT, " : "", | ||
| 1689 | (msr & 1 << 1) ? "ThermStatus, " : "", | ||
| 1690 | (msr & 1 << 4) ? "Graphics, " : "", | ||
| 1691 | (msr & 1 << 6) ? "VR-Therm, " : "", | ||
| 1692 | (msr & 1 << 8) ? "Amps, " : "", | ||
| 1693 | (msr & 1 << 9) ? "GFXPwr, " : "", | ||
| 1694 | (msr & 1 << 10) ? "PkgPwrL1, " : "", | ||
| 1695 | (msr & 1 << 11) ? "PkgPwrL2, " : ""); | ||
| 1696 | fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s)\n", | ||
| 1697 | (msr & 1 << 16) ? "PROCHOT, " : "", | ||
| 1698 | (msr & 1 << 17) ? "ThermStatus, " : "", | ||
| 1699 | (msr & 1 << 20) ? "Graphics, " : "", | ||
| 1700 | (msr & 1 << 22) ? "VR-Therm, " : "", | ||
| 1701 | (msr & 1 << 24) ? "Amps, " : "", | ||
| 1702 | (msr & 1 << 25) ? "GFXPwr, " : "", | ||
| 1703 | (msr & 1 << 26) ? "PkgPwrL1, " : "", | ||
| 1704 | (msr & 1 << 27) ? "PkgPwrL2, " : ""); | ||
| 1705 | } | ||
| 1706 | if (do_ring_perf_limit_reasons) { | ||
| 1707 | get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); | ||
| 1708 | fprintf(stderr, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); | ||
| 1709 | fprintf(stderr, " (Active: %s%s%s%s%s%s)", | ||
| 1710 | (msr & 1 << 0) ? "PROCHOT, " : "", | ||
| 1711 | (msr & 1 << 1) ? "ThermStatus, " : "", | ||
| 1712 | (msr & 1 << 6) ? "VR-Therm, " : "", | ||
| 1713 | (msr & 1 << 8) ? "Amps, " : "", | ||
| 1714 | (msr & 1 << 10) ? "PkgPwrL1, " : "", | ||
| 1715 | (msr & 1 << 11) ? "PkgPwrL2, " : ""); | ||
| 1716 | fprintf(stderr, " (Logged: %s%s%s%s%s%s)\n", | ||
| 1717 | (msr & 1 << 16) ? "PROCHOT, " : "", | ||
| 1718 | (msr & 1 << 17) ? "ThermStatus, " : "", | ||
| 1719 | (msr & 1 << 22) ? "VR-Therm, " : "", | ||
| 1720 | (msr & 1 << 24) ? "Amps, " : "", | ||
| 1721 | (msr & 1 << 26) ? "PkgPwrL1, " : "", | ||
| 1722 | (msr & 1 << 27) ? "PkgPwrL2, " : ""); | ||
| 1723 | } | ||
| 1724 | return 0; | ||
| 1725 | } | ||
| 1726 | |||
| 1567 | #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ | 1727 | #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ |
| 1568 | #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ | 1728 | #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ |
| 1569 | 1729 | ||
| @@ -1653,6 +1813,27 @@ void rapl_probe(unsigned int family, unsigned int model) | |||
| 1653 | return; | 1813 | return; |
| 1654 | } | 1814 | } |
| 1655 | 1815 | ||
| 1816 | void perf_limit_reasons_probe(family, model) | ||
| 1817 | { | ||
| 1818 | if (!genuine_intel) | ||
| 1819 | return; | ||
| 1820 | |||
| 1821 | if (family != 6) | ||
| 1822 | return; | ||
| 1823 | |||
| 1824 | switch (model) { | ||
| 1825 | case 0x3C: /* HSW */ | ||
| 1826 | case 0x45: /* HSW */ | ||
| 1827 | case 0x46: /* HSW */ | ||
| 1828 | do_gfx_perf_limit_reasons = 1; | ||
| 1829 | case 0x3F: /* HSX */ | ||
| 1830 | do_core_perf_limit_reasons = 1; | ||
| 1831 | do_ring_perf_limit_reasons = 1; | ||
| 1832 | default: | ||
| 1833 | return; | ||
| 1834 | } | ||
| 1835 | } | ||
| 1836 | |||
| 1656 | int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) | 1837 | int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
| 1657 | { | 1838 | { |
| 1658 | unsigned long long msr; | 1839 | unsigned long long msr; |
| @@ -1842,8 +2023,15 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 1842 | return 0; | 2023 | return 0; |
| 1843 | } | 2024 | } |
| 1844 | 2025 | ||
| 2026 | /* | ||
| 2027 | * SNB adds support for additional MSRs: | ||
| 2028 | * | ||
| 2029 | * MSR_PKG_C7_RESIDENCY 0x000003fa | ||
| 2030 | * MSR_CORE_C7_RESIDENCY 0x000003fe | ||
| 2031 | * MSR_PKG_C2_RESIDENCY 0x0000060d | ||
| 2032 | */ | ||
| 1845 | 2033 | ||
| 1846 | int is_snb(unsigned int family, unsigned int model) | 2034 | int has_snb_msrs(unsigned int family, unsigned int model) |
| 1847 | { | 2035 | { |
| 1848 | if (!genuine_intel) | 2036 | if (!genuine_intel) |
| 1849 | return 0; | 2037 | return 0; |
| @@ -1865,7 +2053,14 @@ int is_snb(unsigned int family, unsigned int model) | |||
| 1865 | return 0; | 2053 | return 0; |
| 1866 | } | 2054 | } |
| 1867 | 2055 | ||
| 1868 | int has_c8_c9_c10(unsigned int family, unsigned int model) | 2056 | /* |
| 2057 | * HSW adds support for additional MSRs: | ||
| 2058 | * | ||
| 2059 | * MSR_PKG_C8_RESIDENCY 0x00000630 | ||
| 2060 | * MSR_PKG_C9_RESIDENCY 0x00000631 | ||
| 2061 | * MSR_PKG_C10_RESIDENCY 0x00000632 | ||
| 2062 | */ | ||
| 2063 | int has_hsw_msrs(unsigned int family, unsigned int model) | ||
| 1869 | { | 2064 | { |
| 1870 | if (!genuine_intel) | 2065 | if (!genuine_intel) |
| 1871 | return 0; | 2066 | return 0; |
| @@ -1917,7 +2112,7 @@ double slm_bclk(void) | |||
| 1917 | 2112 | ||
| 1918 | double discover_bclk(unsigned int family, unsigned int model) | 2113 | double discover_bclk(unsigned int family, unsigned int model) |
| 1919 | { | 2114 | { |
| 1920 | if (is_snb(family, model)) | 2115 | if (has_snb_msrs(family, model)) |
| 1921 | return 100.00; | 2116 | return 100.00; |
| 1922 | else if (is_slm(family, model)) | 2117 | else if (is_slm(family, model)) |
| 1923 | return slm_bclk(); | 2118 | return slm_bclk(); |
| @@ -1965,7 +2160,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk | |||
| 1965 | } | 2160 | } |
| 1966 | 2161 | ||
| 1967 | /* Temperature Target MSR is Nehalem and newer only */ | 2162 | /* Temperature Target MSR is Nehalem and newer only */ |
| 1968 | if (!do_nehalem_platform_info) | 2163 | if (!do_nhm_platform_info) |
| 1969 | goto guess; | 2164 | goto guess; |
| 1970 | 2165 | ||
| 1971 | if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) | 2166 | if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) |
| @@ -2029,18 +2224,15 @@ void check_cpuid() | |||
| 2029 | ebx = ecx = edx = 0; | 2224 | ebx = ecx = edx = 0; |
| 2030 | __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx); | 2225 | __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx); |
| 2031 | 2226 | ||
| 2032 | if (max_level < 0x80000007) | 2227 | if (max_level >= 0x80000007) { |
| 2033 | errx(1, "CPUID: no invariant TSC (max_level 0x%x)", max_level); | ||
| 2034 | 2228 | ||
| 2035 | /* | 2229 | /* |
| 2036 | * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 | 2230 | * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 |
| 2037 | * this check is valid for both Intel and AMD | 2231 | * this check is valid for both Intel and AMD |
| 2038 | */ | 2232 | */ |
| 2039 | __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); | 2233 | __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); |
| 2040 | has_invariant_tsc = edx & (1 << 8); | 2234 | has_invariant_tsc = edx & (1 << 8); |
| 2041 | 2235 | } | |
| 2042 | if (!has_invariant_tsc) | ||
| 2043 | errx(1, "No invariant TSC"); | ||
| 2044 | 2236 | ||
| 2045 | /* | 2237 | /* |
| 2046 | * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 | 2238 | * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 |
| @@ -2054,26 +2246,22 @@ void check_cpuid() | |||
| 2054 | has_epb = ecx & (1 << 3); | 2246 | has_epb = ecx & (1 << 3); |
| 2055 | 2247 | ||
| 2056 | if (verbose) | 2248 | if (verbose) |
| 2057 | fprintf(stderr, "CPUID(6): %s%s%s%s\n", | 2249 | fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sEPB\n", |
| 2058 | has_aperf ? "APERF" : "No APERF!", | 2250 | has_aperf ? "" : "No ", |
| 2059 | do_dts ? ", DTS" : "", | 2251 | do_dts ? "" : "No ", |
| 2060 | do_ptm ? ", PTM": "", | 2252 | do_ptm ? "" : "No ", |
| 2061 | has_epb ? ", EPB": ""); | 2253 | has_epb ? "" : "No "); |
| 2062 | 2254 | ||
| 2063 | if (!has_aperf) | 2255 | do_nhm_platform_info = do_nhm_cstates = do_smi = has_nhm_msrs(family, model); |
| 2064 | errx(-1, "No APERF"); | 2256 | do_snb_cstates = has_snb_msrs(family, model); |
| 2065 | 2257 | do_c8_c9_c10 = has_hsw_msrs(family, model); | |
| 2066 | do_nehalem_platform_info = genuine_intel && has_invariant_tsc; | ||
| 2067 | do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ | ||
| 2068 | do_smi = do_nhm_cstates; | ||
| 2069 | do_snb_cstates = is_snb(family, model); | ||
| 2070 | do_c8_c9_c10 = has_c8_c9_c10(family, model); | ||
| 2071 | do_slm_cstates = is_slm(family, model); | 2258 | do_slm_cstates = is_slm(family, model); |
| 2072 | bclk = discover_bclk(family, model); | 2259 | bclk = discover_bclk(family, model); |
| 2073 | 2260 | ||
| 2074 | do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); | 2261 | do_nhm_turbo_ratio_limit = has_nhm_turbo_ratio_limit(family, model); |
| 2075 | do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); | 2262 | do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); |
| 2076 | rapl_probe(family, model); | 2263 | rapl_probe(family, model); |
| 2264 | perf_limit_reasons_probe(family, model); | ||
| 2077 | 2265 | ||
| 2078 | return; | 2266 | return; |
| 2079 | } | 2267 | } |
| @@ -2299,10 +2487,9 @@ void setup_all_buffers(void) | |||
| 2299 | 2487 | ||
| 2300 | void turbostat_init() | 2488 | void turbostat_init() |
| 2301 | { | 2489 | { |
| 2302 | check_cpuid(); | ||
| 2303 | |||
| 2304 | check_dev_msr(); | 2490 | check_dev_msr(); |
| 2305 | check_super_user(); | 2491 | check_permissions(); |
| 2492 | check_cpuid(); | ||
| 2306 | 2493 | ||
| 2307 | setup_all_buffers(); | 2494 | setup_all_buffers(); |
| 2308 | 2495 | ||
| @@ -2313,6 +2500,9 @@ void turbostat_init() | |||
| 2313 | for_all_cpus(print_epb, ODD_COUNTERS); | 2500 | for_all_cpus(print_epb, ODD_COUNTERS); |
| 2314 | 2501 | ||
| 2315 | if (verbose) | 2502 | if (verbose) |
| 2503 | for_all_cpus(print_perf_limit, ODD_COUNTERS); | ||
| 2504 | |||
| 2505 | if (verbose) | ||
| 2316 | for_all_cpus(print_rapl, ODD_COUNTERS); | 2506 | for_all_cpus(print_rapl, ODD_COUNTERS); |
| 2317 | 2507 | ||
| 2318 | for_all_cpus(set_temperature_target, ODD_COUNTERS); | 2508 | for_all_cpus(set_temperature_target, ODD_COUNTERS); |
| @@ -2441,7 +2631,7 @@ int main(int argc, char **argv) | |||
| 2441 | cmdline(argc, argv); | 2631 | cmdline(argc, argv); |
| 2442 | 2632 | ||
| 2443 | if (verbose) | 2633 | if (verbose) |
| 2444 | fprintf(stderr, "turbostat v3.7 Feb 6, 2014" | 2634 | fprintf(stderr, "turbostat v3.9 23-Jan, 2015" |
| 2445 | " - Len Brown <lenb@kernel.org>\n"); | 2635 | " - Len Brown <lenb@kernel.org>\n"); |
| 2446 | 2636 | ||
| 2447 | turbostat_init(); | 2637 | turbostat_init(); |
