diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2015-02-10 10:11:26 -0500 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2015-02-10 10:11:26 -0500 |
commit | b5e82233cab43c25fc0a1c28d9136a086db4aa52 (patch) | |
tree | b98690b48f829b3f500770df4b56919f24fee8b0 /tools | |
parent | 7bc95d4ef1033af808cb0714081cb45cd4ee5669 (diff) | |
parent | 994b7f10b0ea3b70a19043f8adb3326707bdc2a9 (diff) |
Merge branch 'pm-tools'
* pm-tools:
tools/power turbostat: relax dependency on APERF_MSR
tools/power turbostat: relax dependency on invariant TSC
tools/power turbostat: decode MSR_*_PERF_LIMIT_REASONS
tools/power turbostat: relax dependency on root permission
cpupower Makefile change to help run the tool without 'make install'
Diffstat (limited to 'tools')
-rw-r--r-- | tools/power/cpupower/Makefile | 2 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 66 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 338 |
3 files changed, 310 insertions, 96 deletions
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 2e2ba2efa0d9..3ed7c0476d48 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile | |||
@@ -209,7 +209,7 @@ $(OUTPUT)%.o: %.c | |||
209 | 209 | ||
210 | $(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_MAJ) | 210 | $(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_MAJ) |
211 | $(ECHO) " CC " $@ | 211 | $(ECHO) " CC " $@ |
212 | $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lcpupower -lrt -lpci -L$(OUTPUT) -o $@ | 212 | $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lcpupower -Wl,-rpath=./ -lrt -lpci -L$(OUTPUT) -o $@ |
213 | $(QUIET) $(STRIPCMD) $@ | 213 | $(QUIET) $(STRIPCMD) $@ |
214 | 214 | ||
215 | $(OUTPUT)po/$(PACKAGE).pot: $(UTIL_SRC) | 215 | $(OUTPUT)po/$(PACKAGE).pot: $(UTIL_SRC) |
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 56bfb523c5bb..9b950699e63d 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 | |||
@@ -12,16 +12,16 @@ turbostat \- Report processor frequency and idle statistics | |||
12 | .RB [ "\-i interval_sec" ] | 12 | .RB [ "\-i interval_sec" ] |
13 | .SH DESCRIPTION | 13 | .SH DESCRIPTION |
14 | \fBturbostat \fP reports processor topology, frequency, | 14 | \fBturbostat \fP reports processor topology, frequency, |
15 | idle power-state statistics, temperature and power on modern X86 processors. | 15 | idle power-state statistics, temperature and power on X86 processors. |
16 | Either \fBcommand\fP is forked and statistics are printed | 16 | There are two ways to invoke turbostat. |
17 | upon its completion, or statistics are printed periodically. | 17 | The first method is to supply a |
18 | 18 | \fBcommand\fP, which is forked and statistics are printed | |
19 | \fBturbostat \fP | 19 | upon its completion. |
20 | must be run on root, and | 20 | The second method is to omit the command, |
21 | minimally requires that the processor | 21 | and turbodstat will print statistics every 5 seconds. |
22 | supports an "invariant" TSC, plus the APERF and MPERF MSRs. | 22 | The 5-second interval can changed using the -i option. |
23 | Additional information is reported depending on hardware counter support. | 23 | |
24 | 24 | Some information is not availalbe on older processors. | |
25 | .SS Options | 25 | .SS Options |
26 | The \fB-p\fP option limits output to the 1st thread in 1st core of each package. | 26 | The \fB-p\fP option limits output to the 1st thread in 1st core of each package. |
27 | .PP | 27 | .PP |
@@ -130,12 +130,13 @@ cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1) | |||
130 | ... | 130 | ... |
131 | .fi | 131 | .fi |
132 | The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency | 132 | The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency |
133 | available at the minimum package voltage. The \fBTSC frequency\fP is the nominal | 133 | available at the minimum package voltage. The \fBTSC frequency\fP is the base |
134 | maximum frequency of the processor if turbo-mode were not available. This frequency | 134 | frequency of the processor -- this should match the brand string |
135 | in /proc/cpuinfo. This base frequency | ||
135 | should be sustainable on all CPUs indefinitely, given nominal power and cooling. | 136 | should be sustainable on all CPUs indefinitely, given nominal power and cooling. |
136 | The remaining rows show what maximum turbo frequency is possible | 137 | The remaining rows show what maximum turbo frequency is possible |
137 | depending on the number of idle cores. Note that this information is | 138 | depending on the number of idle cores. Note that not all information is |
138 | not available on all processors. | 139 | available on all processors. |
139 | .SH FORK EXAMPLE | 140 | .SH FORK EXAMPLE |
140 | If turbostat is invoked with a command, it will fork that command | 141 | If turbostat is invoked with a command, it will fork that command |
141 | and output the statistics gathered when the command exits. | 142 | and output the statistics gathered when the command exits. |
@@ -176,6 +177,11 @@ not including any non-busy idle time. | |||
176 | 177 | ||
177 | .B "turbostat " | 178 | .B "turbostat " |
178 | must be run as root. | 179 | must be run as root. |
180 | Alternatively, non-root users can be enabled to run turbostat this way: | ||
181 | |||
182 | # setcap cap_sys_rawio=ep ./turbostat | ||
183 | |||
184 | # chmod +r /dev/cpu/*/msr | ||
179 | 185 | ||
180 | .B "turbostat " | 186 | .B "turbostat " |
181 | reads hardware counters, but doesn't write them. | 187 | reads hardware counters, but doesn't write them. |
@@ -184,15 +190,33 @@ multiple invocations of itself. | |||
184 | 190 | ||
185 | \fBturbostat \fP | 191 | \fBturbostat \fP |
186 | may work poorly on Linux-2.6.20 through 2.6.29, | 192 | may work poorly on Linux-2.6.20 through 2.6.29, |
187 | as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF | 193 | as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF MSRs |
188 | in those kernels. | 194 | in those kernels. |
189 | 195 | ||
190 | If the TSC column does not make sense, then | 196 | AVG_MHz = APERF_delta/measurement_interval. This is the actual |
191 | the other numbers will also make no sense. | 197 | number of elapsed cycles divided by the entire sample interval -- |
192 | Turbostat is lightweight, and its data collection is not atomic. | 198 | including idle time. Note that this calculation is resiliant |
193 | These issues are usually caused by an extremely short measurement | 199 | to systems lacking a non-stop TSC. |
194 | interval (much less than 1 second), or system activity that prevents | 200 | |
195 | turbostat from being able to run on all CPUS to quickly collect data. | 201 | TSC_MHz = TSC_delta/measurement_interval. |
202 | On a system with an invariant TSC, this value will be constant | ||
203 | and will closely match the base frequency value shown | ||
204 | in the brand string in /proc/cpuinfo. On a system where | ||
205 | the TSC stops in idle, TSC_MHz will drop | ||
206 | below the processor's base frequency. | ||
207 | |||
208 | %Busy = MPERF_delta/TSC_delta | ||
209 | |||
210 | Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval | ||
211 | |||
212 | Note that these calculations depend on TSC_delta, so they | ||
213 | are not reliable during intervals when TSC_MHz is not running at the base frequency. | ||
214 | |||
215 | Turbostat data collection is not atomic. | ||
216 | Extremely short measurement intervals (much less than 1 second), | ||
217 | or system activity that prevents turbostat from being able | ||
218 | to run on all CPUS to quickly collect data, will result in | ||
219 | inconsistent results. | ||
196 | 220 | ||
197 | The APERF, MPERF MSRs are defined to count non-halted cycles. | 221 | The APERF, MPERF MSRs are defined to count non-halted cycles. |
198 | Although it is not guaranteed by the architecture, turbostat assumes | 222 | Although it is not guaranteed by the architecture, turbostat assumes |
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 5b1b807265a1..a02c02f25e88 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
@@ -38,6 +38,8 @@ | |||
38 | #include <ctype.h> | 38 | #include <ctype.h> |
39 | #include <sched.h> | 39 | #include <sched.h> |
40 | #include <cpuid.h> | 40 | #include <cpuid.h> |
41 | #include <linux/capability.h> | ||
42 | #include <errno.h> | ||
41 | 43 | ||
42 | char *proc_stat = "/proc/stat"; | 44 | char *proc_stat = "/proc/stat"; |
43 | unsigned int interval_sec = 5; /* set with -i interval_sec */ | 45 | unsigned int interval_sec = 5; /* set with -i interval_sec */ |
@@ -59,8 +61,8 @@ unsigned int has_epb; | |||
59 | unsigned int units = 1000000; /* MHz etc */ | 61 | unsigned int units = 1000000; /* MHz etc */ |
60 | unsigned int genuine_intel; | 62 | unsigned int genuine_intel; |
61 | unsigned int has_invariant_tsc; | 63 | unsigned int has_invariant_tsc; |
62 | unsigned int do_nehalem_platform_info; | 64 | unsigned int do_nhm_platform_info; |
63 | unsigned int do_nehalem_turbo_ratio_limit; | 65 | unsigned int do_nhm_turbo_ratio_limit; |
64 | unsigned int do_ivt_turbo_ratio_limit; | 66 | unsigned int do_ivt_turbo_ratio_limit; |
65 | unsigned int extra_msr_offset32; | 67 | unsigned int extra_msr_offset32; |
66 | unsigned int extra_msr_offset64; | 68 | unsigned int extra_msr_offset64; |
@@ -81,6 +83,9 @@ unsigned int tcc_activation_temp; | |||
81 | unsigned int tcc_activation_temp_override; | 83 | unsigned int tcc_activation_temp_override; |
82 | double rapl_power_units, rapl_energy_units, rapl_time_units; | 84 | double rapl_power_units, rapl_energy_units, rapl_time_units; |
83 | double rapl_joule_counter_range; | 85 | double rapl_joule_counter_range; |
86 | unsigned int do_core_perf_limit_reasons; | ||
87 | unsigned int do_gfx_perf_limit_reasons; | ||
88 | unsigned int do_ring_perf_limit_reasons; | ||
84 | 89 | ||
85 | #define RAPL_PKG (1 << 0) | 90 | #define RAPL_PKG (1 << 0) |
86 | /* 0x610 MSR_PKG_POWER_LIMIT */ | 91 | /* 0x610 MSR_PKG_POWER_LIMIT */ |
@@ -251,15 +256,13 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr) | |||
251 | sprintf(pathname, "/dev/cpu/%d/msr", cpu); | 256 | sprintf(pathname, "/dev/cpu/%d/msr", cpu); |
252 | fd = open(pathname, O_RDONLY); | 257 | fd = open(pathname, O_RDONLY); |
253 | if (fd < 0) | 258 | if (fd < 0) |
254 | return -1; | 259 | err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); |
255 | 260 | ||
256 | retval = pread(fd, msr, sizeof *msr, offset); | 261 | retval = pread(fd, msr, sizeof *msr, offset); |
257 | close(fd); | 262 | close(fd); |
258 | 263 | ||
259 | if (retval != sizeof *msr) { | 264 | if (retval != sizeof *msr) |
260 | fprintf(stderr, "%s offset 0x%llx read failed\n", pathname, (unsigned long long)offset); | 265 | err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); |
261 | return -1; | ||
262 | } | ||
263 | 266 | ||
264 | return 0; | 267 | return 0; |
265 | } | 268 | } |
@@ -281,7 +284,7 @@ void print_header(void) | |||
281 | outp += sprintf(outp, " CPU"); | 284 | outp += sprintf(outp, " CPU"); |
282 | if (has_aperf) | 285 | if (has_aperf) |
283 | outp += sprintf(outp, " Avg_MHz"); | 286 | outp += sprintf(outp, " Avg_MHz"); |
284 | if (do_nhm_cstates) | 287 | if (has_aperf) |
285 | outp += sprintf(outp, " %%Busy"); | 288 | outp += sprintf(outp, " %%Busy"); |
286 | if (has_aperf) | 289 | if (has_aperf) |
287 | outp += sprintf(outp, " Bzy_MHz"); | 290 | outp += sprintf(outp, " Bzy_MHz"); |
@@ -337,7 +340,7 @@ void print_header(void) | |||
337 | outp += sprintf(outp, " PKG_%%"); | 340 | outp += sprintf(outp, " PKG_%%"); |
338 | if (do_rapl & RAPL_DRAM_PERF_STATUS) | 341 | if (do_rapl & RAPL_DRAM_PERF_STATUS) |
339 | outp += sprintf(outp, " RAM_%%"); | 342 | outp += sprintf(outp, " RAM_%%"); |
340 | } else { | 343 | } else if (do_rapl && rapl_joules) { |
341 | if (do_rapl & RAPL_PKG) | 344 | if (do_rapl & RAPL_PKG) |
342 | outp += sprintf(outp, " Pkg_J"); | 345 | outp += sprintf(outp, " Pkg_J"); |
343 | if (do_rapl & RAPL_CORES) | 346 | if (do_rapl & RAPL_CORES) |
@@ -457,25 +460,25 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
457 | outp += sprintf(outp, "%8d", t->cpu_id); | 460 | outp += sprintf(outp, "%8d", t->cpu_id); |
458 | } | 461 | } |
459 | 462 | ||
460 | /* AvgMHz */ | 463 | /* Avg_MHz */ |
461 | if (has_aperf) | 464 | if (has_aperf) |
462 | outp += sprintf(outp, "%8.0f", | 465 | outp += sprintf(outp, "%8.0f", |
463 | 1.0 / units * t->aperf / interval_float); | 466 | 1.0 / units * t->aperf / interval_float); |
464 | 467 | ||
465 | /* %c0 */ | 468 | /* %Busy */ |
466 | if (do_nhm_cstates) { | 469 | if (has_aperf) { |
467 | if (!skip_c0) | 470 | if (!skip_c0) |
468 | outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc); | 471 | outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc); |
469 | else | 472 | else |
470 | outp += sprintf(outp, "********"); | 473 | outp += sprintf(outp, "********"); |
471 | } | 474 | } |
472 | 475 | ||
473 | /* BzyMHz */ | 476 | /* Bzy_MHz */ |
474 | if (has_aperf) | 477 | if (has_aperf) |
475 | outp += sprintf(outp, "%8.0f", | 478 | outp += sprintf(outp, "%8.0f", |
476 | 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); | 479 | 1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); |
477 | 480 | ||
478 | /* TSC */ | 481 | /* TSC_MHz */ |
479 | outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); | 482 | outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); |
480 | 483 | ||
481 | /* SMI */ | 484 | /* SMI */ |
@@ -561,7 +564,7 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
561 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); | 564 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); |
562 | if (do_rapl & RAPL_DRAM_PERF_STATUS) | 565 | if (do_rapl & RAPL_DRAM_PERF_STATUS) |
563 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); | 566 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); |
564 | } else { | 567 | } else if (do_rapl && rapl_joules) { |
565 | if (do_rapl & RAPL_PKG) | 568 | if (do_rapl & RAPL_PKG) |
566 | outp += sprintf(outp, fmt8, | 569 | outp += sprintf(outp, fmt8, |
567 | p->energy_pkg * rapl_energy_units); | 570 | p->energy_pkg * rapl_energy_units); |
@@ -578,8 +581,8 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
578 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); | 581 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); |
579 | if (do_rapl & RAPL_DRAM_PERF_STATUS) | 582 | if (do_rapl & RAPL_DRAM_PERF_STATUS) |
580 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); | 583 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); |
581 | outp += sprintf(outp, fmt8, interval_float); | ||
582 | 584 | ||
585 | outp += sprintf(outp, fmt8, interval_float); | ||
583 | } | 586 | } |
584 | done: | 587 | done: |
585 | outp += sprintf(outp, "\n"); | 588 | outp += sprintf(outp, "\n"); |
@@ -670,24 +673,26 @@ delta_thread(struct thread_data *new, struct thread_data *old, | |||
670 | 673 | ||
671 | old->c1 = new->c1 - old->c1; | 674 | old->c1 = new->c1 - old->c1; |
672 | 675 | ||
673 | if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { | 676 | if (has_aperf) { |
674 | old->aperf = new->aperf - old->aperf; | 677 | if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { |
675 | old->mperf = new->mperf - old->mperf; | 678 | old->aperf = new->aperf - old->aperf; |
676 | } else { | 679 | old->mperf = new->mperf - old->mperf; |
680 | } else { | ||
677 | 681 | ||
678 | if (!aperf_mperf_unstable) { | 682 | if (!aperf_mperf_unstable) { |
679 | fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); | 683 | fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); |
680 | fprintf(stderr, "* Frequency results do not cover entire interval *\n"); | 684 | fprintf(stderr, "* Frequency results do not cover entire interval *\n"); |
681 | fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); | 685 | fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); |
682 | 686 | ||
683 | aperf_mperf_unstable = 1; | 687 | aperf_mperf_unstable = 1; |
688 | } | ||
689 | /* | ||
690 | * mperf delta is likely a huge "positive" number | ||
691 | * can not use it for calculating c0 time | ||
692 | */ | ||
693 | skip_c0 = 1; | ||
694 | skip_c1 = 1; | ||
684 | } | 695 | } |
685 | /* | ||
686 | * mperf delta is likely a huge "positive" number | ||
687 | * can not use it for calculating c0 time | ||
688 | */ | ||
689 | skip_c0 = 1; | ||
690 | skip_c1 = 1; | ||
691 | } | 696 | } |
692 | 697 | ||
693 | 698 | ||
@@ -1019,7 +1024,7 @@ void print_verbose_header(void) | |||
1019 | unsigned long long msr; | 1024 | unsigned long long msr; |
1020 | unsigned int ratio; | 1025 | unsigned int ratio; |
1021 | 1026 | ||
1022 | if (!do_nehalem_platform_info) | 1027 | if (!do_nhm_platform_info) |
1023 | return; | 1028 | return; |
1024 | 1029 | ||
1025 | get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); | 1030 | get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); |
@@ -1132,7 +1137,7 @@ print_nhm_turbo_ratio_limits: | |||
1132 | } | 1137 | } |
1133 | fprintf(stderr, ")\n"); | 1138 | fprintf(stderr, ")\n"); |
1134 | 1139 | ||
1135 | if (!do_nehalem_turbo_ratio_limit) | 1140 | if (!do_nhm_turbo_ratio_limit) |
1136 | return; | 1141 | return; |
1137 | 1142 | ||
1138 | get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); | 1143 | get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); |
@@ -1178,6 +1183,7 @@ print_nhm_turbo_ratio_limits: | |||
1178 | if (ratio) | 1183 | if (ratio) |
1179 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", | 1184 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", |
1180 | ratio, bclk, ratio * bclk); | 1185 | ratio, bclk, ratio * bclk); |
1186 | |||
1181 | } | 1187 | } |
1182 | 1188 | ||
1183 | void free_all_buffers(void) | 1189 | void free_all_buffers(void) |
@@ -1458,17 +1464,60 @@ void check_dev_msr() | |||
1458 | struct stat sb; | 1464 | struct stat sb; |
1459 | 1465 | ||
1460 | if (stat("/dev/cpu/0/msr", &sb)) | 1466 | if (stat("/dev/cpu/0/msr", &sb)) |
1461 | err(-5, "no /dev/cpu/0/msr\n" | 1467 | err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); |
1462 | "Try \"# modprobe msr\""); | ||
1463 | } | 1468 | } |
1464 | 1469 | ||
1465 | void check_super_user() | 1470 | void check_permissions() |
1466 | { | 1471 | { |
1467 | if (getuid() != 0) | 1472 | struct __user_cap_header_struct cap_header_data; |
1468 | errx(-6, "must be root"); | 1473 | cap_user_header_t cap_header = &cap_header_data; |
1474 | struct __user_cap_data_struct cap_data_data; | ||
1475 | cap_user_data_t cap_data = &cap_data_data; | ||
1476 | extern int capget(cap_user_header_t hdrp, cap_user_data_t datap); | ||
1477 | int do_exit = 0; | ||
1478 | |||
1479 | /* check for CAP_SYS_RAWIO */ | ||
1480 | cap_header->pid = getpid(); | ||
1481 | cap_header->version = _LINUX_CAPABILITY_VERSION; | ||
1482 | if (capget(cap_header, cap_data) < 0) | ||
1483 | err(-6, "capget(2) failed"); | ||
1484 | |||
1485 | if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) { | ||
1486 | do_exit++; | ||
1487 | warnx("capget(CAP_SYS_RAWIO) failed," | ||
1488 | " try \"# setcap cap_sys_rawio=ep %s\"", progname); | ||
1489 | } | ||
1490 | |||
1491 | /* test file permissions */ | ||
1492 | if (euidaccess("/dev/cpu/0/msr", R_OK)) { | ||
1493 | do_exit++; | ||
1494 | warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr"); | ||
1495 | } | ||
1496 | |||
1497 | /* if all else fails, thell them to be root */ | ||
1498 | if (do_exit) | ||
1499 | if (getuid() != 0) | ||
1500 | warnx("... or simply run as root"); | ||
1501 | |||
1502 | if (do_exit) | ||
1503 | exit(-6); | ||
1469 | } | 1504 | } |
1470 | 1505 | ||
1471 | int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) | 1506 | /* |
1507 | * NHM adds support for additional MSRs: | ||
1508 | * | ||
1509 | * MSR_SMI_COUNT 0x00000034 | ||
1510 | * | ||
1511 | * MSR_NHM_PLATFORM_INFO 0x000000ce | ||
1512 | * MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 | ||
1513 | * | ||
1514 | * MSR_PKG_C3_RESIDENCY 0x000003f8 | ||
1515 | * MSR_PKG_C6_RESIDENCY 0x000003f9 | ||
1516 | * MSR_CORE_C3_RESIDENCY 0x000003fc | ||
1517 | * MSR_CORE_C6_RESIDENCY 0x000003fd | ||
1518 | * | ||
1519 | */ | ||
1520 | int has_nhm_msrs(unsigned int family, unsigned int model) | ||
1472 | { | 1521 | { |
1473 | if (!genuine_intel) | 1522 | if (!genuine_intel) |
1474 | return 0; | 1523 | return 0; |
@@ -1495,13 +1544,27 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) | |||
1495 | case 0x3D: /* BDW */ | 1544 | case 0x3D: /* BDW */ |
1496 | case 0x4F: /* BDX */ | 1545 | case 0x4F: /* BDX */ |
1497 | case 0x56: /* BDX-DE */ | 1546 | case 0x56: /* BDX-DE */ |
1498 | return 1; | ||
1499 | case 0x2E: /* Nehalem-EX Xeon - Beckton */ | 1547 | case 0x2E: /* Nehalem-EX Xeon - Beckton */ |
1500 | case 0x2F: /* Westmere-EX Xeon - Eagleton */ | 1548 | case 0x2F: /* Westmere-EX Xeon - Eagleton */ |
1549 | return 1; | ||
1501 | default: | 1550 | default: |
1502 | return 0; | 1551 | return 0; |
1503 | } | 1552 | } |
1504 | } | 1553 | } |
1554 | int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model) | ||
1555 | { | ||
1556 | if (!has_nhm_msrs(family, model)) | ||
1557 | return 0; | ||
1558 | |||
1559 | switch (model) { | ||
1560 | /* Nehalem compatible, but do not include turbo-ratio limit support */ | ||
1561 | case 0x2E: /* Nehalem-EX Xeon - Beckton */ | ||
1562 | case 0x2F: /* Westmere-EX Xeon - Eagleton */ | ||
1563 | return 0; | ||
1564 | default: | ||
1565 | return 1; | ||
1566 | } | ||
1567 | } | ||
1505 | int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) | 1568 | int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) |
1506 | { | 1569 | { |
1507 | if (!genuine_intel) | 1570 | if (!genuine_intel) |
@@ -1564,6 +1627,103 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
1564 | return 0; | 1627 | return 0; |
1565 | } | 1628 | } |
1566 | 1629 | ||
1630 | /* | ||
1631 | * print_perf_limit() | ||
1632 | */ | ||
1633 | int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
1634 | { | ||
1635 | unsigned long long msr; | ||
1636 | int cpu; | ||
1637 | |||
1638 | cpu = t->cpu_id; | ||
1639 | |||
1640 | /* per-package */ | ||
1641 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
1642 | return 0; | ||
1643 | |||
1644 | if (cpu_migrate(cpu)) { | ||
1645 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
1646 | return -1; | ||
1647 | } | ||
1648 | |||
1649 | if (do_core_perf_limit_reasons) { | ||
1650 | get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); | ||
1651 | fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); | ||
1652 | fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", | ||
1653 | (msr & 1 << 0) ? "PROCHOT, " : "", | ||
1654 | (msr & 1 << 1) ? "ThermStatus, " : "", | ||
1655 | (msr & 1 << 2) ? "bit2, " : "", | ||
1656 | (msr & 1 << 4) ? "Graphics, " : "", | ||
1657 | (msr & 1 << 5) ? "Auto-HWP, " : "", | ||
1658 | (msr & 1 << 6) ? "VR-Therm, " : "", | ||
1659 | (msr & 1 << 8) ? "Amps, " : "", | ||
1660 | (msr & 1 << 9) ? "CorePwr, " : "", | ||
1661 | (msr & 1 << 10) ? "PkgPwrL1, " : "", | ||
1662 | (msr & 1 << 11) ? "PkgPwrL2, " : "", | ||
1663 | (msr & 1 << 12) ? "MultiCoreTurbo, " : "", | ||
1664 | (msr & 1 << 13) ? "Transitions, " : "", | ||
1665 | (msr & 1 << 14) ? "bit14, " : "", | ||
1666 | (msr & 1 << 15) ? "bit15, " : ""); | ||
1667 | fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", | ||
1668 | (msr & 1 << 16) ? "PROCHOT, " : "", | ||
1669 | (msr & 1 << 17) ? "ThermStatus, " : "", | ||
1670 | (msr & 1 << 18) ? "bit18, " : "", | ||
1671 | (msr & 1 << 20) ? "Graphics, " : "", | ||
1672 | (msr & 1 << 21) ? "Auto-HWP, " : "", | ||
1673 | (msr & 1 << 22) ? "VR-Therm, " : "", | ||
1674 | (msr & 1 << 24) ? "Amps, " : "", | ||
1675 | (msr & 1 << 25) ? "CorePwr, " : "", | ||
1676 | (msr & 1 << 26) ? "PkgPwrL1, " : "", | ||
1677 | (msr & 1 << 27) ? "PkgPwrL2, " : "", | ||
1678 | (msr & 1 << 28) ? "MultiCoreTurbo, " : "", | ||
1679 | (msr & 1 << 29) ? "Transitions, " : "", | ||
1680 | (msr & 1 << 30) ? "bit30, " : "", | ||
1681 | (msr & 1 << 31) ? "bit31, " : ""); | ||
1682 | |||
1683 | } | ||
1684 | if (do_gfx_perf_limit_reasons) { | ||
1685 | get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); | ||
1686 | fprintf(stderr, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); | ||
1687 | fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s)", | ||
1688 | (msr & 1 << 0) ? "PROCHOT, " : "", | ||
1689 | (msr & 1 << 1) ? "ThermStatus, " : "", | ||
1690 | (msr & 1 << 4) ? "Graphics, " : "", | ||
1691 | (msr & 1 << 6) ? "VR-Therm, " : "", | ||
1692 | (msr & 1 << 8) ? "Amps, " : "", | ||
1693 | (msr & 1 << 9) ? "GFXPwr, " : "", | ||
1694 | (msr & 1 << 10) ? "PkgPwrL1, " : "", | ||
1695 | (msr & 1 << 11) ? "PkgPwrL2, " : ""); | ||
1696 | fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s)\n", | ||
1697 | (msr & 1 << 16) ? "PROCHOT, " : "", | ||
1698 | (msr & 1 << 17) ? "ThermStatus, " : "", | ||
1699 | (msr & 1 << 20) ? "Graphics, " : "", | ||
1700 | (msr & 1 << 22) ? "VR-Therm, " : "", | ||
1701 | (msr & 1 << 24) ? "Amps, " : "", | ||
1702 | (msr & 1 << 25) ? "GFXPwr, " : "", | ||
1703 | (msr & 1 << 26) ? "PkgPwrL1, " : "", | ||
1704 | (msr & 1 << 27) ? "PkgPwrL2, " : ""); | ||
1705 | } | ||
1706 | if (do_ring_perf_limit_reasons) { | ||
1707 | get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); | ||
1708 | fprintf(stderr, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); | ||
1709 | fprintf(stderr, " (Active: %s%s%s%s%s%s)", | ||
1710 | (msr & 1 << 0) ? "PROCHOT, " : "", | ||
1711 | (msr & 1 << 1) ? "ThermStatus, " : "", | ||
1712 | (msr & 1 << 6) ? "VR-Therm, " : "", | ||
1713 | (msr & 1 << 8) ? "Amps, " : "", | ||
1714 | (msr & 1 << 10) ? "PkgPwrL1, " : "", | ||
1715 | (msr & 1 << 11) ? "PkgPwrL2, " : ""); | ||
1716 | fprintf(stderr, " (Logged: %s%s%s%s%s%s)\n", | ||
1717 | (msr & 1 << 16) ? "PROCHOT, " : "", | ||
1718 | (msr & 1 << 17) ? "ThermStatus, " : "", | ||
1719 | (msr & 1 << 22) ? "VR-Therm, " : "", | ||
1720 | (msr & 1 << 24) ? "Amps, " : "", | ||
1721 | (msr & 1 << 26) ? "PkgPwrL1, " : "", | ||
1722 | (msr & 1 << 27) ? "PkgPwrL2, " : ""); | ||
1723 | } | ||
1724 | return 0; | ||
1725 | } | ||
1726 | |||
1567 | #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ | 1727 | #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ |
1568 | #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ | 1728 | #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ |
1569 | 1729 | ||
@@ -1653,6 +1813,27 @@ void rapl_probe(unsigned int family, unsigned int model) | |||
1653 | return; | 1813 | return; |
1654 | } | 1814 | } |
1655 | 1815 | ||
1816 | void perf_limit_reasons_probe(family, model) | ||
1817 | { | ||
1818 | if (!genuine_intel) | ||
1819 | return; | ||
1820 | |||
1821 | if (family != 6) | ||
1822 | return; | ||
1823 | |||
1824 | switch (model) { | ||
1825 | case 0x3C: /* HSW */ | ||
1826 | case 0x45: /* HSW */ | ||
1827 | case 0x46: /* HSW */ | ||
1828 | do_gfx_perf_limit_reasons = 1; | ||
1829 | case 0x3F: /* HSX */ | ||
1830 | do_core_perf_limit_reasons = 1; | ||
1831 | do_ring_perf_limit_reasons = 1; | ||
1832 | default: | ||
1833 | return; | ||
1834 | } | ||
1835 | } | ||
1836 | |||
1656 | int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) | 1837 | int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
1657 | { | 1838 | { |
1658 | unsigned long long msr; | 1839 | unsigned long long msr; |
@@ -1842,8 +2023,15 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
1842 | return 0; | 2023 | return 0; |
1843 | } | 2024 | } |
1844 | 2025 | ||
2026 | /* | ||
2027 | * SNB adds support for additional MSRs: | ||
2028 | * | ||
2029 | * MSR_PKG_C7_RESIDENCY 0x000003fa | ||
2030 | * MSR_CORE_C7_RESIDENCY 0x000003fe | ||
2031 | * MSR_PKG_C2_RESIDENCY 0x0000060d | ||
2032 | */ | ||
1845 | 2033 | ||
1846 | int is_snb(unsigned int family, unsigned int model) | 2034 | int has_snb_msrs(unsigned int family, unsigned int model) |
1847 | { | 2035 | { |
1848 | if (!genuine_intel) | 2036 | if (!genuine_intel) |
1849 | return 0; | 2037 | return 0; |
@@ -1865,7 +2053,14 @@ int is_snb(unsigned int family, unsigned int model) | |||
1865 | return 0; | 2053 | return 0; |
1866 | } | 2054 | } |
1867 | 2055 | ||
1868 | int has_c8_c9_c10(unsigned int family, unsigned int model) | 2056 | /* |
2057 | * HSW adds support for additional MSRs: | ||
2058 | * | ||
2059 | * MSR_PKG_C8_RESIDENCY 0x00000630 | ||
2060 | * MSR_PKG_C9_RESIDENCY 0x00000631 | ||
2061 | * MSR_PKG_C10_RESIDENCY 0x00000632 | ||
2062 | */ | ||
2063 | int has_hsw_msrs(unsigned int family, unsigned int model) | ||
1869 | { | 2064 | { |
1870 | if (!genuine_intel) | 2065 | if (!genuine_intel) |
1871 | return 0; | 2066 | return 0; |
@@ -1917,7 +2112,7 @@ double slm_bclk(void) | |||
1917 | 2112 | ||
1918 | double discover_bclk(unsigned int family, unsigned int model) | 2113 | double discover_bclk(unsigned int family, unsigned int model) |
1919 | { | 2114 | { |
1920 | if (is_snb(family, model)) | 2115 | if (has_snb_msrs(family, model)) |
1921 | return 100.00; | 2116 | return 100.00; |
1922 | else if (is_slm(family, model)) | 2117 | else if (is_slm(family, model)) |
1923 | return slm_bclk(); | 2118 | return slm_bclk(); |
@@ -1965,7 +2160,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk | |||
1965 | } | 2160 | } |
1966 | 2161 | ||
1967 | /* Temperature Target MSR is Nehalem and newer only */ | 2162 | /* Temperature Target MSR is Nehalem and newer only */ |
1968 | if (!do_nehalem_platform_info) | 2163 | if (!do_nhm_platform_info) |
1969 | goto guess; | 2164 | goto guess; |
1970 | 2165 | ||
1971 | if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) | 2166 | if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) |
@@ -2029,18 +2224,15 @@ void check_cpuid() | |||
2029 | ebx = ecx = edx = 0; | 2224 | ebx = ecx = edx = 0; |
2030 | __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx); | 2225 | __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx); |
2031 | 2226 | ||
2032 | if (max_level < 0x80000007) | 2227 | if (max_level >= 0x80000007) { |
2033 | errx(1, "CPUID: no invariant TSC (max_level 0x%x)", max_level); | ||
2034 | 2228 | ||
2035 | /* | 2229 | /* |
2036 | * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 | 2230 | * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 |
2037 | * this check is valid for both Intel and AMD | 2231 | * this check is valid for both Intel and AMD |
2038 | */ | 2232 | */ |
2039 | __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); | 2233 | __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); |
2040 | has_invariant_tsc = edx & (1 << 8); | 2234 | has_invariant_tsc = edx & (1 << 8); |
2041 | 2235 | } | |
2042 | if (!has_invariant_tsc) | ||
2043 | errx(1, "No invariant TSC"); | ||
2044 | 2236 | ||
2045 | /* | 2237 | /* |
2046 | * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 | 2238 | * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 |
@@ -2054,26 +2246,22 @@ void check_cpuid() | |||
2054 | has_epb = ecx & (1 << 3); | 2246 | has_epb = ecx & (1 << 3); |
2055 | 2247 | ||
2056 | if (verbose) | 2248 | if (verbose) |
2057 | fprintf(stderr, "CPUID(6): %s%s%s%s\n", | 2249 | fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sEPB\n", |
2058 | has_aperf ? "APERF" : "No APERF!", | 2250 | has_aperf ? "" : "No ", |
2059 | do_dts ? ", DTS" : "", | 2251 | do_dts ? "" : "No ", |
2060 | do_ptm ? ", PTM": "", | 2252 | do_ptm ? "" : "No ", |
2061 | has_epb ? ", EPB": ""); | 2253 | has_epb ? "" : "No "); |
2062 | 2254 | ||
2063 | if (!has_aperf) | 2255 | do_nhm_platform_info = do_nhm_cstates = do_smi = has_nhm_msrs(family, model); |
2064 | errx(-1, "No APERF"); | 2256 | do_snb_cstates = has_snb_msrs(family, model); |
2065 | 2257 | do_c8_c9_c10 = has_hsw_msrs(family, model); | |
2066 | do_nehalem_platform_info = genuine_intel && has_invariant_tsc; | ||
2067 | do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ | ||
2068 | do_smi = do_nhm_cstates; | ||
2069 | do_snb_cstates = is_snb(family, model); | ||
2070 | do_c8_c9_c10 = has_c8_c9_c10(family, model); | ||
2071 | do_slm_cstates = is_slm(family, model); | 2258 | do_slm_cstates = is_slm(family, model); |
2072 | bclk = discover_bclk(family, model); | 2259 | bclk = discover_bclk(family, model); |
2073 | 2260 | ||
2074 | do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); | 2261 | do_nhm_turbo_ratio_limit = has_nhm_turbo_ratio_limit(family, model); |
2075 | do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); | 2262 | do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); |
2076 | rapl_probe(family, model); | 2263 | rapl_probe(family, model); |
2264 | perf_limit_reasons_probe(family, model); | ||
2077 | 2265 | ||
2078 | return; | 2266 | return; |
2079 | } | 2267 | } |
@@ -2299,10 +2487,9 @@ void setup_all_buffers(void) | |||
2299 | 2487 | ||
2300 | void turbostat_init() | 2488 | void turbostat_init() |
2301 | { | 2489 | { |
2302 | check_cpuid(); | ||
2303 | |||
2304 | check_dev_msr(); | 2490 | check_dev_msr(); |
2305 | check_super_user(); | 2491 | check_permissions(); |
2492 | check_cpuid(); | ||
2306 | 2493 | ||
2307 | setup_all_buffers(); | 2494 | setup_all_buffers(); |
2308 | 2495 | ||
@@ -2313,6 +2500,9 @@ void turbostat_init() | |||
2313 | for_all_cpus(print_epb, ODD_COUNTERS); | 2500 | for_all_cpus(print_epb, ODD_COUNTERS); |
2314 | 2501 | ||
2315 | if (verbose) | 2502 | if (verbose) |
2503 | for_all_cpus(print_perf_limit, ODD_COUNTERS); | ||
2504 | |||
2505 | if (verbose) | ||
2316 | for_all_cpus(print_rapl, ODD_COUNTERS); | 2506 | for_all_cpus(print_rapl, ODD_COUNTERS); |
2317 | 2507 | ||
2318 | for_all_cpus(set_temperature_target, ODD_COUNTERS); | 2508 | for_all_cpus(set_temperature_target, ODD_COUNTERS); |
@@ -2441,7 +2631,7 @@ int main(int argc, char **argv) | |||
2441 | cmdline(argc, argv); | 2631 | cmdline(argc, argv); |
2442 | 2632 | ||
2443 | if (verbose) | 2633 | if (verbose) |
2444 | fprintf(stderr, "turbostat v3.7 Feb 6, 2014" | 2634 | fprintf(stderr, "turbostat v3.9 23-Jan, 2015" |
2445 | " - Len Brown <lenb@kernel.org>\n"); | 2635 | " - Len Brown <lenb@kernel.org>\n"); |
2446 | 2636 | ||
2447 | turbostat_init(); | 2637 | turbostat_init(); |