diff options
Diffstat (limited to 'tools/power')
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 103 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 643 |
2 files changed, 690 insertions, 56 deletions
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index e4d0690cccf9..0d7dc2cfefb5 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 | |||
@@ -11,16 +11,16 @@ turbostat \- Report processor frequency and idle statistics | |||
11 | .RB [ Options ] | 11 | .RB [ Options ] |
12 | .RB [ "\-i interval_sec" ] | 12 | .RB [ "\-i interval_sec" ] |
13 | .SH DESCRIPTION | 13 | .SH DESCRIPTION |
14 | \fBturbostat \fP reports processor topology, frequency | 14 | \fBturbostat \fP reports processor topology, frequency, |
15 | and idle power state statistics on modern X86 processors. | 15 | idle power-state statistics, temperature and power on modern X86 processors. |
16 | Either \fBcommand\fP is forked and statistics are printed | 16 | Either \fBcommand\fP is forked and statistics are printed |
17 | upon its completion, or statistics are printed periodically. | 17 | upon its completion, or statistics are printed periodically. |
18 | 18 | ||
19 | \fBturbostat \fP | 19 | \fBturbostat \fP |
20 | requires that the processor | 20 | must be run on root, and |
21 | minimally requires that the processor | ||
21 | supports an "invariant" TSC, plus the APERF and MPERF MSRs. | 22 | supports an "invariant" TSC, plus the APERF and MPERF MSRs. |
22 | \fBturbostat \fP will report idle cpu power state residency | 23 | Additional information is reported depending on hardware counter support. |
23 | on processors that additionally support C-state residency counters. | ||
24 | 24 | ||
25 | .SS Options | 25 | .SS Options |
26 | The \fB-p\fP option limits output to the 1st thread in 1st core of each package. | 26 | The \fB-p\fP option limits output to the 1st thread in 1st core of each package. |
@@ -57,7 +57,15 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T | |||
57 | \fBGHz\fP average clock rate while the CPU was in c0 state. | 57 | \fBGHz\fP average clock rate while the CPU was in c0 state. |
58 | \fBTSC\fP average GHz that the TSC ran during the entire interval. | 58 | \fBTSC\fP average GHz that the TSC ran during the entire interval. |
59 | \fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states. | 59 | \fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states. |
60 | \fBCTMP\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. | ||
61 | \fBPTMP\fP Degrees Celsius reported by the per-package Package Thermal Monitor. | ||
60 | \fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states. | 62 | \fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states. |
63 | \fBPkg_W\fP Watts consumed by the whole package. | ||
64 | \fBCor_W\fP Watts consumed by the core part of the package. | ||
65 | \fBGFX_W\fP Watts consumed by the Graphics part of the package -- available only on client processors. | ||
66 | \fBRAM_W\fP Watts consumed by the DRAM DIMMS -- available only on server processors. | ||
67 | \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. | ||
68 | \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. | ||
61 | .fi | 69 | .fi |
62 | .PP | 70 | .PP |
63 | .SH EXAMPLE | 71 | .SH EXAMPLE |
@@ -66,50 +74,73 @@ Without any parameters, turbostat prints out counters ever 5 seconds. | |||
66 | for turbostat to fork). | 74 | for turbostat to fork). |
67 | 75 | ||
68 | The first row of statistics is a summary for the entire system. | 76 | The first row of statistics is a summary for the entire system. |
69 | Note that the summary is a weighted average. | 77 | For residency % columns, the summary is a weighted average. |
78 | For Temperature columns, the summary is the column maximum. | ||
79 | For Watts columns, the summary is a system total. | ||
70 | Subsequent rows show per-CPU statistics. | 80 | Subsequent rows show per-CPU statistics. |
71 | 81 | ||
72 | .nf | 82 | .nf |
73 | [root@x980]# ./turbostat | 83 | [root@sandy]# ./turbostat |
74 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 84 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %c7 CTMP PTMP %pc2 %pc3 %pc6 %pc7 Pkg_W Cor_W GFX_W |
75 | 0.09 1.62 3.38 1.83 0.32 97.76 1.26 83.61 | 85 | 0.06 0.80 2.29 0.11 0.00 0.00 99.83 47 40 0.26 0.01 0.44 98.78 3.49 0.12 0.14 |
76 | 0 0 0.15 1.62 3.38 10.23 0.05 89.56 1.26 83.61 | 86 | 0 0 0.07 0.80 2.29 0.07 0.00 0.00 99.86 40 40 0.26 0.01 0.44 98.78 3.49 0.12 0.14 |
77 | 0 6 0.05 1.62 3.38 10.34 | 87 | 0 4 0.03 0.80 2.29 0.12 |
78 | 1 2 0.03 1.62 3.38 0.07 0.05 99.86 | 88 | 1 1 0.04 0.80 2.29 0.25 0.01 0.00 99.71 40 |
79 | 1 8 0.03 1.62 3.38 0.06 | 89 | 1 5 0.16 0.80 2.29 0.13 |
80 | 2 4 0.21 1.62 3.38 0.10 1.49 98.21 | 90 | 2 2 0.05 0.80 2.29 0.06 0.01 0.00 99.88 40 |
81 | 2 10 0.02 1.62 3.38 0.29 | 91 | 2 6 0.03 0.80 2.29 0.08 |
82 | 8 1 0.04 1.62 3.38 0.04 0.08 99.84 | 92 | 3 3 0.05 0.80 2.29 0.08 0.00 0.00 99.87 47 |
83 | 8 7 0.01 1.62 3.38 0.06 | 93 | 3 7 0.04 0.84 2.29 0.09 |
84 | 9 3 0.53 1.62 3.38 0.10 0.20 99.17 | ||
85 | 9 9 0.02 1.62 3.38 0.60 | ||
86 | 10 5 0.01 1.62 3.38 0.02 0.04 99.92 | ||
87 | 10 11 0.02 1.62 3.38 0.02 | ||
88 | .fi | 94 | .fi |
89 | .SH SUMMARY EXAMPLE | 95 | .SH SUMMARY EXAMPLE |
90 | The "-s" option prints the column headers just once, | 96 | The "-s" option prints the column headers just once, |
91 | and then the one line system summary for each sample interval. | 97 | and then the one line system summary for each sample interval. |
92 | 98 | ||
93 | .nf | 99 | .nf |
94 | [root@x980]# ./turbostat -s | 100 | [root@wsm]# turbostat -S |
95 | %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 101 | %c0 GHz TSC %c1 %c3 %c6 CTMP %pc3 %pc6 |
96 | 0.23 1.67 3.38 2.00 0.30 97.47 1.07 82.12 | 102 | 1.40 2.81 3.38 10.78 43.47 44.35 42 13.67 2.09 |
97 | 0.10 1.62 3.38 1.87 2.25 95.77 12.02 72.60 | 103 | 1.34 2.90 3.38 11.48 58.96 28.23 41 19.89 0.15 |
98 | 0.20 1.64 3.38 1.98 0.11 97.72 0.30 83.36 | 104 | 1.55 2.72 3.38 26.73 37.66 34.07 42 2.53 2.80 |
99 | 0.11 1.70 3.38 1.86 1.81 96.22 9.71 74.90 | 105 | 1.37 2.83 3.38 16.95 60.05 21.63 42 5.76 0.20 |
100 | .fi | 106 | .fi |
101 | .SH VERBOSE EXAMPLE | 107 | .SH VERBOSE EXAMPLE |
102 | The "-v" option adds verbosity to the output: | 108 | The "-v" option adds verbosity to the output: |
103 | 109 | ||
104 | .nf | 110 | .nf |
105 | GenuineIntel 11 CPUID levels; family:model:stepping 0x6:2c:2 (6:44:2) | 111 | [root@ivy]# turbostat -v |
106 | 12 * 133 = 1600 MHz max efficiency | 112 | turbostat v3.0 November 23, 2012 - Len Brown <lenb@kernel.org> |
107 | 25 * 133 = 3333 MHz TSC frequency | 113 | CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9) |
108 | 26 * 133 = 3467 MHz max turbo 4 active cores | 114 | CPUID(6): APERF, DTS, PTM, EPB |
109 | 26 * 133 = 3467 MHz max turbo 3 active cores | 115 | RAPL: 851 sec. Joule Counter Range |
110 | 27 * 133 = 3600 MHz max turbo 2 active cores | 116 | cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300 |
111 | 27 * 133 = 3600 MHz max turbo 1 active cores | 117 | 16 * 100 = 1600 MHz max efficiency |
112 | 118 | 35 * 100 = 3500 MHz TSC frequency | |
119 | cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6-noret) | ||
120 | cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 | ||
121 | 37 * 100 = 3700 MHz max turbo 4 active cores | ||
122 | 38 * 100 = 3800 MHz max turbo 3 active cores | ||
123 | 39 * 100 = 3900 MHz max turbo 2 active cores | ||
124 | 39 * 100 = 3900 MHz max turbo 1 active cores | ||
125 | cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced) | ||
126 | cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.) | ||
127 | cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.) | ||
128 | cpu0: MSR_PKG_POWER_LIMIT: 0x830000148268 (UNlocked) | ||
129 | cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled) | ||
130 | cpu0: PKG Limit #2: ENabled (96.000000 Watts, 0.000977* sec, clamp DISabled) | ||
131 | cpu0: MSR_PP0_POLICY: 0 | ||
132 | cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked) | ||
133 | cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) | ||
134 | cpu0: MSR_PP1_POLICY: 0 | ||
135 | cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) | ||
136 | cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) | ||
137 | cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C) | ||
138 | cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C) | ||
139 | cpu0: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) | ||
140 | cpu1: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) | ||
141 | cpu2: MSR_IA32_THERM_STATUS: 0x88540000 (21 C +/- 1) | ||
142 | cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1) | ||
143 | ... | ||
113 | .fi | 144 | .fi |
114 | The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency | 145 | The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency |
115 | available at the minimum package voltage. The \fBTSC frequency\fP is the nominal | 146 | available at the minimum package voltage. The \fBTSC frequency\fP is the nominal |
@@ -142,7 +173,7 @@ cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | |||
142 | 10 5 1.42 3.43 3.38 2.14 30.99 65.44 | 173 | 10 5 1.42 3.43 3.38 2.14 30.99 65.44 |
143 | 10 11 0.16 2.88 3.38 3.40 | 174 | 10 11 0.16 2.88 3.38 3.40 |
144 | .fi | 175 | .fi |
145 | Above the cycle soaker drives cpu7 up its 3.6 Ghz turbo limit | 176 | Above the cycle soaker drives cpu7 up its 3.6 GHz turbo limit |
146 | while the other processors are generally in various states of idle. | 177 | while the other processors are generally in various states of idle. |
147 | 178 | ||
148 | Note that cpu1 and cpu7 are HT siblings within core8. | 179 | Note that cpu1 and cpu7 are HT siblings within core8. |
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index cb031472bede..ce6d46038f74 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
@@ -39,12 +39,15 @@ | |||
39 | char *proc_stat = "/proc/stat"; | 39 | char *proc_stat = "/proc/stat"; |
40 | unsigned int interval_sec = 5; /* set with -i interval_sec */ | 40 | unsigned int interval_sec = 5; /* set with -i interval_sec */ |
41 | unsigned int verbose; /* set with -v */ | 41 | unsigned int verbose; /* set with -v */ |
42 | unsigned int rapl_verbose; /* set with -R */ | ||
43 | unsigned int thermal_verbose; /* set with -T */ | ||
42 | unsigned int summary_only; /* set with -s */ | 44 | unsigned int summary_only; /* set with -s */ |
43 | unsigned int skip_c0; | 45 | unsigned int skip_c0; |
44 | unsigned int skip_c1; | 46 | unsigned int skip_c1; |
45 | unsigned int do_nhm_cstates; | 47 | unsigned int do_nhm_cstates; |
46 | unsigned int do_snb_cstates; | 48 | unsigned int do_snb_cstates; |
47 | unsigned int has_aperf; | 49 | unsigned int has_aperf; |
50 | unsigned int has_epb; | ||
48 | unsigned int units = 1000000000; /* Ghz etc */ | 51 | unsigned int units = 1000000000; /* Ghz etc */ |
49 | unsigned int genuine_intel; | 52 | unsigned int genuine_intel; |
50 | unsigned int has_invariant_tsc; | 53 | unsigned int has_invariant_tsc; |
@@ -62,6 +65,23 @@ unsigned int show_cpu; | |||
62 | unsigned int show_pkg_only; | 65 | unsigned int show_pkg_only; |
63 | unsigned int show_core_only; | 66 | unsigned int show_core_only; |
64 | char *output_buffer, *outp; | 67 | char *output_buffer, *outp; |
68 | unsigned int do_rapl; | ||
69 | unsigned int do_dts; | ||
70 | unsigned int do_ptm; | ||
71 | unsigned int tcc_activation_temp; | ||
72 | unsigned int tcc_activation_temp_override; | ||
73 | double rapl_power_units, rapl_energy_units, rapl_time_units; | ||
74 | double rapl_joule_counter_range; | ||
75 | |||
76 | #define RAPL_PKG (1 << 0) | ||
77 | #define RAPL_CORES (1 << 1) | ||
78 | #define RAPL_GFX (1 << 2) | ||
79 | #define RAPL_DRAM (1 << 3) | ||
80 | #define RAPL_PKG_PERF_STATUS (1 << 4) | ||
81 | #define RAPL_DRAM_PERF_STATUS (1 << 5) | ||
82 | #define TJMAX_DEFAULT 100 | ||
83 | |||
84 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) | ||
65 | 85 | ||
66 | int aperf_mperf_unstable; | 86 | int aperf_mperf_unstable; |
67 | int backwards_count; | 87 | int backwards_count; |
@@ -89,6 +109,7 @@ struct core_data { | |||
89 | unsigned long long c3; | 109 | unsigned long long c3; |
90 | unsigned long long c6; | 110 | unsigned long long c6; |
91 | unsigned long long c7; | 111 | unsigned long long c7; |
112 | unsigned int core_temp_c; | ||
92 | unsigned int core_id; | 113 | unsigned int core_id; |
93 | } *core_even, *core_odd; | 114 | } *core_even, *core_odd; |
94 | 115 | ||
@@ -98,6 +119,14 @@ struct pkg_data { | |||
98 | unsigned long long pc6; | 119 | unsigned long long pc6; |
99 | unsigned long long pc7; | 120 | unsigned long long pc7; |
100 | unsigned int package_id; | 121 | unsigned int package_id; |
122 | unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ | ||
123 | unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ | ||
124 | unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */ | ||
125 | unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */ | ||
126 | unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ | ||
127 | unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ | ||
128 | unsigned int pkg_temp_c; | ||
129 | |||
101 | } *package_even, *package_odd; | 130 | } *package_even, *package_odd; |
102 | 131 | ||
103 | #define ODD_COUNTERS thread_odd, core_odd, package_odd | 132 | #define ODD_COUNTERS thread_odd, core_odd, package_odd |
@@ -235,6 +264,12 @@ void print_header(void) | |||
235 | outp += sprintf(outp, " %%c6"); | 264 | outp += sprintf(outp, " %%c6"); |
236 | if (do_snb_cstates) | 265 | if (do_snb_cstates) |
237 | outp += sprintf(outp, " %%c7"); | 266 | outp += sprintf(outp, " %%c7"); |
267 | |||
268 | if (do_dts) | ||
269 | outp += sprintf(outp, " CTMP"); | ||
270 | if (do_ptm) | ||
271 | outp += sprintf(outp, " PTMP"); | ||
272 | |||
238 | if (do_snb_cstates) | 273 | if (do_snb_cstates) |
239 | outp += sprintf(outp, " %%pc2"); | 274 | outp += sprintf(outp, " %%pc2"); |
240 | if (do_nhm_cstates) | 275 | if (do_nhm_cstates) |
@@ -244,6 +279,19 @@ void print_header(void) | |||
244 | if (do_snb_cstates) | 279 | if (do_snb_cstates) |
245 | outp += sprintf(outp, " %%pc7"); | 280 | outp += sprintf(outp, " %%pc7"); |
246 | 281 | ||
282 | if (do_rapl & RAPL_PKG) | ||
283 | outp += sprintf(outp, " Pkg_W"); | ||
284 | if (do_rapl & RAPL_CORES) | ||
285 | outp += sprintf(outp, " Cor_W"); | ||
286 | if (do_rapl & RAPL_GFX) | ||
287 | outp += sprintf(outp, " GFX_W"); | ||
288 | if (do_rapl & RAPL_DRAM) | ||
289 | outp += sprintf(outp, " RAM_W"); | ||
290 | if (do_rapl & RAPL_PKG_PERF_STATUS) | ||
291 | outp += sprintf(outp, " PKG_%%"); | ||
292 | if (do_rapl & RAPL_DRAM_PERF_STATUS) | ||
293 | outp += sprintf(outp, " RAM_%%"); | ||
294 | |||
247 | outp += sprintf(outp, "\n"); | 295 | outp += sprintf(outp, "\n"); |
248 | } | 296 | } |
249 | 297 | ||
@@ -273,6 +321,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, | |||
273 | fprintf(stderr, "c3: %016llX\n", c->c3); | 321 | fprintf(stderr, "c3: %016llX\n", c->c3); |
274 | fprintf(stderr, "c6: %016llX\n", c->c6); | 322 | fprintf(stderr, "c6: %016llX\n", c->c6); |
275 | fprintf(stderr, "c7: %016llX\n", c->c7); | 323 | fprintf(stderr, "c7: %016llX\n", c->c7); |
324 | fprintf(stderr, "DTS: %dC\n", c->core_temp_c); | ||
276 | } | 325 | } |
277 | 326 | ||
278 | if (p) { | 327 | if (p) { |
@@ -281,6 +330,13 @@ int dump_counters(struct thread_data *t, struct core_data *c, | |||
281 | fprintf(stderr, "pc3: %016llX\n", p->pc3); | 330 | fprintf(stderr, "pc3: %016llX\n", p->pc3); |
282 | fprintf(stderr, "pc6: %016llX\n", p->pc6); | 331 | fprintf(stderr, "pc6: %016llX\n", p->pc6); |
283 | fprintf(stderr, "pc7: %016llX\n", p->pc7); | 332 | fprintf(stderr, "pc7: %016llX\n", p->pc7); |
333 | fprintf(stderr, "Joules PKG: %0X\n", p->energy_pkg); | ||
334 | fprintf(stderr, "Joules COR: %0X\n", p->energy_cores); | ||
335 | fprintf(stderr, "Joules GFX: %0X\n", p->energy_gfx); | ||
336 | fprintf(stderr, "Joules RAM: %0X\n", p->energy_dram); | ||
337 | fprintf(stderr, "Throttle PKG: %0X\n", p->rapl_pkg_perf_status); | ||
338 | fprintf(stderr, "Throttle RAM: %0X\n", p->rapl_dram_perf_status); | ||
339 | fprintf(stderr, "PTM: %dC\n", p->pkg_temp_c); | ||
284 | } | 340 | } |
285 | return 0; | 341 | return 0; |
286 | } | 342 | } |
@@ -290,14 +346,21 @@ int dump_counters(struct thread_data *t, struct core_data *c, | |||
290 | * package: "pk" 2 columns %2d | 346 | * package: "pk" 2 columns %2d |
291 | * core: "cor" 3 columns %3d | 347 | * core: "cor" 3 columns %3d |
292 | * CPU: "CPU" 3 columns %3d | 348 | * CPU: "CPU" 3 columns %3d |
349 | * Pkg_W: %6.2 | ||
350 | * Cor_W: %6.2 | ||
351 | * GFX_W: %5.2 | ||
352 | * RAM_W: %5.2 | ||
293 | * GHz: "GHz" 3 columns %3.2 | 353 | * GHz: "GHz" 3 columns %3.2 |
294 | * TSC: "TSC" 3 columns %3.2 | 354 | * TSC: "TSC" 3 columns %3.2 |
295 | * percentage " %pc3" %6.2 | 355 | * percentage " %pc3" %6.2 |
356 | * Perf Status percentage: %5.2 | ||
357 | * "CTMP" 4 columns %4d | ||
296 | */ | 358 | */ |
297 | int format_counters(struct thread_data *t, struct core_data *c, | 359 | int format_counters(struct thread_data *t, struct core_data *c, |
298 | struct pkg_data *p) | 360 | struct pkg_data *p) |
299 | { | 361 | { |
300 | double interval_float; | 362 | double interval_float; |
363 | char *fmt5, *fmt6; | ||
301 | 364 | ||
302 | /* if showing only 1st thread in core and this isn't one, bail out */ | 365 | /* if showing only 1st thread in core and this isn't one, bail out */ |
303 | if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | 366 | if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
@@ -337,7 +400,6 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
337 | if (show_cpu) | 400 | if (show_cpu) |
338 | outp += sprintf(outp, " %3d", t->cpu_id); | 401 | outp += sprintf(outp, " %3d", t->cpu_id); |
339 | } | 402 | } |
340 | |||
341 | /* %c0 */ | 403 | /* %c0 */ |
342 | if (do_nhm_cstates) { | 404 | if (do_nhm_cstates) { |
343 | if (show_pkg || show_core || show_cpu) | 405 | if (show_pkg || show_core || show_cpu) |
@@ -402,10 +464,16 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
402 | if (do_snb_cstates) | 464 | if (do_snb_cstates) |
403 | outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); | 465 | outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); |
404 | 466 | ||
467 | if (do_dts) | ||
468 | outp += sprintf(outp, " %4d", c->core_temp_c); | ||
469 | |||
405 | /* print per-package data only for 1st core in package */ | 470 | /* print per-package data only for 1st core in package */ |
406 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | 471 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
407 | goto done; | 472 | goto done; |
408 | 473 | ||
474 | if (do_ptm) | ||
475 | outp += sprintf(outp, " %4d", p->pkg_temp_c); | ||
476 | |||
409 | if (do_snb_cstates) | 477 | if (do_snb_cstates) |
410 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); | 478 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); |
411 | if (do_nhm_cstates) | 479 | if (do_nhm_cstates) |
@@ -414,6 +482,32 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
414 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); | 482 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); |
415 | if (do_snb_cstates) | 483 | if (do_snb_cstates) |
416 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); | 484 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); |
485 | |||
486 | /* | ||
487 | * If measurement interval exceeds minimum RAPL Joule Counter range, | ||
488 | * indicate that results are suspect by printing "**" in fraction place. | ||
489 | */ | ||
490 | if (interval_float < rapl_joule_counter_range) { | ||
491 | fmt5 = " %5.2f"; | ||
492 | fmt6 = " %6.2f"; | ||
493 | } else { | ||
494 | fmt5 = " %3.0f**"; | ||
495 | fmt6 = " %4.0f**"; | ||
496 | } | ||
497 | |||
498 | if (do_rapl & RAPL_PKG) | ||
499 | outp += sprintf(outp, fmt6, p->energy_pkg * rapl_energy_units / interval_float); | ||
500 | if (do_rapl & RAPL_CORES) | ||
501 | outp += sprintf(outp, fmt6, p->energy_cores * rapl_energy_units / interval_float); | ||
502 | if (do_rapl & RAPL_GFX) | ||
503 | outp += sprintf(outp, fmt5, p->energy_gfx * rapl_energy_units / interval_float); | ||
504 | if (do_rapl & RAPL_DRAM) | ||
505 | outp += sprintf(outp, fmt5, p->energy_dram * rapl_energy_units / interval_float); | ||
506 | if (do_rapl & RAPL_PKG_PERF_STATUS ) | ||
507 | outp += sprintf(outp, fmt5, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); | ||
508 | if (do_rapl & RAPL_DRAM_PERF_STATUS ) | ||
509 | outp += sprintf(outp, fmt5, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); | ||
510 | |||
417 | done: | 511 | done: |
418 | outp += sprintf(outp, "\n"); | 512 | outp += sprintf(outp, "\n"); |
419 | 513 | ||
@@ -450,6 +544,13 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ | |||
450 | for_all_cpus(format_counters, t, c, p); | 544 | for_all_cpus(format_counters, t, c, p); |
451 | } | 545 | } |
452 | 546 | ||
547 | #define DELTA_WRAP32(new, old) \ | ||
548 | if (new > old) { \ | ||
549 | old = new - old; \ | ||
550 | } else { \ | ||
551 | old = 0x100000000 + new - old; \ | ||
552 | } | ||
553 | |||
453 | void | 554 | void |
454 | delta_package(struct pkg_data *new, struct pkg_data *old) | 555 | delta_package(struct pkg_data *new, struct pkg_data *old) |
455 | { | 556 | { |
@@ -457,6 +558,14 @@ delta_package(struct pkg_data *new, struct pkg_data *old) | |||
457 | old->pc3 = new->pc3 - old->pc3; | 558 | old->pc3 = new->pc3 - old->pc3; |
458 | old->pc6 = new->pc6 - old->pc6; | 559 | old->pc6 = new->pc6 - old->pc6; |
459 | old->pc7 = new->pc7 - old->pc7; | 560 | old->pc7 = new->pc7 - old->pc7; |
561 | old->pkg_temp_c = new->pkg_temp_c; | ||
562 | |||
563 | DELTA_WRAP32(new->energy_pkg, old->energy_pkg); | ||
564 | DELTA_WRAP32(new->energy_cores, old->energy_cores); | ||
565 | DELTA_WRAP32(new->energy_gfx, old->energy_gfx); | ||
566 | DELTA_WRAP32(new->energy_dram, old->energy_dram); | ||
567 | DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); | ||
568 | DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); | ||
460 | } | 569 | } |
461 | 570 | ||
462 | void | 571 | void |
@@ -465,6 +574,7 @@ delta_core(struct core_data *new, struct core_data *old) | |||
465 | old->c3 = new->c3 - old->c3; | 574 | old->c3 = new->c3 - old->c3; |
466 | old->c6 = new->c6 - old->c6; | 575 | old->c6 = new->c6 - old->c6; |
467 | old->c7 = new->c7 - old->c7; | 576 | old->c7 = new->c7 - old->c7; |
577 | old->core_temp_c = new->core_temp_c; | ||
468 | } | 578 | } |
469 | 579 | ||
470 | /* | 580 | /* |
@@ -571,11 +681,20 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data | |||
571 | c->c3 = 0; | 681 | c->c3 = 0; |
572 | c->c6 = 0; | 682 | c->c6 = 0; |
573 | c->c7 = 0; | 683 | c->c7 = 0; |
684 | c->core_temp_c = 0; | ||
574 | 685 | ||
575 | p->pc2 = 0; | 686 | p->pc2 = 0; |
576 | p->pc3 = 0; | 687 | p->pc3 = 0; |
577 | p->pc6 = 0; | 688 | p->pc6 = 0; |
578 | p->pc7 = 0; | 689 | p->pc7 = 0; |
690 | |||
691 | p->energy_pkg = 0; | ||
692 | p->energy_dram = 0; | ||
693 | p->energy_cores = 0; | ||
694 | p->energy_gfx = 0; | ||
695 | p->rapl_pkg_perf_status = 0; | ||
696 | p->rapl_dram_perf_status = 0; | ||
697 | p->pkg_temp_c = 0; | ||
579 | } | 698 | } |
580 | int sum_counters(struct thread_data *t, struct core_data *c, | 699 | int sum_counters(struct thread_data *t, struct core_data *c, |
581 | struct pkg_data *p) | 700 | struct pkg_data *p) |
@@ -596,6 +715,8 @@ int sum_counters(struct thread_data *t, struct core_data *c, | |||
596 | average.cores.c6 += c->c6; | 715 | average.cores.c6 += c->c6; |
597 | average.cores.c7 += c->c7; | 716 | average.cores.c7 += c->c7; |
598 | 717 | ||
718 | average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); | ||
719 | |||
599 | /* sum per-pkg values only for 1st core in pkg */ | 720 | /* sum per-pkg values only for 1st core in pkg */ |
600 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | 721 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
601 | return 0; | 722 | return 0; |
@@ -605,6 +726,15 @@ int sum_counters(struct thread_data *t, struct core_data *c, | |||
605 | average.packages.pc6 += p->pc6; | 726 | average.packages.pc6 += p->pc6; |
606 | average.packages.pc7 += p->pc7; | 727 | average.packages.pc7 += p->pc7; |
607 | 728 | ||
729 | average.packages.energy_pkg += p->energy_pkg; | ||
730 | average.packages.energy_dram += p->energy_dram; | ||
731 | average.packages.energy_cores += p->energy_cores; | ||
732 | average.packages.energy_gfx += p->energy_gfx; | ||
733 | |||
734 | average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); | ||
735 | |||
736 | average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; | ||
737 | average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; | ||
608 | return 0; | 738 | return 0; |
609 | } | 739 | } |
610 | /* | 740 | /* |
@@ -656,6 +786,7 @@ static unsigned long long rdtsc(void) | |||
656 | int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | 786 | int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
657 | { | 787 | { |
658 | int cpu = t->cpu_id; | 788 | int cpu = t->cpu_id; |
789 | unsigned long long msr; | ||
659 | 790 | ||
660 | if (cpu_migrate(cpu)) { | 791 | if (cpu_migrate(cpu)) { |
661 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | 792 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); |
@@ -672,9 +803,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
672 | } | 803 | } |
673 | 804 | ||
674 | if (extra_delta_offset32) { | 805 | if (extra_delta_offset32) { |
675 | if (get_msr(cpu, extra_delta_offset32, &t->extra_delta32)) | 806 | if (get_msr(cpu, extra_delta_offset32, &msr)) |
676 | return -5; | 807 | return -5; |
677 | t->extra_delta32 &= 0xFFFFFFFF; | 808 | t->extra_delta32 = msr & 0xFFFFFFFF; |
678 | } | 809 | } |
679 | 810 | ||
680 | if (extra_delta_offset64) | 811 | if (extra_delta_offset64) |
@@ -682,9 +813,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
682 | return -5; | 813 | return -5; |
683 | 814 | ||
684 | if (extra_msr_offset32) { | 815 | if (extra_msr_offset32) { |
685 | if (get_msr(cpu, extra_msr_offset32, &t->extra_msr32)) | 816 | if (get_msr(cpu, extra_msr_offset32, &msr)) |
686 | return -5; | 817 | return -5; |
687 | t->extra_msr32 &= 0xFFFFFFFF; | 818 | t->extra_msr32 = msr & 0xFFFFFFFF; |
688 | } | 819 | } |
689 | 820 | ||
690 | if (extra_msr_offset64) | 821 | if (extra_msr_offset64) |
@@ -706,6 +837,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
706 | if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) | 837 | if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) |
707 | return -8; | 838 | return -8; |
708 | 839 | ||
840 | if (do_dts) { | ||
841 | if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) | ||
842 | return -9; | ||
843 | c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); | ||
844 | } | ||
845 | |||
846 | |||
709 | /* collect package counters only for 1st core in package */ | 847 | /* collect package counters only for 1st core in package */ |
710 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | 848 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
711 | return 0; | 849 | return 0; |
@@ -722,6 +860,41 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
722 | if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) | 860 | if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) |
723 | return -12; | 861 | return -12; |
724 | } | 862 | } |
863 | if (do_rapl & RAPL_PKG) { | ||
864 | if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) | ||
865 | return -13; | ||
866 | p->energy_pkg = msr & 0xFFFFFFFF; | ||
867 | } | ||
868 | if (do_rapl & RAPL_CORES) { | ||
869 | if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) | ||
870 | return -14; | ||
871 | p->energy_cores = msr & 0xFFFFFFFF; | ||
872 | } | ||
873 | if (do_rapl & RAPL_DRAM) { | ||
874 | if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) | ||
875 | return -15; | ||
876 | p->energy_dram = msr & 0xFFFFFFFF; | ||
877 | } | ||
878 | if (do_rapl & RAPL_GFX) { | ||
879 | if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr)) | ||
880 | return -16; | ||
881 | p->energy_gfx = msr & 0xFFFFFFFF; | ||
882 | } | ||
883 | if (do_rapl & RAPL_PKG_PERF_STATUS) { | ||
884 | if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr)) | ||
885 | return -16; | ||
886 | p->rapl_pkg_perf_status = msr & 0xFFFFFFFF; | ||
887 | } | ||
888 | if (do_rapl & RAPL_DRAM_PERF_STATUS) { | ||
889 | if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr)) | ||
890 | return -16; | ||
891 | p->rapl_dram_perf_status = msr & 0xFFFFFFFF; | ||
892 | } | ||
893 | if (do_ptm) { | ||
894 | if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) | ||
895 | return -17; | ||
896 | p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); | ||
897 | } | ||
725 | return 0; | 898 | return 0; |
726 | } | 899 | } |
727 | 900 | ||
@@ -735,8 +908,8 @@ void print_verbose_header(void) | |||
735 | 908 | ||
736 | get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); | 909 | get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); |
737 | 910 | ||
738 | if (verbose > 1) | 911 | if (verbose) |
739 | fprintf(stderr, "MSR_NHM_PLATFORM_INFO: 0x%llx\n", msr); | 912 | fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); |
740 | 913 | ||
741 | ratio = (msr >> 40) & 0xFF; | 914 | ratio = (msr >> 40) & 0xFF; |
742 | fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", | 915 | fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", |
@@ -751,8 +924,8 @@ void print_verbose_header(void) | |||
751 | 924 | ||
752 | get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); | 925 | get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); |
753 | 926 | ||
754 | if (verbose > 1) | 927 | if (verbose) |
755 | fprintf(stderr, "MSR_IVT_TURBO_RATIO_LIMIT: 0x%llx\n", msr); | 928 | fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); |
756 | 929 | ||
757 | ratio = (msr >> 56) & 0xFF; | 930 | ratio = (msr >> 56) & 0xFF; |
758 | if (ratio) | 931 | if (ratio) |
@@ -795,14 +968,56 @@ void print_verbose_header(void) | |||
795 | ratio, bclk, ratio * bclk); | 968 | ratio, bclk, ratio * bclk); |
796 | 969 | ||
797 | print_nhm_turbo_ratio_limits: | 970 | print_nhm_turbo_ratio_limits: |
971 | get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); | ||
972 | |||
973 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) | ||
974 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | ||
975 | |||
976 | fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); | ||
977 | |||
978 | fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ", | ||
979 | (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", | ||
980 | (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", | ||
981 | (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", | ||
982 | (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", | ||
983 | (msr & (1 << 15)) ? "" : "UN", | ||
984 | (unsigned int)msr & 7); | ||
985 | |||
986 | |||
987 | switch(msr & 0x7) { | ||
988 | case 0: | ||
989 | fprintf(stderr, "pc0"); | ||
990 | break; | ||
991 | case 1: | ||
992 | fprintf(stderr, do_snb_cstates ? "pc2" : "pc0"); | ||
993 | break; | ||
994 | case 2: | ||
995 | fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3"); | ||
996 | break; | ||
997 | case 3: | ||
998 | fprintf(stderr, "pc6"); | ||
999 | break; | ||
1000 | case 4: | ||
1001 | fprintf(stderr, "pc7"); | ||
1002 | break; | ||
1003 | case 5: | ||
1004 | fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid"); | ||
1005 | break; | ||
1006 | case 7: | ||
1007 | fprintf(stderr, "unlimited"); | ||
1008 | break; | ||
1009 | default: | ||
1010 | fprintf(stderr, "invalid"); | ||
1011 | } | ||
1012 | fprintf(stderr, ")\n"); | ||
798 | 1013 | ||
799 | if (!do_nehalem_turbo_ratio_limit) | 1014 | if (!do_nehalem_turbo_ratio_limit) |
800 | return; | 1015 | return; |
801 | 1016 | ||
802 | get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); | 1017 | get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); |
803 | 1018 | ||
804 | if (verbose > 1) | 1019 | if (verbose) |
805 | fprintf(stderr, "MSR_NHM_TURBO_RATIO_LIMIT: 0x%llx\n", msr); | 1020 | fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); |
806 | 1021 | ||
807 | ratio = (msr >> 56) & 0xFF; | 1022 | ratio = (msr >> 56) & 0xFF; |
808 | if (ratio) | 1023 | if (ratio) |
@@ -1205,6 +1420,299 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) | |||
1205 | } | 1420 | } |
1206 | } | 1421 | } |
1207 | 1422 | ||
1423 | /* | ||
1424 | * print_epb() | ||
1425 | * Decode the ENERGY_PERF_BIAS MSR | ||
1426 | */ | ||
1427 | int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
1428 | { | ||
1429 | unsigned long long msr; | ||
1430 | char *epb_string; | ||
1431 | int cpu; | ||
1432 | |||
1433 | if (!has_epb) | ||
1434 | return 0; | ||
1435 | |||
1436 | cpu = t->cpu_id; | ||
1437 | |||
1438 | /* EPB is per-package */ | ||
1439 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
1440 | return 0; | ||
1441 | |||
1442 | if (cpu_migrate(cpu)) { | ||
1443 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
1444 | return -1; | ||
1445 | } | ||
1446 | |||
1447 | if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) | ||
1448 | return 0; | ||
1449 | |||
1450 | switch (msr & 0x7) { | ||
1451 | case ENERGY_PERF_BIAS_PERFORMANCE: | ||
1452 | epb_string = "performance"; | ||
1453 | break; | ||
1454 | case ENERGY_PERF_BIAS_NORMAL: | ||
1455 | epb_string = "balanced"; | ||
1456 | break; | ||
1457 | case ENERGY_PERF_BIAS_POWERSAVE: | ||
1458 | epb_string = "powersave"; | ||
1459 | break; | ||
1460 | default: | ||
1461 | epb_string = "custom"; | ||
1462 | break; | ||
1463 | } | ||
1464 | fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); | ||
1465 | |||
1466 | return 0; | ||
1467 | } | ||
1468 | |||
1469 | #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ | ||
1470 | #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ | ||
1471 | |||
1472 | /* | ||
1473 | * rapl_probe() | ||
1474 | * | ||
1475 | * sets do_rapl | ||
1476 | */ | ||
1477 | void rapl_probe(unsigned int family, unsigned int model) | ||
1478 | { | ||
1479 | unsigned long long msr; | ||
1480 | double tdp; | ||
1481 | |||
1482 | if (!genuine_intel) | ||
1483 | return; | ||
1484 | |||
1485 | if (family != 6) | ||
1486 | return; | ||
1487 | |||
1488 | switch (model) { | ||
1489 | case 0x2A: | ||
1490 | case 0x3A: | ||
1491 | do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX; | ||
1492 | break; | ||
1493 | case 0x2D: | ||
1494 | case 0x3E: | ||
1495 | do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS; | ||
1496 | break; | ||
1497 | default: | ||
1498 | return; | ||
1499 | } | ||
1500 | |||
1501 | /* units on package 0, verify later other packages match */ | ||
1502 | if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr)) | ||
1503 | return; | ||
1504 | |||
1505 | rapl_power_units = 1.0 / (1 << (msr & 0xF)); | ||
1506 | rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); | ||
1507 | rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); | ||
1508 | |||
1509 | /* get TDP to determine energy counter range */ | ||
1510 | if (get_msr(0, MSR_PKG_POWER_INFO, &msr)) | ||
1511 | return; | ||
1512 | |||
1513 | tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; | ||
1514 | |||
1515 | rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; | ||
1516 | |||
1517 | if (verbose) | ||
1518 | fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range); | ||
1519 | |||
1520 | return; | ||
1521 | } | ||
1522 | |||
1523 | int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
1524 | { | ||
1525 | unsigned long long msr; | ||
1526 | unsigned int dts; | ||
1527 | int cpu; | ||
1528 | |||
1529 | if (!(do_dts || do_ptm)) | ||
1530 | return 0; | ||
1531 | |||
1532 | cpu = t->cpu_id; | ||
1533 | |||
1534 | /* DTS is per-core, no need to print for each thread */ | ||
1535 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | ||
1536 | return 0; | ||
1537 | |||
1538 | if (cpu_migrate(cpu)) { | ||
1539 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
1540 | return -1; | ||
1541 | } | ||
1542 | |||
1543 | if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) { | ||
1544 | if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) | ||
1545 | return 0; | ||
1546 | |||
1547 | dts = (msr >> 16) & 0x7F; | ||
1548 | fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", | ||
1549 | cpu, msr, tcc_activation_temp - dts); | ||
1550 | |||
1551 | #ifdef THERM_DEBUG | ||
1552 | if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) | ||
1553 | return 0; | ||
1554 | |||
1555 | dts = (msr >> 16) & 0x7F; | ||
1556 | dts2 = (msr >> 8) & 0x7F; | ||
1557 | fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", | ||
1558 | cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); | ||
1559 | #endif | ||
1560 | } | ||
1561 | |||
1562 | |||
1563 | if (do_dts) { | ||
1564 | unsigned int resolution; | ||
1565 | |||
1566 | if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) | ||
1567 | return 0; | ||
1568 | |||
1569 | dts = (msr >> 16) & 0x7F; | ||
1570 | resolution = (msr >> 27) & 0xF; | ||
1571 | fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", | ||
1572 | cpu, msr, tcc_activation_temp - dts, resolution); | ||
1573 | |||
1574 | #ifdef THERM_DEBUG | ||
1575 | if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) | ||
1576 | return 0; | ||
1577 | |||
1578 | dts = (msr >> 16) & 0x7F; | ||
1579 | dts2 = (msr >> 8) & 0x7F; | ||
1580 | fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", | ||
1581 | cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); | ||
1582 | #endif | ||
1583 | } | ||
1584 | |||
1585 | return 0; | ||
1586 | } | ||
1587 | |||
1588 | void print_power_limit_msr(int cpu, unsigned long long msr, char *label) | ||
1589 | { | ||
1590 | fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", | ||
1591 | cpu, label, | ||
1592 | ((msr >> 15) & 1) ? "EN" : "DIS", | ||
1593 | ((msr >> 0) & 0x7FFF) * rapl_power_units, | ||
1594 | (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, | ||
1595 | (((msr >> 16) & 1) ? "EN" : "DIS")); | ||
1596 | |||
1597 | return; | ||
1598 | } | ||
1599 | |||
1600 | int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
1601 | { | ||
1602 | unsigned long long msr; | ||
1603 | int cpu; | ||
1604 | double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units; | ||
1605 | |||
1606 | if (!do_rapl) | ||
1607 | return 0; | ||
1608 | |||
1609 | /* RAPL counters are per package, so print only for 1st thread/package */ | ||
1610 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
1611 | return 0; | ||
1612 | |||
1613 | cpu = t->cpu_id; | ||
1614 | if (cpu_migrate(cpu)) { | ||
1615 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
1616 | return -1; | ||
1617 | } | ||
1618 | |||
1619 | if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) | ||
1620 | return -1; | ||
1621 | |||
1622 | local_rapl_power_units = 1.0 / (1 << (msr & 0xF)); | ||
1623 | local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); | ||
1624 | local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); | ||
1625 | |||
1626 | if (local_rapl_power_units != rapl_power_units) | ||
1627 | fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu); | ||
1628 | if (local_rapl_energy_units != rapl_energy_units) | ||
1629 | fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu); | ||
1630 | if (local_rapl_time_units != rapl_time_units) | ||
1631 | fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu); | ||
1632 | |||
1633 | if (verbose) { | ||
1634 | fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " | ||
1635 | "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, | ||
1636 | local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units); | ||
1637 | } | ||
1638 | if (do_rapl & RAPL_PKG) { | ||
1639 | if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) | ||
1640 | return -5; | ||
1641 | |||
1642 | |||
1643 | fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", | ||
1644 | cpu, msr, | ||
1645 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
1646 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
1647 | ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
1648 | ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); | ||
1649 | |||
1650 | if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) | ||
1651 | return -9; | ||
1652 | |||
1653 | fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", | ||
1654 | cpu, msr, (msr >> 63) & 1 ? "": "UN"); | ||
1655 | |||
1656 | print_power_limit_msr(cpu, msr, "PKG Limit #1"); | ||
1657 | fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", | ||
1658 | cpu, | ||
1659 | ((msr >> 47) & 1) ? "EN" : "DIS", | ||
1660 | ((msr >> 32) & 0x7FFF) * rapl_power_units, | ||
1661 | (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, | ||
1662 | ((msr >> 48) & 1) ? "EN" : "DIS"); | ||
1663 | } | ||
1664 | |||
1665 | if (do_rapl & RAPL_DRAM) { | ||
1666 | if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) | ||
1667 | return -6; | ||
1668 | |||
1669 | |||
1670 | fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", | ||
1671 | cpu, msr, | ||
1672 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
1673 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
1674 | ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
1675 | ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); | ||
1676 | |||
1677 | |||
1678 | if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) | ||
1679 | return -9; | ||
1680 | fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", | ||
1681 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | ||
1682 | |||
1683 | print_power_limit_msr(cpu, msr, "DRAM Limit"); | ||
1684 | } | ||
1685 | if (do_rapl & RAPL_CORES) { | ||
1686 | if (verbose) { | ||
1687 | if (get_msr(cpu, MSR_PP0_POLICY, &msr)) | ||
1688 | return -7; | ||
1689 | |||
1690 | fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); | ||
1691 | |||
1692 | if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) | ||
1693 | return -9; | ||
1694 | fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", | ||
1695 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | ||
1696 | print_power_limit_msr(cpu, msr, "Cores Limit"); | ||
1697 | } | ||
1698 | } | ||
1699 | if (do_rapl & RAPL_GFX) { | ||
1700 | if (verbose) { | ||
1701 | if (get_msr(cpu, MSR_PP1_POLICY, &msr)) | ||
1702 | return -8; | ||
1703 | |||
1704 | fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); | ||
1705 | |||
1706 | if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) | ||
1707 | return -9; | ||
1708 | fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", | ||
1709 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | ||
1710 | print_power_limit_msr(cpu, msr, "GFX Limit"); | ||
1711 | } | ||
1712 | } | ||
1713 | return 0; | ||
1714 | } | ||
1715 | |||
1208 | 1716 | ||
1209 | int is_snb(unsigned int family, unsigned int model) | 1717 | int is_snb(unsigned int family, unsigned int model) |
1210 | { | 1718 | { |
@@ -1229,6 +1737,72 @@ double discover_bclk(unsigned int family, unsigned int model) | |||
1229 | return 133.33; | 1737 | return 133.33; |
1230 | } | 1738 | } |
1231 | 1739 | ||
1740 | /* | ||
1741 | * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where | ||
1742 | * the Thermal Control Circuit (TCC) activates. | ||
1743 | * This is usually equal to tjMax. | ||
1744 | * | ||
1745 | * Older processors do not have this MSR, so there we guess, | ||
1746 | * but also allow cmdline over-ride with -T. | ||
1747 | * | ||
1748 | * Several MSR temperature values are in units of degrees-C | ||
1749 | * below this value, including the Digital Thermal Sensor (DTS), | ||
1750 | * Package Thermal Management Sensor (PTM), and thermal event thresholds. | ||
1751 | */ | ||
1752 | int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
1753 | { | ||
1754 | unsigned long long msr; | ||
1755 | unsigned int target_c_local; | ||
1756 | int cpu; | ||
1757 | |||
1758 | /* tcc_activation_temp is used only for dts or ptm */ | ||
1759 | if (!(do_dts || do_ptm)) | ||
1760 | return 0; | ||
1761 | |||
1762 | /* this is a per-package concept */ | ||
1763 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
1764 | return 0; | ||
1765 | |||
1766 | cpu = t->cpu_id; | ||
1767 | if (cpu_migrate(cpu)) { | ||
1768 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
1769 | return -1; | ||
1770 | } | ||
1771 | |||
1772 | if (tcc_activation_temp_override != 0) { | ||
1773 | tcc_activation_temp = tcc_activation_temp_override; | ||
1774 | fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", | ||
1775 | cpu, tcc_activation_temp); | ||
1776 | return 0; | ||
1777 | } | ||
1778 | |||
1779 | /* Temperature Target MSR is Nehalem and newer only */ | ||
1780 | if (!do_nehalem_platform_info) | ||
1781 | goto guess; | ||
1782 | |||
1783 | if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) | ||
1784 | goto guess; | ||
1785 | |||
1786 | target_c_local = (msr >> 16) & 0x7F; | ||
1787 | |||
1788 | if (verbose) | ||
1789 | fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", | ||
1790 | cpu, msr, target_c_local); | ||
1791 | |||
1792 | if (target_c_local < 85 || target_c_local > 120) | ||
1793 | goto guess; | ||
1794 | |||
1795 | tcc_activation_temp = target_c_local; | ||
1796 | |||
1797 | return 0; | ||
1798 | |||
1799 | guess: | ||
1800 | tcc_activation_temp = TJMAX_DEFAULT; | ||
1801 | fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", | ||
1802 | cpu, tcc_activation_temp); | ||
1803 | |||
1804 | return 0; | ||
1805 | } | ||
1232 | void check_cpuid() | 1806 | void check_cpuid() |
1233 | { | 1807 | { |
1234 | unsigned int eax, ebx, ecx, edx, max_level; | 1808 | unsigned int eax, ebx, ecx, edx, max_level; |
@@ -1242,7 +1816,7 @@ void check_cpuid() | |||
1242 | genuine_intel = 1; | 1816 | genuine_intel = 1; |
1243 | 1817 | ||
1244 | if (verbose) | 1818 | if (verbose) |
1245 | fprintf(stderr, "%.4s%.4s%.4s ", | 1819 | fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", |
1246 | (char *)&ebx, (char *)&edx, (char *)&ecx); | 1820 | (char *)&ebx, (char *)&edx, (char *)&ecx); |
1247 | 1821 | ||
1248 | asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); | 1822 | asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); |
@@ -1293,10 +1867,19 @@ void check_cpuid() | |||
1293 | 1867 | ||
1294 | asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); | 1868 | asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); |
1295 | has_aperf = ecx & (1 << 0); | 1869 | has_aperf = ecx & (1 << 0); |
1296 | if (!has_aperf) { | 1870 | do_dts = eax & (1 << 0); |
1297 | fprintf(stderr, "No APERF MSR\n"); | 1871 | do_ptm = eax & (1 << 6); |
1298 | exit(1); | 1872 | has_epb = ecx & (1 << 3); |
1299 | } | 1873 | |
1874 | if (verbose) | ||
1875 | fprintf(stderr, "CPUID(6): %s%s%s%s\n", | ||
1876 | has_aperf ? "APERF" : "No APERF!", | ||
1877 | do_dts ? ", DTS" : "", | ||
1878 | do_ptm ? ", PTM": "", | ||
1879 | has_epb ? ", EPB": ""); | ||
1880 | |||
1881 | if (!has_aperf) | ||
1882 | exit(-1); | ||
1300 | 1883 | ||
1301 | do_nehalem_platform_info = genuine_intel && has_invariant_tsc; | 1884 | do_nehalem_platform_info = genuine_intel && has_invariant_tsc; |
1302 | do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ | 1885 | do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ |
@@ -1305,12 +1888,15 @@ void check_cpuid() | |||
1305 | 1888 | ||
1306 | do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); | 1889 | do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); |
1307 | do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); | 1890 | do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); |
1891 | rapl_probe(family, model); | ||
1892 | |||
1893 | return; | ||
1308 | } | 1894 | } |
1309 | 1895 | ||
1310 | 1896 | ||
1311 | void usage() | 1897 | void usage() |
1312 | { | 1898 | { |
1313 | fprintf(stderr, "%s: [-v][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", | 1899 | fprintf(stderr, "%s: [-v][-R][-T][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", |
1314 | progname); | 1900 | progname); |
1315 | exit(1); | 1901 | exit(1); |
1316 | } | 1902 | } |
@@ -1546,6 +2132,17 @@ void turbostat_init() | |||
1546 | 2132 | ||
1547 | if (verbose) | 2133 | if (verbose) |
1548 | print_verbose_header(); | 2134 | print_verbose_header(); |
2135 | |||
2136 | if (verbose) | ||
2137 | for_all_cpus(print_epb, ODD_COUNTERS); | ||
2138 | |||
2139 | if (verbose) | ||
2140 | for_all_cpus(print_rapl, ODD_COUNTERS); | ||
2141 | |||
2142 | for_all_cpus(set_temperature_target, ODD_COUNTERS); | ||
2143 | |||
2144 | if (verbose) | ||
2145 | for_all_cpus(print_thermal, ODD_COUNTERS); | ||
1549 | } | 2146 | } |
1550 | 2147 | ||
1551 | int fork_it(char **argv) | 2148 | int fork_it(char **argv) |
@@ -1602,7 +2199,7 @@ void cmdline(int argc, char **argv) | |||
1602 | 2199 | ||
1603 | progname = argv[0]; | 2200 | progname = argv[0]; |
1604 | 2201 | ||
1605 | while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:")) != -1) { | 2202 | while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:RT:")) != -1) { |
1606 | switch (opt) { | 2203 | switch (opt) { |
1607 | case 'p': | 2204 | case 'p': |
1608 | show_core_only++; | 2205 | show_core_only++; |
@@ -1634,6 +2231,12 @@ void cmdline(int argc, char **argv) | |||
1634 | case 'M': | 2231 | case 'M': |
1635 | sscanf(optarg, "%x", &extra_msr_offset64); | 2232 | sscanf(optarg, "%x", &extra_msr_offset64); |
1636 | break; | 2233 | break; |
2234 | case 'R': | ||
2235 | rapl_verbose++; | ||
2236 | break; | ||
2237 | case 'T': | ||
2238 | tcc_activation_temp_override = atoi(optarg); | ||
2239 | break; | ||
1637 | default: | 2240 | default: |
1638 | usage(); | 2241 | usage(); |
1639 | } | 2242 | } |
@@ -1644,8 +2247,8 @@ int main(int argc, char **argv) | |||
1644 | { | 2247 | { |
1645 | cmdline(argc, argv); | 2248 | cmdline(argc, argv); |
1646 | 2249 | ||
1647 | if (verbose > 1) | 2250 | if (verbose) |
1648 | fprintf(stderr, "turbostat v2.1 October 6, 2012" | 2251 | fprintf(stderr, "turbostat v3.0 November 23, 2012" |
1649 | " - Len Brown <lenb@kernel.org>\n"); | 2252 | " - Len Brown <lenb@kernel.org>\n"); |
1650 | 2253 | ||
1651 | turbostat_init(); | 2254 | turbostat_init(); |