diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-19 17:31:41 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-04-19 17:31:41 -0400 |
commit | 09d51602cf84a1264946711dd4ea0dddbac599a1 (patch) | |
tree | 41d96f89a1071659ff768b733115c7873a2f2778 /tools | |
parent | 6162e4b0bedeb3dac2ba0a5e1b1f56db107d97ec (diff) | |
parent | e9257f5fa48cc296d7eed35acf9f2ad195184122 (diff) |
Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux
Pull turbostat update from Len Brown:
"Updates to the turbostat utility.
Just one kernel dependency in this batch -- added a #define to
msr-index.h"
* 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux:
tools/power turbostat: correct dumped pkg-cstate-limit value
tools/power turbostat: calculate TSC frequency from CPUID(0x15) on SKL
tools/power turbostat: correct DRAM RAPL units on recent Xeon processors
tools/power turbostat: Initial Skylake support
tools/power turbostat: Use $(CURDIR) instead of $(PWD) and add support for O= option in Makefile
tools/power turbostat: modprobe msr, if needed
tools/power turbostat: dump MSR_TURBO_RATIO_LIMIT2
tools/power turbostat: use new MSR_TURBO_RATIO_LIMIT names
x86 msr-index: define MSR_TURBO_RATIO_LIMIT,1,2
tools/power turbostat: label base frequency
tools/power turbostat: update PERF_LIMIT_REASONS decoding
tools/power turbostat: simplify default output
Diffstat (limited to 'tools')
-rw-r--r-- | tools/power/x86/turbostat/Makefile | 6 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 138 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 436 |
3 files changed, 419 insertions, 161 deletions
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index d1b3a361e526..4039854560d0 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile | |||
@@ -1,8 +1,12 @@ | |||
1 | CC = $(CROSS_COMPILE)gcc | 1 | CC = $(CROSS_COMPILE)gcc |
2 | BUILD_OUTPUT := $(PWD) | 2 | BUILD_OUTPUT := $(CURDIR) |
3 | PREFIX := /usr | 3 | PREFIX := /usr |
4 | DESTDIR := | 4 | DESTDIR := |
5 | 5 | ||
6 | ifeq ("$(origin O)", "command line") | ||
7 | BUILD_OUTPUT := $(O) | ||
8 | endif | ||
9 | |||
6 | turbostat : turbostat.c | 10 | turbostat : turbostat.c |
7 | CFLAGS += -Wall | 11 | CFLAGS += -Wall |
8 | CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"' | 12 | CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"' |
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index feea7ad9500b..05b8fc38dc8b 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 | |||
@@ -20,9 +20,11 @@ upon its completion. | |||
20 | The second method is to omit the command, | 20 | The second method is to omit the command, |
21 | and turbostat displays statistics every 5 seconds. | 21 | and turbostat displays statistics every 5 seconds. |
22 | The 5-second interval can be changed using the --interval option. | 22 | The 5-second interval can be changed using the --interval option. |
23 | 23 | .PP | |
24 | Some information is not available on older processors. | 24 | Some information is not available on older processors. |
25 | .SS Options | 25 | .SS Options |
26 | Options can be specified with a single or double '-', and only as much of the option | ||
27 | name as necessary to disambiguate it from others is necessary. Note that options are case-sensitive. | ||
26 | \fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter. | 28 | \fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter. |
27 | .PP | 29 | .PP |
28 | \fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter. | 30 | \fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter. |
@@ -55,16 +57,20 @@ more than once may also enable internal turbostat debug information. | |||
55 | The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit, | 57 | The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit, |
56 | displays the statistics gathered since it was forked. | 58 | displays the statistics gathered since it was forked. |
57 | .PP | 59 | .PP |
58 | .SH FIELD DESCRIPTIONS | 60 | .SH DEFAULT FIELD DESCRIPTIONS |
59 | .nf | 61 | .nf |
60 | \fBPackage\fP processor package number. | 62 | \fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. |
61 | \fBCore\fP processor core number. | ||
62 | \fBCPU\fP Linux CPU (logical processor) number. | ||
63 | Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology. | ||
64 | \fBAVG_MHz\fP number of cycles executed divided by time elapsed. | 63 | \fBAVG_MHz\fP number of cycles executed divided by time elapsed. |
65 | \fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. | 64 | \fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. |
66 | \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). | 65 | \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). |
67 | \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. | 66 | \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. |
67 | .fi | ||
68 | .PP | ||
69 | .SH DEBUG FIELD DESCRIPTIONS | ||
70 | .nf | ||
71 | \fBPackage\fP processor package number. | ||
72 | \fBCore\fP processor core number. | ||
73 | Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT). | ||
68 | \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. | 74 | \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. |
69 | \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. | 75 | \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. |
70 | \fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. | 76 | \fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. |
@@ -81,63 +87,76 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T | |||
81 | Without any parameters, turbostat displays statistics ever 5 seconds. | 87 | Without any parameters, turbostat displays statistics ever 5 seconds. |
82 | (override interval with "-i sec" option, or specify a command | 88 | (override interval with "-i sec" option, or specify a command |
83 | for turbostat to fork). | 89 | for turbostat to fork). |
90 | .nf | ||
91 | [root@hsw]# ./turbostat | ||
92 | CPU Avg_MHz %Busy Bzy_MHz TSC_MHz | ||
93 | - 488 12.51 3898 3498 | ||
94 | 0 0 0.01 3885 3498 | ||
95 | 4 3897 99.99 3898 3498 | ||
96 | 1 0 0.00 3861 3498 | ||
97 | 5 0 0.00 3882 3498 | ||
98 | 2 1 0.02 3894 3498 | ||
99 | 6 2 0.06 3898 3498 | ||
100 | 3 0 0.00 3849 3498 | ||
101 | 7 0 0.00 3877 3498 | ||
102 | |||
103 | .fi | ||
104 | .SH DEBUG EXAMPLE | ||
105 | The "--debug" option prints additional system information before measurements: | ||
84 | 106 | ||
85 | The first row of statistics is a summary for the entire system. | 107 | The first row of statistics is a summary for the entire system. |
86 | For residency % columns, the summary is a weighted average. | 108 | For residency % columns, the summary is a weighted average. |
87 | For Temperature columns, the summary is the column maximum. | 109 | For Temperature columns, the summary is the column maximum. |
88 | For Watts columns, the summary is a system total. | 110 | For Watts columns, the summary is a system total. |
89 | Subsequent rows show per-CPU statistics. | 111 | Subsequent rows show per-CPU statistics. |
90 | |||
91 | .nf | ||
92 | [root@ivy]# ./turbostat | ||
93 | Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt | ||
94 | - - 6 0.36 1596 3492 0 0.59 0.01 99.04 0.00 23 24 23.82 0.01 72.47 0.00 6.40 1.01 0.00 | ||
95 | 0 0 9 0.58 1596 3492 0 0.28 0.01 99.13 0.00 23 24 23.82 0.01 72.47 0.00 6.40 1.01 0.00 | ||
96 | 0 4 1 0.07 1596 3492 0 0.79 | ||
97 | 1 1 10 0.65 1596 3492 0 0.59 0.00 98.76 0.00 23 | ||
98 | 1 5 5 0.28 1596 3492 0 0.95 | ||
99 | 2 2 10 0.66 1596 3492 0 0.41 0.01 98.92 0.00 23 | ||
100 | 2 6 2 0.10 1597 3492 0 0.97 | ||
101 | 3 3 3 0.20 1596 3492 0 0.44 0.00 99.37 0.00 23 | ||
102 | 3 7 5 0.31 1596 3492 0 0.33 | ||
103 | .fi | ||
104 | .SH DEBUG EXAMPLE | ||
105 | The "--debug" option prints additional system information before measurements: | ||
106 | |||
107 | .nf | 112 | .nf |
108 | turbostat version 4.0 10-Feb, 2015 - Len Brown <lenb@kernel.org> | 113 | turbostat version 4.1 10-Feb, 2015 - Len Brown <lenb@kernel.org> |
109 | CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9) | 114 | CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3) |
110 | CPUID(6): APERF, DTS, PTM, EPB | 115 | CPUID(6): APERF, DTS, PTM, EPB |
111 | RAPL: 851 sec. Joule Counter Range, at 77 Watts | 116 | RAPL: 3121 sec. Joule Counter Range, at 84 Watts |
112 | cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300 | 117 | cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300 |
113 | 16 * 100 = 1600 MHz max efficiency | 118 | 8 * 100 = 800 MHz max efficiency |
114 | 35 * 100 = 3500 MHz TSC frequency | 119 | 35 * 100 = 3500 MHz TSC frequency |
115 | cpu0: MSR_IA32_POWER_CTL: 0x0014005d (C1E auto-promotion: DISabled) | 120 | cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled) |
116 | cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6n) | 121 | cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0) |
117 | cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 | 122 | cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 |
118 | 37 * 100 = 3700 MHz max turbo 4 active cores | 123 | 37 * 100 = 3700 MHz max turbo 4 active cores |
119 | 38 * 100 = 3800 MHz max turbo 3 active cores | 124 | 38 * 100 = 3800 MHz max turbo 3 active cores |
120 | 39 * 100 = 3900 MHz max turbo 2 active cores | 125 | 39 * 100 = 3900 MHz max turbo 2 active cores |
121 | 39 * 100 = 3900 MHz max turbo 1 active cores | 126 | 39 * 100 = 3900 MHz max turbo 1 active cores |
122 | cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced) | 127 | cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced) |
123 | cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.) | 128 | cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Auto-HWP, Amps, MultiCoreTurbo, Transitions, ) |
124 | cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.) | 129 | cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: ) |
125 | cpu0: MSR_PKG_POWER_LIMIT: 0x30000148268 (UNlocked) | 130 | cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, ) |
126 | cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled) | 131 | cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.) |
127 | cpu0: PKG Limit #2: DISabled (96.000000 Watts, 0.000977* sec, clamp DISabled) | 132 | cpu0: MSR_PKG_POWER_INFO: 0x000002a0 (84 W TDP, RAPL 0 - 0 W, 0.000000 sec.) |
133 | cpu0: MSR_PKG_POWER_LIMIT: 0x428348001a82a0 (UNlocked) | ||
134 | cpu0: PKG Limit #1: ENabled (84.000000 Watts, 8.000000 sec, clamp DISabled) | ||
135 | cpu0: PKG Limit #2: ENabled (105.000000 Watts, 0.002441* sec, clamp DISabled) | ||
128 | cpu0: MSR_PP0_POLICY: 0 | 136 | cpu0: MSR_PP0_POLICY: 0 |
129 | cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked) | 137 | cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked) |
130 | cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) | 138 | cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) |
131 | cpu0: MSR_PP1_POLICY: 0 | 139 | cpu0: MSR_PP1_POLICY: 0 |
132 | cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) | 140 | cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) |
133 | cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) | 141 | cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) |
134 | cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C) | 142 | cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C) |
135 | cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C) | 143 | cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88340800 (48 C) |
136 | cpu0: MSR_IA32_THERM_STATUS: 0x88580000 (17 C +/- 1) | 144 | cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1) |
137 | cpu1: MSR_IA32_THERM_STATUS: 0x885a0000 (15 C +/- 1) | 145 | cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1) |
138 | cpu2: MSR_IA32_THERM_STATUS: 0x88570000 (18 C +/- 1) | 146 | cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1) |
139 | cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1) | 147 | cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1) |
140 | ... | 148 | Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt |
149 | - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00 | ||
150 | 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00 | ||
151 | 0 4 3897 99.98 3898 3498 0 0.02 | ||
152 | 1 1 7 0.17 3887 3498 0 0.04 0.00 0.00 99.79 32 | ||
153 | 1 5 0 0.00 3885 3498 0 0.21 | ||
154 | 2 2 29 0.76 3895 3498 0 0.10 0.01 0.01 99.13 32 | ||
155 | 2 6 2 0.06 3896 3498 0 0.80 | ||
156 | 3 3 1 0.02 3832 3498 0 0.03 0.00 0.00 99.95 28 | ||
157 | 3 7 0 0.00 3879 3498 0 0.04 | ||
158 | ^C | ||
159 | |||
141 | .fi | 160 | .fi |
142 | The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency | 161 | The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency |
143 | available at the minimum package voltage. The \fBTSC frequency\fP is the base | 162 | available at the minimum package voltage. The \fBTSC frequency\fP is the base |
@@ -147,6 +166,9 @@ should be sustainable on all CPUs indefinitely, given nominal power and cooling. | |||
147 | The remaining rows show what maximum turbo frequency is possible | 166 | The remaining rows show what maximum turbo frequency is possible |
148 | depending on the number of idle cores. Note that not all information is | 167 | depending on the number of idle cores. Note that not all information is |
149 | available on all processors. | 168 | available on all processors. |
169 | .PP | ||
170 | The --debug option adds additional columns to the measurement ouput, including CPU idle power-state residency processor temperature sensor readinds. | ||
171 | See the field definitions above. | ||
150 | .SH FORK EXAMPLE | 172 | .SH FORK EXAMPLE |
151 | If turbostat is invoked with a command, it will fork that command | 173 | If turbostat is invoked with a command, it will fork that command |
152 | and output the statistics gathered when the command exits. | 174 | and output the statistics gathered when the command exits. |
@@ -154,27 +176,23 @@ eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds | |||
154 | until ^C while the other CPUs are mostly idle: | 176 | until ^C while the other CPUs are mostly idle: |
155 | 177 | ||
156 | .nf | 178 | .nf |
157 | root@ivy: turbostat cat /dev/zero > /dev/null | 179 | root@hsw: turbostat cat /dev/zero > /dev/null |
158 | ^C | 180 | ^C |
159 | Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt | 181 | CPU Avg_MHz %Busy Bzy_MHz TSC_MHz |
160 | - - 496 12.75 3886 3492 0 13.16 0.04 74.04 0.00 36 36 0.00 0.00 0.00 0.00 23.15 17.65 0.00 | 182 | - 482 12.51 3854 3498 |
161 | 0 0 22 0.57 3830 3492 0 0.83 0.02 98.59 0.00 27 36 0.00 0.00 0.00 0.00 23.15 17.65 0.00 | 183 | 0 0 0.01 1960 3498 |
162 | 0 4 9 0.24 3829 3492 0 1.15 | 184 | 4 0 0.00 2128 3498 |
163 | 1 1 4 0.09 3783 3492 0 99.91 0.00 0.00 0.00 36 | 185 | 1 0 0.00 3003 3498 |
164 | 1 5 3880 99.82 3888 3492 0 0.18 | 186 | 5 3854 99.98 3855 3498 |
165 | 2 2 17 0.44 3813 3492 0 0.77 0.04 98.75 0.00 28 | 187 | 2 0 0.01 3504 3498 |
166 | 2 6 12 0.32 3823 3492 0 0.89 | 188 | 6 3 0.08 3884 3498 |
167 | 3 3 16 0.43 3844 3492 0 0.63 0.11 98.84 0.00 30 | 189 | 3 0 0.00 2553 3498 |
168 | 3 7 4 0.11 3827 3492 0 0.94 | 190 | 7 0 0.00 2126 3498 |
169 | 30.372243 sec | 191 | 10.783983 sec |
170 | 192 | ||
171 | .fi | 193 | .fi |
172 | Above the cycle soaker drives cpu5 up its 3.8 GHz turbo limit | 194 | Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit. |
173 | while the other processors are generally in various states of idle. | 195 | The first row shows the average MHz and %Busy across all the processors in the system. |
174 | |||
175 | Note that cpu1 and cpu5 are HT siblings within core1. | ||
176 | As cpu5 is very busy, it prevents its sibling, cpu1, | ||
177 | from entering a c-state deeper than c1. | ||
178 | 196 | ||
179 | Note that the Avg_MHz column reflects the total number of cycles executed | 197 | Note that the Avg_MHz column reflects the total number of cycles executed |
180 | divided by the measurement interval. If the %Busy column is 100%, | 198 | divided by the measurement interval. If the %Busy column is 100%, |
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 2d089cac8580..bac98ca3d4ca 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
@@ -57,6 +57,7 @@ unsigned int do_pc3; | |||
57 | unsigned int do_pc6; | 57 | unsigned int do_pc6; |
58 | unsigned int do_pc7; | 58 | unsigned int do_pc7; |
59 | unsigned int do_c8_c9_c10; | 59 | unsigned int do_c8_c9_c10; |
60 | unsigned int do_skl_residency; | ||
60 | unsigned int do_slm_cstates; | 61 | unsigned int do_slm_cstates; |
61 | unsigned int use_c1_residency_msr; | 62 | unsigned int use_c1_residency_msr; |
62 | unsigned int has_aperf; | 63 | unsigned int has_aperf; |
@@ -65,8 +66,6 @@ unsigned int units = 1000000; /* MHz etc */ | |||
65 | unsigned int genuine_intel; | 66 | unsigned int genuine_intel; |
66 | unsigned int has_invariant_tsc; | 67 | unsigned int has_invariant_tsc; |
67 | unsigned int do_nhm_platform_info; | 68 | unsigned int do_nhm_platform_info; |
68 | unsigned int do_nhm_turbo_ratio_limit; | ||
69 | unsigned int do_ivt_turbo_ratio_limit; | ||
70 | unsigned int extra_msr_offset32; | 69 | unsigned int extra_msr_offset32; |
71 | unsigned int extra_msr_offset64; | 70 | unsigned int extra_msr_offset64; |
72 | unsigned int extra_delta_offset32; | 71 | unsigned int extra_delta_offset32; |
@@ -84,11 +83,14 @@ unsigned int do_dts; | |||
84 | unsigned int do_ptm; | 83 | unsigned int do_ptm; |
85 | unsigned int tcc_activation_temp; | 84 | unsigned int tcc_activation_temp; |
86 | unsigned int tcc_activation_temp_override; | 85 | unsigned int tcc_activation_temp_override; |
87 | double rapl_power_units, rapl_energy_units, rapl_time_units; | 86 | double rapl_power_units, rapl_time_units; |
87 | double rapl_dram_energy_units, rapl_energy_units; | ||
88 | double rapl_joule_counter_range; | 88 | double rapl_joule_counter_range; |
89 | unsigned int do_core_perf_limit_reasons; | 89 | unsigned int do_core_perf_limit_reasons; |
90 | unsigned int do_gfx_perf_limit_reasons; | 90 | unsigned int do_gfx_perf_limit_reasons; |
91 | unsigned int do_ring_perf_limit_reasons; | 91 | unsigned int do_ring_perf_limit_reasons; |
92 | unsigned int crystal_hz; | ||
93 | unsigned long long tsc_hz; | ||
92 | 94 | ||
93 | #define RAPL_PKG (1 << 0) | 95 | #define RAPL_PKG (1 << 0) |
94 | /* 0x610 MSR_PKG_POWER_LIMIT */ | 96 | /* 0x610 MSR_PKG_POWER_LIMIT */ |
@@ -101,18 +103,18 @@ unsigned int do_ring_perf_limit_reasons; | |||
101 | #define RAPL_DRAM (1 << 3) | 103 | #define RAPL_DRAM (1 << 3) |
102 | /* 0x618 MSR_DRAM_POWER_LIMIT */ | 104 | /* 0x618 MSR_DRAM_POWER_LIMIT */ |
103 | /* 0x619 MSR_DRAM_ENERGY_STATUS */ | 105 | /* 0x619 MSR_DRAM_ENERGY_STATUS */ |
104 | /* 0x61c MSR_DRAM_POWER_INFO */ | ||
105 | #define RAPL_DRAM_PERF_STATUS (1 << 4) | 106 | #define RAPL_DRAM_PERF_STATUS (1 << 4) |
106 | /* 0x61b MSR_DRAM_PERF_STATUS */ | 107 | /* 0x61b MSR_DRAM_PERF_STATUS */ |
108 | #define RAPL_DRAM_POWER_INFO (1 << 5) | ||
109 | /* 0x61c MSR_DRAM_POWER_INFO */ | ||
107 | 110 | ||
108 | #define RAPL_CORES (1 << 5) | 111 | #define RAPL_CORES (1 << 6) |
109 | /* 0x638 MSR_PP0_POWER_LIMIT */ | 112 | /* 0x638 MSR_PP0_POWER_LIMIT */ |
110 | /* 0x639 MSR_PP0_ENERGY_STATUS */ | 113 | /* 0x639 MSR_PP0_ENERGY_STATUS */ |
111 | #define RAPL_CORE_POLICY (1 << 6) | 114 | #define RAPL_CORE_POLICY (1 << 7) |
112 | /* 0x63a MSR_PP0_POLICY */ | 115 | /* 0x63a MSR_PP0_POLICY */ |
113 | 116 | ||
114 | 117 | #define RAPL_GFX (1 << 8) | |
115 | #define RAPL_GFX (1 << 7) | ||
116 | /* 0x640 MSR_PP1_POWER_LIMIT */ | 118 | /* 0x640 MSR_PP1_POWER_LIMIT */ |
117 | /* 0x641 MSR_PP1_ENERGY_STATUS */ | 119 | /* 0x641 MSR_PP1_ENERGY_STATUS */ |
118 | /* 0x642 MSR_PP1_POLICY */ | 120 | /* 0x642 MSR_PP1_POLICY */ |
@@ -159,6 +161,10 @@ struct pkg_data { | |||
159 | unsigned long long pc8; | 161 | unsigned long long pc8; |
160 | unsigned long long pc9; | 162 | unsigned long long pc9; |
161 | unsigned long long pc10; | 163 | unsigned long long pc10; |
164 | unsigned long long pkg_wtd_core_c0; | ||
165 | unsigned long long pkg_any_core_c0; | ||
166 | unsigned long long pkg_any_gfxe_c0; | ||
167 | unsigned long long pkg_both_core_gfxe_c0; | ||
162 | unsigned int package_id; | 168 | unsigned int package_id; |
163 | unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ | 169 | unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ |
164 | unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ | 170 | unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ |
@@ -292,8 +298,7 @@ void print_header(void) | |||
292 | if (has_aperf) | 298 | if (has_aperf) |
293 | outp += sprintf(outp, " Bzy_MHz"); | 299 | outp += sprintf(outp, " Bzy_MHz"); |
294 | outp += sprintf(outp, " TSC_MHz"); | 300 | outp += sprintf(outp, " TSC_MHz"); |
295 | if (do_smi) | 301 | |
296 | outp += sprintf(outp, " SMI"); | ||
297 | if (extra_delta_offset32) | 302 | if (extra_delta_offset32) |
298 | outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); | 303 | outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); |
299 | if (extra_delta_offset64) | 304 | if (extra_delta_offset64) |
@@ -302,6 +307,13 @@ void print_header(void) | |||
302 | outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); | 307 | outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); |
303 | if (extra_msr_offset64) | 308 | if (extra_msr_offset64) |
304 | outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); | 309 | outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); |
310 | |||
311 | if (!debug) | ||
312 | goto done; | ||
313 | |||
314 | if (do_smi) | ||
315 | outp += sprintf(outp, " SMI"); | ||
316 | |||
305 | if (do_nhm_cstates) | 317 | if (do_nhm_cstates) |
306 | outp += sprintf(outp, " CPU%%c1"); | 318 | outp += sprintf(outp, " CPU%%c1"); |
307 | if (do_nhm_cstates && !do_slm_cstates) | 319 | if (do_nhm_cstates && !do_slm_cstates) |
@@ -316,6 +328,13 @@ void print_header(void) | |||
316 | if (do_ptm) | 328 | if (do_ptm) |
317 | outp += sprintf(outp, " PkgTmp"); | 329 | outp += sprintf(outp, " PkgTmp"); |
318 | 330 | ||
331 | if (do_skl_residency) { | ||
332 | outp += sprintf(outp, " Totl%%C0"); | ||
333 | outp += sprintf(outp, " Any%%C0"); | ||
334 | outp += sprintf(outp, " GFX%%C0"); | ||
335 | outp += sprintf(outp, " CPUGFX%%"); | ||
336 | } | ||
337 | |||
319 | if (do_pc2) | 338 | if (do_pc2) |
320 | outp += sprintf(outp, " Pkg%%pc2"); | 339 | outp += sprintf(outp, " Pkg%%pc2"); |
321 | if (do_pc3) | 340 | if (do_pc3) |
@@ -359,6 +378,7 @@ void print_header(void) | |||
359 | outp += sprintf(outp, " time"); | 378 | outp += sprintf(outp, " time"); |
360 | 379 | ||
361 | } | 380 | } |
381 | done: | ||
362 | outp += sprintf(outp, "\n"); | 382 | outp += sprintf(outp, "\n"); |
363 | } | 383 | } |
364 | 384 | ||
@@ -396,6 +416,12 @@ int dump_counters(struct thread_data *t, struct core_data *c, | |||
396 | 416 | ||
397 | if (p) { | 417 | if (p) { |
398 | outp += sprintf(outp, "package: %d\n", p->package_id); | 418 | outp += sprintf(outp, "package: %d\n", p->package_id); |
419 | |||
420 | outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); | ||
421 | outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0); | ||
422 | outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0); | ||
423 | outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0); | ||
424 | |||
399 | outp += sprintf(outp, "pc2: %016llX\n", p->pc2); | 425 | outp += sprintf(outp, "pc2: %016llX\n", p->pc2); |
400 | if (do_pc3) | 426 | if (do_pc3) |
401 | outp += sprintf(outp, "pc3: %016llX\n", p->pc3); | 427 | outp += sprintf(outp, "pc3: %016llX\n", p->pc3); |
@@ -487,10 +513,6 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
487 | /* TSC_MHz */ | 513 | /* TSC_MHz */ |
488 | outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); | 514 | outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); |
489 | 515 | ||
490 | /* SMI */ | ||
491 | if (do_smi) | ||
492 | outp += sprintf(outp, "%8d", t->smi_count); | ||
493 | |||
494 | /* delta */ | 516 | /* delta */ |
495 | if (extra_delta_offset32) | 517 | if (extra_delta_offset32) |
496 | outp += sprintf(outp, " %11llu", t->extra_delta32); | 518 | outp += sprintf(outp, " %11llu", t->extra_delta32); |
@@ -506,6 +528,13 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
506 | if (extra_msr_offset64) | 528 | if (extra_msr_offset64) |
507 | outp += sprintf(outp, " 0x%016llx", t->extra_msr64); | 529 | outp += sprintf(outp, " 0x%016llx", t->extra_msr64); |
508 | 530 | ||
531 | if (!debug) | ||
532 | goto done; | ||
533 | |||
534 | /* SMI */ | ||
535 | if (do_smi) | ||
536 | outp += sprintf(outp, "%8d", t->smi_count); | ||
537 | |||
509 | if (do_nhm_cstates) { | 538 | if (do_nhm_cstates) { |
510 | if (!skip_c1) | 539 | if (!skip_c1) |
511 | outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc); | 540 | outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc); |
@@ -531,9 +560,18 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
531 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | 560 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
532 | goto done; | 561 | goto done; |
533 | 562 | ||
563 | /* PkgTmp */ | ||
534 | if (do_ptm) | 564 | if (do_ptm) |
535 | outp += sprintf(outp, "%8d", p->pkg_temp_c); | 565 | outp += sprintf(outp, "%8d", p->pkg_temp_c); |
536 | 566 | ||
567 | /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ | ||
568 | if (do_skl_residency) { | ||
569 | outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); | ||
570 | outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc); | ||
571 | outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc); | ||
572 | outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc); | ||
573 | } | ||
574 | |||
537 | if (do_pc2) | 575 | if (do_pc2) |
538 | outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc); | 576 | outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc); |
539 | if (do_pc3) | 577 | if (do_pc3) |
@@ -565,7 +603,7 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
565 | if (do_rapl & RAPL_GFX) | 603 | if (do_rapl & RAPL_GFX) |
566 | outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float); | 604 | outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float); |
567 | if (do_rapl & RAPL_DRAM) | 605 | if (do_rapl & RAPL_DRAM) |
568 | outp += sprintf(outp, fmt8, p->energy_dram * rapl_energy_units / interval_float); | 606 | outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float); |
569 | if (do_rapl & RAPL_PKG_PERF_STATUS) | 607 | if (do_rapl & RAPL_PKG_PERF_STATUS) |
570 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); | 608 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); |
571 | if (do_rapl & RAPL_DRAM_PERF_STATUS) | 609 | if (do_rapl & RAPL_DRAM_PERF_STATUS) |
@@ -582,7 +620,7 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
582 | p->energy_gfx * rapl_energy_units); | 620 | p->energy_gfx * rapl_energy_units); |
583 | if (do_rapl & RAPL_DRAM) | 621 | if (do_rapl & RAPL_DRAM) |
584 | outp += sprintf(outp, fmt8, | 622 | outp += sprintf(outp, fmt8, |
585 | p->energy_dram * rapl_energy_units); | 623 | p->energy_dram * rapl_dram_energy_units); |
586 | if (do_rapl & RAPL_PKG_PERF_STATUS) | 624 | if (do_rapl & RAPL_PKG_PERF_STATUS) |
587 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); | 625 | outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); |
588 | if (do_rapl & RAPL_DRAM_PERF_STATUS) | 626 | if (do_rapl & RAPL_DRAM_PERF_STATUS) |
@@ -636,6 +674,13 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ | |||
636 | void | 674 | void |
637 | delta_package(struct pkg_data *new, struct pkg_data *old) | 675 | delta_package(struct pkg_data *new, struct pkg_data *old) |
638 | { | 676 | { |
677 | |||
678 | if (do_skl_residency) { | ||
679 | old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; | ||
680 | old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; | ||
681 | old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; | ||
682 | old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; | ||
683 | } | ||
639 | old->pc2 = new->pc2 - old->pc2; | 684 | old->pc2 = new->pc2 - old->pc2; |
640 | if (do_pc3) | 685 | if (do_pc3) |
641 | old->pc3 = new->pc3 - old->pc3; | 686 | old->pc3 = new->pc3 - old->pc3; |
@@ -782,6 +827,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data | |||
782 | c->c7 = 0; | 827 | c->c7 = 0; |
783 | c->core_temp_c = 0; | 828 | c->core_temp_c = 0; |
784 | 829 | ||
830 | p->pkg_wtd_core_c0 = 0; | ||
831 | p->pkg_any_core_c0 = 0; | ||
832 | p->pkg_any_gfxe_c0 = 0; | ||
833 | p->pkg_both_core_gfxe_c0 = 0; | ||
834 | |||
785 | p->pc2 = 0; | 835 | p->pc2 = 0; |
786 | if (do_pc3) | 836 | if (do_pc3) |
787 | p->pc3 = 0; | 837 | p->pc3 = 0; |
@@ -826,6 +876,13 @@ int sum_counters(struct thread_data *t, struct core_data *c, | |||
826 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | 876 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
827 | return 0; | 877 | return 0; |
828 | 878 | ||
879 | if (do_skl_residency) { | ||
880 | average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; | ||
881 | average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; | ||
882 | average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; | ||
883 | average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; | ||
884 | } | ||
885 | |||
829 | average.packages.pc2 += p->pc2; | 886 | average.packages.pc2 += p->pc2; |
830 | if (do_pc3) | 887 | if (do_pc3) |
831 | average.packages.pc3 += p->pc3; | 888 | average.packages.pc3 += p->pc3; |
@@ -873,6 +930,13 @@ void compute_average(struct thread_data *t, struct core_data *c, | |||
873 | average.cores.c6 /= topo.num_cores; | 930 | average.cores.c6 /= topo.num_cores; |
874 | average.cores.c7 /= topo.num_cores; | 931 | average.cores.c7 /= topo.num_cores; |
875 | 932 | ||
933 | if (do_skl_residency) { | ||
934 | average.packages.pkg_wtd_core_c0 /= topo.num_packages; | ||
935 | average.packages.pkg_any_core_c0 /= topo.num_packages; | ||
936 | average.packages.pkg_any_gfxe_c0 /= topo.num_packages; | ||
937 | average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages; | ||
938 | } | ||
939 | |||
876 | average.packages.pc2 /= topo.num_packages; | 940 | average.packages.pc2 /= topo.num_packages; |
877 | if (do_pc3) | 941 | if (do_pc3) |
878 | average.packages.pc3 /= topo.num_packages; | 942 | average.packages.pc3 /= topo.num_packages; |
@@ -979,6 +1043,16 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
979 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | 1043 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
980 | return 0; | 1044 | return 0; |
981 | 1045 | ||
1046 | if (do_skl_residency) { | ||
1047 | if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) | ||
1048 | return -10; | ||
1049 | if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) | ||
1050 | return -11; | ||
1051 | if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) | ||
1052 | return -12; | ||
1053 | if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) | ||
1054 | return -13; | ||
1055 | } | ||
982 | if (do_pc3) | 1056 | if (do_pc3) |
983 | if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) | 1057 | if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) |
984 | return -9; | 1058 | return -9; |
@@ -1055,49 +1129,77 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
1055 | #define PCL_6R 9 /* PC6 Retention */ | 1129 | #define PCL_6R 9 /* PC6 Retention */ |
1056 | #define PCL__7 10 /* PC7 */ | 1130 | #define PCL__7 10 /* PC7 */ |
1057 | #define PCL_7S 11 /* PC7 Shrink */ | 1131 | #define PCL_7S 11 /* PC7 Shrink */ |
1058 | #define PCLUNL 12 /* Unlimited */ | 1132 | #define PCL__8 12 /* PC8 */ |
1133 | #define PCL__9 13 /* PC9 */ | ||
1134 | #define PCLUNL 14 /* Unlimited */ | ||
1059 | 1135 | ||
1060 | int pkg_cstate_limit = PCLUKN; | 1136 | int pkg_cstate_limit = PCLUKN; |
1061 | char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2", | 1137 | char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2", |
1062 | "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "unlimited"}; | 1138 | "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"}; |
1063 | 1139 | ||
1064 | int nhm_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL}; | 1140 | int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; |
1065 | int snb_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL}; | 1141 | int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; |
1066 | int hsw_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCLRSV, PCLUNL}; | 1142 | int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; |
1067 | int slv_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7}; | 1143 | int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; |
1068 | int amt_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7}; | 1144 | int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; |
1069 | int phi_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL}; | 1145 | int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; |
1070 | 1146 | ||
1071 | void print_verbose_header(void) | 1147 | static void |
1148 | dump_nhm_platform_info(void) | ||
1072 | { | 1149 | { |
1073 | unsigned long long msr; | 1150 | unsigned long long msr; |
1074 | unsigned int ratio; | 1151 | unsigned int ratio; |
1075 | 1152 | ||
1076 | if (!do_nhm_platform_info) | ||
1077 | return; | ||
1078 | |||
1079 | get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); | 1153 | get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); |
1080 | 1154 | ||
1081 | fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); | 1155 | fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); |
1082 | 1156 | ||
1083 | ratio = (msr >> 40) & 0xFF; | 1157 | ratio = (msr >> 40) & 0xFF; |
1084 | fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", | 1158 | fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n", |
1085 | ratio, bclk, ratio * bclk); | 1159 | ratio, bclk, ratio * bclk); |
1086 | 1160 | ||
1087 | ratio = (msr >> 8) & 0xFF; | 1161 | ratio = (msr >> 8) & 0xFF; |
1088 | fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", | 1162 | fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", |
1089 | ratio, bclk, ratio * bclk); | 1163 | ratio, bclk, ratio * bclk); |
1090 | 1164 | ||
1091 | get_msr(0, MSR_IA32_POWER_CTL, &msr); | 1165 | get_msr(0, MSR_IA32_POWER_CTL, &msr); |
1092 | fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", | 1166 | fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", |
1093 | msr, msr & 0x2 ? "EN" : "DIS"); | 1167 | msr, msr & 0x2 ? "EN" : "DIS"); |
1094 | 1168 | ||
1095 | if (!do_ivt_turbo_ratio_limit) | 1169 | return; |
1096 | goto print_nhm_turbo_ratio_limits; | 1170 | } |
1171 | |||
1172 | static void | ||
1173 | dump_hsw_turbo_ratio_limits(void) | ||
1174 | { | ||
1175 | unsigned long long msr; | ||
1176 | unsigned int ratio; | ||
1177 | |||
1178 | get_msr(0, MSR_TURBO_RATIO_LIMIT2, &msr); | ||
1179 | |||
1180 | fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr); | ||
1181 | |||
1182 | ratio = (msr >> 8) & 0xFF; | ||
1183 | if (ratio) | ||
1184 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", | ||
1185 | ratio, bclk, ratio * bclk); | ||
1186 | |||
1187 | ratio = (msr >> 0) & 0xFF; | ||
1188 | if (ratio) | ||
1189 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", | ||
1190 | ratio, bclk, ratio * bclk); | ||
1191 | return; | ||
1192 | } | ||
1193 | |||
1194 | static void | ||
1195 | dump_ivt_turbo_ratio_limits(void) | ||
1196 | { | ||
1197 | unsigned long long msr; | ||
1198 | unsigned int ratio; | ||
1097 | 1199 | ||
1098 | get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); | 1200 | get_msr(0, MSR_TURBO_RATIO_LIMIT1, &msr); |
1099 | 1201 | ||
1100 | fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); | 1202 | fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr); |
1101 | 1203 | ||
1102 | ratio = (msr >> 56) & 0xFF; | 1204 | ratio = (msr >> 56) & 0xFF; |
1103 | if (ratio) | 1205 | if (ratio) |
@@ -1138,30 +1240,18 @@ void print_verbose_header(void) | |||
1138 | if (ratio) | 1240 | if (ratio) |
1139 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", | 1241 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", |
1140 | ratio, bclk, ratio * bclk); | 1242 | ratio, bclk, ratio * bclk); |
1243 | return; | ||
1244 | } | ||
1141 | 1245 | ||
1142 | print_nhm_turbo_ratio_limits: | 1246 | static void |
1143 | get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); | 1247 | dump_nhm_turbo_ratio_limits(void) |
1144 | 1248 | { | |
1145 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) | 1249 | unsigned long long msr; |
1146 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | 1250 | unsigned int ratio; |
1147 | |||
1148 | fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); | ||
1149 | |||
1150 | fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", | ||
1151 | (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", | ||
1152 | (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", | ||
1153 | (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", | ||
1154 | (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", | ||
1155 | (msr & (1 << 15)) ? "" : "UN", | ||
1156 | (unsigned int)msr & 7, | ||
1157 | pkg_cstate_limit_strings[pkg_cstate_limit]); | ||
1158 | |||
1159 | if (!do_nhm_turbo_ratio_limit) | ||
1160 | return; | ||
1161 | 1251 | ||
1162 | get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); | 1252 | get_msr(0, MSR_TURBO_RATIO_LIMIT, &msr); |
1163 | 1253 | ||
1164 | fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); | 1254 | fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); |
1165 | 1255 | ||
1166 | ratio = (msr >> 56) & 0xFF; | 1256 | ratio = (msr >> 56) & 0xFF; |
1167 | if (ratio) | 1257 | if (ratio) |
@@ -1202,7 +1292,30 @@ print_nhm_turbo_ratio_limits: | |||
1202 | if (ratio) | 1292 | if (ratio) |
1203 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", | 1293 | fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", |
1204 | ratio, bclk, ratio * bclk); | 1294 | ratio, bclk, ratio * bclk); |
1295 | return; | ||
1296 | } | ||
1297 | |||
1298 | static void | ||
1299 | dump_nhm_cst_cfg(void) | ||
1300 | { | ||
1301 | unsigned long long msr; | ||
1302 | |||
1303 | get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); | ||
1304 | |||
1305 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) | ||
1306 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | ||
1205 | 1307 | ||
1308 | fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); | ||
1309 | |||
1310 | fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", | ||
1311 | (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", | ||
1312 | (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", | ||
1313 | (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", | ||
1314 | (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", | ||
1315 | (msr & (1 << 15)) ? "" : "UN", | ||
1316 | (unsigned int)msr & 7, | ||
1317 | pkg_cstate_limit_strings[pkg_cstate_limit]); | ||
1318 | return; | ||
1206 | } | 1319 | } |
1207 | 1320 | ||
1208 | void free_all_buffers(void) | 1321 | void free_all_buffers(void) |
@@ -1483,7 +1596,8 @@ void check_dev_msr() | |||
1483 | struct stat sb; | 1596 | struct stat sb; |
1484 | 1597 | ||
1485 | if (stat("/dev/cpu/0/msr", &sb)) | 1598 | if (stat("/dev/cpu/0/msr", &sb)) |
1486 | err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); | 1599 | if (system("/sbin/modprobe msr > /dev/null 2>&1")) |
1600 | err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); | ||
1487 | } | 1601 | } |
1488 | 1602 | ||
1489 | void check_permissions() | 1603 | void check_permissions() |
@@ -1573,6 +1687,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) | |||
1573 | case 0x47: /* BDW */ | 1687 | case 0x47: /* BDW */ |
1574 | case 0x4F: /* BDX */ | 1688 | case 0x4F: /* BDX */ |
1575 | case 0x56: /* BDX-DE */ | 1689 | case 0x56: /* BDX-DE */ |
1690 | case 0x4E: /* SKL */ | ||
1691 | case 0x5E: /* SKL */ | ||
1576 | pkg_cstate_limits = hsw_pkg_cstate_limits; | 1692 | pkg_cstate_limits = hsw_pkg_cstate_limits; |
1577 | break; | 1693 | break; |
1578 | case 0x37: /* BYT */ | 1694 | case 0x37: /* BYT */ |
@@ -1590,7 +1706,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) | |||
1590 | } | 1706 | } |
1591 | get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); | 1707 | get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); |
1592 | 1708 | ||
1593 | pkg_cstate_limit = pkg_cstate_limits[msr & 0x7]; | 1709 | pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; |
1594 | 1710 | ||
1595 | return 1; | 1711 | return 1; |
1596 | } | 1712 | } |
@@ -1615,12 +1731,49 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) | |||
1615 | 1731 | ||
1616 | switch (model) { | 1732 | switch (model) { |
1617 | case 0x3E: /* IVB Xeon */ | 1733 | case 0x3E: /* IVB Xeon */ |
1734 | case 0x3F: /* HSW Xeon */ | ||
1735 | return 1; | ||
1736 | default: | ||
1737 | return 0; | ||
1738 | } | ||
1739 | } | ||
1740 | int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model) | ||
1741 | { | ||
1742 | if (!genuine_intel) | ||
1743 | return 0; | ||
1744 | |||
1745 | if (family != 6) | ||
1746 | return 0; | ||
1747 | |||
1748 | switch (model) { | ||
1749 | case 0x3F: /* HSW Xeon */ | ||
1618 | return 1; | 1750 | return 1; |
1619 | default: | 1751 | default: |
1620 | return 0; | 1752 | return 0; |
1621 | } | 1753 | } |
1622 | } | 1754 | } |
1623 | 1755 | ||
1756 | static void | ||
1757 | dump_cstate_pstate_config_info(family, model) | ||
1758 | { | ||
1759 | if (!do_nhm_platform_info) | ||
1760 | return; | ||
1761 | |||
1762 | dump_nhm_platform_info(); | ||
1763 | |||
1764 | if (has_hsw_turbo_ratio_limit(family, model)) | ||
1765 | dump_hsw_turbo_ratio_limits(); | ||
1766 | |||
1767 | if (has_ivt_turbo_ratio_limit(family, model)) | ||
1768 | dump_ivt_turbo_ratio_limits(); | ||
1769 | |||
1770 | if (has_nhm_turbo_ratio_limit(family, model)) | ||
1771 | dump_nhm_turbo_ratio_limits(); | ||
1772 | |||
1773 | dump_nhm_cst_cfg(); | ||
1774 | } | ||
1775 | |||
1776 | |||
1624 | /* | 1777 | /* |
1625 | * print_epb() | 1778 | * print_epb() |
1626 | * Decode the ENERGY_PERF_BIAS MSR | 1779 | * Decode the ENERGY_PERF_BIAS MSR |
@@ -1690,35 +1843,35 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data | |||
1690 | get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); | 1843 | get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); |
1691 | fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); | 1844 | fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); |
1692 | fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", | 1845 | fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", |
1693 | (msr & 1 << 0) ? "PROCHOT, " : "", | 1846 | (msr & 1 << 15) ? "bit15, " : "", |
1694 | (msr & 1 << 1) ? "ThermStatus, " : "", | ||
1695 | (msr & 1 << 2) ? "bit2, " : "", | ||
1696 | (msr & 1 << 4) ? "Graphics, " : "", | ||
1697 | (msr & 1 << 5) ? "Auto-HWP, " : "", | ||
1698 | (msr & 1 << 6) ? "VR-Therm, " : "", | ||
1699 | (msr & 1 << 8) ? "Amps, " : "", | ||
1700 | (msr & 1 << 9) ? "CorePwr, " : "", | ||
1701 | (msr & 1 << 10) ? "PkgPwrL1, " : "", | ||
1702 | (msr & 1 << 11) ? "PkgPwrL2, " : "", | ||
1703 | (msr & 1 << 12) ? "MultiCoreTurbo, " : "", | ||
1704 | (msr & 1 << 13) ? "Transitions, " : "", | ||
1705 | (msr & 1 << 14) ? "bit14, " : "", | 1847 | (msr & 1 << 14) ? "bit14, " : "", |
1706 | (msr & 1 << 15) ? "bit15, " : ""); | 1848 | (msr & 1 << 13) ? "Transitions, " : "", |
1849 | (msr & 1 << 12) ? "MultiCoreTurbo, " : "", | ||
1850 | (msr & 1 << 11) ? "PkgPwrL2, " : "", | ||
1851 | (msr & 1 << 10) ? "PkgPwrL1, " : "", | ||
1852 | (msr & 1 << 9) ? "CorePwr, " : "", | ||
1853 | (msr & 1 << 8) ? "Amps, " : "", | ||
1854 | (msr & 1 << 6) ? "VR-Therm, " : "", | ||
1855 | (msr & 1 << 5) ? "Auto-HWP, " : "", | ||
1856 | (msr & 1 << 4) ? "Graphics, " : "", | ||
1857 | (msr & 1 << 2) ? "bit2, " : "", | ||
1858 | (msr & 1 << 1) ? "ThermStatus, " : "", | ||
1859 | (msr & 1 << 0) ? "PROCHOT, " : ""); | ||
1707 | fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", | 1860 | fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", |
1708 | (msr & 1 << 16) ? "PROCHOT, " : "", | 1861 | (msr & 1 << 31) ? "bit31, " : "", |
1709 | (msr & 1 << 17) ? "ThermStatus, " : "", | ||
1710 | (msr & 1 << 18) ? "bit18, " : "", | ||
1711 | (msr & 1 << 20) ? "Graphics, " : "", | ||
1712 | (msr & 1 << 21) ? "Auto-HWP, " : "", | ||
1713 | (msr & 1 << 22) ? "VR-Therm, " : "", | ||
1714 | (msr & 1 << 24) ? "Amps, " : "", | ||
1715 | (msr & 1 << 25) ? "CorePwr, " : "", | ||
1716 | (msr & 1 << 26) ? "PkgPwrL1, " : "", | ||
1717 | (msr & 1 << 27) ? "PkgPwrL2, " : "", | ||
1718 | (msr & 1 << 28) ? "MultiCoreTurbo, " : "", | ||
1719 | (msr & 1 << 29) ? "Transitions, " : "", | ||
1720 | (msr & 1 << 30) ? "bit30, " : "", | 1862 | (msr & 1 << 30) ? "bit30, " : "", |
1721 | (msr & 1 << 31) ? "bit31, " : ""); | 1863 | (msr & 1 << 29) ? "Transitions, " : "", |
1864 | (msr & 1 << 28) ? "MultiCoreTurbo, " : "", | ||
1865 | (msr & 1 << 27) ? "PkgPwrL2, " : "", | ||
1866 | (msr & 1 << 26) ? "PkgPwrL1, " : "", | ||
1867 | (msr & 1 << 25) ? "CorePwr, " : "", | ||
1868 | (msr & 1 << 24) ? "Amps, " : "", | ||
1869 | (msr & 1 << 22) ? "VR-Therm, " : "", | ||
1870 | (msr & 1 << 21) ? "Auto-HWP, " : "", | ||
1871 | (msr & 1 << 20) ? "Graphics, " : "", | ||
1872 | (msr & 1 << 18) ? "bit18, " : "", | ||
1873 | (msr & 1 << 17) ? "ThermStatus, " : "", | ||
1874 | (msr & 1 << 16) ? "PROCHOT, " : ""); | ||
1722 | 1875 | ||
1723 | } | 1876 | } |
1724 | if (do_gfx_perf_limit_reasons) { | 1877 | if (do_gfx_perf_limit_reasons) { |
@@ -1784,6 +1937,25 @@ double get_tdp(model) | |||
1784 | } | 1937 | } |
1785 | } | 1938 | } |
1786 | 1939 | ||
1940 | /* | ||
1941 | * rapl_dram_energy_units_probe() | ||
1942 | * Energy units are either hard-coded, or come from RAPL Energy Unit MSR. | ||
1943 | */ | ||
1944 | static double | ||
1945 | rapl_dram_energy_units_probe(int model, double rapl_energy_units) | ||
1946 | { | ||
1947 | /* only called for genuine_intel, family 6 */ | ||
1948 | |||
1949 | switch (model) { | ||
1950 | case 0x3F: /* HSX */ | ||
1951 | case 0x4F: /* BDX */ | ||
1952 | case 0x56: /* BDX-DE */ | ||
1953 | return (rapl_dram_energy_units = 15.3 / 1000000); | ||
1954 | default: | ||
1955 | return (rapl_energy_units); | ||
1956 | } | ||
1957 | } | ||
1958 | |||
1787 | 1959 | ||
1788 | /* | 1960 | /* |
1789 | * rapl_probe() | 1961 | * rapl_probe() |
@@ -1812,14 +1984,18 @@ void rapl_probe(unsigned int family, unsigned int model) | |||
1812 | case 0x47: /* BDW */ | 1984 | case 0x47: /* BDW */ |
1813 | do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; | 1985 | do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; |
1814 | break; | 1986 | break; |
1987 | case 0x4E: /* SKL */ | ||
1988 | case 0x5E: /* SKL */ | ||
1989 | do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; | ||
1990 | break; | ||
1815 | case 0x3F: /* HSX */ | 1991 | case 0x3F: /* HSX */ |
1816 | case 0x4F: /* BDX */ | 1992 | case 0x4F: /* BDX */ |
1817 | case 0x56: /* BDX-DE */ | 1993 | case 0x56: /* BDX-DE */ |
1818 | do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; | 1994 | do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; |
1819 | break; | 1995 | break; |
1820 | case 0x2D: | 1996 | case 0x2D: |
1821 | case 0x3E: | 1997 | case 0x3E: |
1822 | do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; | 1998 | do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; |
1823 | break; | 1999 | break; |
1824 | case 0x37: /* BYT */ | 2000 | case 0x37: /* BYT */ |
1825 | case 0x4D: /* AVN */ | 2001 | case 0x4D: /* AVN */ |
@@ -1839,6 +2015,8 @@ void rapl_probe(unsigned int family, unsigned int model) | |||
1839 | else | 2015 | else |
1840 | rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); | 2016 | rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); |
1841 | 2017 | ||
2018 | rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units); | ||
2019 | |||
1842 | time_unit = msr >> 16 & 0xF; | 2020 | time_unit = msr >> 16 & 0xF; |
1843 | if (time_unit == 0) | 2021 | if (time_unit == 0) |
1844 | time_unit = 0xA; | 2022 | time_unit = 0xA; |
@@ -2009,19 +2187,18 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
2009 | ((msr >> 48) & 1) ? "EN" : "DIS"); | 2187 | ((msr >> 48) & 1) ? "EN" : "DIS"); |
2010 | } | 2188 | } |
2011 | 2189 | ||
2012 | if (do_rapl & RAPL_DRAM) { | 2190 | if (do_rapl & RAPL_DRAM_POWER_INFO) { |
2013 | if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) | 2191 | if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) |
2014 | return -6; | 2192 | return -6; |
2015 | 2193 | ||
2016 | |||
2017 | fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", | 2194 | fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", |
2018 | cpu, msr, | 2195 | cpu, msr, |
2019 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, | 2196 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, |
2020 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, | 2197 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, |
2021 | ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, | 2198 | ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, |
2022 | ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); | 2199 | ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); |
2023 | 2200 | } | |
2024 | 2201 | if (do_rapl & RAPL_DRAM) { | |
2025 | if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) | 2202 | if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) |
2026 | return -9; | 2203 | return -9; |
2027 | fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", | 2204 | fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", |
@@ -2090,6 +2267,8 @@ int has_snb_msrs(unsigned int family, unsigned int model) | |||
2090 | case 0x47: /* BDW */ | 2267 | case 0x47: /* BDW */ |
2091 | case 0x4F: /* BDX */ | 2268 | case 0x4F: /* BDX */ |
2092 | case 0x56: /* BDX-DE */ | 2269 | case 0x56: /* BDX-DE */ |
2270 | case 0x4E: /* SKL */ | ||
2271 | case 0x5E: /* SKL */ | ||
2093 | return 1; | 2272 | return 1; |
2094 | } | 2273 | } |
2095 | return 0; | 2274 | return 0; |
@@ -2110,11 +2289,35 @@ int has_hsw_msrs(unsigned int family, unsigned int model) | |||
2110 | switch (model) { | 2289 | switch (model) { |
2111 | case 0x45: /* HSW */ | 2290 | case 0x45: /* HSW */ |
2112 | case 0x3D: /* BDW */ | 2291 | case 0x3D: /* BDW */ |
2292 | case 0x4E: /* SKL */ | ||
2293 | case 0x5E: /* SKL */ | ||
2113 | return 1; | 2294 | return 1; |
2114 | } | 2295 | } |
2115 | return 0; | 2296 | return 0; |
2116 | } | 2297 | } |
2117 | 2298 | ||
2299 | /* | ||
2300 | * SKL adds support for additional MSRS: | ||
2301 | * | ||
2302 | * MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 | ||
2303 | * MSR_PKG_ANY_CORE_C0_RES 0x00000659 | ||
2304 | * MSR_PKG_ANY_GFXE_C0_RES 0x0000065A | ||
2305 | * MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B | ||
2306 | */ | ||
2307 | int has_skl_msrs(unsigned int family, unsigned int model) | ||
2308 | { | ||
2309 | if (!genuine_intel) | ||
2310 | return 0; | ||
2311 | |||
2312 | switch (model) { | ||
2313 | case 0x4E: /* SKL */ | ||
2314 | case 0x5E: /* SKL */ | ||
2315 | return 1; | ||
2316 | } | ||
2317 | return 0; | ||
2318 | } | ||
2319 | |||
2320 | |||
2118 | 2321 | ||
2119 | int is_slm(unsigned int family, unsigned int model) | 2322 | int is_slm(unsigned int family, unsigned int model) |
2120 | { | 2323 | { |
@@ -2228,7 +2431,7 @@ guess: | |||
2228 | 2431 | ||
2229 | return 0; | 2432 | return 0; |
2230 | } | 2433 | } |
2231 | void check_cpuid() | 2434 | void process_cpuid() |
2232 | { | 2435 | { |
2233 | unsigned int eax, ebx, ecx, edx, max_level; | 2436 | unsigned int eax, ebx, ecx, edx, max_level; |
2234 | unsigned int fms, family, model, stepping; | 2437 | unsigned int fms, family, model, stepping; |
@@ -2294,6 +2497,41 @@ void check_cpuid() | |||
2294 | do_ptm ? "" : "No ", | 2497 | do_ptm ? "" : "No ", |
2295 | has_epb ? "" : "No "); | 2498 | has_epb ? "" : "No "); |
2296 | 2499 | ||
2500 | if (max_level > 0x15) { | ||
2501 | unsigned int eax_crystal; | ||
2502 | unsigned int ebx_tsc; | ||
2503 | |||
2504 | /* | ||
2505 | * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz | ||
2506 | */ | ||
2507 | eax_crystal = ebx_tsc = crystal_hz = edx = 0; | ||
2508 | __get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx); | ||
2509 | |||
2510 | if (ebx_tsc != 0) { | ||
2511 | |||
2512 | if (debug && (ebx != 0)) | ||
2513 | fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", | ||
2514 | eax_crystal, ebx_tsc, crystal_hz); | ||
2515 | |||
2516 | if (crystal_hz == 0) | ||
2517 | switch(model) { | ||
2518 | case 0x4E: /* SKL */ | ||
2519 | case 0x5E: /* SKL */ | ||
2520 | crystal_hz = 24000000; /* 24 MHz */ | ||
2521 | break; | ||
2522 | default: | ||
2523 | crystal_hz = 0; | ||
2524 | } | ||
2525 | |||
2526 | if (crystal_hz) { | ||
2527 | tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; | ||
2528 | if (debug) | ||
2529 | fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", | ||
2530 | tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); | ||
2531 | } | ||
2532 | } | ||
2533 | } | ||
2534 | |||
2297 | do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model); | 2535 | do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model); |
2298 | do_snb_cstates = has_snb_msrs(family, model); | 2536 | do_snb_cstates = has_snb_msrs(family, model); |
2299 | do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2); | 2537 | do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2); |
@@ -2301,18 +2539,19 @@ void check_cpuid() | |||
2301 | do_pc6 = (pkg_cstate_limit >= PCL__6); | 2539 | do_pc6 = (pkg_cstate_limit >= PCL__6); |
2302 | do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7); | 2540 | do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7); |
2303 | do_c8_c9_c10 = has_hsw_msrs(family, model); | 2541 | do_c8_c9_c10 = has_hsw_msrs(family, model); |
2542 | do_skl_residency = has_skl_msrs(family, model); | ||
2304 | do_slm_cstates = is_slm(family, model); | 2543 | do_slm_cstates = is_slm(family, model); |
2305 | bclk = discover_bclk(family, model); | 2544 | bclk = discover_bclk(family, model); |
2306 | 2545 | ||
2307 | do_nhm_turbo_ratio_limit = do_nhm_platform_info && has_nhm_turbo_ratio_limit(family, model); | ||
2308 | do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); | ||
2309 | rapl_probe(family, model); | 2546 | rapl_probe(family, model); |
2310 | perf_limit_reasons_probe(family, model); | 2547 | perf_limit_reasons_probe(family, model); |
2311 | 2548 | ||
2549 | if (debug) | ||
2550 | dump_cstate_pstate_config_info(); | ||
2551 | |||
2312 | return; | 2552 | return; |
2313 | } | 2553 | } |
2314 | 2554 | ||
2315 | |||
2316 | void help() | 2555 | void help() |
2317 | { | 2556 | { |
2318 | fprintf(stderr, | 2557 | fprintf(stderr, |
@@ -2428,14 +2667,14 @@ void topology_probe() | |||
2428 | if (debug > 1) | 2667 | if (debug > 1) |
2429 | fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", | 2668 | fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", |
2430 | max_core_id, topo.num_cores_per_pkg); | 2669 | max_core_id, topo.num_cores_per_pkg); |
2431 | if (!summary_only && topo.num_cores_per_pkg > 1) | 2670 | if (debug && !summary_only && topo.num_cores_per_pkg > 1) |
2432 | show_core = 1; | 2671 | show_core = 1; |
2433 | 2672 | ||
2434 | topo.num_packages = max_package_id + 1; | 2673 | topo.num_packages = max_package_id + 1; |
2435 | if (debug > 1) | 2674 | if (debug > 1) |
2436 | fprintf(stderr, "max_package_id %d, sizing for %d packages\n", | 2675 | fprintf(stderr, "max_package_id %d, sizing for %d packages\n", |
2437 | max_package_id, topo.num_packages); | 2676 | max_package_id, topo.num_packages); |
2438 | if (!summary_only && topo.num_packages > 1) | 2677 | if (debug && !summary_only && topo.num_packages > 1) |
2439 | show_pkg = 1; | 2678 | show_pkg = 1; |
2440 | 2679 | ||
2441 | topo.num_threads_per_core = max_siblings; | 2680 | topo.num_threads_per_core = max_siblings; |
@@ -2550,14 +2789,11 @@ void turbostat_init() | |||
2550 | { | 2789 | { |
2551 | check_dev_msr(); | 2790 | check_dev_msr(); |
2552 | check_permissions(); | 2791 | check_permissions(); |
2553 | check_cpuid(); | 2792 | process_cpuid(); |
2554 | 2793 | ||
2555 | setup_all_buffers(); | 2794 | setup_all_buffers(); |
2556 | 2795 | ||
2557 | if (debug) | 2796 | if (debug) |
2558 | print_verbose_header(); | ||
2559 | |||
2560 | if (debug) | ||
2561 | for_all_cpus(print_epb, ODD_COUNTERS); | 2797 | for_all_cpus(print_epb, ODD_COUNTERS); |
2562 | 2798 | ||
2563 | if (debug) | 2799 | if (debug) |
@@ -2634,7 +2870,7 @@ int get_and_dump_counters(void) | |||
2634 | } | 2870 | } |
2635 | 2871 | ||
2636 | void print_version() { | 2872 | void print_version() { |
2637 | fprintf(stderr, "turbostat version 4.1 10-Feb, 2015" | 2873 | fprintf(stderr, "turbostat version 4.5 2 Apr, 2015" |
2638 | " - Len Brown <lenb@kernel.org>\n"); | 2874 | " - Len Brown <lenb@kernel.org>\n"); |
2639 | } | 2875 | } |
2640 | 2876 | ||