aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-04-19 17:31:41 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-04-19 17:31:41 -0400
commit09d51602cf84a1264946711dd4ea0dddbac599a1 (patch)
tree41d96f89a1071659ff768b733115c7873a2f2778 /tools
parent6162e4b0bedeb3dac2ba0a5e1b1f56db107d97ec (diff)
parente9257f5fa48cc296d7eed35acf9f2ad195184122 (diff)
Merge branch 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux
Pull turbostat update from Len Brown: "Updates to the turbostat utility. Just one kernel dependency in this batch -- added a #define to msr-index.h" * 'turbostat' of git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux: tools/power turbostat: correct dumped pkg-cstate-limit value tools/power turbostat: calculate TSC frequency from CPUID(0x15) on SKL tools/power turbostat: correct DRAM RAPL units on recent Xeon processors tools/power turbostat: Initial Skylake support tools/power turbostat: Use $(CURDIR) instead of $(PWD) and add support for O= option in Makefile tools/power turbostat: modprobe msr, if needed tools/power turbostat: dump MSR_TURBO_RATIO_LIMIT2 tools/power turbostat: use new MSR_TURBO_RATIO_LIMIT names x86 msr-index: define MSR_TURBO_RATIO_LIMIT,1,2 tools/power turbostat: label base frequency tools/power turbostat: update PERF_LIMIT_REASONS decoding tools/power turbostat: simplify default output
Diffstat (limited to 'tools')
-rw-r--r--tools/power/x86/turbostat/Makefile6
-rw-r--r--tools/power/x86/turbostat/turbostat.8138
-rw-r--r--tools/power/x86/turbostat/turbostat.c436
3 files changed, 419 insertions, 161 deletions
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index d1b3a361e526..4039854560d0 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -1,8 +1,12 @@
1CC = $(CROSS_COMPILE)gcc 1CC = $(CROSS_COMPILE)gcc
2BUILD_OUTPUT := $(PWD) 2BUILD_OUTPUT := $(CURDIR)
3PREFIX := /usr 3PREFIX := /usr
4DESTDIR := 4DESTDIR :=
5 5
6ifeq ("$(origin O)", "command line")
7 BUILD_OUTPUT := $(O)
8endif
9
6turbostat : turbostat.c 10turbostat : turbostat.c
7CFLAGS += -Wall 11CFLAGS += -Wall
8CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"' 12CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"'
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index feea7ad9500b..05b8fc38dc8b 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -20,9 +20,11 @@ upon its completion.
20The second method is to omit the command, 20The second method is to omit the command,
21and turbostat displays statistics every 5 seconds. 21and turbostat displays statistics every 5 seconds.
22The 5-second interval can be changed using the --interval option. 22The 5-second interval can be changed using the --interval option.
23 23.PP
24Some information is not available on older processors. 24Some information is not available on older processors.
25.SS Options 25.SS Options
26Options can be specified with a single or double '-', and only as much of the option
27name as necessary to disambiguate it from others is necessary. Note that options are case-sensitive.
26\fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter. 28\fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter.
27.PP 29.PP
28\fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter. 30\fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter.
@@ -55,16 +57,20 @@ more than once may also enable internal turbostat debug information.
55The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit, 57The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit,
56displays the statistics gathered since it was forked. 58displays the statistics gathered since it was forked.
57.PP 59.PP
58.SH FIELD DESCRIPTIONS 60.SH DEFAULT FIELD DESCRIPTIONS
59.nf 61.nf
60\fBPackage\fP processor package number. 62\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
61\fBCore\fP processor core number.
62\fBCPU\fP Linux CPU (logical processor) number.
63Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology.
64\fBAVG_MHz\fP number of cycles executed divided by time elapsed. 63\fBAVG_MHz\fP number of cycles executed divided by time elapsed.
65\fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. 64\fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state.
66\fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). 65\fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state).
67\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. 66\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
67.fi
68.PP
69.SH DEBUG FIELD DESCRIPTIONS
70.nf
71\fBPackage\fP processor package number.
72\fBCore\fP processor core number.
73Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
68\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. 74\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states.
69\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. 75\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
70\fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. 76\fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
@@ -81,63 +87,76 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T
81Without any parameters, turbostat displays statistics ever 5 seconds. 87Without any parameters, turbostat displays statistics ever 5 seconds.
82(override interval with "-i sec" option, or specify a command 88(override interval with "-i sec" option, or specify a command
83for turbostat to fork). 89for turbostat to fork).
90.nf
91[root@hsw]# ./turbostat
92 CPU Avg_MHz %Busy Bzy_MHz TSC_MHz
93 - 488 12.51 3898 3498
94 0 0 0.01 3885 3498
95 4 3897 99.99 3898 3498
96 1 0 0.00 3861 3498
97 5 0 0.00 3882 3498
98 2 1 0.02 3894 3498
99 6 2 0.06 3898 3498
100 3 0 0.00 3849 3498
101 7 0 0.00 3877 3498
102
103.fi
104.SH DEBUG EXAMPLE
105The "--debug" option prints additional system information before measurements:
84 106
85The first row of statistics is a summary for the entire system. 107The first row of statistics is a summary for the entire system.
86For residency % columns, the summary is a weighted average. 108For residency % columns, the summary is a weighted average.
87For Temperature columns, the summary is the column maximum. 109For Temperature columns, the summary is the column maximum.
88For Watts columns, the summary is a system total. 110For Watts columns, the summary is a system total.
89Subsequent rows show per-CPU statistics. 111Subsequent rows show per-CPU statistics.
90
91.nf
92[root@ivy]# ./turbostat
93 Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
94 - - 6 0.36 1596 3492 0 0.59 0.01 99.04 0.00 23 24 23.82 0.01 72.47 0.00 6.40 1.01 0.00
95 0 0 9 0.58 1596 3492 0 0.28 0.01 99.13 0.00 23 24 23.82 0.01 72.47 0.00 6.40 1.01 0.00
96 0 4 1 0.07 1596 3492 0 0.79
97 1 1 10 0.65 1596 3492 0 0.59 0.00 98.76 0.00 23
98 1 5 5 0.28 1596 3492 0 0.95
99 2 2 10 0.66 1596 3492 0 0.41 0.01 98.92 0.00 23
100 2 6 2 0.10 1597 3492 0 0.97
101 3 3 3 0.20 1596 3492 0 0.44 0.00 99.37 0.00 23
102 3 7 5 0.31 1596 3492 0 0.33
103.fi
104.SH DEBUG EXAMPLE
105The "--debug" option prints additional system information before measurements:
106
107.nf 112.nf
108turbostat version 4.0 10-Feb, 2015 - Len Brown <lenb@kernel.org> 113turbostat version 4.1 10-Feb, 2015 - Len Brown <lenb@kernel.org>
109CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9) 114CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3)
110CPUID(6): APERF, DTS, PTM, EPB 115CPUID(6): APERF, DTS, PTM, EPB
111RAPL: 851 sec. Joule Counter Range, at 77 Watts 116RAPL: 3121 sec. Joule Counter Range, at 84 Watts
112cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300 117cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300
11316 * 100 = 1600 MHz max efficiency 1188 * 100 = 800 MHz max efficiency
11435 * 100 = 3500 MHz TSC frequency 11935 * 100 = 3500 MHz TSC frequency
115cpu0: MSR_IA32_POWER_CTL: 0x0014005d (C1E auto-promotion: DISabled) 120cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
116cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6n) 121cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
117cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 122cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727
11837 * 100 = 3700 MHz max turbo 4 active cores 12337 * 100 = 3700 MHz max turbo 4 active cores
11938 * 100 = 3800 MHz max turbo 3 active cores 12438 * 100 = 3800 MHz max turbo 3 active cores
12039 * 100 = 3900 MHz max turbo 2 active cores 12539 * 100 = 3900 MHz max turbo 2 active cores
12139 * 100 = 3900 MHz max turbo 1 active cores 12639 * 100 = 3900 MHz max turbo 1 active cores
122cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced) 127cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced)
123cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.) 128cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Auto-HWP, Amps, MultiCoreTurbo, Transitions, )
124cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.) 129cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: )
125cpu0: MSR_PKG_POWER_LIMIT: 0x30000148268 (UNlocked) 130cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, )
126cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled) 131cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.)
127cpu0: PKG Limit #2: DISabled (96.000000 Watts, 0.000977* sec, clamp DISabled) 132cpu0: MSR_PKG_POWER_INFO: 0x000002a0 (84 W TDP, RAPL 0 - 0 W, 0.000000 sec.)
133cpu0: MSR_PKG_POWER_LIMIT: 0x428348001a82a0 (UNlocked)
134cpu0: PKG Limit #1: ENabled (84.000000 Watts, 8.000000 sec, clamp DISabled)
135cpu0: PKG Limit #2: ENabled (105.000000 Watts, 0.002441* sec, clamp DISabled)
128cpu0: MSR_PP0_POLICY: 0 136cpu0: MSR_PP0_POLICY: 0
129cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked) 137cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked)
130cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) 138cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
131cpu0: MSR_PP1_POLICY: 0 139cpu0: MSR_PP1_POLICY: 0
132cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) 140cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked)
133cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) 141cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
134cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C) 142cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C)
135cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C) 143cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88340800 (48 C)
136cpu0: MSR_IA32_THERM_STATUS: 0x88580000 (17 C +/- 1) 144cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1)
137cpu1: MSR_IA32_THERM_STATUS: 0x885a0000 (15 C +/- 1) 145cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1)
138cpu2: MSR_IA32_THERM_STATUS: 0x88570000 (18 C +/- 1) 146cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1)
139cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1) 147cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1)
140 ... 148 Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt
149 - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00
150 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00
151 0 4 3897 99.98 3898 3498 0 0.02
152 1 1 7 0.17 3887 3498 0 0.04 0.00 0.00 99.79 32
153 1 5 0 0.00 3885 3498 0 0.21
154 2 2 29 0.76 3895 3498 0 0.10 0.01 0.01 99.13 32
155 2 6 2 0.06 3896 3498 0 0.80
156 3 3 1 0.02 3832 3498 0 0.03 0.00 0.00 99.95 28
157 3 7 0 0.00 3879 3498 0 0.04
158^C
159
141.fi 160.fi
142The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency 161The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
143available at the minimum package voltage. The \fBTSC frequency\fP is the base 162available at the minimum package voltage. The \fBTSC frequency\fP is the base
@@ -147,6 +166,9 @@ should be sustainable on all CPUs indefinitely, given nominal power and cooling.
147The remaining rows show what maximum turbo frequency is possible 166The remaining rows show what maximum turbo frequency is possible
148depending on the number of idle cores. Note that not all information is 167depending on the number of idle cores. Note that not all information is
149available on all processors. 168available on all processors.
169.PP
170The --debug option adds additional columns to the measurement ouput, including CPU idle power-state residency processor temperature sensor readinds.
171See the field definitions above.
150.SH FORK EXAMPLE 172.SH FORK EXAMPLE
151If turbostat is invoked with a command, it will fork that command 173If turbostat is invoked with a command, it will fork that command
152and output the statistics gathered when the command exits. 174and output the statistics gathered when the command exits.
@@ -154,27 +176,23 @@ eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
154until ^C while the other CPUs are mostly idle: 176until ^C while the other CPUs are mostly idle:
155 177
156.nf 178.nf
157root@ivy: turbostat cat /dev/zero > /dev/null 179root@hsw: turbostat cat /dev/zero > /dev/null
158^C 180^C
159 Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt 181 CPU Avg_MHz %Busy Bzy_MHz TSC_MHz
160 - - 496 12.75 3886 3492 0 13.16 0.04 74.04 0.00 36 36 0.00 0.00 0.00 0.00 23.15 17.65 0.00 182 - 482 12.51 3854 3498
161 0 0 22 0.57 3830 3492 0 0.83 0.02 98.59 0.00 27 36 0.00 0.00 0.00 0.00 23.15 17.65 0.00 183 0 0 0.01 1960 3498
162 0 4 9 0.24 3829 3492 0 1.15 184 4 0 0.00 2128 3498
163 1 1 4 0.09 3783 3492 0 99.91 0.00 0.00 0.00 36 185 1 0 0.00 3003 3498
164 1 5 3880 99.82 3888 3492 0 0.18 186 5 3854 99.98 3855 3498
165 2 2 17 0.44 3813 3492 0 0.77 0.04 98.75 0.00 28 187 2 0 0.01 3504 3498
166 2 6 12 0.32 3823 3492 0 0.89 188 6 3 0.08 3884 3498
167 3 3 16 0.43 3844 3492 0 0.63 0.11 98.84 0.00 30 189 3 0 0.00 2553 3498
168 3 7 4 0.11 3827 3492 0 0.94 190 7 0 0.00 2126 3498
16930.372243 sec 19110.783983 sec
170 192
171.fi 193.fi
172Above the cycle soaker drives cpu5 up its 3.8 GHz turbo limit 194Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit.
173while the other processors are generally in various states of idle. 195The first row shows the average MHz and %Busy across all the processors in the system.
174
175Note that cpu1 and cpu5 are HT siblings within core1.
176As cpu5 is very busy, it prevents its sibling, cpu1,
177from entering a c-state deeper than c1.
178 196
179Note that the Avg_MHz column reflects the total number of cycles executed 197Note that the Avg_MHz column reflects the total number of cycles executed
180divided by the measurement interval. If the %Busy column is 100%, 198divided by the measurement interval. If the %Busy column is 100%,
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 2d089cac8580..bac98ca3d4ca 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -57,6 +57,7 @@ unsigned int do_pc3;
57unsigned int do_pc6; 57unsigned int do_pc6;
58unsigned int do_pc7; 58unsigned int do_pc7;
59unsigned int do_c8_c9_c10; 59unsigned int do_c8_c9_c10;
60unsigned int do_skl_residency;
60unsigned int do_slm_cstates; 61unsigned int do_slm_cstates;
61unsigned int use_c1_residency_msr; 62unsigned int use_c1_residency_msr;
62unsigned int has_aperf; 63unsigned int has_aperf;
@@ -65,8 +66,6 @@ unsigned int units = 1000000; /* MHz etc */
65unsigned int genuine_intel; 66unsigned int genuine_intel;
66unsigned int has_invariant_tsc; 67unsigned int has_invariant_tsc;
67unsigned int do_nhm_platform_info; 68unsigned int do_nhm_platform_info;
68unsigned int do_nhm_turbo_ratio_limit;
69unsigned int do_ivt_turbo_ratio_limit;
70unsigned int extra_msr_offset32; 69unsigned int extra_msr_offset32;
71unsigned int extra_msr_offset64; 70unsigned int extra_msr_offset64;
72unsigned int extra_delta_offset32; 71unsigned int extra_delta_offset32;
@@ -84,11 +83,14 @@ unsigned int do_dts;
84unsigned int do_ptm; 83unsigned int do_ptm;
85unsigned int tcc_activation_temp; 84unsigned int tcc_activation_temp;
86unsigned int tcc_activation_temp_override; 85unsigned int tcc_activation_temp_override;
87double rapl_power_units, rapl_energy_units, rapl_time_units; 86double rapl_power_units, rapl_time_units;
87double rapl_dram_energy_units, rapl_energy_units;
88double rapl_joule_counter_range; 88double rapl_joule_counter_range;
89unsigned int do_core_perf_limit_reasons; 89unsigned int do_core_perf_limit_reasons;
90unsigned int do_gfx_perf_limit_reasons; 90unsigned int do_gfx_perf_limit_reasons;
91unsigned int do_ring_perf_limit_reasons; 91unsigned int do_ring_perf_limit_reasons;
92unsigned int crystal_hz;
93unsigned long long tsc_hz;
92 94
93#define RAPL_PKG (1 << 0) 95#define RAPL_PKG (1 << 0)
94 /* 0x610 MSR_PKG_POWER_LIMIT */ 96 /* 0x610 MSR_PKG_POWER_LIMIT */
@@ -101,18 +103,18 @@ unsigned int do_ring_perf_limit_reasons;
101#define RAPL_DRAM (1 << 3) 103#define RAPL_DRAM (1 << 3)
102 /* 0x618 MSR_DRAM_POWER_LIMIT */ 104 /* 0x618 MSR_DRAM_POWER_LIMIT */
103 /* 0x619 MSR_DRAM_ENERGY_STATUS */ 105 /* 0x619 MSR_DRAM_ENERGY_STATUS */
104 /* 0x61c MSR_DRAM_POWER_INFO */
105#define RAPL_DRAM_PERF_STATUS (1 << 4) 106#define RAPL_DRAM_PERF_STATUS (1 << 4)
106 /* 0x61b MSR_DRAM_PERF_STATUS */ 107 /* 0x61b MSR_DRAM_PERF_STATUS */
108#define RAPL_DRAM_POWER_INFO (1 << 5)
109 /* 0x61c MSR_DRAM_POWER_INFO */
107 110
108#define RAPL_CORES (1 << 5) 111#define RAPL_CORES (1 << 6)
109 /* 0x638 MSR_PP0_POWER_LIMIT */ 112 /* 0x638 MSR_PP0_POWER_LIMIT */
110 /* 0x639 MSR_PP0_ENERGY_STATUS */ 113 /* 0x639 MSR_PP0_ENERGY_STATUS */
111#define RAPL_CORE_POLICY (1 << 6) 114#define RAPL_CORE_POLICY (1 << 7)
112 /* 0x63a MSR_PP0_POLICY */ 115 /* 0x63a MSR_PP0_POLICY */
113 116
114 117#define RAPL_GFX (1 << 8)
115#define RAPL_GFX (1 << 7)
116 /* 0x640 MSR_PP1_POWER_LIMIT */ 118 /* 0x640 MSR_PP1_POWER_LIMIT */
117 /* 0x641 MSR_PP1_ENERGY_STATUS */ 119 /* 0x641 MSR_PP1_ENERGY_STATUS */
118 /* 0x642 MSR_PP1_POLICY */ 120 /* 0x642 MSR_PP1_POLICY */
@@ -159,6 +161,10 @@ struct pkg_data {
159 unsigned long long pc8; 161 unsigned long long pc8;
160 unsigned long long pc9; 162 unsigned long long pc9;
161 unsigned long long pc10; 163 unsigned long long pc10;
164 unsigned long long pkg_wtd_core_c0;
165 unsigned long long pkg_any_core_c0;
166 unsigned long long pkg_any_gfxe_c0;
167 unsigned long long pkg_both_core_gfxe_c0;
162 unsigned int package_id; 168 unsigned int package_id;
163 unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ 169 unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */
164 unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ 170 unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */
@@ -292,8 +298,7 @@ void print_header(void)
292 if (has_aperf) 298 if (has_aperf)
293 outp += sprintf(outp, " Bzy_MHz"); 299 outp += sprintf(outp, " Bzy_MHz");
294 outp += sprintf(outp, " TSC_MHz"); 300 outp += sprintf(outp, " TSC_MHz");
295 if (do_smi) 301
296 outp += sprintf(outp, " SMI");
297 if (extra_delta_offset32) 302 if (extra_delta_offset32)
298 outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); 303 outp += sprintf(outp, " count 0x%03X", extra_delta_offset32);
299 if (extra_delta_offset64) 304 if (extra_delta_offset64)
@@ -302,6 +307,13 @@ void print_header(void)
302 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); 307 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32);
303 if (extra_msr_offset64) 308 if (extra_msr_offset64)
304 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); 309 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64);
310
311 if (!debug)
312 goto done;
313
314 if (do_smi)
315 outp += sprintf(outp, " SMI");
316
305 if (do_nhm_cstates) 317 if (do_nhm_cstates)
306 outp += sprintf(outp, " CPU%%c1"); 318 outp += sprintf(outp, " CPU%%c1");
307 if (do_nhm_cstates && !do_slm_cstates) 319 if (do_nhm_cstates && !do_slm_cstates)
@@ -316,6 +328,13 @@ void print_header(void)
316 if (do_ptm) 328 if (do_ptm)
317 outp += sprintf(outp, " PkgTmp"); 329 outp += sprintf(outp, " PkgTmp");
318 330
331 if (do_skl_residency) {
332 outp += sprintf(outp, " Totl%%C0");
333 outp += sprintf(outp, " Any%%C0");
334 outp += sprintf(outp, " GFX%%C0");
335 outp += sprintf(outp, " CPUGFX%%");
336 }
337
319 if (do_pc2) 338 if (do_pc2)
320 outp += sprintf(outp, " Pkg%%pc2"); 339 outp += sprintf(outp, " Pkg%%pc2");
321 if (do_pc3) 340 if (do_pc3)
@@ -359,6 +378,7 @@ void print_header(void)
359 outp += sprintf(outp, " time"); 378 outp += sprintf(outp, " time");
360 379
361 } 380 }
381 done:
362 outp += sprintf(outp, "\n"); 382 outp += sprintf(outp, "\n");
363} 383}
364 384
@@ -396,6 +416,12 @@ int dump_counters(struct thread_data *t, struct core_data *c,
396 416
397 if (p) { 417 if (p) {
398 outp += sprintf(outp, "package: %d\n", p->package_id); 418 outp += sprintf(outp, "package: %d\n", p->package_id);
419
420 outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
421 outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
422 outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
423 outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
424
399 outp += sprintf(outp, "pc2: %016llX\n", p->pc2); 425 outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
400 if (do_pc3) 426 if (do_pc3)
401 outp += sprintf(outp, "pc3: %016llX\n", p->pc3); 427 outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
@@ -487,10 +513,6 @@ int format_counters(struct thread_data *t, struct core_data *c,
487 /* TSC_MHz */ 513 /* TSC_MHz */
488 outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); 514 outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
489 515
490 /* SMI */
491 if (do_smi)
492 outp += sprintf(outp, "%8d", t->smi_count);
493
494 /* delta */ 516 /* delta */
495 if (extra_delta_offset32) 517 if (extra_delta_offset32)
496 outp += sprintf(outp, " %11llu", t->extra_delta32); 518 outp += sprintf(outp, " %11llu", t->extra_delta32);
@@ -506,6 +528,13 @@ int format_counters(struct thread_data *t, struct core_data *c,
506 if (extra_msr_offset64) 528 if (extra_msr_offset64)
507 outp += sprintf(outp, " 0x%016llx", t->extra_msr64); 529 outp += sprintf(outp, " 0x%016llx", t->extra_msr64);
508 530
531 if (!debug)
532 goto done;
533
534 /* SMI */
535 if (do_smi)
536 outp += sprintf(outp, "%8d", t->smi_count);
537
509 if (do_nhm_cstates) { 538 if (do_nhm_cstates) {
510 if (!skip_c1) 539 if (!skip_c1)
511 outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc); 540 outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc);
@@ -531,9 +560,18 @@ int format_counters(struct thread_data *t, struct core_data *c,
531 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 560 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
532 goto done; 561 goto done;
533 562
563 /* PkgTmp */
534 if (do_ptm) 564 if (do_ptm)
535 outp += sprintf(outp, "%8d", p->pkg_temp_c); 565 outp += sprintf(outp, "%8d", p->pkg_temp_c);
536 566
567 /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
568 if (do_skl_residency) {
569 outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
570 outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc);
571 outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc);
572 outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc);
573 }
574
537 if (do_pc2) 575 if (do_pc2)
538 outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc); 576 outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc);
539 if (do_pc3) 577 if (do_pc3)
@@ -565,7 +603,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
565 if (do_rapl & RAPL_GFX) 603 if (do_rapl & RAPL_GFX)
566 outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float); 604 outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float);
567 if (do_rapl & RAPL_DRAM) 605 if (do_rapl & RAPL_DRAM)
568 outp += sprintf(outp, fmt8, p->energy_dram * rapl_energy_units / interval_float); 606 outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float);
569 if (do_rapl & RAPL_PKG_PERF_STATUS) 607 if (do_rapl & RAPL_PKG_PERF_STATUS)
570 outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); 608 outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
571 if (do_rapl & RAPL_DRAM_PERF_STATUS) 609 if (do_rapl & RAPL_DRAM_PERF_STATUS)
@@ -582,7 +620,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
582 p->energy_gfx * rapl_energy_units); 620 p->energy_gfx * rapl_energy_units);
583 if (do_rapl & RAPL_DRAM) 621 if (do_rapl & RAPL_DRAM)
584 outp += sprintf(outp, fmt8, 622 outp += sprintf(outp, fmt8,
585 p->energy_dram * rapl_energy_units); 623 p->energy_dram * rapl_dram_energy_units);
586 if (do_rapl & RAPL_PKG_PERF_STATUS) 624 if (do_rapl & RAPL_PKG_PERF_STATUS)
587 outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); 625 outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
588 if (do_rapl & RAPL_DRAM_PERF_STATUS) 626 if (do_rapl & RAPL_DRAM_PERF_STATUS)
@@ -636,6 +674,13 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
636void 674void
637delta_package(struct pkg_data *new, struct pkg_data *old) 675delta_package(struct pkg_data *new, struct pkg_data *old)
638{ 676{
677
678 if (do_skl_residency) {
679 old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
680 old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
681 old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
682 old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
683 }
639 old->pc2 = new->pc2 - old->pc2; 684 old->pc2 = new->pc2 - old->pc2;
640 if (do_pc3) 685 if (do_pc3)
641 old->pc3 = new->pc3 - old->pc3; 686 old->pc3 = new->pc3 - old->pc3;
@@ -782,6 +827,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
782 c->c7 = 0; 827 c->c7 = 0;
783 c->core_temp_c = 0; 828 c->core_temp_c = 0;
784 829
830 p->pkg_wtd_core_c0 = 0;
831 p->pkg_any_core_c0 = 0;
832 p->pkg_any_gfxe_c0 = 0;
833 p->pkg_both_core_gfxe_c0 = 0;
834
785 p->pc2 = 0; 835 p->pc2 = 0;
786 if (do_pc3) 836 if (do_pc3)
787 p->pc3 = 0; 837 p->pc3 = 0;
@@ -826,6 +876,13 @@ int sum_counters(struct thread_data *t, struct core_data *c,
826 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 876 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
827 return 0; 877 return 0;
828 878
879 if (do_skl_residency) {
880 average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
881 average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
882 average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
883 average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
884 }
885
829 average.packages.pc2 += p->pc2; 886 average.packages.pc2 += p->pc2;
830 if (do_pc3) 887 if (do_pc3)
831 average.packages.pc3 += p->pc3; 888 average.packages.pc3 += p->pc3;
@@ -873,6 +930,13 @@ void compute_average(struct thread_data *t, struct core_data *c,
873 average.cores.c6 /= topo.num_cores; 930 average.cores.c6 /= topo.num_cores;
874 average.cores.c7 /= topo.num_cores; 931 average.cores.c7 /= topo.num_cores;
875 932
933 if (do_skl_residency) {
934 average.packages.pkg_wtd_core_c0 /= topo.num_packages;
935 average.packages.pkg_any_core_c0 /= topo.num_packages;
936 average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
937 average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
938 }
939
876 average.packages.pc2 /= topo.num_packages; 940 average.packages.pc2 /= topo.num_packages;
877 if (do_pc3) 941 if (do_pc3)
878 average.packages.pc3 /= topo.num_packages; 942 average.packages.pc3 /= topo.num_packages;
@@ -979,6 +1043,16 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
979 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 1043 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
980 return 0; 1044 return 0;
981 1045
1046 if (do_skl_residency) {
1047 if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
1048 return -10;
1049 if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
1050 return -11;
1051 if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
1052 return -12;
1053 if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
1054 return -13;
1055 }
982 if (do_pc3) 1056 if (do_pc3)
983 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) 1057 if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
984 return -9; 1058 return -9;
@@ -1055,49 +1129,77 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1055#define PCL_6R 9 /* PC6 Retention */ 1129#define PCL_6R 9 /* PC6 Retention */
1056#define PCL__7 10 /* PC7 */ 1130#define PCL__7 10 /* PC7 */
1057#define PCL_7S 11 /* PC7 Shrink */ 1131#define PCL_7S 11 /* PC7 Shrink */
1058#define PCLUNL 12 /* Unlimited */ 1132#define PCL__8 12 /* PC8 */
1133#define PCL__9 13 /* PC9 */
1134#define PCLUNL 14 /* Unlimited */
1059 1135
1060int pkg_cstate_limit = PCLUKN; 1136int pkg_cstate_limit = PCLUKN;
1061char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2", 1137char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
1062 "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "unlimited"}; 1138 "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"};
1063 1139
1064int nhm_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL}; 1140int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1065int snb_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL}; 1141int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1066int hsw_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCLRSV, PCLUNL}; 1142int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1067int slv_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7}; 1143int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1068int amt_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7}; 1144int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1069int phi_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL}; 1145int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
1070 1146
1071void print_verbose_header(void) 1147static void
1148dump_nhm_platform_info(void)
1072{ 1149{
1073 unsigned long long msr; 1150 unsigned long long msr;
1074 unsigned int ratio; 1151 unsigned int ratio;
1075 1152
1076 if (!do_nhm_platform_info)
1077 return;
1078
1079 get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); 1153 get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
1080 1154
1081 fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); 1155 fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
1082 1156
1083 ratio = (msr >> 40) & 0xFF; 1157 ratio = (msr >> 40) & 0xFF;
1084 fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", 1158 fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n",
1085 ratio, bclk, ratio * bclk); 1159 ratio, bclk, ratio * bclk);
1086 1160
1087 ratio = (msr >> 8) & 0xFF; 1161 ratio = (msr >> 8) & 0xFF;
1088 fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", 1162 fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n",
1089 ratio, bclk, ratio * bclk); 1163 ratio, bclk, ratio * bclk);
1090 1164
1091 get_msr(0, MSR_IA32_POWER_CTL, &msr); 1165 get_msr(0, MSR_IA32_POWER_CTL, &msr);
1092 fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", 1166 fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
1093 msr, msr & 0x2 ? "EN" : "DIS"); 1167 msr, msr & 0x2 ? "EN" : "DIS");
1094 1168
1095 if (!do_ivt_turbo_ratio_limit) 1169 return;
1096 goto print_nhm_turbo_ratio_limits; 1170}
1171
1172static void
1173dump_hsw_turbo_ratio_limits(void)
1174{
1175 unsigned long long msr;
1176 unsigned int ratio;
1177
1178 get_msr(0, MSR_TURBO_RATIO_LIMIT2, &msr);
1179
1180 fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr);
1181
1182 ratio = (msr >> 8) & 0xFF;
1183 if (ratio)
1184 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
1185 ratio, bclk, ratio * bclk);
1186
1187 ratio = (msr >> 0) & 0xFF;
1188 if (ratio)
1189 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
1190 ratio, bclk, ratio * bclk);
1191 return;
1192}
1193
1194static void
1195dump_ivt_turbo_ratio_limits(void)
1196{
1197 unsigned long long msr;
1198 unsigned int ratio;
1097 1199
1098 get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); 1200 get_msr(0, MSR_TURBO_RATIO_LIMIT1, &msr);
1099 1201
1100 fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); 1202 fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr);
1101 1203
1102 ratio = (msr >> 56) & 0xFF; 1204 ratio = (msr >> 56) & 0xFF;
1103 if (ratio) 1205 if (ratio)
@@ -1138,30 +1240,18 @@ void print_verbose_header(void)
1138 if (ratio) 1240 if (ratio)
1139 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", 1241 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
1140 ratio, bclk, ratio * bclk); 1242 ratio, bclk, ratio * bclk);
1243 return;
1244}
1141 1245
1142print_nhm_turbo_ratio_limits: 1246static void
1143 get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); 1247dump_nhm_turbo_ratio_limits(void)
1144 1248{
1145#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) 1249 unsigned long long msr;
1146#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) 1250 unsigned int ratio;
1147
1148 fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
1149
1150 fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
1151 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
1152 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
1153 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
1154 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
1155 (msr & (1 << 15)) ? "" : "UN",
1156 (unsigned int)msr & 7,
1157 pkg_cstate_limit_strings[pkg_cstate_limit]);
1158
1159 if (!do_nhm_turbo_ratio_limit)
1160 return;
1161 1251
1162 get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); 1252 get_msr(0, MSR_TURBO_RATIO_LIMIT, &msr);
1163 1253
1164 fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); 1254 fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
1165 1255
1166 ratio = (msr >> 56) & 0xFF; 1256 ratio = (msr >> 56) & 0xFF;
1167 if (ratio) 1257 if (ratio)
@@ -1202,7 +1292,30 @@ print_nhm_turbo_ratio_limits:
1202 if (ratio) 1292 if (ratio)
1203 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", 1293 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
1204 ratio, bclk, ratio * bclk); 1294 ratio, bclk, ratio * bclk);
1295 return;
1296}
1297
1298static void
1299dump_nhm_cst_cfg(void)
1300{
1301 unsigned long long msr;
1302
1303 get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
1304
1305#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
1306#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
1205 1307
1308 fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
1309
1310 fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
1311 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
1312 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
1313 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
1314 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
1315 (msr & (1 << 15)) ? "" : "UN",
1316 (unsigned int)msr & 7,
1317 pkg_cstate_limit_strings[pkg_cstate_limit]);
1318 return;
1206} 1319}
1207 1320
1208void free_all_buffers(void) 1321void free_all_buffers(void)
@@ -1483,7 +1596,8 @@ void check_dev_msr()
1483 struct stat sb; 1596 struct stat sb;
1484 1597
1485 if (stat("/dev/cpu/0/msr", &sb)) 1598 if (stat("/dev/cpu/0/msr", &sb))
1486 err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); 1599 if (system("/sbin/modprobe msr > /dev/null 2>&1"))
1600 err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
1487} 1601}
1488 1602
1489void check_permissions() 1603void check_permissions()
@@ -1573,6 +1687,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
1573 case 0x47: /* BDW */ 1687 case 0x47: /* BDW */
1574 case 0x4F: /* BDX */ 1688 case 0x4F: /* BDX */
1575 case 0x56: /* BDX-DE */ 1689 case 0x56: /* BDX-DE */
1690 case 0x4E: /* SKL */
1691 case 0x5E: /* SKL */
1576 pkg_cstate_limits = hsw_pkg_cstate_limits; 1692 pkg_cstate_limits = hsw_pkg_cstate_limits;
1577 break; 1693 break;
1578 case 0x37: /* BYT */ 1694 case 0x37: /* BYT */
@@ -1590,7 +1706,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
1590 } 1706 }
1591 get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); 1707 get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
1592 1708
1593 pkg_cstate_limit = pkg_cstate_limits[msr & 0x7]; 1709 pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
1594 1710
1595 return 1; 1711 return 1;
1596} 1712}
@@ -1615,12 +1731,49 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
1615 1731
1616 switch (model) { 1732 switch (model) {
1617 case 0x3E: /* IVB Xeon */ 1733 case 0x3E: /* IVB Xeon */
1734 case 0x3F: /* HSW Xeon */
1735 return 1;
1736 default:
1737 return 0;
1738 }
1739}
1740int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
1741{
1742 if (!genuine_intel)
1743 return 0;
1744
1745 if (family != 6)
1746 return 0;
1747
1748 switch (model) {
1749 case 0x3F: /* HSW Xeon */
1618 return 1; 1750 return 1;
1619 default: 1751 default:
1620 return 0; 1752 return 0;
1621 } 1753 }
1622} 1754}
1623 1755
1756static void
1757dump_cstate_pstate_config_info(family, model)
1758{
1759 if (!do_nhm_platform_info)
1760 return;
1761
1762 dump_nhm_platform_info();
1763
1764 if (has_hsw_turbo_ratio_limit(family, model))
1765 dump_hsw_turbo_ratio_limits();
1766
1767 if (has_ivt_turbo_ratio_limit(family, model))
1768 dump_ivt_turbo_ratio_limits();
1769
1770 if (has_nhm_turbo_ratio_limit(family, model))
1771 dump_nhm_turbo_ratio_limits();
1772
1773 dump_nhm_cst_cfg();
1774}
1775
1776
1624/* 1777/*
1625 * print_epb() 1778 * print_epb()
1626 * Decode the ENERGY_PERF_BIAS MSR 1779 * Decode the ENERGY_PERF_BIAS MSR
@@ -1690,35 +1843,35 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
1690 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); 1843 get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
1691 fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); 1844 fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
1692 fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", 1845 fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
1693 (msr & 1 << 0) ? "PROCHOT, " : "", 1846 (msr & 1 << 15) ? "bit15, " : "",
1694 (msr & 1 << 1) ? "ThermStatus, " : "",
1695 (msr & 1 << 2) ? "bit2, " : "",
1696 (msr & 1 << 4) ? "Graphics, " : "",
1697 (msr & 1 << 5) ? "Auto-HWP, " : "",
1698 (msr & 1 << 6) ? "VR-Therm, " : "",
1699 (msr & 1 << 8) ? "Amps, " : "",
1700 (msr & 1 << 9) ? "CorePwr, " : "",
1701 (msr & 1 << 10) ? "PkgPwrL1, " : "",
1702 (msr & 1 << 11) ? "PkgPwrL2, " : "",
1703 (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
1704 (msr & 1 << 13) ? "Transitions, " : "",
1705 (msr & 1 << 14) ? "bit14, " : "", 1847 (msr & 1 << 14) ? "bit14, " : "",
1706 (msr & 1 << 15) ? "bit15, " : ""); 1848 (msr & 1 << 13) ? "Transitions, " : "",
1849 (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
1850 (msr & 1 << 11) ? "PkgPwrL2, " : "",
1851 (msr & 1 << 10) ? "PkgPwrL1, " : "",
1852 (msr & 1 << 9) ? "CorePwr, " : "",
1853 (msr & 1 << 8) ? "Amps, " : "",
1854 (msr & 1 << 6) ? "VR-Therm, " : "",
1855 (msr & 1 << 5) ? "Auto-HWP, " : "",
1856 (msr & 1 << 4) ? "Graphics, " : "",
1857 (msr & 1 << 2) ? "bit2, " : "",
1858 (msr & 1 << 1) ? "ThermStatus, " : "",
1859 (msr & 1 << 0) ? "PROCHOT, " : "");
1707 fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", 1860 fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
1708 (msr & 1 << 16) ? "PROCHOT, " : "", 1861 (msr & 1 << 31) ? "bit31, " : "",
1709 (msr & 1 << 17) ? "ThermStatus, " : "",
1710 (msr & 1 << 18) ? "bit18, " : "",
1711 (msr & 1 << 20) ? "Graphics, " : "",
1712 (msr & 1 << 21) ? "Auto-HWP, " : "",
1713 (msr & 1 << 22) ? "VR-Therm, " : "",
1714 (msr & 1 << 24) ? "Amps, " : "",
1715 (msr & 1 << 25) ? "CorePwr, " : "",
1716 (msr & 1 << 26) ? "PkgPwrL1, " : "",
1717 (msr & 1 << 27) ? "PkgPwrL2, " : "",
1718 (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
1719 (msr & 1 << 29) ? "Transitions, " : "",
1720 (msr & 1 << 30) ? "bit30, " : "", 1862 (msr & 1 << 30) ? "bit30, " : "",
1721 (msr & 1 << 31) ? "bit31, " : ""); 1863 (msr & 1 << 29) ? "Transitions, " : "",
1864 (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
1865 (msr & 1 << 27) ? "PkgPwrL2, " : "",
1866 (msr & 1 << 26) ? "PkgPwrL1, " : "",
1867 (msr & 1 << 25) ? "CorePwr, " : "",
1868 (msr & 1 << 24) ? "Amps, " : "",
1869 (msr & 1 << 22) ? "VR-Therm, " : "",
1870 (msr & 1 << 21) ? "Auto-HWP, " : "",
1871 (msr & 1 << 20) ? "Graphics, " : "",
1872 (msr & 1 << 18) ? "bit18, " : "",
1873 (msr & 1 << 17) ? "ThermStatus, " : "",
1874 (msr & 1 << 16) ? "PROCHOT, " : "");
1722 1875
1723 } 1876 }
1724 if (do_gfx_perf_limit_reasons) { 1877 if (do_gfx_perf_limit_reasons) {
@@ -1784,6 +1937,25 @@ double get_tdp(model)
1784 } 1937 }
1785} 1938}
1786 1939
1940/*
1941 * rapl_dram_energy_units_probe()
1942 * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
1943 */
1944static double
1945rapl_dram_energy_units_probe(int model, double rapl_energy_units)
1946{
1947 /* only called for genuine_intel, family 6 */
1948
1949 switch (model) {
1950 case 0x3F: /* HSX */
1951 case 0x4F: /* BDX */
1952 case 0x56: /* BDX-DE */
1953 return (rapl_dram_energy_units = 15.3 / 1000000);
1954 default:
1955 return (rapl_energy_units);
1956 }
1957}
1958
1787 1959
1788/* 1960/*
1789 * rapl_probe() 1961 * rapl_probe()
@@ -1812,14 +1984,18 @@ void rapl_probe(unsigned int family, unsigned int model)
1812 case 0x47: /* BDW */ 1984 case 0x47: /* BDW */
1813 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; 1985 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
1814 break; 1986 break;
1987 case 0x4E: /* SKL */
1988 case 0x5E: /* SKL */
1989 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
1990 break;
1815 case 0x3F: /* HSX */ 1991 case 0x3F: /* HSX */
1816 case 0x4F: /* BDX */ 1992 case 0x4F: /* BDX */
1817 case 0x56: /* BDX-DE */ 1993 case 0x56: /* BDX-DE */
1818 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; 1994 do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
1819 break; 1995 break;
1820 case 0x2D: 1996 case 0x2D:
1821 case 0x3E: 1997 case 0x3E:
1822 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; 1998 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
1823 break; 1999 break;
1824 case 0x37: /* BYT */ 2000 case 0x37: /* BYT */
1825 case 0x4D: /* AVN */ 2001 case 0x4D: /* AVN */
@@ -1839,6 +2015,8 @@ void rapl_probe(unsigned int family, unsigned int model)
1839 else 2015 else
1840 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); 2016 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
1841 2017
2018 rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
2019
1842 time_unit = msr >> 16 & 0xF; 2020 time_unit = msr >> 16 & 0xF;
1843 if (time_unit == 0) 2021 if (time_unit == 0)
1844 time_unit = 0xA; 2022 time_unit = 0xA;
@@ -2009,19 +2187,18 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
2009 ((msr >> 48) & 1) ? "EN" : "DIS"); 2187 ((msr >> 48) & 1) ? "EN" : "DIS");
2010 } 2188 }
2011 2189
2012 if (do_rapl & RAPL_DRAM) { 2190 if (do_rapl & RAPL_DRAM_POWER_INFO) {
2013 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) 2191 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
2014 return -6; 2192 return -6;
2015 2193
2016
2017 fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", 2194 fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
2018 cpu, msr, 2195 cpu, msr,
2019 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, 2196 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2020 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, 2197 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2021 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, 2198 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
2022 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); 2199 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
2023 2200 }
2024 2201 if (do_rapl & RAPL_DRAM) {
2025 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) 2202 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
2026 return -9; 2203 return -9;
2027 fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", 2204 fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
@@ -2090,6 +2267,8 @@ int has_snb_msrs(unsigned int family, unsigned int model)
2090 case 0x47: /* BDW */ 2267 case 0x47: /* BDW */
2091 case 0x4F: /* BDX */ 2268 case 0x4F: /* BDX */
2092 case 0x56: /* BDX-DE */ 2269 case 0x56: /* BDX-DE */
2270 case 0x4E: /* SKL */
2271 case 0x5E: /* SKL */
2093 return 1; 2272 return 1;
2094 } 2273 }
2095 return 0; 2274 return 0;
@@ -2110,11 +2289,35 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
2110 switch (model) { 2289 switch (model) {
2111 case 0x45: /* HSW */ 2290 case 0x45: /* HSW */
2112 case 0x3D: /* BDW */ 2291 case 0x3D: /* BDW */
2292 case 0x4E: /* SKL */
2293 case 0x5E: /* SKL */
2113 return 1; 2294 return 1;
2114 } 2295 }
2115 return 0; 2296 return 0;
2116} 2297}
2117 2298
2299/*
2300 * SKL adds support for additional MSRS:
2301 *
2302 * MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
2303 * MSR_PKG_ANY_CORE_C0_RES 0x00000659
2304 * MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
2305 * MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
2306 */
2307int has_skl_msrs(unsigned int family, unsigned int model)
2308{
2309 if (!genuine_intel)
2310 return 0;
2311
2312 switch (model) {
2313 case 0x4E: /* SKL */
2314 case 0x5E: /* SKL */
2315 return 1;
2316 }
2317 return 0;
2318}
2319
2320
2118 2321
2119int is_slm(unsigned int family, unsigned int model) 2322int is_slm(unsigned int family, unsigned int model)
2120{ 2323{
@@ -2228,7 +2431,7 @@ guess:
2228 2431
2229 return 0; 2432 return 0;
2230} 2433}
2231void check_cpuid() 2434void process_cpuid()
2232{ 2435{
2233 unsigned int eax, ebx, ecx, edx, max_level; 2436 unsigned int eax, ebx, ecx, edx, max_level;
2234 unsigned int fms, family, model, stepping; 2437 unsigned int fms, family, model, stepping;
@@ -2294,6 +2497,41 @@ void check_cpuid()
2294 do_ptm ? "" : "No ", 2497 do_ptm ? "" : "No ",
2295 has_epb ? "" : "No "); 2498 has_epb ? "" : "No ");
2296 2499
2500 if (max_level > 0x15) {
2501 unsigned int eax_crystal;
2502 unsigned int ebx_tsc;
2503
2504 /*
2505 * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
2506 */
2507 eax_crystal = ebx_tsc = crystal_hz = edx = 0;
2508 __get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx);
2509
2510 if (ebx_tsc != 0) {
2511
2512 if (debug && (ebx != 0))
2513 fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
2514 eax_crystal, ebx_tsc, crystal_hz);
2515
2516 if (crystal_hz == 0)
2517 switch(model) {
2518 case 0x4E: /* SKL */
2519 case 0x5E: /* SKL */
2520 crystal_hz = 24000000; /* 24 MHz */
2521 break;
2522 default:
2523 crystal_hz = 0;
2524 }
2525
2526 if (crystal_hz) {
2527 tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
2528 if (debug)
2529 fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
2530 tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
2531 }
2532 }
2533 }
2534
2297 do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model); 2535 do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
2298 do_snb_cstates = has_snb_msrs(family, model); 2536 do_snb_cstates = has_snb_msrs(family, model);
2299 do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2); 2537 do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
@@ -2301,18 +2539,19 @@ void check_cpuid()
2301 do_pc6 = (pkg_cstate_limit >= PCL__6); 2539 do_pc6 = (pkg_cstate_limit >= PCL__6);
2302 do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7); 2540 do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
2303 do_c8_c9_c10 = has_hsw_msrs(family, model); 2541 do_c8_c9_c10 = has_hsw_msrs(family, model);
2542 do_skl_residency = has_skl_msrs(family, model);
2304 do_slm_cstates = is_slm(family, model); 2543 do_slm_cstates = is_slm(family, model);
2305 bclk = discover_bclk(family, model); 2544 bclk = discover_bclk(family, model);
2306 2545
2307 do_nhm_turbo_ratio_limit = do_nhm_platform_info && has_nhm_turbo_ratio_limit(family, model);
2308 do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model);
2309 rapl_probe(family, model); 2546 rapl_probe(family, model);
2310 perf_limit_reasons_probe(family, model); 2547 perf_limit_reasons_probe(family, model);
2311 2548
2549 if (debug)
2550 dump_cstate_pstate_config_info();
2551
2312 return; 2552 return;
2313} 2553}
2314 2554
2315
2316void help() 2555void help()
2317{ 2556{
2318 fprintf(stderr, 2557 fprintf(stderr,
@@ -2428,14 +2667,14 @@ void topology_probe()
2428 if (debug > 1) 2667 if (debug > 1)
2429 fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", 2668 fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n",
2430 max_core_id, topo.num_cores_per_pkg); 2669 max_core_id, topo.num_cores_per_pkg);
2431 if (!summary_only && topo.num_cores_per_pkg > 1) 2670 if (debug && !summary_only && topo.num_cores_per_pkg > 1)
2432 show_core = 1; 2671 show_core = 1;
2433 2672
2434 topo.num_packages = max_package_id + 1; 2673 topo.num_packages = max_package_id + 1;
2435 if (debug > 1) 2674 if (debug > 1)
2436 fprintf(stderr, "max_package_id %d, sizing for %d packages\n", 2675 fprintf(stderr, "max_package_id %d, sizing for %d packages\n",
2437 max_package_id, topo.num_packages); 2676 max_package_id, topo.num_packages);
2438 if (!summary_only && topo.num_packages > 1) 2677 if (debug && !summary_only && topo.num_packages > 1)
2439 show_pkg = 1; 2678 show_pkg = 1;
2440 2679
2441 topo.num_threads_per_core = max_siblings; 2680 topo.num_threads_per_core = max_siblings;
@@ -2550,14 +2789,11 @@ void turbostat_init()
2550{ 2789{
2551 check_dev_msr(); 2790 check_dev_msr();
2552 check_permissions(); 2791 check_permissions();
2553 check_cpuid(); 2792 process_cpuid();
2554 2793
2555 setup_all_buffers(); 2794 setup_all_buffers();
2556 2795
2557 if (debug) 2796 if (debug)
2558 print_verbose_header();
2559
2560 if (debug)
2561 for_all_cpus(print_epb, ODD_COUNTERS); 2797 for_all_cpus(print_epb, ODD_COUNTERS);
2562 2798
2563 if (debug) 2799 if (debug)
@@ -2634,7 +2870,7 @@ int get_and_dump_counters(void)
2634} 2870}
2635 2871
2636void print_version() { 2872void print_version() {
2637 fprintf(stderr, "turbostat version 4.1 10-Feb, 2015" 2873 fprintf(stderr, "turbostat version 4.5 2 Apr, 2015"
2638 " - Len Brown <lenb@kernel.org>\n"); 2874 " - Len Brown <lenb@kernel.org>\n");
2639} 2875}
2640 2876