aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h37
-rw-r--r--tools/power/x86/turbostat/Makefile21
-rw-r--r--tools/power/x86/turbostat/turbostat.8103
-rw-r--r--tools/power/x86/turbostat/turbostat.c677
-rw-r--r--tools/power/x86/x86_energy_perf_policy/Makefile6
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c2
6 files changed, 765 insertions, 81 deletions
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 6e930b218724..433a59fb1a74 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -35,11 +35,14 @@
35#define MSR_IA32_PERFCTR0 0x000000c1 35#define MSR_IA32_PERFCTR0 0x000000c1
36#define MSR_IA32_PERFCTR1 0x000000c2 36#define MSR_IA32_PERFCTR1 0x000000c2
37#define MSR_FSB_FREQ 0x000000cd 37#define MSR_FSB_FREQ 0x000000cd
38#define MSR_NHM_PLATFORM_INFO 0x000000ce
38 39
39#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 40#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2
40#define NHM_C3_AUTO_DEMOTE (1UL << 25) 41#define NHM_C3_AUTO_DEMOTE (1UL << 25)
41#define NHM_C1_AUTO_DEMOTE (1UL << 26) 42#define NHM_C1_AUTO_DEMOTE (1UL << 26)
42#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) 43#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
44#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
45#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
43 46
44#define MSR_MTRRcap 0x000000fe 47#define MSR_MTRRcap 0x000000fe
45#define MSR_IA32_BBL_CR_CTL 0x00000119 48#define MSR_IA32_BBL_CR_CTL 0x00000119
@@ -55,6 +58,8 @@
55 58
56#define MSR_OFFCORE_RSP_0 0x000001a6 59#define MSR_OFFCORE_RSP_0 0x000001a6
57#define MSR_OFFCORE_RSP_1 0x000001a7 60#define MSR_OFFCORE_RSP_1 0x000001a7
61#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
62#define MSR_IVT_TURBO_RATIO_LIMIT 0x000001ae
58 63
59#define MSR_LBR_SELECT 0x000001c8 64#define MSR_LBR_SELECT 0x000001c8
60#define MSR_LBR_TOS 0x000001c9 65#define MSR_LBR_TOS 0x000001c9
@@ -103,6 +108,38 @@
103#define MSR_IA32_MC0_ADDR 0x00000402 108#define MSR_IA32_MC0_ADDR 0x00000402
104#define MSR_IA32_MC0_MISC 0x00000403 109#define MSR_IA32_MC0_MISC 0x00000403
105 110
111/* C-state Residency Counters */
112#define MSR_PKG_C3_RESIDENCY 0x000003f8
113#define MSR_PKG_C6_RESIDENCY 0x000003f9
114#define MSR_PKG_C7_RESIDENCY 0x000003fa
115#define MSR_CORE_C3_RESIDENCY 0x000003fc
116#define MSR_CORE_C6_RESIDENCY 0x000003fd
117#define MSR_CORE_C7_RESIDENCY 0x000003fe
118#define MSR_PKG_C2_RESIDENCY 0x0000060d
119
120/* Run Time Average Power Limiting (RAPL) Interface */
121
122#define MSR_RAPL_POWER_UNIT 0x00000606
123
124#define MSR_PKG_POWER_LIMIT 0x00000610
125#define MSR_PKG_ENERGY_STATUS 0x00000611
126#define MSR_PKG_PERF_STATUS 0x00000613
127#define MSR_PKG_POWER_INFO 0x00000614
128
129#define MSR_DRAM_POWER_LIMIT 0x00000618
130#define MSR_DRAM_ENERGY_STATUS 0x00000619
131#define MSR_DRAM_PERF_STATUS 0x0000061b
132#define MSR_DRAM_POWER_INFO 0x0000061c
133
134#define MSR_PP0_POWER_LIMIT 0x00000638
135#define MSR_PP0_ENERGY_STATUS 0x00000639
136#define MSR_PP0_POLICY 0x0000063a
137#define MSR_PP0_PERF_STATUS 0x0000063b
138
139#define MSR_PP1_POWER_LIMIT 0x00000640
140#define MSR_PP1_ENERGY_STATUS 0x00000641
141#define MSR_PP1_POLICY 0x00000642
142
106#define MSR_AMD64_MC0_MASK 0xc0010044 143#define MSR_AMD64_MC0_MASK 0xc0010044
107 144
108#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) 145#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index f85649554191..f09641da40d4 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -1,9 +1,22 @@
1CC = $(CROSS_COMPILE)gcc
2BUILD_OUTPUT := $(PWD)
3PREFIX := /usr
4DESTDIR :=
5
1turbostat : turbostat.c 6turbostat : turbostat.c
2CFLAGS += -Wall 7CFLAGS += -Wall
8CFLAGS += -I../../../../arch/x86/include/uapi/
9
10%: %.c
11 @mkdir -p $(BUILD_OUTPUT)
12 $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@
3 13
14.PHONY : clean
4clean : 15clean :
5 rm -f turbostat 16 @rm -f $(BUILD_OUTPUT)/turbostat
6 17
7install : 18install : turbostat
8 install turbostat /usr/bin/turbostat 19 install -d $(DESTDIR)$(PREFIX)/bin
9 install turbostat.8 /usr/share/man/man8 20 install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat
21 install -d $(DESTDIR)$(PREFIX)/share/man/man8
22 install turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index e4d0690cccf9..0d7dc2cfefb5 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -11,16 +11,16 @@ turbostat \- Report processor frequency and idle statistics
11.RB [ Options ] 11.RB [ Options ]
12.RB [ "\-i interval_sec" ] 12.RB [ "\-i interval_sec" ]
13.SH DESCRIPTION 13.SH DESCRIPTION
14\fBturbostat \fP reports processor topology, frequency 14\fBturbostat \fP reports processor topology, frequency,
15and idle power state statistics on modern X86 processors. 15idle power-state statistics, temperature and power on modern X86 processors.
16Either \fBcommand\fP is forked and statistics are printed 16Either \fBcommand\fP is forked and statistics are printed
17upon its completion, or statistics are printed periodically. 17upon its completion, or statistics are printed periodically.
18 18
19\fBturbostat \fP 19\fBturbostat \fP
20requires that the processor 20must be run on root, and
21minimally requires that the processor
21supports an "invariant" TSC, plus the APERF and MPERF MSRs. 22supports an "invariant" TSC, plus the APERF and MPERF MSRs.
22\fBturbostat \fP will report idle cpu power state residency 23Additional information is reported depending on hardware counter support.
23on processors that additionally support C-state residency counters.
24 24
25.SS Options 25.SS Options
26The \fB-p\fP option limits output to the 1st thread in 1st core of each package. 26The \fB-p\fP option limits output to the 1st thread in 1st core of each package.
@@ -57,7 +57,15 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T
57\fBGHz\fP average clock rate while the CPU was in c0 state. 57\fBGHz\fP average clock rate while the CPU was in c0 state.
58\fBTSC\fP average GHz that the TSC ran during the entire interval. 58\fBTSC\fP average GHz that the TSC ran during the entire interval.
59\fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states. 59\fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states.
60\fBCTMP\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
61\fBPTMP\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
60\fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states. 62\fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states.
63\fBPkg_W\fP Watts consumed by the whole package.
64\fBCor_W\fP Watts consumed by the core part of the package.
65\fBGFX_W\fP Watts consumed by the Graphics part of the package -- available only on client processors.
66\fBRAM_W\fP Watts consumed by the DRAM DIMMS -- available only on server processors.
67\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package.
68\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
61.fi 69.fi
62.PP 70.PP
63.SH EXAMPLE 71.SH EXAMPLE
@@ -66,50 +74,73 @@ Without any parameters, turbostat prints out counters ever 5 seconds.
66for turbostat to fork). 74for turbostat to fork).
67 75
68The first row of statistics is a summary for the entire system. 76The first row of statistics is a summary for the entire system.
69Note that the summary is a weighted average. 77For residency % columns, the summary is a weighted average.
78For Temperature columns, the summary is the column maximum.
79For Watts columns, the summary is a system total.
70Subsequent rows show per-CPU statistics. 80Subsequent rows show per-CPU statistics.
71 81
72.nf 82.nf
73[root@x980]# ./turbostat 83[root@sandy]# ./turbostat
74cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 84cor CPU %c0 GHz TSC %c1 %c3 %c6 %c7 CTMP PTMP %pc2 %pc3 %pc6 %pc7 Pkg_W Cor_W GFX_W
75 0.09 1.62 3.38 1.83 0.32 97.76 1.26 83.61 85 0.06 0.80 2.29 0.11 0.00 0.00 99.83 47 40 0.26 0.01 0.44 98.78 3.49 0.12 0.14
76 0 0 0.15 1.62 3.38 10.23 0.05 89.56 1.26 83.61 86 0 0 0.07 0.80 2.29 0.07 0.00 0.00 99.86 40 40 0.26 0.01 0.44 98.78 3.49 0.12 0.14
77 0 6 0.05 1.62 3.38 10.34 87 0 4 0.03 0.80 2.29 0.12
78 1 2 0.03 1.62 3.38 0.07 0.05 99.86 88 1 1 0.04 0.80 2.29 0.25 0.01 0.00 99.71 40
79 1 8 0.03 1.62 3.38 0.06 89 1 5 0.16 0.80 2.29 0.13
80 2 4 0.21 1.62 3.38 0.10 1.49 98.21 90 2 2 0.05 0.80 2.29 0.06 0.01 0.00 99.88 40
81 2 10 0.02 1.62 3.38 0.29 91 2 6 0.03 0.80 2.29 0.08
82 8 1 0.04 1.62 3.38 0.04 0.08 99.84 92 3 3 0.05 0.80 2.29 0.08 0.00 0.00 99.87 47
83 8 7 0.01 1.62 3.38 0.06 93 3 7 0.04 0.84 2.29 0.09
84 9 3 0.53 1.62 3.38 0.10 0.20 99.17
85 9 9 0.02 1.62 3.38 0.60
86 10 5 0.01 1.62 3.38 0.02 0.04 99.92
87 10 11 0.02 1.62 3.38 0.02
88.fi 94.fi
89.SH SUMMARY EXAMPLE 95.SH SUMMARY EXAMPLE
90The "-s" option prints the column headers just once, 96The "-s" option prints the column headers just once,
91and then the one line system summary for each sample interval. 97and then the one line system summary for each sample interval.
92 98
93.nf 99.nf
94[root@x980]# ./turbostat -s 100[root@wsm]# turbostat -S
95 %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 101 %c0 GHz TSC %c1 %c3 %c6 CTMP %pc3 %pc6
96 0.23 1.67 3.38 2.00 0.30 97.47 1.07 82.12 102 1.40 2.81 3.38 10.78 43.47 44.35 42 13.67 2.09
97 0.10 1.62 3.38 1.87 2.25 95.77 12.02 72.60 103 1.34 2.90 3.38 11.48 58.96 28.23 41 19.89 0.15
98 0.20 1.64 3.38 1.98 0.11 97.72 0.30 83.36 104 1.55 2.72 3.38 26.73 37.66 34.07 42 2.53 2.80
99 0.11 1.70 3.38 1.86 1.81 96.22 9.71 74.90 105 1.37 2.83 3.38 16.95 60.05 21.63 42 5.76 0.20
100.fi 106.fi
101.SH VERBOSE EXAMPLE 107.SH VERBOSE EXAMPLE
102The "-v" option adds verbosity to the output: 108The "-v" option adds verbosity to the output:
103 109
104.nf 110.nf
105GenuineIntel 11 CPUID levels; family:model:stepping 0x6:2c:2 (6:44:2) 111[root@ivy]# turbostat -v
10612 * 133 = 1600 MHz max efficiency 112turbostat v3.0 November 23, 2012 - Len Brown <lenb@kernel.org>
10725 * 133 = 3333 MHz TSC frequency 113CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9)
10826 * 133 = 3467 MHz max turbo 4 active cores 114CPUID(6): APERF, DTS, PTM, EPB
10926 * 133 = 3467 MHz max turbo 3 active cores 115RAPL: 851 sec. Joule Counter Range
11027 * 133 = 3600 MHz max turbo 2 active cores 116cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300
11127 * 133 = 3600 MHz max turbo 1 active cores 11716 * 100 = 1600 MHz max efficiency
112 11835 * 100 = 3500 MHz TSC frequency
119cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6-noret)
120cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727
12137 * 100 = 3700 MHz max turbo 4 active cores
12238 * 100 = 3800 MHz max turbo 3 active cores
12339 * 100 = 3900 MHz max turbo 2 active cores
12439 * 100 = 3900 MHz max turbo 1 active cores
125cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced)
126cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.)
127cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.)
128cpu0: MSR_PKG_POWER_LIMIT: 0x830000148268 (UNlocked)
129cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled)
130cpu0: PKG Limit #2: ENabled (96.000000 Watts, 0.000977* sec, clamp DISabled)
131cpu0: MSR_PP0_POLICY: 0
132cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked)
133cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
134cpu0: MSR_PP1_POLICY: 0
135cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked)
136cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
137cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C)
138cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C)
139cpu0: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1)
140cpu1: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1)
141cpu2: MSR_IA32_THERM_STATUS: 0x88540000 (21 C +/- 1)
142cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1)
143 ...
113.fi 144.fi
114The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency 145The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
115available at the minimum package voltage. The \fBTSC frequency\fP is the nominal 146available at the minimum package voltage. The \fBTSC frequency\fP is the nominal
@@ -142,7 +173,7 @@ cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
142 10 5 1.42 3.43 3.38 2.14 30.99 65.44 173 10 5 1.42 3.43 3.38 2.14 30.99 65.44
143 10 11 0.16 2.88 3.38 3.40 174 10 11 0.16 2.88 3.38 3.40
144.fi 175.fi
145Above the cycle soaker drives cpu7 up its 3.6 Ghz turbo limit 176Above the cycle soaker drives cpu7 up its 3.6 GHz turbo limit
146while the other processors are generally in various states of idle. 177while the other processors are generally in various states of idle.
147 178
148Note that cpu1 and cpu7 are HT siblings within core8. 179Note that cpu1 and cpu7 are HT siblings within core8.
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index ea095abbe97e..ce6d46038f74 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -20,6 +20,7 @@
20 */ 20 */
21 21
22#define _GNU_SOURCE 22#define _GNU_SOURCE
23#include <asm/msr.h>
23#include <stdio.h> 24#include <stdio.h>
24#include <unistd.h> 25#include <unistd.h>
25#include <sys/types.h> 26#include <sys/types.h>
@@ -35,28 +36,18 @@
35#include <ctype.h> 36#include <ctype.h>
36#include <sched.h> 37#include <sched.h>
37 38
38#define MSR_NEHALEM_PLATFORM_INFO 0xCE
39#define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD
40#define MSR_IVT_TURBO_RATIO_LIMIT 0x1AE
41#define MSR_APERF 0xE8
42#define MSR_MPERF 0xE7
43#define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */
44#define MSR_PKG_C3_RESIDENCY 0x3F8
45#define MSR_PKG_C6_RESIDENCY 0x3F9
46#define MSR_PKG_C7_RESIDENCY 0x3FA /* SNB only */
47#define MSR_CORE_C3_RESIDENCY 0x3FC
48#define MSR_CORE_C6_RESIDENCY 0x3FD
49#define MSR_CORE_C7_RESIDENCY 0x3FE /* SNB only */
50
51char *proc_stat = "/proc/stat"; 39char *proc_stat = "/proc/stat";
52unsigned int interval_sec = 5; /* set with -i interval_sec */ 40unsigned int interval_sec = 5; /* set with -i interval_sec */
53unsigned int verbose; /* set with -v */ 41unsigned int verbose; /* set with -v */
42unsigned int rapl_verbose; /* set with -R */
43unsigned int thermal_verbose; /* set with -T */
54unsigned int summary_only; /* set with -s */ 44unsigned int summary_only; /* set with -s */
55unsigned int skip_c0; 45unsigned int skip_c0;
56unsigned int skip_c1; 46unsigned int skip_c1;
57unsigned int do_nhm_cstates; 47unsigned int do_nhm_cstates;
58unsigned int do_snb_cstates; 48unsigned int do_snb_cstates;
59unsigned int has_aperf; 49unsigned int has_aperf;
50unsigned int has_epb;
60unsigned int units = 1000000000; /* Ghz etc */ 51unsigned int units = 1000000000; /* Ghz etc */
61unsigned int genuine_intel; 52unsigned int genuine_intel;
62unsigned int has_invariant_tsc; 53unsigned int has_invariant_tsc;
@@ -74,6 +65,23 @@ unsigned int show_cpu;
74unsigned int show_pkg_only; 65unsigned int show_pkg_only;
75unsigned int show_core_only; 66unsigned int show_core_only;
76char *output_buffer, *outp; 67char *output_buffer, *outp;
68unsigned int do_rapl;
69unsigned int do_dts;
70unsigned int do_ptm;
71unsigned int tcc_activation_temp;
72unsigned int tcc_activation_temp_override;
73double rapl_power_units, rapl_energy_units, rapl_time_units;
74double rapl_joule_counter_range;
75
76#define RAPL_PKG (1 << 0)
77#define RAPL_CORES (1 << 1)
78#define RAPL_GFX (1 << 2)
79#define RAPL_DRAM (1 << 3)
80#define RAPL_PKG_PERF_STATUS (1 << 4)
81#define RAPL_DRAM_PERF_STATUS (1 << 5)
82#define TJMAX_DEFAULT 100
83
84#define MAX(a, b) ((a) > (b) ? (a) : (b))
77 85
78int aperf_mperf_unstable; 86int aperf_mperf_unstable;
79int backwards_count; 87int backwards_count;
@@ -101,6 +109,7 @@ struct core_data {
101 unsigned long long c3; 109 unsigned long long c3;
102 unsigned long long c6; 110 unsigned long long c6;
103 unsigned long long c7; 111 unsigned long long c7;
112 unsigned int core_temp_c;
104 unsigned int core_id; 113 unsigned int core_id;
105} *core_even, *core_odd; 114} *core_even, *core_odd;
106 115
@@ -110,6 +119,14 @@ struct pkg_data {
110 unsigned long long pc6; 119 unsigned long long pc6;
111 unsigned long long pc7; 120 unsigned long long pc7;
112 unsigned int package_id; 121 unsigned int package_id;
122 unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */
123 unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */
124 unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */
125 unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */
126 unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
127 unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
128 unsigned int pkg_temp_c;
129
113} *package_even, *package_odd; 130} *package_even, *package_odd;
114 131
115#define ODD_COUNTERS thread_odd, core_odd, package_odd 132#define ODD_COUNTERS thread_odd, core_odd, package_odd
@@ -247,6 +264,12 @@ void print_header(void)
247 outp += sprintf(outp, " %%c6"); 264 outp += sprintf(outp, " %%c6");
248 if (do_snb_cstates) 265 if (do_snb_cstates)
249 outp += sprintf(outp, " %%c7"); 266 outp += sprintf(outp, " %%c7");
267
268 if (do_dts)
269 outp += sprintf(outp, " CTMP");
270 if (do_ptm)
271 outp += sprintf(outp, " PTMP");
272
250 if (do_snb_cstates) 273 if (do_snb_cstates)
251 outp += sprintf(outp, " %%pc2"); 274 outp += sprintf(outp, " %%pc2");
252 if (do_nhm_cstates) 275 if (do_nhm_cstates)
@@ -256,6 +279,19 @@ void print_header(void)
256 if (do_snb_cstates) 279 if (do_snb_cstates)
257 outp += sprintf(outp, " %%pc7"); 280 outp += sprintf(outp, " %%pc7");
258 281
282 if (do_rapl & RAPL_PKG)
283 outp += sprintf(outp, " Pkg_W");
284 if (do_rapl & RAPL_CORES)
285 outp += sprintf(outp, " Cor_W");
286 if (do_rapl & RAPL_GFX)
287 outp += sprintf(outp, " GFX_W");
288 if (do_rapl & RAPL_DRAM)
289 outp += sprintf(outp, " RAM_W");
290 if (do_rapl & RAPL_PKG_PERF_STATUS)
291 outp += sprintf(outp, " PKG_%%");
292 if (do_rapl & RAPL_DRAM_PERF_STATUS)
293 outp += sprintf(outp, " RAM_%%");
294
259 outp += sprintf(outp, "\n"); 295 outp += sprintf(outp, "\n");
260} 296}
261 297
@@ -285,6 +321,7 @@ int dump_counters(struct thread_data *t, struct core_data *c,
285 fprintf(stderr, "c3: %016llX\n", c->c3); 321 fprintf(stderr, "c3: %016llX\n", c->c3);
286 fprintf(stderr, "c6: %016llX\n", c->c6); 322 fprintf(stderr, "c6: %016llX\n", c->c6);
287 fprintf(stderr, "c7: %016llX\n", c->c7); 323 fprintf(stderr, "c7: %016llX\n", c->c7);
324 fprintf(stderr, "DTS: %dC\n", c->core_temp_c);
288 } 325 }
289 326
290 if (p) { 327 if (p) {
@@ -293,6 +330,13 @@ int dump_counters(struct thread_data *t, struct core_data *c,
293 fprintf(stderr, "pc3: %016llX\n", p->pc3); 330 fprintf(stderr, "pc3: %016llX\n", p->pc3);
294 fprintf(stderr, "pc6: %016llX\n", p->pc6); 331 fprintf(stderr, "pc6: %016llX\n", p->pc6);
295 fprintf(stderr, "pc7: %016llX\n", p->pc7); 332 fprintf(stderr, "pc7: %016llX\n", p->pc7);
333 fprintf(stderr, "Joules PKG: %0X\n", p->energy_pkg);
334 fprintf(stderr, "Joules COR: %0X\n", p->energy_cores);
335 fprintf(stderr, "Joules GFX: %0X\n", p->energy_gfx);
336 fprintf(stderr, "Joules RAM: %0X\n", p->energy_dram);
337 fprintf(stderr, "Throttle PKG: %0X\n", p->rapl_pkg_perf_status);
338 fprintf(stderr, "Throttle RAM: %0X\n", p->rapl_dram_perf_status);
339 fprintf(stderr, "PTM: %dC\n", p->pkg_temp_c);
296 } 340 }
297 return 0; 341 return 0;
298} 342}
@@ -302,14 +346,21 @@ int dump_counters(struct thread_data *t, struct core_data *c,
302 * package: "pk" 2 columns %2d 346 * package: "pk" 2 columns %2d
303 * core: "cor" 3 columns %3d 347 * core: "cor" 3 columns %3d
304 * CPU: "CPU" 3 columns %3d 348 * CPU: "CPU" 3 columns %3d
349 * Pkg_W: %6.2
350 * Cor_W: %6.2
351 * GFX_W: %5.2
352 * RAM_W: %5.2
305 * GHz: "GHz" 3 columns %3.2 353 * GHz: "GHz" 3 columns %3.2
306 * TSC: "TSC" 3 columns %3.2 354 * TSC: "TSC" 3 columns %3.2
307 * percentage " %pc3" %6.2 355 * percentage " %pc3" %6.2
356 * Perf Status percentage: %5.2
357 * "CTMP" 4 columns %4d
308 */ 358 */
309int format_counters(struct thread_data *t, struct core_data *c, 359int format_counters(struct thread_data *t, struct core_data *c,
310 struct pkg_data *p) 360 struct pkg_data *p)
311{ 361{
312 double interval_float; 362 double interval_float;
363 char *fmt5, *fmt6;
313 364
314 /* if showing only 1st thread in core and this isn't one, bail out */ 365 /* if showing only 1st thread in core and this isn't one, bail out */
315 if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) 366 if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
@@ -349,7 +400,6 @@ int format_counters(struct thread_data *t, struct core_data *c,
349 if (show_cpu) 400 if (show_cpu)
350 outp += sprintf(outp, " %3d", t->cpu_id); 401 outp += sprintf(outp, " %3d", t->cpu_id);
351 } 402 }
352
353 /* %c0 */ 403 /* %c0 */
354 if (do_nhm_cstates) { 404 if (do_nhm_cstates) {
355 if (show_pkg || show_core || show_cpu) 405 if (show_pkg || show_core || show_cpu)
@@ -414,10 +464,16 @@ int format_counters(struct thread_data *t, struct core_data *c,
414 if (do_snb_cstates) 464 if (do_snb_cstates)
415 outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); 465 outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc);
416 466
467 if (do_dts)
468 outp += sprintf(outp, " %4d", c->core_temp_c);
469
417 /* print per-package data only for 1st core in package */ 470 /* print per-package data only for 1st core in package */
418 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 471 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
419 goto done; 472 goto done;
420 473
474 if (do_ptm)
475 outp += sprintf(outp, " %4d", p->pkg_temp_c);
476
421 if (do_snb_cstates) 477 if (do_snb_cstates)
422 outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); 478 outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc);
423 if (do_nhm_cstates) 479 if (do_nhm_cstates)
@@ -426,6 +482,32 @@ int format_counters(struct thread_data *t, struct core_data *c,
426 outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); 482 outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc);
427 if (do_snb_cstates) 483 if (do_snb_cstates)
428 outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); 484 outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc);
485
486 /*
487 * If measurement interval exceeds minimum RAPL Joule Counter range,
488 * indicate that results are suspect by printing "**" in fraction place.
489 */
490 if (interval_float < rapl_joule_counter_range) {
491 fmt5 = " %5.2f";
492 fmt6 = " %6.2f";
493 } else {
494 fmt5 = " %3.0f**";
495 fmt6 = " %4.0f**";
496 }
497
498 if (do_rapl & RAPL_PKG)
499 outp += sprintf(outp, fmt6, p->energy_pkg * rapl_energy_units / interval_float);
500 if (do_rapl & RAPL_CORES)
501 outp += sprintf(outp, fmt6, p->energy_cores * rapl_energy_units / interval_float);
502 if (do_rapl & RAPL_GFX)
503 outp += sprintf(outp, fmt5, p->energy_gfx * rapl_energy_units / interval_float);
504 if (do_rapl & RAPL_DRAM)
505 outp += sprintf(outp, fmt5, p->energy_dram * rapl_energy_units / interval_float);
506 if (do_rapl & RAPL_PKG_PERF_STATUS )
507 outp += sprintf(outp, fmt5, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
508 if (do_rapl & RAPL_DRAM_PERF_STATUS )
509 outp += sprintf(outp, fmt5, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
510
429done: 511done:
430 outp += sprintf(outp, "\n"); 512 outp += sprintf(outp, "\n");
431 513
@@ -435,6 +517,7 @@ done:
435void flush_stdout() 517void flush_stdout()
436{ 518{
437 fputs(output_buffer, stdout); 519 fputs(output_buffer, stdout);
520 fflush(stdout);
438 outp = output_buffer; 521 outp = output_buffer;
439} 522}
440void flush_stderr() 523void flush_stderr()
@@ -461,6 +544,13 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
461 for_all_cpus(format_counters, t, c, p); 544 for_all_cpus(format_counters, t, c, p);
462} 545}
463 546
547#define DELTA_WRAP32(new, old) \
548 if (new > old) { \
549 old = new - old; \
550 } else { \
551 old = 0x100000000 + new - old; \
552 }
553
464void 554void
465delta_package(struct pkg_data *new, struct pkg_data *old) 555delta_package(struct pkg_data *new, struct pkg_data *old)
466{ 556{
@@ -468,6 +558,14 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
468 old->pc3 = new->pc3 - old->pc3; 558 old->pc3 = new->pc3 - old->pc3;
469 old->pc6 = new->pc6 - old->pc6; 559 old->pc6 = new->pc6 - old->pc6;
470 old->pc7 = new->pc7 - old->pc7; 560 old->pc7 = new->pc7 - old->pc7;
561 old->pkg_temp_c = new->pkg_temp_c;
562
563 DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
564 DELTA_WRAP32(new->energy_cores, old->energy_cores);
565 DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
566 DELTA_WRAP32(new->energy_dram, old->energy_dram);
567 DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
568 DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
471} 569}
472 570
473void 571void
@@ -476,6 +574,7 @@ delta_core(struct core_data *new, struct core_data *old)
476 old->c3 = new->c3 - old->c3; 574 old->c3 = new->c3 - old->c3;
477 old->c6 = new->c6 - old->c6; 575 old->c6 = new->c6 - old->c6;
478 old->c7 = new->c7 - old->c7; 576 old->c7 = new->c7 - old->c7;
577 old->core_temp_c = new->core_temp_c;
479} 578}
480 579
481/* 580/*
@@ -582,11 +681,20 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
582 c->c3 = 0; 681 c->c3 = 0;
583 c->c6 = 0; 682 c->c6 = 0;
584 c->c7 = 0; 683 c->c7 = 0;
684 c->core_temp_c = 0;
585 685
586 p->pc2 = 0; 686 p->pc2 = 0;
587 p->pc3 = 0; 687 p->pc3 = 0;
588 p->pc6 = 0; 688 p->pc6 = 0;
589 p->pc7 = 0; 689 p->pc7 = 0;
690
691 p->energy_pkg = 0;
692 p->energy_dram = 0;
693 p->energy_cores = 0;
694 p->energy_gfx = 0;
695 p->rapl_pkg_perf_status = 0;
696 p->rapl_dram_perf_status = 0;
697 p->pkg_temp_c = 0;
590} 698}
591int sum_counters(struct thread_data *t, struct core_data *c, 699int sum_counters(struct thread_data *t, struct core_data *c,
592 struct pkg_data *p) 700 struct pkg_data *p)
@@ -607,6 +715,8 @@ int sum_counters(struct thread_data *t, struct core_data *c,
607 average.cores.c6 += c->c6; 715 average.cores.c6 += c->c6;
608 average.cores.c7 += c->c7; 716 average.cores.c7 += c->c7;
609 717
718 average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
719
610 /* sum per-pkg values only for 1st core in pkg */ 720 /* sum per-pkg values only for 1st core in pkg */
611 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 721 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
612 return 0; 722 return 0;
@@ -616,6 +726,15 @@ int sum_counters(struct thread_data *t, struct core_data *c,
616 average.packages.pc6 += p->pc6; 726 average.packages.pc6 += p->pc6;
617 average.packages.pc7 += p->pc7; 727 average.packages.pc7 += p->pc7;
618 728
729 average.packages.energy_pkg += p->energy_pkg;
730 average.packages.energy_dram += p->energy_dram;
731 average.packages.energy_cores += p->energy_cores;
732 average.packages.energy_gfx += p->energy_gfx;
733
734 average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
735
736 average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
737 average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
619 return 0; 738 return 0;
620} 739}
621/* 740/*
@@ -667,23 +786,26 @@ static unsigned long long rdtsc(void)
667int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) 786int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
668{ 787{
669 int cpu = t->cpu_id; 788 int cpu = t->cpu_id;
789 unsigned long long msr;
670 790
671 if (cpu_migrate(cpu)) 791 if (cpu_migrate(cpu)) {
792 fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
672 return -1; 793 return -1;
794 }
673 795
674 t->tsc = rdtsc(); /* we are running on local CPU of interest */ 796 t->tsc = rdtsc(); /* we are running on local CPU of interest */
675 797
676 if (has_aperf) { 798 if (has_aperf) {
677 if (get_msr(cpu, MSR_APERF, &t->aperf)) 799 if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
678 return -3; 800 return -3;
679 if (get_msr(cpu, MSR_MPERF, &t->mperf)) 801 if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
680 return -4; 802 return -4;
681 } 803 }
682 804
683 if (extra_delta_offset32) { 805 if (extra_delta_offset32) {
684 if (get_msr(cpu, extra_delta_offset32, &t->extra_delta32)) 806 if (get_msr(cpu, extra_delta_offset32, &msr))
685 return -5; 807 return -5;
686 t->extra_delta32 &= 0xFFFFFFFF; 808 t->extra_delta32 = msr & 0xFFFFFFFF;
687 } 809 }
688 810
689 if (extra_delta_offset64) 811 if (extra_delta_offset64)
@@ -691,9 +813,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
691 return -5; 813 return -5;
692 814
693 if (extra_msr_offset32) { 815 if (extra_msr_offset32) {
694 if (get_msr(cpu, extra_msr_offset32, &t->extra_msr32)) 816 if (get_msr(cpu, extra_msr_offset32, &msr))
695 return -5; 817 return -5;
696 t->extra_msr32 &= 0xFFFFFFFF; 818 t->extra_msr32 = msr & 0xFFFFFFFF;
697 } 819 }
698 820
699 if (extra_msr_offset64) 821 if (extra_msr_offset64)
@@ -715,6 +837,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
715 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) 837 if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
716 return -8; 838 return -8;
717 839
840 if (do_dts) {
841 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
842 return -9;
843 c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
844 }
845
846
718 /* collect package counters only for 1st core in package */ 847 /* collect package counters only for 1st core in package */
719 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) 848 if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
720 return 0; 849 return 0;
@@ -731,6 +860,41 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
731 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) 860 if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
732 return -12; 861 return -12;
733 } 862 }
863 if (do_rapl & RAPL_PKG) {
864 if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
865 return -13;
866 p->energy_pkg = msr & 0xFFFFFFFF;
867 }
868 if (do_rapl & RAPL_CORES) {
869 if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
870 return -14;
871 p->energy_cores = msr & 0xFFFFFFFF;
872 }
873 if (do_rapl & RAPL_DRAM) {
874 if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
875 return -15;
876 p->energy_dram = msr & 0xFFFFFFFF;
877 }
878 if (do_rapl & RAPL_GFX) {
879 if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
880 return -16;
881 p->energy_gfx = msr & 0xFFFFFFFF;
882 }
883 if (do_rapl & RAPL_PKG_PERF_STATUS) {
884 if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
885 return -16;
886 p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
887 }
888 if (do_rapl & RAPL_DRAM_PERF_STATUS) {
889 if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
890 return -16;
891 p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
892 }
893 if (do_ptm) {
894 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
895 return -17;
896 p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
897 }
734 return 0; 898 return 0;
735} 899}
736 900
@@ -742,10 +906,10 @@ void print_verbose_header(void)
742 if (!do_nehalem_platform_info) 906 if (!do_nehalem_platform_info)
743 return; 907 return;
744 908
745 get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); 909 get_msr(0, MSR_NHM_PLATFORM_INFO, &msr);
746 910
747 if (verbose > 1) 911 if (verbose)
748 fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); 912 fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr);
749 913
750 ratio = (msr >> 40) & 0xFF; 914 ratio = (msr >> 40) & 0xFF;
751 fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", 915 fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
@@ -760,8 +924,8 @@ void print_verbose_header(void)
760 924
761 get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); 925 get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr);
762 926
763 if (verbose > 1) 927 if (verbose)
764 fprintf(stderr, "MSR_IVT_TURBO_RATIO_LIMIT: 0x%llx\n", msr); 928 fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
765 929
766 ratio = (msr >> 56) & 0xFF; 930 ratio = (msr >> 56) & 0xFF;
767 if (ratio) 931 if (ratio)
@@ -804,14 +968,56 @@ void print_verbose_header(void)
804 ratio, bclk, ratio * bclk); 968 ratio, bclk, ratio * bclk);
805 969
806print_nhm_turbo_ratio_limits: 970print_nhm_turbo_ratio_limits:
971 get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr);
972
973#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
974#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
975
976 fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr);
977
978 fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ",
979 (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
980 (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
981 (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
982 (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
983 (msr & (1 << 15)) ? "" : "UN",
984 (unsigned int)msr & 7);
985
986
987 switch(msr & 0x7) {
988 case 0:
989 fprintf(stderr, "pc0");
990 break;
991 case 1:
992 fprintf(stderr, do_snb_cstates ? "pc2" : "pc0");
993 break;
994 case 2:
995 fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3");
996 break;
997 case 3:
998 fprintf(stderr, "pc6");
999 break;
1000 case 4:
1001 fprintf(stderr, "pc7");
1002 break;
1003 case 5:
1004 fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid");
1005 break;
1006 case 7:
1007 fprintf(stderr, "unlimited");
1008 break;
1009 default:
1010 fprintf(stderr, "invalid");
1011 }
1012 fprintf(stderr, ")\n");
807 1013
808 if (!do_nehalem_turbo_ratio_limit) 1014 if (!do_nehalem_turbo_ratio_limit)
809 return; 1015 return;
810 1016
811 get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); 1017 get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
812 1018
813 if (verbose > 1) 1019 if (verbose)
814 fprintf(stderr, "MSR_NEHALEM_TURBO_RATIO_LIMIT: 0x%llx\n", msr); 1020 fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr);
815 1021
816 ratio = (msr >> 56) & 0xFF; 1022 ratio = (msr >> 56) & 0xFF;
817 if (ratio) 1023 if (ratio)
@@ -1100,15 +1306,22 @@ int mark_cpu_present(int cpu)
1100void turbostat_loop() 1306void turbostat_loop()
1101{ 1307{
1102 int retval; 1308 int retval;
1309 int restarted = 0;
1103 1310
1104restart: 1311restart:
1312 restarted++;
1313
1105 retval = for_all_cpus(get_counters, EVEN_COUNTERS); 1314 retval = for_all_cpus(get_counters, EVEN_COUNTERS);
1106 if (retval < -1) { 1315 if (retval < -1) {
1107 exit(retval); 1316 exit(retval);
1108 } else if (retval == -1) { 1317 } else if (retval == -1) {
1318 if (restarted > 1) {
1319 exit(retval);
1320 }
1109 re_initialize(); 1321 re_initialize();
1110 goto restart; 1322 goto restart;
1111 } 1323 }
1324 restarted = 0;
1112 gettimeofday(&tv_even, (struct timezone *)NULL); 1325 gettimeofday(&tv_even, (struct timezone *)NULL);
1113 1326
1114 while (1) { 1327 while (1) {
@@ -1207,6 +1420,299 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
1207 } 1420 }
1208} 1421}
1209 1422
1423/*
1424 * print_epb()
1425 * Decode the ENERGY_PERF_BIAS MSR
1426 */
1427int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1428{
1429 unsigned long long msr;
1430 char *epb_string;
1431 int cpu;
1432
1433 if (!has_epb)
1434 return 0;
1435
1436 cpu = t->cpu_id;
1437
1438 /* EPB is per-package */
1439 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1440 return 0;
1441
1442 if (cpu_migrate(cpu)) {
1443 fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1444 return -1;
1445 }
1446
1447 if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
1448 return 0;
1449
1450 switch (msr & 0x7) {
1451 case ENERGY_PERF_BIAS_PERFORMANCE:
1452 epb_string = "performance";
1453 break;
1454 case ENERGY_PERF_BIAS_NORMAL:
1455 epb_string = "balanced";
1456 break;
1457 case ENERGY_PERF_BIAS_POWERSAVE:
1458 epb_string = "powersave";
1459 break;
1460 default:
1461 epb_string = "custom";
1462 break;
1463 }
1464 fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
1465
1466 return 0;
1467}
1468
1469#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
1470#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
1471
1472/*
1473 * rapl_probe()
1474 *
1475 * sets do_rapl
1476 */
1477void rapl_probe(unsigned int family, unsigned int model)
1478{
1479 unsigned long long msr;
1480 double tdp;
1481
1482 if (!genuine_intel)
1483 return;
1484
1485 if (family != 6)
1486 return;
1487
1488 switch (model) {
1489 case 0x2A:
1490 case 0x3A:
1491 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX;
1492 break;
1493 case 0x2D:
1494 case 0x3E:
1495 do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS;
1496 break;
1497 default:
1498 return;
1499 }
1500
1501 /* units on package 0, verify later other packages match */
1502 if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr))
1503 return;
1504
1505 rapl_power_units = 1.0 / (1 << (msr & 0xF));
1506 rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
1507 rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
1508
1509 /* get TDP to determine energy counter range */
1510 if (get_msr(0, MSR_PKG_POWER_INFO, &msr))
1511 return;
1512
1513 tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
1514
1515 rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
1516
1517 if (verbose)
1518 fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range);
1519
1520 return;
1521}
1522
1523int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1524{
1525 unsigned long long msr;
1526 unsigned int dts;
1527 int cpu;
1528
1529 if (!(do_dts || do_ptm))
1530 return 0;
1531
1532 cpu = t->cpu_id;
1533
1534 /* DTS is per-core, no need to print for each thread */
1535 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
1536 return 0;
1537
1538 if (cpu_migrate(cpu)) {
1539 fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1540 return -1;
1541 }
1542
1543 if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
1544 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
1545 return 0;
1546
1547 dts = (msr >> 16) & 0x7F;
1548 fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
1549 cpu, msr, tcc_activation_temp - dts);
1550
1551#ifdef THERM_DEBUG
1552 if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
1553 return 0;
1554
1555 dts = (msr >> 16) & 0x7F;
1556 dts2 = (msr >> 8) & 0x7F;
1557 fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
1558 cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
1559#endif
1560 }
1561
1562
1563 if (do_dts) {
1564 unsigned int resolution;
1565
1566 if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
1567 return 0;
1568
1569 dts = (msr >> 16) & 0x7F;
1570 resolution = (msr >> 27) & 0xF;
1571 fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
1572 cpu, msr, tcc_activation_temp - dts, resolution);
1573
1574#ifdef THERM_DEBUG
1575 if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
1576 return 0;
1577
1578 dts = (msr >> 16) & 0x7F;
1579 dts2 = (msr >> 8) & 0x7F;
1580 fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
1581 cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
1582#endif
1583 }
1584
1585 return 0;
1586}
1587
1588void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
1589{
1590 fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
1591 cpu, label,
1592 ((msr >> 15) & 1) ? "EN" : "DIS",
1593 ((msr >> 0) & 0x7FFF) * rapl_power_units,
1594 (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
1595 (((msr >> 16) & 1) ? "EN" : "DIS"));
1596
1597 return;
1598}
1599
1600int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1601{
1602 unsigned long long msr;
1603 int cpu;
1604 double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units;
1605
1606 if (!do_rapl)
1607 return 0;
1608
1609 /* RAPL counters are per package, so print only for 1st thread/package */
1610 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1611 return 0;
1612
1613 cpu = t->cpu_id;
1614 if (cpu_migrate(cpu)) {
1615 fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1616 return -1;
1617 }
1618
1619 if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
1620 return -1;
1621
1622 local_rapl_power_units = 1.0 / (1 << (msr & 0xF));
1623 local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
1624 local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF));
1625
1626 if (local_rapl_power_units != rapl_power_units)
1627 fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu);
1628 if (local_rapl_energy_units != rapl_energy_units)
1629 fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu);
1630 if (local_rapl_time_units != rapl_time_units)
1631 fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu);
1632
1633 if (verbose) {
1634 fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
1635 "(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
1636 local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units);
1637 }
1638 if (do_rapl & RAPL_PKG) {
1639 if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
1640 return -5;
1641
1642
1643 fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
1644 cpu, msr,
1645 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1646 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1647 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1648 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
1649
1650 if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
1651 return -9;
1652
1653 fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
1654 cpu, msr, (msr >> 63) & 1 ? "": "UN");
1655
1656 print_power_limit_msr(cpu, msr, "PKG Limit #1");
1657 fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
1658 cpu,
1659 ((msr >> 47) & 1) ? "EN" : "DIS",
1660 ((msr >> 32) & 0x7FFF) * rapl_power_units,
1661 (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
1662 ((msr >> 48) & 1) ? "EN" : "DIS");
1663 }
1664
1665 if (do_rapl & RAPL_DRAM) {
1666 if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
1667 return -6;
1668
1669
1670 fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
1671 cpu, msr,
1672 ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1673 ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1674 ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
1675 ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
1676
1677
1678 if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
1679 return -9;
1680 fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
1681 cpu, msr, (msr >> 31) & 1 ? "": "UN");
1682
1683 print_power_limit_msr(cpu, msr, "DRAM Limit");
1684 }
1685 if (do_rapl & RAPL_CORES) {
1686 if (verbose) {
1687 if (get_msr(cpu, MSR_PP0_POLICY, &msr))
1688 return -7;
1689
1690 fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
1691
1692 if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
1693 return -9;
1694 fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
1695 cpu, msr, (msr >> 31) & 1 ? "": "UN");
1696 print_power_limit_msr(cpu, msr, "Cores Limit");
1697 }
1698 }
1699 if (do_rapl & RAPL_GFX) {
1700 if (verbose) {
1701 if (get_msr(cpu, MSR_PP1_POLICY, &msr))
1702 return -8;
1703
1704 fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
1705
1706 if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
1707 return -9;
1708 fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
1709 cpu, msr, (msr >> 31) & 1 ? "": "UN");
1710 print_power_limit_msr(cpu, msr, "GFX Limit");
1711 }
1712 }
1713 return 0;
1714}
1715
1210 1716
1211int is_snb(unsigned int family, unsigned int model) 1717int is_snb(unsigned int family, unsigned int model)
1212{ 1718{
@@ -1231,6 +1737,72 @@ double discover_bclk(unsigned int family, unsigned int model)
1231 return 133.33; 1737 return 133.33;
1232} 1738}
1233 1739
1740/*
1741 * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
1742 * the Thermal Control Circuit (TCC) activates.
1743 * This is usually equal to tjMax.
1744 *
1745 * Older processors do not have this MSR, so there we guess,
1746 * but also allow cmdline over-ride with -T.
1747 *
1748 * Several MSR temperature values are in units of degrees-C
1749 * below this value, including the Digital Thermal Sensor (DTS),
1750 * Package Thermal Management Sensor (PTM), and thermal event thresholds.
1751 */
1752int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
1753{
1754 unsigned long long msr;
1755 unsigned int target_c_local;
1756 int cpu;
1757
1758 /* tcc_activation_temp is used only for dts or ptm */
1759 if (!(do_dts || do_ptm))
1760 return 0;
1761
1762 /* this is a per-package concept */
1763 if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
1764 return 0;
1765
1766 cpu = t->cpu_id;
1767 if (cpu_migrate(cpu)) {
1768 fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
1769 return -1;
1770 }
1771
1772 if (tcc_activation_temp_override != 0) {
1773 tcc_activation_temp = tcc_activation_temp_override;
1774 fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n",
1775 cpu, tcc_activation_temp);
1776 return 0;
1777 }
1778
1779 /* Temperature Target MSR is Nehalem and newer only */
1780 if (!do_nehalem_platform_info)
1781 goto guess;
1782
1783 if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr))
1784 goto guess;
1785
1786 target_c_local = (msr >> 16) & 0x7F;
1787
1788 if (verbose)
1789 fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
1790 cpu, msr, target_c_local);
1791
1792 if (target_c_local < 85 || target_c_local > 120)
1793 goto guess;
1794
1795 tcc_activation_temp = target_c_local;
1796
1797 return 0;
1798
1799guess:
1800 tcc_activation_temp = TJMAX_DEFAULT;
1801 fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
1802 cpu, tcc_activation_temp);
1803
1804 return 0;
1805}
1234void check_cpuid() 1806void check_cpuid()
1235{ 1807{
1236 unsigned int eax, ebx, ecx, edx, max_level; 1808 unsigned int eax, ebx, ecx, edx, max_level;
@@ -1244,7 +1816,7 @@ void check_cpuid()
1244 genuine_intel = 1; 1816 genuine_intel = 1;
1245 1817
1246 if (verbose) 1818 if (verbose)
1247 fprintf(stderr, "%.4s%.4s%.4s ", 1819 fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ",
1248 (char *)&ebx, (char *)&edx, (char *)&ecx); 1820 (char *)&ebx, (char *)&edx, (char *)&ecx);
1249 1821
1250 asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); 1822 asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
@@ -1295,10 +1867,19 @@ void check_cpuid()
1295 1867
1296 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); 1868 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6));
1297 has_aperf = ecx & (1 << 0); 1869 has_aperf = ecx & (1 << 0);
1298 if (!has_aperf) { 1870 do_dts = eax & (1 << 0);
1299 fprintf(stderr, "No APERF MSR\n"); 1871 do_ptm = eax & (1 << 6);
1300 exit(1); 1872 has_epb = ecx & (1 << 3);
1301 } 1873
1874 if (verbose)
1875 fprintf(stderr, "CPUID(6): %s%s%s%s\n",
1876 has_aperf ? "APERF" : "No APERF!",
1877 do_dts ? ", DTS" : "",
1878 do_ptm ? ", PTM": "",
1879 has_epb ? ", EPB": "");
1880
1881 if (!has_aperf)
1882 exit(-1);
1302 1883
1303 do_nehalem_platform_info = genuine_intel && has_invariant_tsc; 1884 do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
1304 do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ 1885 do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */
@@ -1307,12 +1888,15 @@ void check_cpuid()
1307 1888
1308 do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); 1889 do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
1309 do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); 1890 do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model);
1891 rapl_probe(family, model);
1892
1893 return;
1310} 1894}
1311 1895
1312 1896
1313void usage() 1897void usage()
1314{ 1898{
1315 fprintf(stderr, "%s: [-v][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", 1899 fprintf(stderr, "%s: [-v][-R][-T][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n",
1316 progname); 1900 progname);
1317 exit(1); 1901 exit(1);
1318} 1902}
@@ -1548,6 +2132,17 @@ void turbostat_init()
1548 2132
1549 if (verbose) 2133 if (verbose)
1550 print_verbose_header(); 2134 print_verbose_header();
2135
2136 if (verbose)
2137 for_all_cpus(print_epb, ODD_COUNTERS);
2138
2139 if (verbose)
2140 for_all_cpus(print_rapl, ODD_COUNTERS);
2141
2142 for_all_cpus(set_temperature_target, ODD_COUNTERS);
2143
2144 if (verbose)
2145 for_all_cpus(print_thermal, ODD_COUNTERS);
1551} 2146}
1552 2147
1553int fork_it(char **argv) 2148int fork_it(char **argv)
@@ -1604,7 +2199,7 @@ void cmdline(int argc, char **argv)
1604 2199
1605 progname = argv[0]; 2200 progname = argv[0];
1606 2201
1607 while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:")) != -1) { 2202 while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:RT:")) != -1) {
1608 switch (opt) { 2203 switch (opt) {
1609 case 'p': 2204 case 'p':
1610 show_core_only++; 2205 show_core_only++;
@@ -1636,6 +2231,12 @@ void cmdline(int argc, char **argv)
1636 case 'M': 2231 case 'M':
1637 sscanf(optarg, "%x", &extra_msr_offset64); 2232 sscanf(optarg, "%x", &extra_msr_offset64);
1638 break; 2233 break;
2234 case 'R':
2235 rapl_verbose++;
2236 break;
2237 case 'T':
2238 tcc_activation_temp_override = atoi(optarg);
2239 break;
1639 default: 2240 default:
1640 usage(); 2241 usage();
1641 } 2242 }
@@ -1646,8 +2247,8 @@ int main(int argc, char **argv)
1646{ 2247{
1647 cmdline(argc, argv); 2248 cmdline(argc, argv);
1648 2249
1649 if (verbose > 1) 2250 if (verbose)
1650 fprintf(stderr, "turbostat v2.1 October 6, 2012" 2251 fprintf(stderr, "turbostat v3.0 November 23, 2012"
1651 " - Len Brown <lenb@kernel.org>\n"); 2252 " - Len Brown <lenb@kernel.org>\n");
1652 2253
1653 turbostat_init(); 2254 turbostat_init();
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
index f458237fdd79..971c9ffdcb50 100644
--- a/tools/power/x86/x86_energy_perf_policy/Makefile
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile
@@ -1,8 +1,10 @@
1DESTDIR ?=
2
1x86_energy_perf_policy : x86_energy_perf_policy.c 3x86_energy_perf_policy : x86_energy_perf_policy.c
2 4
3clean : 5clean :
4 rm -f x86_energy_perf_policy 6 rm -f x86_energy_perf_policy
5 7
6install : 8install :
7 install x86_energy_perf_policy /usr/bin/ 9 install x86_energy_perf_policy ${DESTDIR}/usr/bin/
8 install x86_energy_perf_policy.8 /usr/share/man/man8/ 10 install x86_energy_perf_policy.8 ${DESTDIR}/usr/share/man/man8/
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index 33c5c7ee148f..40b3e5482f8a 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -289,7 +289,7 @@ void for_every_cpu(void (func)(int))
289 "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", 289 "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n",
290 &cpu); 290 &cpu);
291 if (retval != 1) 291 if (retval != 1)
292 return; 292 break;
293 293
294 func(cpu); 294 func(cpu);
295 } 295 }