diff options
-rw-r--r-- | arch/x86/include/uapi/asm/msr-index.h | 37 | ||||
-rw-r--r-- | tools/power/x86/turbostat/Makefile | 21 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 103 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 677 | ||||
-rw-r--r-- | tools/power/x86/x86_energy_perf_policy/Makefile | 6 | ||||
-rw-r--r-- | tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c | 2 |
6 files changed, 765 insertions, 81 deletions
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 6e930b218724..433a59fb1a74 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
@@ -35,11 +35,14 @@ | |||
35 | #define MSR_IA32_PERFCTR0 0x000000c1 | 35 | #define MSR_IA32_PERFCTR0 0x000000c1 |
36 | #define MSR_IA32_PERFCTR1 0x000000c2 | 36 | #define MSR_IA32_PERFCTR1 0x000000c2 |
37 | #define MSR_FSB_FREQ 0x000000cd | 37 | #define MSR_FSB_FREQ 0x000000cd |
38 | #define MSR_NHM_PLATFORM_INFO 0x000000ce | ||
38 | 39 | ||
39 | #define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 | 40 | #define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 |
40 | #define NHM_C3_AUTO_DEMOTE (1UL << 25) | 41 | #define NHM_C3_AUTO_DEMOTE (1UL << 25) |
41 | #define NHM_C1_AUTO_DEMOTE (1UL << 26) | 42 | #define NHM_C1_AUTO_DEMOTE (1UL << 26) |
42 | #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) | 43 | #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) |
44 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) | ||
45 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | ||
43 | 46 | ||
44 | #define MSR_MTRRcap 0x000000fe | 47 | #define MSR_MTRRcap 0x000000fe |
45 | #define MSR_IA32_BBL_CR_CTL 0x00000119 | 48 | #define MSR_IA32_BBL_CR_CTL 0x00000119 |
@@ -55,6 +58,8 @@ | |||
55 | 58 | ||
56 | #define MSR_OFFCORE_RSP_0 0x000001a6 | 59 | #define MSR_OFFCORE_RSP_0 0x000001a6 |
57 | #define MSR_OFFCORE_RSP_1 0x000001a7 | 60 | #define MSR_OFFCORE_RSP_1 0x000001a7 |
61 | #define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad | ||
62 | #define MSR_IVT_TURBO_RATIO_LIMIT 0x000001ae | ||
58 | 63 | ||
59 | #define MSR_LBR_SELECT 0x000001c8 | 64 | #define MSR_LBR_SELECT 0x000001c8 |
60 | #define MSR_LBR_TOS 0x000001c9 | 65 | #define MSR_LBR_TOS 0x000001c9 |
@@ -103,6 +108,38 @@ | |||
103 | #define MSR_IA32_MC0_ADDR 0x00000402 | 108 | #define MSR_IA32_MC0_ADDR 0x00000402 |
104 | #define MSR_IA32_MC0_MISC 0x00000403 | 109 | #define MSR_IA32_MC0_MISC 0x00000403 |
105 | 110 | ||
111 | /* C-state Residency Counters */ | ||
112 | #define MSR_PKG_C3_RESIDENCY 0x000003f8 | ||
113 | #define MSR_PKG_C6_RESIDENCY 0x000003f9 | ||
114 | #define MSR_PKG_C7_RESIDENCY 0x000003fa | ||
115 | #define MSR_CORE_C3_RESIDENCY 0x000003fc | ||
116 | #define MSR_CORE_C6_RESIDENCY 0x000003fd | ||
117 | #define MSR_CORE_C7_RESIDENCY 0x000003fe | ||
118 | #define MSR_PKG_C2_RESIDENCY 0x0000060d | ||
119 | |||
120 | /* Run Time Average Power Limiting (RAPL) Interface */ | ||
121 | |||
122 | #define MSR_RAPL_POWER_UNIT 0x00000606 | ||
123 | |||
124 | #define MSR_PKG_POWER_LIMIT 0x00000610 | ||
125 | #define MSR_PKG_ENERGY_STATUS 0x00000611 | ||
126 | #define MSR_PKG_PERF_STATUS 0x00000613 | ||
127 | #define MSR_PKG_POWER_INFO 0x00000614 | ||
128 | |||
129 | #define MSR_DRAM_POWER_LIMIT 0x00000618 | ||
130 | #define MSR_DRAM_ENERGY_STATUS 0x00000619 | ||
131 | #define MSR_DRAM_PERF_STATUS 0x0000061b | ||
132 | #define MSR_DRAM_POWER_INFO 0x0000061c | ||
133 | |||
134 | #define MSR_PP0_POWER_LIMIT 0x00000638 | ||
135 | #define MSR_PP0_ENERGY_STATUS 0x00000639 | ||
136 | #define MSR_PP0_POLICY 0x0000063a | ||
137 | #define MSR_PP0_PERF_STATUS 0x0000063b | ||
138 | |||
139 | #define MSR_PP1_POWER_LIMIT 0x00000640 | ||
140 | #define MSR_PP1_ENERGY_STATUS 0x00000641 | ||
141 | #define MSR_PP1_POLICY 0x00000642 | ||
142 | |||
106 | #define MSR_AMD64_MC0_MASK 0xc0010044 | 143 | #define MSR_AMD64_MC0_MASK 0xc0010044 |
107 | 144 | ||
108 | #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) | 145 | #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) |
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index f85649554191..f09641da40d4 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile | |||
@@ -1,9 +1,22 @@ | |||
1 | CC = $(CROSS_COMPILE)gcc | ||
2 | BUILD_OUTPUT := $(PWD) | ||
3 | PREFIX := /usr | ||
4 | DESTDIR := | ||
5 | |||
1 | turbostat : turbostat.c | 6 | turbostat : turbostat.c |
2 | CFLAGS += -Wall | 7 | CFLAGS += -Wall |
8 | CFLAGS += -I../../../../arch/x86/include/uapi/ | ||
9 | |||
10 | %: %.c | ||
11 | @mkdir -p $(BUILD_OUTPUT) | ||
12 | $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ | ||
3 | 13 | ||
14 | .PHONY : clean | ||
4 | clean : | 15 | clean : |
5 | rm -f turbostat | 16 | @rm -f $(BUILD_OUTPUT)/turbostat |
6 | 17 | ||
7 | install : | 18 | install : turbostat |
8 | install turbostat /usr/bin/turbostat | 19 | install -d $(DESTDIR)$(PREFIX)/bin |
9 | install turbostat.8 /usr/share/man/man8 | 20 | install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat |
21 | install -d $(DESTDIR)$(PREFIX)/share/man/man8 | ||
22 | install turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8 | ||
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index e4d0690cccf9..0d7dc2cfefb5 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 | |||
@@ -11,16 +11,16 @@ turbostat \- Report processor frequency and idle statistics | |||
11 | .RB [ Options ] | 11 | .RB [ Options ] |
12 | .RB [ "\-i interval_sec" ] | 12 | .RB [ "\-i interval_sec" ] |
13 | .SH DESCRIPTION | 13 | .SH DESCRIPTION |
14 | \fBturbostat \fP reports processor topology, frequency | 14 | \fBturbostat \fP reports processor topology, frequency, |
15 | and idle power state statistics on modern X86 processors. | 15 | idle power-state statistics, temperature and power on modern X86 processors. |
16 | Either \fBcommand\fP is forked and statistics are printed | 16 | Either \fBcommand\fP is forked and statistics are printed |
17 | upon its completion, or statistics are printed periodically. | 17 | upon its completion, or statistics are printed periodically. |
18 | 18 | ||
19 | \fBturbostat \fP | 19 | \fBturbostat \fP |
20 | requires that the processor | 20 | must be run on root, and |
21 | minimally requires that the processor | ||
21 | supports an "invariant" TSC, plus the APERF and MPERF MSRs. | 22 | supports an "invariant" TSC, plus the APERF and MPERF MSRs. |
22 | \fBturbostat \fP will report idle cpu power state residency | 23 | Additional information is reported depending on hardware counter support. |
23 | on processors that additionally support C-state residency counters. | ||
24 | 24 | ||
25 | .SS Options | 25 | .SS Options |
26 | The \fB-p\fP option limits output to the 1st thread in 1st core of each package. | 26 | The \fB-p\fP option limits output to the 1st thread in 1st core of each package. |
@@ -57,7 +57,15 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T | |||
57 | \fBGHz\fP average clock rate while the CPU was in c0 state. | 57 | \fBGHz\fP average clock rate while the CPU was in c0 state. |
58 | \fBTSC\fP average GHz that the TSC ran during the entire interval. | 58 | \fBTSC\fP average GHz that the TSC ran during the entire interval. |
59 | \fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states. | 59 | \fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states. |
60 | \fBCTMP\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. | ||
61 | \fBPTMP\fP Degrees Celsius reported by the per-package Package Thermal Monitor. | ||
60 | \fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states. | 62 | \fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states. |
63 | \fBPkg_W\fP Watts consumed by the whole package. | ||
64 | \fBCor_W\fP Watts consumed by the core part of the package. | ||
65 | \fBGFX_W\fP Watts consumed by the Graphics part of the package -- available only on client processors. | ||
66 | \fBRAM_W\fP Watts consumed by the DRAM DIMMS -- available only on server processors. | ||
67 | \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. | ||
68 | \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. | ||
61 | .fi | 69 | .fi |
62 | .PP | 70 | .PP |
63 | .SH EXAMPLE | 71 | .SH EXAMPLE |
@@ -66,50 +74,73 @@ Without any parameters, turbostat prints out counters ever 5 seconds. | |||
66 | for turbostat to fork). | 74 | for turbostat to fork). |
67 | 75 | ||
68 | The first row of statistics is a summary for the entire system. | 76 | The first row of statistics is a summary for the entire system. |
69 | Note that the summary is a weighted average. | 77 | For residency % columns, the summary is a weighted average. |
78 | For Temperature columns, the summary is the column maximum. | ||
79 | For Watts columns, the summary is a system total. | ||
70 | Subsequent rows show per-CPU statistics. | 80 | Subsequent rows show per-CPU statistics. |
71 | 81 | ||
72 | .nf | 82 | .nf |
73 | [root@x980]# ./turbostat | 83 | [root@sandy]# ./turbostat |
74 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 84 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %c7 CTMP PTMP %pc2 %pc3 %pc6 %pc7 Pkg_W Cor_W GFX_W |
75 | 0.09 1.62 3.38 1.83 0.32 97.76 1.26 83.61 | 85 | 0.06 0.80 2.29 0.11 0.00 0.00 99.83 47 40 0.26 0.01 0.44 98.78 3.49 0.12 0.14 |
76 | 0 0 0.15 1.62 3.38 10.23 0.05 89.56 1.26 83.61 | 86 | 0 0 0.07 0.80 2.29 0.07 0.00 0.00 99.86 40 40 0.26 0.01 0.44 98.78 3.49 0.12 0.14 |
77 | 0 6 0.05 1.62 3.38 10.34 | 87 | 0 4 0.03 0.80 2.29 0.12 |
78 | 1 2 0.03 1.62 3.38 0.07 0.05 99.86 | 88 | 1 1 0.04 0.80 2.29 0.25 0.01 0.00 99.71 40 |
79 | 1 8 0.03 1.62 3.38 0.06 | 89 | 1 5 0.16 0.80 2.29 0.13 |
80 | 2 4 0.21 1.62 3.38 0.10 1.49 98.21 | 90 | 2 2 0.05 0.80 2.29 0.06 0.01 0.00 99.88 40 |
81 | 2 10 0.02 1.62 3.38 0.29 | 91 | 2 6 0.03 0.80 2.29 0.08 |
82 | 8 1 0.04 1.62 3.38 0.04 0.08 99.84 | 92 | 3 3 0.05 0.80 2.29 0.08 0.00 0.00 99.87 47 |
83 | 8 7 0.01 1.62 3.38 0.06 | 93 | 3 7 0.04 0.84 2.29 0.09 |
84 | 9 3 0.53 1.62 3.38 0.10 0.20 99.17 | ||
85 | 9 9 0.02 1.62 3.38 0.60 | ||
86 | 10 5 0.01 1.62 3.38 0.02 0.04 99.92 | ||
87 | 10 11 0.02 1.62 3.38 0.02 | ||
88 | .fi | 94 | .fi |
89 | .SH SUMMARY EXAMPLE | 95 | .SH SUMMARY EXAMPLE |
90 | The "-s" option prints the column headers just once, | 96 | The "-s" option prints the column headers just once, |
91 | and then the one line system summary for each sample interval. | 97 | and then the one line system summary for each sample interval. |
92 | 98 | ||
93 | .nf | 99 | .nf |
94 | [root@x980]# ./turbostat -s | 100 | [root@wsm]# turbostat -S |
95 | %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 101 | %c0 GHz TSC %c1 %c3 %c6 CTMP %pc3 %pc6 |
96 | 0.23 1.67 3.38 2.00 0.30 97.47 1.07 82.12 | 102 | 1.40 2.81 3.38 10.78 43.47 44.35 42 13.67 2.09 |
97 | 0.10 1.62 3.38 1.87 2.25 95.77 12.02 72.60 | 103 | 1.34 2.90 3.38 11.48 58.96 28.23 41 19.89 0.15 |
98 | 0.20 1.64 3.38 1.98 0.11 97.72 0.30 83.36 | 104 | 1.55 2.72 3.38 26.73 37.66 34.07 42 2.53 2.80 |
99 | 0.11 1.70 3.38 1.86 1.81 96.22 9.71 74.90 | 105 | 1.37 2.83 3.38 16.95 60.05 21.63 42 5.76 0.20 |
100 | .fi | 106 | .fi |
101 | .SH VERBOSE EXAMPLE | 107 | .SH VERBOSE EXAMPLE |
102 | The "-v" option adds verbosity to the output: | 108 | The "-v" option adds verbosity to the output: |
103 | 109 | ||
104 | .nf | 110 | .nf |
105 | GenuineIntel 11 CPUID levels; family:model:stepping 0x6:2c:2 (6:44:2) | 111 | [root@ivy]# turbostat -v |
106 | 12 * 133 = 1600 MHz max efficiency | 112 | turbostat v3.0 November 23, 2012 - Len Brown <lenb@kernel.org> |
107 | 25 * 133 = 3333 MHz TSC frequency | 113 | CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9) |
108 | 26 * 133 = 3467 MHz max turbo 4 active cores | 114 | CPUID(6): APERF, DTS, PTM, EPB |
109 | 26 * 133 = 3467 MHz max turbo 3 active cores | 115 | RAPL: 851 sec. Joule Counter Range |
110 | 27 * 133 = 3600 MHz max turbo 2 active cores | 116 | cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300 |
111 | 27 * 133 = 3600 MHz max turbo 1 active cores | 117 | 16 * 100 = 1600 MHz max efficiency |
112 | 118 | 35 * 100 = 3500 MHz TSC frequency | |
119 | cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6-noret) | ||
120 | cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 | ||
121 | 37 * 100 = 3700 MHz max turbo 4 active cores | ||
122 | 38 * 100 = 3800 MHz max turbo 3 active cores | ||
123 | 39 * 100 = 3900 MHz max turbo 2 active cores | ||
124 | 39 * 100 = 3900 MHz max turbo 1 active cores | ||
125 | cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced) | ||
126 | cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.) | ||
127 | cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.) | ||
128 | cpu0: MSR_PKG_POWER_LIMIT: 0x830000148268 (UNlocked) | ||
129 | cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled) | ||
130 | cpu0: PKG Limit #2: ENabled (96.000000 Watts, 0.000977* sec, clamp DISabled) | ||
131 | cpu0: MSR_PP0_POLICY: 0 | ||
132 | cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked) | ||
133 | cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) | ||
134 | cpu0: MSR_PP1_POLICY: 0 | ||
135 | cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) | ||
136 | cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) | ||
137 | cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C) | ||
138 | cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C) | ||
139 | cpu0: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) | ||
140 | cpu1: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) | ||
141 | cpu2: MSR_IA32_THERM_STATUS: 0x88540000 (21 C +/- 1) | ||
142 | cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1) | ||
143 | ... | ||
113 | .fi | 144 | .fi |
114 | The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency | 145 | The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency |
115 | available at the minimum package voltage. The \fBTSC frequency\fP is the nominal | 146 | available at the minimum package voltage. The \fBTSC frequency\fP is the nominal |
@@ -142,7 +173,7 @@ cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | |||
142 | 10 5 1.42 3.43 3.38 2.14 30.99 65.44 | 173 | 10 5 1.42 3.43 3.38 2.14 30.99 65.44 |
143 | 10 11 0.16 2.88 3.38 3.40 | 174 | 10 11 0.16 2.88 3.38 3.40 |
144 | .fi | 175 | .fi |
145 | Above the cycle soaker drives cpu7 up its 3.6 Ghz turbo limit | 176 | Above the cycle soaker drives cpu7 up its 3.6 GHz turbo limit |
146 | while the other processors are generally in various states of idle. | 177 | while the other processors are generally in various states of idle. |
147 | 178 | ||
148 | Note that cpu1 and cpu7 are HT siblings within core8. | 179 | Note that cpu1 and cpu7 are HT siblings within core8. |
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ea095abbe97e..ce6d46038f74 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
@@ -20,6 +20,7 @@ | |||
20 | */ | 20 | */ |
21 | 21 | ||
22 | #define _GNU_SOURCE | 22 | #define _GNU_SOURCE |
23 | #include <asm/msr.h> | ||
23 | #include <stdio.h> | 24 | #include <stdio.h> |
24 | #include <unistd.h> | 25 | #include <unistd.h> |
25 | #include <sys/types.h> | 26 | #include <sys/types.h> |
@@ -35,28 +36,18 @@ | |||
35 | #include <ctype.h> | 36 | #include <ctype.h> |
36 | #include <sched.h> | 37 | #include <sched.h> |
37 | 38 | ||
38 | #define MSR_NEHALEM_PLATFORM_INFO 0xCE | ||
39 | #define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD | ||
40 | #define MSR_IVT_TURBO_RATIO_LIMIT 0x1AE | ||
41 | #define MSR_APERF 0xE8 | ||
42 | #define MSR_MPERF 0xE7 | ||
43 | #define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */ | ||
44 | #define MSR_PKG_C3_RESIDENCY 0x3F8 | ||
45 | #define MSR_PKG_C6_RESIDENCY 0x3F9 | ||
46 | #define MSR_PKG_C7_RESIDENCY 0x3FA /* SNB only */ | ||
47 | #define MSR_CORE_C3_RESIDENCY 0x3FC | ||
48 | #define MSR_CORE_C6_RESIDENCY 0x3FD | ||
49 | #define MSR_CORE_C7_RESIDENCY 0x3FE /* SNB only */ | ||
50 | |||
51 | char *proc_stat = "/proc/stat"; | 39 | char *proc_stat = "/proc/stat"; |
52 | unsigned int interval_sec = 5; /* set with -i interval_sec */ | 40 | unsigned int interval_sec = 5; /* set with -i interval_sec */ |
53 | unsigned int verbose; /* set with -v */ | 41 | unsigned int verbose; /* set with -v */ |
42 | unsigned int rapl_verbose; /* set with -R */ | ||
43 | unsigned int thermal_verbose; /* set with -T */ | ||
54 | unsigned int summary_only; /* set with -s */ | 44 | unsigned int summary_only; /* set with -s */ |
55 | unsigned int skip_c0; | 45 | unsigned int skip_c0; |
56 | unsigned int skip_c1; | 46 | unsigned int skip_c1; |
57 | unsigned int do_nhm_cstates; | 47 | unsigned int do_nhm_cstates; |
58 | unsigned int do_snb_cstates; | 48 | unsigned int do_snb_cstates; |
59 | unsigned int has_aperf; | 49 | unsigned int has_aperf; |
50 | unsigned int has_epb; | ||
60 | unsigned int units = 1000000000; /* Ghz etc */ | 51 | unsigned int units = 1000000000; /* Ghz etc */ |
61 | unsigned int genuine_intel; | 52 | unsigned int genuine_intel; |
62 | unsigned int has_invariant_tsc; | 53 | unsigned int has_invariant_tsc; |
@@ -74,6 +65,23 @@ unsigned int show_cpu; | |||
74 | unsigned int show_pkg_only; | 65 | unsigned int show_pkg_only; |
75 | unsigned int show_core_only; | 66 | unsigned int show_core_only; |
76 | char *output_buffer, *outp; | 67 | char *output_buffer, *outp; |
68 | unsigned int do_rapl; | ||
69 | unsigned int do_dts; | ||
70 | unsigned int do_ptm; | ||
71 | unsigned int tcc_activation_temp; | ||
72 | unsigned int tcc_activation_temp_override; | ||
73 | double rapl_power_units, rapl_energy_units, rapl_time_units; | ||
74 | double rapl_joule_counter_range; | ||
75 | |||
76 | #define RAPL_PKG (1 << 0) | ||
77 | #define RAPL_CORES (1 << 1) | ||
78 | #define RAPL_GFX (1 << 2) | ||
79 | #define RAPL_DRAM (1 << 3) | ||
80 | #define RAPL_PKG_PERF_STATUS (1 << 4) | ||
81 | #define RAPL_DRAM_PERF_STATUS (1 << 5) | ||
82 | #define TJMAX_DEFAULT 100 | ||
83 | |||
84 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) | ||
77 | 85 | ||
78 | int aperf_mperf_unstable; | 86 | int aperf_mperf_unstable; |
79 | int backwards_count; | 87 | int backwards_count; |
@@ -101,6 +109,7 @@ struct core_data { | |||
101 | unsigned long long c3; | 109 | unsigned long long c3; |
102 | unsigned long long c6; | 110 | unsigned long long c6; |
103 | unsigned long long c7; | 111 | unsigned long long c7; |
112 | unsigned int core_temp_c; | ||
104 | unsigned int core_id; | 113 | unsigned int core_id; |
105 | } *core_even, *core_odd; | 114 | } *core_even, *core_odd; |
106 | 115 | ||
@@ -110,6 +119,14 @@ struct pkg_data { | |||
110 | unsigned long long pc6; | 119 | unsigned long long pc6; |
111 | unsigned long long pc7; | 120 | unsigned long long pc7; |
112 | unsigned int package_id; | 121 | unsigned int package_id; |
122 | unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ | ||
123 | unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ | ||
124 | unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */ | ||
125 | unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */ | ||
126 | unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ | ||
127 | unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ | ||
128 | unsigned int pkg_temp_c; | ||
129 | |||
113 | } *package_even, *package_odd; | 130 | } *package_even, *package_odd; |
114 | 131 | ||
115 | #define ODD_COUNTERS thread_odd, core_odd, package_odd | 132 | #define ODD_COUNTERS thread_odd, core_odd, package_odd |
@@ -247,6 +264,12 @@ void print_header(void) | |||
247 | outp += sprintf(outp, " %%c6"); | 264 | outp += sprintf(outp, " %%c6"); |
248 | if (do_snb_cstates) | 265 | if (do_snb_cstates) |
249 | outp += sprintf(outp, " %%c7"); | 266 | outp += sprintf(outp, " %%c7"); |
267 | |||
268 | if (do_dts) | ||
269 | outp += sprintf(outp, " CTMP"); | ||
270 | if (do_ptm) | ||
271 | outp += sprintf(outp, " PTMP"); | ||
272 | |||
250 | if (do_snb_cstates) | 273 | if (do_snb_cstates) |
251 | outp += sprintf(outp, " %%pc2"); | 274 | outp += sprintf(outp, " %%pc2"); |
252 | if (do_nhm_cstates) | 275 | if (do_nhm_cstates) |
@@ -256,6 +279,19 @@ void print_header(void) | |||
256 | if (do_snb_cstates) | 279 | if (do_snb_cstates) |
257 | outp += sprintf(outp, " %%pc7"); | 280 | outp += sprintf(outp, " %%pc7"); |
258 | 281 | ||
282 | if (do_rapl & RAPL_PKG) | ||
283 | outp += sprintf(outp, " Pkg_W"); | ||
284 | if (do_rapl & RAPL_CORES) | ||
285 | outp += sprintf(outp, " Cor_W"); | ||
286 | if (do_rapl & RAPL_GFX) | ||
287 | outp += sprintf(outp, " GFX_W"); | ||
288 | if (do_rapl & RAPL_DRAM) | ||
289 | outp += sprintf(outp, " RAM_W"); | ||
290 | if (do_rapl & RAPL_PKG_PERF_STATUS) | ||
291 | outp += sprintf(outp, " PKG_%%"); | ||
292 | if (do_rapl & RAPL_DRAM_PERF_STATUS) | ||
293 | outp += sprintf(outp, " RAM_%%"); | ||
294 | |||
259 | outp += sprintf(outp, "\n"); | 295 | outp += sprintf(outp, "\n"); |
260 | } | 296 | } |
261 | 297 | ||
@@ -285,6 +321,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, | |||
285 | fprintf(stderr, "c3: %016llX\n", c->c3); | 321 | fprintf(stderr, "c3: %016llX\n", c->c3); |
286 | fprintf(stderr, "c6: %016llX\n", c->c6); | 322 | fprintf(stderr, "c6: %016llX\n", c->c6); |
287 | fprintf(stderr, "c7: %016llX\n", c->c7); | 323 | fprintf(stderr, "c7: %016llX\n", c->c7); |
324 | fprintf(stderr, "DTS: %dC\n", c->core_temp_c); | ||
288 | } | 325 | } |
289 | 326 | ||
290 | if (p) { | 327 | if (p) { |
@@ -293,6 +330,13 @@ int dump_counters(struct thread_data *t, struct core_data *c, | |||
293 | fprintf(stderr, "pc3: %016llX\n", p->pc3); | 330 | fprintf(stderr, "pc3: %016llX\n", p->pc3); |
294 | fprintf(stderr, "pc6: %016llX\n", p->pc6); | 331 | fprintf(stderr, "pc6: %016llX\n", p->pc6); |
295 | fprintf(stderr, "pc7: %016llX\n", p->pc7); | 332 | fprintf(stderr, "pc7: %016llX\n", p->pc7); |
333 | fprintf(stderr, "Joules PKG: %0X\n", p->energy_pkg); | ||
334 | fprintf(stderr, "Joules COR: %0X\n", p->energy_cores); | ||
335 | fprintf(stderr, "Joules GFX: %0X\n", p->energy_gfx); | ||
336 | fprintf(stderr, "Joules RAM: %0X\n", p->energy_dram); | ||
337 | fprintf(stderr, "Throttle PKG: %0X\n", p->rapl_pkg_perf_status); | ||
338 | fprintf(stderr, "Throttle RAM: %0X\n", p->rapl_dram_perf_status); | ||
339 | fprintf(stderr, "PTM: %dC\n", p->pkg_temp_c); | ||
296 | } | 340 | } |
297 | return 0; | 341 | return 0; |
298 | } | 342 | } |
@@ -302,14 +346,21 @@ int dump_counters(struct thread_data *t, struct core_data *c, | |||
302 | * package: "pk" 2 columns %2d | 346 | * package: "pk" 2 columns %2d |
303 | * core: "cor" 3 columns %3d | 347 | * core: "cor" 3 columns %3d |
304 | * CPU: "CPU" 3 columns %3d | 348 | * CPU: "CPU" 3 columns %3d |
349 | * Pkg_W: %6.2 | ||
350 | * Cor_W: %6.2 | ||
351 | * GFX_W: %5.2 | ||
352 | * RAM_W: %5.2 | ||
305 | * GHz: "GHz" 3 columns %3.2 | 353 | * GHz: "GHz" 3 columns %3.2 |
306 | * TSC: "TSC" 3 columns %3.2 | 354 | * TSC: "TSC" 3 columns %3.2 |
307 | * percentage " %pc3" %6.2 | 355 | * percentage " %pc3" %6.2 |
356 | * Perf Status percentage: %5.2 | ||
357 | * "CTMP" 4 columns %4d | ||
308 | */ | 358 | */ |
309 | int format_counters(struct thread_data *t, struct core_data *c, | 359 | int format_counters(struct thread_data *t, struct core_data *c, |
310 | struct pkg_data *p) | 360 | struct pkg_data *p) |
311 | { | 361 | { |
312 | double interval_float; | 362 | double interval_float; |
363 | char *fmt5, *fmt6; | ||
313 | 364 | ||
314 | /* if showing only 1st thread in core and this isn't one, bail out */ | 365 | /* if showing only 1st thread in core and this isn't one, bail out */ |
315 | if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | 366 | if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
@@ -349,7 +400,6 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
349 | if (show_cpu) | 400 | if (show_cpu) |
350 | outp += sprintf(outp, " %3d", t->cpu_id); | 401 | outp += sprintf(outp, " %3d", t->cpu_id); |
351 | } | 402 | } |
352 | |||
353 | /* %c0 */ | 403 | /* %c0 */ |
354 | if (do_nhm_cstates) { | 404 | if (do_nhm_cstates) { |
355 | if (show_pkg || show_core || show_cpu) | 405 | if (show_pkg || show_core || show_cpu) |
@@ -414,10 +464,16 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
414 | if (do_snb_cstates) | 464 | if (do_snb_cstates) |
415 | outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); | 465 | outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); |
416 | 466 | ||
467 | if (do_dts) | ||
468 | outp += sprintf(outp, " %4d", c->core_temp_c); | ||
469 | |||
417 | /* print per-package data only for 1st core in package */ | 470 | /* print per-package data only for 1st core in package */ |
418 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | 471 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
419 | goto done; | 472 | goto done; |
420 | 473 | ||
474 | if (do_ptm) | ||
475 | outp += sprintf(outp, " %4d", p->pkg_temp_c); | ||
476 | |||
421 | if (do_snb_cstates) | 477 | if (do_snb_cstates) |
422 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); | 478 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); |
423 | if (do_nhm_cstates) | 479 | if (do_nhm_cstates) |
@@ -426,6 +482,32 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
426 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); | 482 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); |
427 | if (do_snb_cstates) | 483 | if (do_snb_cstates) |
428 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); | 484 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); |
485 | |||
486 | /* | ||
487 | * If measurement interval exceeds minimum RAPL Joule Counter range, | ||
488 | * indicate that results are suspect by printing "**" in fraction place. | ||
489 | */ | ||
490 | if (interval_float < rapl_joule_counter_range) { | ||
491 | fmt5 = " %5.2f"; | ||
492 | fmt6 = " %6.2f"; | ||
493 | } else { | ||
494 | fmt5 = " %3.0f**"; | ||
495 | fmt6 = " %4.0f**"; | ||
496 | } | ||
497 | |||
498 | if (do_rapl & RAPL_PKG) | ||
499 | outp += sprintf(outp, fmt6, p->energy_pkg * rapl_energy_units / interval_float); | ||
500 | if (do_rapl & RAPL_CORES) | ||
501 | outp += sprintf(outp, fmt6, p->energy_cores * rapl_energy_units / interval_float); | ||
502 | if (do_rapl & RAPL_GFX) | ||
503 | outp += sprintf(outp, fmt5, p->energy_gfx * rapl_energy_units / interval_float); | ||
504 | if (do_rapl & RAPL_DRAM) | ||
505 | outp += sprintf(outp, fmt5, p->energy_dram * rapl_energy_units / interval_float); | ||
506 | if (do_rapl & RAPL_PKG_PERF_STATUS ) | ||
507 | outp += sprintf(outp, fmt5, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); | ||
508 | if (do_rapl & RAPL_DRAM_PERF_STATUS ) | ||
509 | outp += sprintf(outp, fmt5, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); | ||
510 | |||
429 | done: | 511 | done: |
430 | outp += sprintf(outp, "\n"); | 512 | outp += sprintf(outp, "\n"); |
431 | 513 | ||
@@ -435,6 +517,7 @@ done: | |||
435 | void flush_stdout() | 517 | void flush_stdout() |
436 | { | 518 | { |
437 | fputs(output_buffer, stdout); | 519 | fputs(output_buffer, stdout); |
520 | fflush(stdout); | ||
438 | outp = output_buffer; | 521 | outp = output_buffer; |
439 | } | 522 | } |
440 | void flush_stderr() | 523 | void flush_stderr() |
@@ -461,6 +544,13 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ | |||
461 | for_all_cpus(format_counters, t, c, p); | 544 | for_all_cpus(format_counters, t, c, p); |
462 | } | 545 | } |
463 | 546 | ||
547 | #define DELTA_WRAP32(new, old) \ | ||
548 | if (new > old) { \ | ||
549 | old = new - old; \ | ||
550 | } else { \ | ||
551 | old = 0x100000000 + new - old; \ | ||
552 | } | ||
553 | |||
464 | void | 554 | void |
465 | delta_package(struct pkg_data *new, struct pkg_data *old) | 555 | delta_package(struct pkg_data *new, struct pkg_data *old) |
466 | { | 556 | { |
@@ -468,6 +558,14 @@ delta_package(struct pkg_data *new, struct pkg_data *old) | |||
468 | old->pc3 = new->pc3 - old->pc3; | 558 | old->pc3 = new->pc3 - old->pc3; |
469 | old->pc6 = new->pc6 - old->pc6; | 559 | old->pc6 = new->pc6 - old->pc6; |
470 | old->pc7 = new->pc7 - old->pc7; | 560 | old->pc7 = new->pc7 - old->pc7; |
561 | old->pkg_temp_c = new->pkg_temp_c; | ||
562 | |||
563 | DELTA_WRAP32(new->energy_pkg, old->energy_pkg); | ||
564 | DELTA_WRAP32(new->energy_cores, old->energy_cores); | ||
565 | DELTA_WRAP32(new->energy_gfx, old->energy_gfx); | ||
566 | DELTA_WRAP32(new->energy_dram, old->energy_dram); | ||
567 | DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); | ||
568 | DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); | ||
471 | } | 569 | } |
472 | 570 | ||
473 | void | 571 | void |
@@ -476,6 +574,7 @@ delta_core(struct core_data *new, struct core_data *old) | |||
476 | old->c3 = new->c3 - old->c3; | 574 | old->c3 = new->c3 - old->c3; |
477 | old->c6 = new->c6 - old->c6; | 575 | old->c6 = new->c6 - old->c6; |
478 | old->c7 = new->c7 - old->c7; | 576 | old->c7 = new->c7 - old->c7; |
577 | old->core_temp_c = new->core_temp_c; | ||
479 | } | 578 | } |
480 | 579 | ||
481 | /* | 580 | /* |
@@ -582,11 +681,20 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data | |||
582 | c->c3 = 0; | 681 | c->c3 = 0; |
583 | c->c6 = 0; | 682 | c->c6 = 0; |
584 | c->c7 = 0; | 683 | c->c7 = 0; |
684 | c->core_temp_c = 0; | ||
585 | 685 | ||
586 | p->pc2 = 0; | 686 | p->pc2 = 0; |
587 | p->pc3 = 0; | 687 | p->pc3 = 0; |
588 | p->pc6 = 0; | 688 | p->pc6 = 0; |
589 | p->pc7 = 0; | 689 | p->pc7 = 0; |
690 | |||
691 | p->energy_pkg = 0; | ||
692 | p->energy_dram = 0; | ||
693 | p->energy_cores = 0; | ||
694 | p->energy_gfx = 0; | ||
695 | p->rapl_pkg_perf_status = 0; | ||
696 | p->rapl_dram_perf_status = 0; | ||
697 | p->pkg_temp_c = 0; | ||
590 | } | 698 | } |
591 | int sum_counters(struct thread_data *t, struct core_data *c, | 699 | int sum_counters(struct thread_data *t, struct core_data *c, |
592 | struct pkg_data *p) | 700 | struct pkg_data *p) |
@@ -607,6 +715,8 @@ int sum_counters(struct thread_data *t, struct core_data *c, | |||
607 | average.cores.c6 += c->c6; | 715 | average.cores.c6 += c->c6; |
608 | average.cores.c7 += c->c7; | 716 | average.cores.c7 += c->c7; |
609 | 717 | ||
718 | average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); | ||
719 | |||
610 | /* sum per-pkg values only for 1st core in pkg */ | 720 | /* sum per-pkg values only for 1st core in pkg */ |
611 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | 721 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
612 | return 0; | 722 | return 0; |
@@ -616,6 +726,15 @@ int sum_counters(struct thread_data *t, struct core_data *c, | |||
616 | average.packages.pc6 += p->pc6; | 726 | average.packages.pc6 += p->pc6; |
617 | average.packages.pc7 += p->pc7; | 727 | average.packages.pc7 += p->pc7; |
618 | 728 | ||
729 | average.packages.energy_pkg += p->energy_pkg; | ||
730 | average.packages.energy_dram += p->energy_dram; | ||
731 | average.packages.energy_cores += p->energy_cores; | ||
732 | average.packages.energy_gfx += p->energy_gfx; | ||
733 | |||
734 | average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); | ||
735 | |||
736 | average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; | ||
737 | average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; | ||
619 | return 0; | 738 | return 0; |
620 | } | 739 | } |
621 | /* | 740 | /* |
@@ -667,23 +786,26 @@ static unsigned long long rdtsc(void) | |||
667 | int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | 786 | int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
668 | { | 787 | { |
669 | int cpu = t->cpu_id; | 788 | int cpu = t->cpu_id; |
789 | unsigned long long msr; | ||
670 | 790 | ||
671 | if (cpu_migrate(cpu)) | 791 | if (cpu_migrate(cpu)) { |
792 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
672 | return -1; | 793 | return -1; |
794 | } | ||
673 | 795 | ||
674 | t->tsc = rdtsc(); /* we are running on local CPU of interest */ | 796 | t->tsc = rdtsc(); /* we are running on local CPU of interest */ |
675 | 797 | ||
676 | if (has_aperf) { | 798 | if (has_aperf) { |
677 | if (get_msr(cpu, MSR_APERF, &t->aperf)) | 799 | if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) |
678 | return -3; | 800 | return -3; |
679 | if (get_msr(cpu, MSR_MPERF, &t->mperf)) | 801 | if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) |
680 | return -4; | 802 | return -4; |
681 | } | 803 | } |
682 | 804 | ||
683 | if (extra_delta_offset32) { | 805 | if (extra_delta_offset32) { |
684 | if (get_msr(cpu, extra_delta_offset32, &t->extra_delta32)) | 806 | if (get_msr(cpu, extra_delta_offset32, &msr)) |
685 | return -5; | 807 | return -5; |
686 | t->extra_delta32 &= 0xFFFFFFFF; | 808 | t->extra_delta32 = msr & 0xFFFFFFFF; |
687 | } | 809 | } |
688 | 810 | ||
689 | if (extra_delta_offset64) | 811 | if (extra_delta_offset64) |
@@ -691,9 +813,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
691 | return -5; | 813 | return -5; |
692 | 814 | ||
693 | if (extra_msr_offset32) { | 815 | if (extra_msr_offset32) { |
694 | if (get_msr(cpu, extra_msr_offset32, &t->extra_msr32)) | 816 | if (get_msr(cpu, extra_msr_offset32, &msr)) |
695 | return -5; | 817 | return -5; |
696 | t->extra_msr32 &= 0xFFFFFFFF; | 818 | t->extra_msr32 = msr & 0xFFFFFFFF; |
697 | } | 819 | } |
698 | 820 | ||
699 | if (extra_msr_offset64) | 821 | if (extra_msr_offset64) |
@@ -715,6 +837,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
715 | if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) | 837 | if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) |
716 | return -8; | 838 | return -8; |
717 | 839 | ||
840 | if (do_dts) { | ||
841 | if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) | ||
842 | return -9; | ||
843 | c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); | ||
844 | } | ||
845 | |||
846 | |||
718 | /* collect package counters only for 1st core in package */ | 847 | /* collect package counters only for 1st core in package */ |
719 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | 848 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
720 | return 0; | 849 | return 0; |
@@ -731,6 +860,41 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
731 | if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) | 860 | if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) |
732 | return -12; | 861 | return -12; |
733 | } | 862 | } |
863 | if (do_rapl & RAPL_PKG) { | ||
864 | if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) | ||
865 | return -13; | ||
866 | p->energy_pkg = msr & 0xFFFFFFFF; | ||
867 | } | ||
868 | if (do_rapl & RAPL_CORES) { | ||
869 | if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) | ||
870 | return -14; | ||
871 | p->energy_cores = msr & 0xFFFFFFFF; | ||
872 | } | ||
873 | if (do_rapl & RAPL_DRAM) { | ||
874 | if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) | ||
875 | return -15; | ||
876 | p->energy_dram = msr & 0xFFFFFFFF; | ||
877 | } | ||
878 | if (do_rapl & RAPL_GFX) { | ||
879 | if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr)) | ||
880 | return -16; | ||
881 | p->energy_gfx = msr & 0xFFFFFFFF; | ||
882 | } | ||
883 | if (do_rapl & RAPL_PKG_PERF_STATUS) { | ||
884 | if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr)) | ||
885 | return -16; | ||
886 | p->rapl_pkg_perf_status = msr & 0xFFFFFFFF; | ||
887 | } | ||
888 | if (do_rapl & RAPL_DRAM_PERF_STATUS) { | ||
889 | if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr)) | ||
890 | return -16; | ||
891 | p->rapl_dram_perf_status = msr & 0xFFFFFFFF; | ||
892 | } | ||
893 | if (do_ptm) { | ||
894 | if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) | ||
895 | return -17; | ||
896 | p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); | ||
897 | } | ||
734 | return 0; | 898 | return 0; |
735 | } | 899 | } |
736 | 900 | ||
@@ -742,10 +906,10 @@ void print_verbose_header(void) | |||
742 | if (!do_nehalem_platform_info) | 906 | if (!do_nehalem_platform_info) |
743 | return; | 907 | return; |
744 | 908 | ||
745 | get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); | 909 | get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); |
746 | 910 | ||
747 | if (verbose > 1) | 911 | if (verbose) |
748 | fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); | 912 | fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); |
749 | 913 | ||
750 | ratio = (msr >> 40) & 0xFF; | 914 | ratio = (msr >> 40) & 0xFF; |
751 | fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", | 915 | fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", |
@@ -760,8 +924,8 @@ void print_verbose_header(void) | |||
760 | 924 | ||
761 | get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); | 925 | get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); |
762 | 926 | ||
763 | if (verbose > 1) | 927 | if (verbose) |
764 | fprintf(stderr, "MSR_IVT_TURBO_RATIO_LIMIT: 0x%llx\n", msr); | 928 | fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); |
765 | 929 | ||
766 | ratio = (msr >> 56) & 0xFF; | 930 | ratio = (msr >> 56) & 0xFF; |
767 | if (ratio) | 931 | if (ratio) |
@@ -804,14 +968,56 @@ void print_verbose_header(void) | |||
804 | ratio, bclk, ratio * bclk); | 968 | ratio, bclk, ratio * bclk); |
805 | 969 | ||
806 | print_nhm_turbo_ratio_limits: | 970 | print_nhm_turbo_ratio_limits: |
971 | get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); | ||
972 | |||
973 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) | ||
974 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | ||
975 | |||
976 | fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); | ||
977 | |||
978 | fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ", | ||
979 | (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", | ||
980 | (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", | ||
981 | (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", | ||
982 | (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", | ||
983 | (msr & (1 << 15)) ? "" : "UN", | ||
984 | (unsigned int)msr & 7); | ||
985 | |||
986 | |||
987 | switch(msr & 0x7) { | ||
988 | case 0: | ||
989 | fprintf(stderr, "pc0"); | ||
990 | break; | ||
991 | case 1: | ||
992 | fprintf(stderr, do_snb_cstates ? "pc2" : "pc0"); | ||
993 | break; | ||
994 | case 2: | ||
995 | fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3"); | ||
996 | break; | ||
997 | case 3: | ||
998 | fprintf(stderr, "pc6"); | ||
999 | break; | ||
1000 | case 4: | ||
1001 | fprintf(stderr, "pc7"); | ||
1002 | break; | ||
1003 | case 5: | ||
1004 | fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid"); | ||
1005 | break; | ||
1006 | case 7: | ||
1007 | fprintf(stderr, "unlimited"); | ||
1008 | break; | ||
1009 | default: | ||
1010 | fprintf(stderr, "invalid"); | ||
1011 | } | ||
1012 | fprintf(stderr, ")\n"); | ||
807 | 1013 | ||
808 | if (!do_nehalem_turbo_ratio_limit) | 1014 | if (!do_nehalem_turbo_ratio_limit) |
809 | return; | 1015 | return; |
810 | 1016 | ||
811 | get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); | 1017 | get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); |
812 | 1018 | ||
813 | if (verbose > 1) | 1019 | if (verbose) |
814 | fprintf(stderr, "MSR_NEHALEM_TURBO_RATIO_LIMIT: 0x%llx\n", msr); | 1020 | fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); |
815 | 1021 | ||
816 | ratio = (msr >> 56) & 0xFF; | 1022 | ratio = (msr >> 56) & 0xFF; |
817 | if (ratio) | 1023 | if (ratio) |
@@ -1100,15 +1306,22 @@ int mark_cpu_present(int cpu) | |||
1100 | void turbostat_loop() | 1306 | void turbostat_loop() |
1101 | { | 1307 | { |
1102 | int retval; | 1308 | int retval; |
1309 | int restarted = 0; | ||
1103 | 1310 | ||
1104 | restart: | 1311 | restart: |
1312 | restarted++; | ||
1313 | |||
1105 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); | 1314 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
1106 | if (retval < -1) { | 1315 | if (retval < -1) { |
1107 | exit(retval); | 1316 | exit(retval); |
1108 | } else if (retval == -1) { | 1317 | } else if (retval == -1) { |
1318 | if (restarted > 1) { | ||
1319 | exit(retval); | ||
1320 | } | ||
1109 | re_initialize(); | 1321 | re_initialize(); |
1110 | goto restart; | 1322 | goto restart; |
1111 | } | 1323 | } |
1324 | restarted = 0; | ||
1112 | gettimeofday(&tv_even, (struct timezone *)NULL); | 1325 | gettimeofday(&tv_even, (struct timezone *)NULL); |
1113 | 1326 | ||
1114 | while (1) { | 1327 | while (1) { |
@@ -1207,6 +1420,299 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) | |||
1207 | } | 1420 | } |
1208 | } | 1421 | } |
1209 | 1422 | ||
1423 | /* | ||
1424 | * print_epb() | ||
1425 | * Decode the ENERGY_PERF_BIAS MSR | ||
1426 | */ | ||
1427 | int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
1428 | { | ||
1429 | unsigned long long msr; | ||
1430 | char *epb_string; | ||
1431 | int cpu; | ||
1432 | |||
1433 | if (!has_epb) | ||
1434 | return 0; | ||
1435 | |||
1436 | cpu = t->cpu_id; | ||
1437 | |||
1438 | /* EPB is per-package */ | ||
1439 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
1440 | return 0; | ||
1441 | |||
1442 | if (cpu_migrate(cpu)) { | ||
1443 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
1444 | return -1; | ||
1445 | } | ||
1446 | |||
1447 | if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) | ||
1448 | return 0; | ||
1449 | |||
1450 | switch (msr & 0x7) { | ||
1451 | case ENERGY_PERF_BIAS_PERFORMANCE: | ||
1452 | epb_string = "performance"; | ||
1453 | break; | ||
1454 | case ENERGY_PERF_BIAS_NORMAL: | ||
1455 | epb_string = "balanced"; | ||
1456 | break; | ||
1457 | case ENERGY_PERF_BIAS_POWERSAVE: | ||
1458 | epb_string = "powersave"; | ||
1459 | break; | ||
1460 | default: | ||
1461 | epb_string = "custom"; | ||
1462 | break; | ||
1463 | } | ||
1464 | fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); | ||
1465 | |||
1466 | return 0; | ||
1467 | } | ||
1468 | |||
1469 | #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ | ||
1470 | #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ | ||
1471 | |||
1472 | /* | ||
1473 | * rapl_probe() | ||
1474 | * | ||
1475 | * sets do_rapl | ||
1476 | */ | ||
1477 | void rapl_probe(unsigned int family, unsigned int model) | ||
1478 | { | ||
1479 | unsigned long long msr; | ||
1480 | double tdp; | ||
1481 | |||
1482 | if (!genuine_intel) | ||
1483 | return; | ||
1484 | |||
1485 | if (family != 6) | ||
1486 | return; | ||
1487 | |||
1488 | switch (model) { | ||
1489 | case 0x2A: | ||
1490 | case 0x3A: | ||
1491 | do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX; | ||
1492 | break; | ||
1493 | case 0x2D: | ||
1494 | case 0x3E: | ||
1495 | do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS; | ||
1496 | break; | ||
1497 | default: | ||
1498 | return; | ||
1499 | } | ||
1500 | |||
1501 | /* units on package 0, verify later other packages match */ | ||
1502 | if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr)) | ||
1503 | return; | ||
1504 | |||
1505 | rapl_power_units = 1.0 / (1 << (msr & 0xF)); | ||
1506 | rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); | ||
1507 | rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); | ||
1508 | |||
1509 | /* get TDP to determine energy counter range */ | ||
1510 | if (get_msr(0, MSR_PKG_POWER_INFO, &msr)) | ||
1511 | return; | ||
1512 | |||
1513 | tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; | ||
1514 | |||
1515 | rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; | ||
1516 | |||
1517 | if (verbose) | ||
1518 | fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range); | ||
1519 | |||
1520 | return; | ||
1521 | } | ||
1522 | |||
1523 | int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
1524 | { | ||
1525 | unsigned long long msr; | ||
1526 | unsigned int dts; | ||
1527 | int cpu; | ||
1528 | |||
1529 | if (!(do_dts || do_ptm)) | ||
1530 | return 0; | ||
1531 | |||
1532 | cpu = t->cpu_id; | ||
1533 | |||
1534 | /* DTS is per-core, no need to print for each thread */ | ||
1535 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | ||
1536 | return 0; | ||
1537 | |||
1538 | if (cpu_migrate(cpu)) { | ||
1539 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
1540 | return -1; | ||
1541 | } | ||
1542 | |||
1543 | if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) { | ||
1544 | if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) | ||
1545 | return 0; | ||
1546 | |||
1547 | dts = (msr >> 16) & 0x7F; | ||
1548 | fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", | ||
1549 | cpu, msr, tcc_activation_temp - dts); | ||
1550 | |||
1551 | #ifdef THERM_DEBUG | ||
1552 | if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) | ||
1553 | return 0; | ||
1554 | |||
1555 | dts = (msr >> 16) & 0x7F; | ||
1556 | dts2 = (msr >> 8) & 0x7F; | ||
1557 | fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", | ||
1558 | cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); | ||
1559 | #endif | ||
1560 | } | ||
1561 | |||
1562 | |||
1563 | if (do_dts) { | ||
1564 | unsigned int resolution; | ||
1565 | |||
1566 | if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) | ||
1567 | return 0; | ||
1568 | |||
1569 | dts = (msr >> 16) & 0x7F; | ||
1570 | resolution = (msr >> 27) & 0xF; | ||
1571 | fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", | ||
1572 | cpu, msr, tcc_activation_temp - dts, resolution); | ||
1573 | |||
1574 | #ifdef THERM_DEBUG | ||
1575 | if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) | ||
1576 | return 0; | ||
1577 | |||
1578 | dts = (msr >> 16) & 0x7F; | ||
1579 | dts2 = (msr >> 8) & 0x7F; | ||
1580 | fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", | ||
1581 | cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); | ||
1582 | #endif | ||
1583 | } | ||
1584 | |||
1585 | return 0; | ||
1586 | } | ||
1587 | |||
1588 | void print_power_limit_msr(int cpu, unsigned long long msr, char *label) | ||
1589 | { | ||
1590 | fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", | ||
1591 | cpu, label, | ||
1592 | ((msr >> 15) & 1) ? "EN" : "DIS", | ||
1593 | ((msr >> 0) & 0x7FFF) * rapl_power_units, | ||
1594 | (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, | ||
1595 | (((msr >> 16) & 1) ? "EN" : "DIS")); | ||
1596 | |||
1597 | return; | ||
1598 | } | ||
1599 | |||
1600 | int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
1601 | { | ||
1602 | unsigned long long msr; | ||
1603 | int cpu; | ||
1604 | double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units; | ||
1605 | |||
1606 | if (!do_rapl) | ||
1607 | return 0; | ||
1608 | |||
1609 | /* RAPL counters are per package, so print only for 1st thread/package */ | ||
1610 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
1611 | return 0; | ||
1612 | |||
1613 | cpu = t->cpu_id; | ||
1614 | if (cpu_migrate(cpu)) { | ||
1615 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
1616 | return -1; | ||
1617 | } | ||
1618 | |||
1619 | if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) | ||
1620 | return -1; | ||
1621 | |||
1622 | local_rapl_power_units = 1.0 / (1 << (msr & 0xF)); | ||
1623 | local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); | ||
1624 | local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); | ||
1625 | |||
1626 | if (local_rapl_power_units != rapl_power_units) | ||
1627 | fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu); | ||
1628 | if (local_rapl_energy_units != rapl_energy_units) | ||
1629 | fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu); | ||
1630 | if (local_rapl_time_units != rapl_time_units) | ||
1631 | fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu); | ||
1632 | |||
1633 | if (verbose) { | ||
1634 | fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " | ||
1635 | "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, | ||
1636 | local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units); | ||
1637 | } | ||
1638 | if (do_rapl & RAPL_PKG) { | ||
1639 | if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) | ||
1640 | return -5; | ||
1641 | |||
1642 | |||
1643 | fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", | ||
1644 | cpu, msr, | ||
1645 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
1646 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
1647 | ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
1648 | ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); | ||
1649 | |||
1650 | if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) | ||
1651 | return -9; | ||
1652 | |||
1653 | fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", | ||
1654 | cpu, msr, (msr >> 63) & 1 ? "": "UN"); | ||
1655 | |||
1656 | print_power_limit_msr(cpu, msr, "PKG Limit #1"); | ||
1657 | fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", | ||
1658 | cpu, | ||
1659 | ((msr >> 47) & 1) ? "EN" : "DIS", | ||
1660 | ((msr >> 32) & 0x7FFF) * rapl_power_units, | ||
1661 | (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, | ||
1662 | ((msr >> 48) & 1) ? "EN" : "DIS"); | ||
1663 | } | ||
1664 | |||
1665 | if (do_rapl & RAPL_DRAM) { | ||
1666 | if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) | ||
1667 | return -6; | ||
1668 | |||
1669 | |||
1670 | fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", | ||
1671 | cpu, msr, | ||
1672 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
1673 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
1674 | ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
1675 | ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); | ||
1676 | |||
1677 | |||
1678 | if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) | ||
1679 | return -9; | ||
1680 | fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", | ||
1681 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | ||
1682 | |||
1683 | print_power_limit_msr(cpu, msr, "DRAM Limit"); | ||
1684 | } | ||
1685 | if (do_rapl & RAPL_CORES) { | ||
1686 | if (verbose) { | ||
1687 | if (get_msr(cpu, MSR_PP0_POLICY, &msr)) | ||
1688 | return -7; | ||
1689 | |||
1690 | fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); | ||
1691 | |||
1692 | if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) | ||
1693 | return -9; | ||
1694 | fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", | ||
1695 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | ||
1696 | print_power_limit_msr(cpu, msr, "Cores Limit"); | ||
1697 | } | ||
1698 | } | ||
1699 | if (do_rapl & RAPL_GFX) { | ||
1700 | if (verbose) { | ||
1701 | if (get_msr(cpu, MSR_PP1_POLICY, &msr)) | ||
1702 | return -8; | ||
1703 | |||
1704 | fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); | ||
1705 | |||
1706 | if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) | ||
1707 | return -9; | ||
1708 | fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", | ||
1709 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | ||
1710 | print_power_limit_msr(cpu, msr, "GFX Limit"); | ||
1711 | } | ||
1712 | } | ||
1713 | return 0; | ||
1714 | } | ||
1715 | |||
1210 | 1716 | ||
1211 | int is_snb(unsigned int family, unsigned int model) | 1717 | int is_snb(unsigned int family, unsigned int model) |
1212 | { | 1718 | { |
@@ -1231,6 +1737,72 @@ double discover_bclk(unsigned int family, unsigned int model) | |||
1231 | return 133.33; | 1737 | return 133.33; |
1232 | } | 1738 | } |
1233 | 1739 | ||
1740 | /* | ||
1741 | * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where | ||
1742 | * the Thermal Control Circuit (TCC) activates. | ||
1743 | * This is usually equal to tjMax. | ||
1744 | * | ||
1745 | * Older processors do not have this MSR, so there we guess, | ||
1746 | * but also allow cmdline over-ride with -T. | ||
1747 | * | ||
1748 | * Several MSR temperature values are in units of degrees-C | ||
1749 | * below this value, including the Digital Thermal Sensor (DTS), | ||
1750 | * Package Thermal Management Sensor (PTM), and thermal event thresholds. | ||
1751 | */ | ||
1752 | int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
1753 | { | ||
1754 | unsigned long long msr; | ||
1755 | unsigned int target_c_local; | ||
1756 | int cpu; | ||
1757 | |||
1758 | /* tcc_activation_temp is used only for dts or ptm */ | ||
1759 | if (!(do_dts || do_ptm)) | ||
1760 | return 0; | ||
1761 | |||
1762 | /* this is a per-package concept */ | ||
1763 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
1764 | return 0; | ||
1765 | |||
1766 | cpu = t->cpu_id; | ||
1767 | if (cpu_migrate(cpu)) { | ||
1768 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
1769 | return -1; | ||
1770 | } | ||
1771 | |||
1772 | if (tcc_activation_temp_override != 0) { | ||
1773 | tcc_activation_temp = tcc_activation_temp_override; | ||
1774 | fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", | ||
1775 | cpu, tcc_activation_temp); | ||
1776 | return 0; | ||
1777 | } | ||
1778 | |||
1779 | /* Temperature Target MSR is Nehalem and newer only */ | ||
1780 | if (!do_nehalem_platform_info) | ||
1781 | goto guess; | ||
1782 | |||
1783 | if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) | ||
1784 | goto guess; | ||
1785 | |||
1786 | target_c_local = (msr >> 16) & 0x7F; | ||
1787 | |||
1788 | if (verbose) | ||
1789 | fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", | ||
1790 | cpu, msr, target_c_local); | ||
1791 | |||
1792 | if (target_c_local < 85 || target_c_local > 120) | ||
1793 | goto guess; | ||
1794 | |||
1795 | tcc_activation_temp = target_c_local; | ||
1796 | |||
1797 | return 0; | ||
1798 | |||
1799 | guess: | ||
1800 | tcc_activation_temp = TJMAX_DEFAULT; | ||
1801 | fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", | ||
1802 | cpu, tcc_activation_temp); | ||
1803 | |||
1804 | return 0; | ||
1805 | } | ||
1234 | void check_cpuid() | 1806 | void check_cpuid() |
1235 | { | 1807 | { |
1236 | unsigned int eax, ebx, ecx, edx, max_level; | 1808 | unsigned int eax, ebx, ecx, edx, max_level; |
@@ -1244,7 +1816,7 @@ void check_cpuid() | |||
1244 | genuine_intel = 1; | 1816 | genuine_intel = 1; |
1245 | 1817 | ||
1246 | if (verbose) | 1818 | if (verbose) |
1247 | fprintf(stderr, "%.4s%.4s%.4s ", | 1819 | fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", |
1248 | (char *)&ebx, (char *)&edx, (char *)&ecx); | 1820 | (char *)&ebx, (char *)&edx, (char *)&ecx); |
1249 | 1821 | ||
1250 | asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); | 1822 | asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); |
@@ -1295,10 +1867,19 @@ void check_cpuid() | |||
1295 | 1867 | ||
1296 | asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); | 1868 | asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); |
1297 | has_aperf = ecx & (1 << 0); | 1869 | has_aperf = ecx & (1 << 0); |
1298 | if (!has_aperf) { | 1870 | do_dts = eax & (1 << 0); |
1299 | fprintf(stderr, "No APERF MSR\n"); | 1871 | do_ptm = eax & (1 << 6); |
1300 | exit(1); | 1872 | has_epb = ecx & (1 << 3); |
1301 | } | 1873 | |
1874 | if (verbose) | ||
1875 | fprintf(stderr, "CPUID(6): %s%s%s%s\n", | ||
1876 | has_aperf ? "APERF" : "No APERF!", | ||
1877 | do_dts ? ", DTS" : "", | ||
1878 | do_ptm ? ", PTM": "", | ||
1879 | has_epb ? ", EPB": ""); | ||
1880 | |||
1881 | if (!has_aperf) | ||
1882 | exit(-1); | ||
1302 | 1883 | ||
1303 | do_nehalem_platform_info = genuine_intel && has_invariant_tsc; | 1884 | do_nehalem_platform_info = genuine_intel && has_invariant_tsc; |
1304 | do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ | 1885 | do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ |
@@ -1307,12 +1888,15 @@ void check_cpuid() | |||
1307 | 1888 | ||
1308 | do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); | 1889 | do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); |
1309 | do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); | 1890 | do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); |
1891 | rapl_probe(family, model); | ||
1892 | |||
1893 | return; | ||
1310 | } | 1894 | } |
1311 | 1895 | ||
1312 | 1896 | ||
1313 | void usage() | 1897 | void usage() |
1314 | { | 1898 | { |
1315 | fprintf(stderr, "%s: [-v][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", | 1899 | fprintf(stderr, "%s: [-v][-R][-T][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", |
1316 | progname); | 1900 | progname); |
1317 | exit(1); | 1901 | exit(1); |
1318 | } | 1902 | } |
@@ -1548,6 +2132,17 @@ void turbostat_init() | |||
1548 | 2132 | ||
1549 | if (verbose) | 2133 | if (verbose) |
1550 | print_verbose_header(); | 2134 | print_verbose_header(); |
2135 | |||
2136 | if (verbose) | ||
2137 | for_all_cpus(print_epb, ODD_COUNTERS); | ||
2138 | |||
2139 | if (verbose) | ||
2140 | for_all_cpus(print_rapl, ODD_COUNTERS); | ||
2141 | |||
2142 | for_all_cpus(set_temperature_target, ODD_COUNTERS); | ||
2143 | |||
2144 | if (verbose) | ||
2145 | for_all_cpus(print_thermal, ODD_COUNTERS); | ||
1551 | } | 2146 | } |
1552 | 2147 | ||
1553 | int fork_it(char **argv) | 2148 | int fork_it(char **argv) |
@@ -1604,7 +2199,7 @@ void cmdline(int argc, char **argv) | |||
1604 | 2199 | ||
1605 | progname = argv[0]; | 2200 | progname = argv[0]; |
1606 | 2201 | ||
1607 | while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:")) != -1) { | 2202 | while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:RT:")) != -1) { |
1608 | switch (opt) { | 2203 | switch (opt) { |
1609 | case 'p': | 2204 | case 'p': |
1610 | show_core_only++; | 2205 | show_core_only++; |
@@ -1636,6 +2231,12 @@ void cmdline(int argc, char **argv) | |||
1636 | case 'M': | 2231 | case 'M': |
1637 | sscanf(optarg, "%x", &extra_msr_offset64); | 2232 | sscanf(optarg, "%x", &extra_msr_offset64); |
1638 | break; | 2233 | break; |
2234 | case 'R': | ||
2235 | rapl_verbose++; | ||
2236 | break; | ||
2237 | case 'T': | ||
2238 | tcc_activation_temp_override = atoi(optarg); | ||
2239 | break; | ||
1639 | default: | 2240 | default: |
1640 | usage(); | 2241 | usage(); |
1641 | } | 2242 | } |
@@ -1646,8 +2247,8 @@ int main(int argc, char **argv) | |||
1646 | { | 2247 | { |
1647 | cmdline(argc, argv); | 2248 | cmdline(argc, argv); |
1648 | 2249 | ||
1649 | if (verbose > 1) | 2250 | if (verbose) |
1650 | fprintf(stderr, "turbostat v2.1 October 6, 2012" | 2251 | fprintf(stderr, "turbostat v3.0 November 23, 2012" |
1651 | " - Len Brown <lenb@kernel.org>\n"); | 2252 | " - Len Brown <lenb@kernel.org>\n"); |
1652 | 2253 | ||
1653 | turbostat_init(); | 2254 | turbostat_init(); |
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile index f458237fdd79..971c9ffdcb50 100644 --- a/tools/power/x86/x86_energy_perf_policy/Makefile +++ b/tools/power/x86/x86_energy_perf_policy/Makefile | |||
@@ -1,8 +1,10 @@ | |||
1 | DESTDIR ?= | ||
2 | |||
1 | x86_energy_perf_policy : x86_energy_perf_policy.c | 3 | x86_energy_perf_policy : x86_energy_perf_policy.c |
2 | 4 | ||
3 | clean : | 5 | clean : |
4 | rm -f x86_energy_perf_policy | 6 | rm -f x86_energy_perf_policy |
5 | 7 | ||
6 | install : | 8 | install : |
7 | install x86_energy_perf_policy /usr/bin/ | 9 | install x86_energy_perf_policy ${DESTDIR}/usr/bin/ |
8 | install x86_energy_perf_policy.8 /usr/share/man/man8/ | 10 | install x86_energy_perf_policy.8 ${DESTDIR}/usr/share/man/man8/ |
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 33c5c7ee148f..40b3e5482f8a 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c | |||
@@ -289,7 +289,7 @@ void for_every_cpu(void (func)(int)) | |||
289 | "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", | 289 | "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", |
290 | &cpu); | 290 | &cpu); |
291 | if (retval != 1) | 291 | if (retval != 1) |
292 | return; | 292 | break; |
293 | 293 | ||
294 | func(cpu); | 294 | func(cpu); |
295 | } | 295 | } |