diff options
| -rw-r--r-- | arch/x86/include/uapi/asm/msr-index.h | 37 | ||||
| -rw-r--r-- | tools/power/x86/turbostat/Makefile | 21 | ||||
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 103 | ||||
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 677 | ||||
| -rw-r--r-- | tools/power/x86/x86_energy_perf_policy/Makefile | 6 | ||||
| -rw-r--r-- | tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c | 2 |
6 files changed, 765 insertions, 81 deletions
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 6e930b218724..433a59fb1a74 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
| @@ -35,11 +35,14 @@ | |||
| 35 | #define MSR_IA32_PERFCTR0 0x000000c1 | 35 | #define MSR_IA32_PERFCTR0 0x000000c1 |
| 36 | #define MSR_IA32_PERFCTR1 0x000000c2 | 36 | #define MSR_IA32_PERFCTR1 0x000000c2 |
| 37 | #define MSR_FSB_FREQ 0x000000cd | 37 | #define MSR_FSB_FREQ 0x000000cd |
| 38 | #define MSR_NHM_PLATFORM_INFO 0x000000ce | ||
| 38 | 39 | ||
| 39 | #define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 | 40 | #define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 |
| 40 | #define NHM_C3_AUTO_DEMOTE (1UL << 25) | 41 | #define NHM_C3_AUTO_DEMOTE (1UL << 25) |
| 41 | #define NHM_C1_AUTO_DEMOTE (1UL << 26) | 42 | #define NHM_C1_AUTO_DEMOTE (1UL << 26) |
| 42 | #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) | 43 | #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) |
| 44 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) | ||
| 45 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | ||
| 43 | 46 | ||
| 44 | #define MSR_MTRRcap 0x000000fe | 47 | #define MSR_MTRRcap 0x000000fe |
| 45 | #define MSR_IA32_BBL_CR_CTL 0x00000119 | 48 | #define MSR_IA32_BBL_CR_CTL 0x00000119 |
| @@ -55,6 +58,8 @@ | |||
| 55 | 58 | ||
| 56 | #define MSR_OFFCORE_RSP_0 0x000001a6 | 59 | #define MSR_OFFCORE_RSP_0 0x000001a6 |
| 57 | #define MSR_OFFCORE_RSP_1 0x000001a7 | 60 | #define MSR_OFFCORE_RSP_1 0x000001a7 |
| 61 | #define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad | ||
| 62 | #define MSR_IVT_TURBO_RATIO_LIMIT 0x000001ae | ||
| 58 | 63 | ||
| 59 | #define MSR_LBR_SELECT 0x000001c8 | 64 | #define MSR_LBR_SELECT 0x000001c8 |
| 60 | #define MSR_LBR_TOS 0x000001c9 | 65 | #define MSR_LBR_TOS 0x000001c9 |
| @@ -103,6 +108,38 @@ | |||
| 103 | #define MSR_IA32_MC0_ADDR 0x00000402 | 108 | #define MSR_IA32_MC0_ADDR 0x00000402 |
| 104 | #define MSR_IA32_MC0_MISC 0x00000403 | 109 | #define MSR_IA32_MC0_MISC 0x00000403 |
| 105 | 110 | ||
| 111 | /* C-state Residency Counters */ | ||
| 112 | #define MSR_PKG_C3_RESIDENCY 0x000003f8 | ||
| 113 | #define MSR_PKG_C6_RESIDENCY 0x000003f9 | ||
| 114 | #define MSR_PKG_C7_RESIDENCY 0x000003fa | ||
| 115 | #define MSR_CORE_C3_RESIDENCY 0x000003fc | ||
| 116 | #define MSR_CORE_C6_RESIDENCY 0x000003fd | ||
| 117 | #define MSR_CORE_C7_RESIDENCY 0x000003fe | ||
| 118 | #define MSR_PKG_C2_RESIDENCY 0x0000060d | ||
| 119 | |||
| 120 | /* Run Time Average Power Limiting (RAPL) Interface */ | ||
| 121 | |||
| 122 | #define MSR_RAPL_POWER_UNIT 0x00000606 | ||
| 123 | |||
| 124 | #define MSR_PKG_POWER_LIMIT 0x00000610 | ||
| 125 | #define MSR_PKG_ENERGY_STATUS 0x00000611 | ||
| 126 | #define MSR_PKG_PERF_STATUS 0x00000613 | ||
| 127 | #define MSR_PKG_POWER_INFO 0x00000614 | ||
| 128 | |||
| 129 | #define MSR_DRAM_POWER_LIMIT 0x00000618 | ||
| 130 | #define MSR_DRAM_ENERGY_STATUS 0x00000619 | ||
| 131 | #define MSR_DRAM_PERF_STATUS 0x0000061b | ||
| 132 | #define MSR_DRAM_POWER_INFO 0x0000061c | ||
| 133 | |||
| 134 | #define MSR_PP0_POWER_LIMIT 0x00000638 | ||
| 135 | #define MSR_PP0_ENERGY_STATUS 0x00000639 | ||
| 136 | #define MSR_PP0_POLICY 0x0000063a | ||
| 137 | #define MSR_PP0_PERF_STATUS 0x0000063b | ||
| 138 | |||
| 139 | #define MSR_PP1_POWER_LIMIT 0x00000640 | ||
| 140 | #define MSR_PP1_ENERGY_STATUS 0x00000641 | ||
| 141 | #define MSR_PP1_POLICY 0x00000642 | ||
| 142 | |||
| 106 | #define MSR_AMD64_MC0_MASK 0xc0010044 | 143 | #define MSR_AMD64_MC0_MASK 0xc0010044 |
| 107 | 144 | ||
| 108 | #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) | 145 | #define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) |
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index f85649554191..f09641da40d4 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile | |||
| @@ -1,9 +1,22 @@ | |||
| 1 | CC = $(CROSS_COMPILE)gcc | ||
| 2 | BUILD_OUTPUT := $(PWD) | ||
| 3 | PREFIX := /usr | ||
| 4 | DESTDIR := | ||
| 5 | |||
| 1 | turbostat : turbostat.c | 6 | turbostat : turbostat.c |
| 2 | CFLAGS += -Wall | 7 | CFLAGS += -Wall |
| 8 | CFLAGS += -I../../../../arch/x86/include/uapi/ | ||
| 9 | |||
| 10 | %: %.c | ||
| 11 | @mkdir -p $(BUILD_OUTPUT) | ||
| 12 | $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ | ||
| 3 | 13 | ||
| 14 | .PHONY : clean | ||
| 4 | clean : | 15 | clean : |
| 5 | rm -f turbostat | 16 | @rm -f $(BUILD_OUTPUT)/turbostat |
| 6 | 17 | ||
| 7 | install : | 18 | install : turbostat |
| 8 | install turbostat /usr/bin/turbostat | 19 | install -d $(DESTDIR)$(PREFIX)/bin |
| 9 | install turbostat.8 /usr/share/man/man8 | 20 | install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat |
| 21 | install -d $(DESTDIR)$(PREFIX)/share/man/man8 | ||
| 22 | install turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8 | ||
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index e4d0690cccf9..0d7dc2cfefb5 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 | |||
| @@ -11,16 +11,16 @@ turbostat \- Report processor frequency and idle statistics | |||
| 11 | .RB [ Options ] | 11 | .RB [ Options ] |
| 12 | .RB [ "\-i interval_sec" ] | 12 | .RB [ "\-i interval_sec" ] |
| 13 | .SH DESCRIPTION | 13 | .SH DESCRIPTION |
| 14 | \fBturbostat \fP reports processor topology, frequency | 14 | \fBturbostat \fP reports processor topology, frequency, |
| 15 | and idle power state statistics on modern X86 processors. | 15 | idle power-state statistics, temperature and power on modern X86 processors. |
| 16 | Either \fBcommand\fP is forked and statistics are printed | 16 | Either \fBcommand\fP is forked and statistics are printed |
| 17 | upon its completion, or statistics are printed periodically. | 17 | upon its completion, or statistics are printed periodically. |
| 18 | 18 | ||
| 19 | \fBturbostat \fP | 19 | \fBturbostat \fP |
| 20 | requires that the processor | 20 | must be run on root, and |
| 21 | minimally requires that the processor | ||
| 21 | supports an "invariant" TSC, plus the APERF and MPERF MSRs. | 22 | supports an "invariant" TSC, plus the APERF and MPERF MSRs. |
| 22 | \fBturbostat \fP will report idle cpu power state residency | 23 | Additional information is reported depending on hardware counter support. |
| 23 | on processors that additionally support C-state residency counters. | ||
| 24 | 24 | ||
| 25 | .SS Options | 25 | .SS Options |
| 26 | The \fB-p\fP option limits output to the 1st thread in 1st core of each package. | 26 | The \fB-p\fP option limits output to the 1st thread in 1st core of each package. |
| @@ -57,7 +57,15 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T | |||
| 57 | \fBGHz\fP average clock rate while the CPU was in c0 state. | 57 | \fBGHz\fP average clock rate while the CPU was in c0 state. |
| 58 | \fBTSC\fP average GHz that the TSC ran during the entire interval. | 58 | \fBTSC\fP average GHz that the TSC ran during the entire interval. |
| 59 | \fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states. | 59 | \fB%c1, %c3, %c6, %c7\fP show the percentage residency in hardware core idle states. |
| 60 | \fBCTMP\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. | ||
| 61 | \fBPTMP\fP Degrees Celsius reported by the per-package Package Thermal Monitor. | ||
| 60 | \fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states. | 62 | \fB%pc2, %pc3, %pc6, %pc7\fP percentage residency in hardware package idle states. |
| 63 | \fBPkg_W\fP Watts consumed by the whole package. | ||
| 64 | \fBCor_W\fP Watts consumed by the core part of the package. | ||
| 65 | \fBGFX_W\fP Watts consumed by the Graphics part of the package -- available only on client processors. | ||
| 66 | \fBRAM_W\fP Watts consumed by the DRAM DIMMS -- available only on server processors. | ||
| 67 | \fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. | ||
| 68 | \fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. | ||
| 61 | .fi | 69 | .fi |
| 62 | .PP | 70 | .PP |
| 63 | .SH EXAMPLE | 71 | .SH EXAMPLE |
| @@ -66,50 +74,73 @@ Without any parameters, turbostat prints out counters ever 5 seconds. | |||
| 66 | for turbostat to fork). | 74 | for turbostat to fork). |
| 67 | 75 | ||
| 68 | The first row of statistics is a summary for the entire system. | 76 | The first row of statistics is a summary for the entire system. |
| 69 | Note that the summary is a weighted average. | 77 | For residency % columns, the summary is a weighted average. |
| 78 | For Temperature columns, the summary is the column maximum. | ||
| 79 | For Watts columns, the summary is a system total. | ||
| 70 | Subsequent rows show per-CPU statistics. | 80 | Subsequent rows show per-CPU statistics. |
| 71 | 81 | ||
| 72 | .nf | 82 | .nf |
| 73 | [root@x980]# ./turbostat | 83 | [root@sandy]# ./turbostat |
| 74 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 84 | cor CPU %c0 GHz TSC %c1 %c3 %c6 %c7 CTMP PTMP %pc2 %pc3 %pc6 %pc7 Pkg_W Cor_W GFX_W |
| 75 | 0.09 1.62 3.38 1.83 0.32 97.76 1.26 83.61 | 85 | 0.06 0.80 2.29 0.11 0.00 0.00 99.83 47 40 0.26 0.01 0.44 98.78 3.49 0.12 0.14 |
| 76 | 0 0 0.15 1.62 3.38 10.23 0.05 89.56 1.26 83.61 | 86 | 0 0 0.07 0.80 2.29 0.07 0.00 0.00 99.86 40 40 0.26 0.01 0.44 98.78 3.49 0.12 0.14 |
| 77 | 0 6 0.05 1.62 3.38 10.34 | 87 | 0 4 0.03 0.80 2.29 0.12 |
| 78 | 1 2 0.03 1.62 3.38 0.07 0.05 99.86 | 88 | 1 1 0.04 0.80 2.29 0.25 0.01 0.00 99.71 40 |
| 79 | 1 8 0.03 1.62 3.38 0.06 | 89 | 1 5 0.16 0.80 2.29 0.13 |
| 80 | 2 4 0.21 1.62 3.38 0.10 1.49 98.21 | 90 | 2 2 0.05 0.80 2.29 0.06 0.01 0.00 99.88 40 |
| 81 | 2 10 0.02 1.62 3.38 0.29 | 91 | 2 6 0.03 0.80 2.29 0.08 |
| 82 | 8 1 0.04 1.62 3.38 0.04 0.08 99.84 | 92 | 3 3 0.05 0.80 2.29 0.08 0.00 0.00 99.87 47 |
| 83 | 8 7 0.01 1.62 3.38 0.06 | 93 | 3 7 0.04 0.84 2.29 0.09 |
| 84 | 9 3 0.53 1.62 3.38 0.10 0.20 99.17 | ||
| 85 | 9 9 0.02 1.62 3.38 0.60 | ||
| 86 | 10 5 0.01 1.62 3.38 0.02 0.04 99.92 | ||
| 87 | 10 11 0.02 1.62 3.38 0.02 | ||
| 88 | .fi | 94 | .fi |
| 89 | .SH SUMMARY EXAMPLE | 95 | .SH SUMMARY EXAMPLE |
| 90 | The "-s" option prints the column headers just once, | 96 | The "-s" option prints the column headers just once, |
| 91 | and then the one line system summary for each sample interval. | 97 | and then the one line system summary for each sample interval. |
| 92 | 98 | ||
| 93 | .nf | 99 | .nf |
| 94 | [root@x980]# ./turbostat -s | 100 | [root@wsm]# turbostat -S |
| 95 | %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | 101 | %c0 GHz TSC %c1 %c3 %c6 CTMP %pc3 %pc6 |
| 96 | 0.23 1.67 3.38 2.00 0.30 97.47 1.07 82.12 | 102 | 1.40 2.81 3.38 10.78 43.47 44.35 42 13.67 2.09 |
| 97 | 0.10 1.62 3.38 1.87 2.25 95.77 12.02 72.60 | 103 | 1.34 2.90 3.38 11.48 58.96 28.23 41 19.89 0.15 |
| 98 | 0.20 1.64 3.38 1.98 0.11 97.72 0.30 83.36 | 104 | 1.55 2.72 3.38 26.73 37.66 34.07 42 2.53 2.80 |
| 99 | 0.11 1.70 3.38 1.86 1.81 96.22 9.71 74.90 | 105 | 1.37 2.83 3.38 16.95 60.05 21.63 42 5.76 0.20 |
| 100 | .fi | 106 | .fi |
| 101 | .SH VERBOSE EXAMPLE | 107 | .SH VERBOSE EXAMPLE |
| 102 | The "-v" option adds verbosity to the output: | 108 | The "-v" option adds verbosity to the output: |
| 103 | 109 | ||
| 104 | .nf | 110 | .nf |
| 105 | GenuineIntel 11 CPUID levels; family:model:stepping 0x6:2c:2 (6:44:2) | 111 | [root@ivy]# turbostat -v |
| 106 | 12 * 133 = 1600 MHz max efficiency | 112 | turbostat v3.0 November 23, 2012 - Len Brown <lenb@kernel.org> |
| 107 | 25 * 133 = 3333 MHz TSC frequency | 113 | CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9) |
| 108 | 26 * 133 = 3467 MHz max turbo 4 active cores | 114 | CPUID(6): APERF, DTS, PTM, EPB |
| 109 | 26 * 133 = 3467 MHz max turbo 3 active cores | 115 | RAPL: 851 sec. Joule Counter Range |
| 110 | 27 * 133 = 3600 MHz max turbo 2 active cores | 116 | cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300 |
| 111 | 27 * 133 = 3600 MHz max turbo 1 active cores | 117 | 16 * 100 = 1600 MHz max efficiency |
| 112 | 118 | 35 * 100 = 3500 MHz TSC frequency | |
| 119 | cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6-noret) | ||
| 120 | cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 | ||
| 121 | 37 * 100 = 3700 MHz max turbo 4 active cores | ||
| 122 | 38 * 100 = 3800 MHz max turbo 3 active cores | ||
| 123 | 39 * 100 = 3900 MHz max turbo 2 active cores | ||
| 124 | 39 * 100 = 3900 MHz max turbo 1 active cores | ||
| 125 | cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced) | ||
| 126 | cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.) | ||
| 127 | cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.) | ||
| 128 | cpu0: MSR_PKG_POWER_LIMIT: 0x830000148268 (UNlocked) | ||
| 129 | cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled) | ||
| 130 | cpu0: PKG Limit #2: ENabled (96.000000 Watts, 0.000977* sec, clamp DISabled) | ||
| 131 | cpu0: MSR_PP0_POLICY: 0 | ||
| 132 | cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked) | ||
| 133 | cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) | ||
| 134 | cpu0: MSR_PP1_POLICY: 0 | ||
| 135 | cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) | ||
| 136 | cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) | ||
| 137 | cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C) | ||
| 138 | cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C) | ||
| 139 | cpu0: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) | ||
| 140 | cpu1: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) | ||
| 141 | cpu2: MSR_IA32_THERM_STATUS: 0x88540000 (21 C +/- 1) | ||
| 142 | cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1) | ||
| 143 | ... | ||
| 113 | .fi | 144 | .fi |
| 114 | The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency | 145 | The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency |
| 115 | available at the minimum package voltage. The \fBTSC frequency\fP is the nominal | 146 | available at the minimum package voltage. The \fBTSC frequency\fP is the nominal |
| @@ -142,7 +173,7 @@ cor CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6 | |||
| 142 | 10 5 1.42 3.43 3.38 2.14 30.99 65.44 | 173 | 10 5 1.42 3.43 3.38 2.14 30.99 65.44 |
| 143 | 10 11 0.16 2.88 3.38 3.40 | 174 | 10 11 0.16 2.88 3.38 3.40 |
| 144 | .fi | 175 | .fi |
| 145 | Above the cycle soaker drives cpu7 up its 3.6 Ghz turbo limit | 176 | Above the cycle soaker drives cpu7 up its 3.6 GHz turbo limit |
| 146 | while the other processors are generally in various states of idle. | 177 | while the other processors are generally in various states of idle. |
| 147 | 178 | ||
| 148 | Note that cpu1 and cpu7 are HT siblings within core8. | 179 | Note that cpu1 and cpu7 are HT siblings within core8. |
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ea095abbe97e..ce6d46038f74 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | */ | 20 | */ |
| 21 | 21 | ||
| 22 | #define _GNU_SOURCE | 22 | #define _GNU_SOURCE |
| 23 | #include <asm/msr.h> | ||
| 23 | #include <stdio.h> | 24 | #include <stdio.h> |
| 24 | #include <unistd.h> | 25 | #include <unistd.h> |
| 25 | #include <sys/types.h> | 26 | #include <sys/types.h> |
| @@ -35,28 +36,18 @@ | |||
| 35 | #include <ctype.h> | 36 | #include <ctype.h> |
| 36 | #include <sched.h> | 37 | #include <sched.h> |
| 37 | 38 | ||
| 38 | #define MSR_NEHALEM_PLATFORM_INFO 0xCE | ||
| 39 | #define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD | ||
| 40 | #define MSR_IVT_TURBO_RATIO_LIMIT 0x1AE | ||
| 41 | #define MSR_APERF 0xE8 | ||
| 42 | #define MSR_MPERF 0xE7 | ||
| 43 | #define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */ | ||
| 44 | #define MSR_PKG_C3_RESIDENCY 0x3F8 | ||
| 45 | #define MSR_PKG_C6_RESIDENCY 0x3F9 | ||
| 46 | #define MSR_PKG_C7_RESIDENCY 0x3FA /* SNB only */ | ||
| 47 | #define MSR_CORE_C3_RESIDENCY 0x3FC | ||
| 48 | #define MSR_CORE_C6_RESIDENCY 0x3FD | ||
| 49 | #define MSR_CORE_C7_RESIDENCY 0x3FE /* SNB only */ | ||
| 50 | |||
| 51 | char *proc_stat = "/proc/stat"; | 39 | char *proc_stat = "/proc/stat"; |
| 52 | unsigned int interval_sec = 5; /* set with -i interval_sec */ | 40 | unsigned int interval_sec = 5; /* set with -i interval_sec */ |
| 53 | unsigned int verbose; /* set with -v */ | 41 | unsigned int verbose; /* set with -v */ |
| 42 | unsigned int rapl_verbose; /* set with -R */ | ||
| 43 | unsigned int thermal_verbose; /* set with -T */ | ||
| 54 | unsigned int summary_only; /* set with -s */ | 44 | unsigned int summary_only; /* set with -s */ |
| 55 | unsigned int skip_c0; | 45 | unsigned int skip_c0; |
| 56 | unsigned int skip_c1; | 46 | unsigned int skip_c1; |
| 57 | unsigned int do_nhm_cstates; | 47 | unsigned int do_nhm_cstates; |
| 58 | unsigned int do_snb_cstates; | 48 | unsigned int do_snb_cstates; |
| 59 | unsigned int has_aperf; | 49 | unsigned int has_aperf; |
| 50 | unsigned int has_epb; | ||
| 60 | unsigned int units = 1000000000; /* Ghz etc */ | 51 | unsigned int units = 1000000000; /* Ghz etc */ |
| 61 | unsigned int genuine_intel; | 52 | unsigned int genuine_intel; |
| 62 | unsigned int has_invariant_tsc; | 53 | unsigned int has_invariant_tsc; |
| @@ -74,6 +65,23 @@ unsigned int show_cpu; | |||
| 74 | unsigned int show_pkg_only; | 65 | unsigned int show_pkg_only; |
| 75 | unsigned int show_core_only; | 66 | unsigned int show_core_only; |
| 76 | char *output_buffer, *outp; | 67 | char *output_buffer, *outp; |
| 68 | unsigned int do_rapl; | ||
| 69 | unsigned int do_dts; | ||
| 70 | unsigned int do_ptm; | ||
| 71 | unsigned int tcc_activation_temp; | ||
| 72 | unsigned int tcc_activation_temp_override; | ||
| 73 | double rapl_power_units, rapl_energy_units, rapl_time_units; | ||
| 74 | double rapl_joule_counter_range; | ||
| 75 | |||
| 76 | #define RAPL_PKG (1 << 0) | ||
| 77 | #define RAPL_CORES (1 << 1) | ||
| 78 | #define RAPL_GFX (1 << 2) | ||
| 79 | #define RAPL_DRAM (1 << 3) | ||
| 80 | #define RAPL_PKG_PERF_STATUS (1 << 4) | ||
| 81 | #define RAPL_DRAM_PERF_STATUS (1 << 5) | ||
| 82 | #define TJMAX_DEFAULT 100 | ||
| 83 | |||
| 84 | #define MAX(a, b) ((a) > (b) ? (a) : (b)) | ||
| 77 | 85 | ||
| 78 | int aperf_mperf_unstable; | 86 | int aperf_mperf_unstable; |
| 79 | int backwards_count; | 87 | int backwards_count; |
| @@ -101,6 +109,7 @@ struct core_data { | |||
| 101 | unsigned long long c3; | 109 | unsigned long long c3; |
| 102 | unsigned long long c6; | 110 | unsigned long long c6; |
| 103 | unsigned long long c7; | 111 | unsigned long long c7; |
| 112 | unsigned int core_temp_c; | ||
| 104 | unsigned int core_id; | 113 | unsigned int core_id; |
| 105 | } *core_even, *core_odd; | 114 | } *core_even, *core_odd; |
| 106 | 115 | ||
| @@ -110,6 +119,14 @@ struct pkg_data { | |||
| 110 | unsigned long long pc6; | 119 | unsigned long long pc6; |
| 111 | unsigned long long pc7; | 120 | unsigned long long pc7; |
| 112 | unsigned int package_id; | 121 | unsigned int package_id; |
| 122 | unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ | ||
| 123 | unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ | ||
| 124 | unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */ | ||
| 125 | unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */ | ||
| 126 | unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ | ||
| 127 | unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ | ||
| 128 | unsigned int pkg_temp_c; | ||
| 129 | |||
| 113 | } *package_even, *package_odd; | 130 | } *package_even, *package_odd; |
| 114 | 131 | ||
| 115 | #define ODD_COUNTERS thread_odd, core_odd, package_odd | 132 | #define ODD_COUNTERS thread_odd, core_odd, package_odd |
| @@ -247,6 +264,12 @@ void print_header(void) | |||
| 247 | outp += sprintf(outp, " %%c6"); | 264 | outp += sprintf(outp, " %%c6"); |
| 248 | if (do_snb_cstates) | 265 | if (do_snb_cstates) |
| 249 | outp += sprintf(outp, " %%c7"); | 266 | outp += sprintf(outp, " %%c7"); |
| 267 | |||
| 268 | if (do_dts) | ||
| 269 | outp += sprintf(outp, " CTMP"); | ||
| 270 | if (do_ptm) | ||
| 271 | outp += sprintf(outp, " PTMP"); | ||
| 272 | |||
| 250 | if (do_snb_cstates) | 273 | if (do_snb_cstates) |
| 251 | outp += sprintf(outp, " %%pc2"); | 274 | outp += sprintf(outp, " %%pc2"); |
| 252 | if (do_nhm_cstates) | 275 | if (do_nhm_cstates) |
| @@ -256,6 +279,19 @@ void print_header(void) | |||
| 256 | if (do_snb_cstates) | 279 | if (do_snb_cstates) |
| 257 | outp += sprintf(outp, " %%pc7"); | 280 | outp += sprintf(outp, " %%pc7"); |
| 258 | 281 | ||
| 282 | if (do_rapl & RAPL_PKG) | ||
| 283 | outp += sprintf(outp, " Pkg_W"); | ||
| 284 | if (do_rapl & RAPL_CORES) | ||
| 285 | outp += sprintf(outp, " Cor_W"); | ||
| 286 | if (do_rapl & RAPL_GFX) | ||
| 287 | outp += sprintf(outp, " GFX_W"); | ||
| 288 | if (do_rapl & RAPL_DRAM) | ||
| 289 | outp += sprintf(outp, " RAM_W"); | ||
| 290 | if (do_rapl & RAPL_PKG_PERF_STATUS) | ||
| 291 | outp += sprintf(outp, " PKG_%%"); | ||
| 292 | if (do_rapl & RAPL_DRAM_PERF_STATUS) | ||
| 293 | outp += sprintf(outp, " RAM_%%"); | ||
| 294 | |||
| 259 | outp += sprintf(outp, "\n"); | 295 | outp += sprintf(outp, "\n"); |
| 260 | } | 296 | } |
| 261 | 297 | ||
| @@ -285,6 +321,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, | |||
| 285 | fprintf(stderr, "c3: %016llX\n", c->c3); | 321 | fprintf(stderr, "c3: %016llX\n", c->c3); |
| 286 | fprintf(stderr, "c6: %016llX\n", c->c6); | 322 | fprintf(stderr, "c6: %016llX\n", c->c6); |
| 287 | fprintf(stderr, "c7: %016llX\n", c->c7); | 323 | fprintf(stderr, "c7: %016llX\n", c->c7); |
| 324 | fprintf(stderr, "DTS: %dC\n", c->core_temp_c); | ||
| 288 | } | 325 | } |
| 289 | 326 | ||
| 290 | if (p) { | 327 | if (p) { |
| @@ -293,6 +330,13 @@ int dump_counters(struct thread_data *t, struct core_data *c, | |||
| 293 | fprintf(stderr, "pc3: %016llX\n", p->pc3); | 330 | fprintf(stderr, "pc3: %016llX\n", p->pc3); |
| 294 | fprintf(stderr, "pc6: %016llX\n", p->pc6); | 331 | fprintf(stderr, "pc6: %016llX\n", p->pc6); |
| 295 | fprintf(stderr, "pc7: %016llX\n", p->pc7); | 332 | fprintf(stderr, "pc7: %016llX\n", p->pc7); |
| 333 | fprintf(stderr, "Joules PKG: %0X\n", p->energy_pkg); | ||
| 334 | fprintf(stderr, "Joules COR: %0X\n", p->energy_cores); | ||
| 335 | fprintf(stderr, "Joules GFX: %0X\n", p->energy_gfx); | ||
| 336 | fprintf(stderr, "Joules RAM: %0X\n", p->energy_dram); | ||
| 337 | fprintf(stderr, "Throttle PKG: %0X\n", p->rapl_pkg_perf_status); | ||
| 338 | fprintf(stderr, "Throttle RAM: %0X\n", p->rapl_dram_perf_status); | ||
| 339 | fprintf(stderr, "PTM: %dC\n", p->pkg_temp_c); | ||
| 296 | } | 340 | } |
| 297 | return 0; | 341 | return 0; |
| 298 | } | 342 | } |
| @@ -302,14 +346,21 @@ int dump_counters(struct thread_data *t, struct core_data *c, | |||
| 302 | * package: "pk" 2 columns %2d | 346 | * package: "pk" 2 columns %2d |
| 303 | * core: "cor" 3 columns %3d | 347 | * core: "cor" 3 columns %3d |
| 304 | * CPU: "CPU" 3 columns %3d | 348 | * CPU: "CPU" 3 columns %3d |
| 349 | * Pkg_W: %6.2 | ||
| 350 | * Cor_W: %6.2 | ||
| 351 | * GFX_W: %5.2 | ||
| 352 | * RAM_W: %5.2 | ||
| 305 | * GHz: "GHz" 3 columns %3.2 | 353 | * GHz: "GHz" 3 columns %3.2 |
| 306 | * TSC: "TSC" 3 columns %3.2 | 354 | * TSC: "TSC" 3 columns %3.2 |
| 307 | * percentage " %pc3" %6.2 | 355 | * percentage " %pc3" %6.2 |
| 356 | * Perf Status percentage: %5.2 | ||
| 357 | * "CTMP" 4 columns %4d | ||
| 308 | */ | 358 | */ |
| 309 | int format_counters(struct thread_data *t, struct core_data *c, | 359 | int format_counters(struct thread_data *t, struct core_data *c, |
| 310 | struct pkg_data *p) | 360 | struct pkg_data *p) |
| 311 | { | 361 | { |
| 312 | double interval_float; | 362 | double interval_float; |
| 363 | char *fmt5, *fmt6; | ||
| 313 | 364 | ||
| 314 | /* if showing only 1st thread in core and this isn't one, bail out */ | 365 | /* if showing only 1st thread in core and this isn't one, bail out */ |
| 315 | if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | 366 | if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) |
| @@ -349,7 +400,6 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
| 349 | if (show_cpu) | 400 | if (show_cpu) |
| 350 | outp += sprintf(outp, " %3d", t->cpu_id); | 401 | outp += sprintf(outp, " %3d", t->cpu_id); |
| 351 | } | 402 | } |
| 352 | |||
| 353 | /* %c0 */ | 403 | /* %c0 */ |
| 354 | if (do_nhm_cstates) { | 404 | if (do_nhm_cstates) { |
| 355 | if (show_pkg || show_core || show_cpu) | 405 | if (show_pkg || show_core || show_cpu) |
| @@ -414,10 +464,16 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
| 414 | if (do_snb_cstates) | 464 | if (do_snb_cstates) |
| 415 | outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); | 465 | outp += sprintf(outp, " %6.2f", 100.0 * c->c7/t->tsc); |
| 416 | 466 | ||
| 467 | if (do_dts) | ||
| 468 | outp += sprintf(outp, " %4d", c->core_temp_c); | ||
| 469 | |||
| 417 | /* print per-package data only for 1st core in package */ | 470 | /* print per-package data only for 1st core in package */ |
| 418 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | 471 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
| 419 | goto done; | 472 | goto done; |
| 420 | 473 | ||
| 474 | if (do_ptm) | ||
| 475 | outp += sprintf(outp, " %4d", p->pkg_temp_c); | ||
| 476 | |||
| 421 | if (do_snb_cstates) | 477 | if (do_snb_cstates) |
| 422 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); | 478 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc2/t->tsc); |
| 423 | if (do_nhm_cstates) | 479 | if (do_nhm_cstates) |
| @@ -426,6 +482,32 @@ int format_counters(struct thread_data *t, struct core_data *c, | |||
| 426 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); | 482 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc6/t->tsc); |
| 427 | if (do_snb_cstates) | 483 | if (do_snb_cstates) |
| 428 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); | 484 | outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); |
| 485 | |||
| 486 | /* | ||
| 487 | * If measurement interval exceeds minimum RAPL Joule Counter range, | ||
| 488 | * indicate that results are suspect by printing "**" in fraction place. | ||
| 489 | */ | ||
| 490 | if (interval_float < rapl_joule_counter_range) { | ||
| 491 | fmt5 = " %5.2f"; | ||
| 492 | fmt6 = " %6.2f"; | ||
| 493 | } else { | ||
| 494 | fmt5 = " %3.0f**"; | ||
| 495 | fmt6 = " %4.0f**"; | ||
| 496 | } | ||
| 497 | |||
| 498 | if (do_rapl & RAPL_PKG) | ||
| 499 | outp += sprintf(outp, fmt6, p->energy_pkg * rapl_energy_units / interval_float); | ||
| 500 | if (do_rapl & RAPL_CORES) | ||
| 501 | outp += sprintf(outp, fmt6, p->energy_cores * rapl_energy_units / interval_float); | ||
| 502 | if (do_rapl & RAPL_GFX) | ||
| 503 | outp += sprintf(outp, fmt5, p->energy_gfx * rapl_energy_units / interval_float); | ||
| 504 | if (do_rapl & RAPL_DRAM) | ||
| 505 | outp += sprintf(outp, fmt5, p->energy_dram * rapl_energy_units / interval_float); | ||
| 506 | if (do_rapl & RAPL_PKG_PERF_STATUS ) | ||
| 507 | outp += sprintf(outp, fmt5, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); | ||
| 508 | if (do_rapl & RAPL_DRAM_PERF_STATUS ) | ||
| 509 | outp += sprintf(outp, fmt5, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); | ||
| 510 | |||
| 429 | done: | 511 | done: |
| 430 | outp += sprintf(outp, "\n"); | 512 | outp += sprintf(outp, "\n"); |
| 431 | 513 | ||
| @@ -435,6 +517,7 @@ done: | |||
| 435 | void flush_stdout() | 517 | void flush_stdout() |
| 436 | { | 518 | { |
| 437 | fputs(output_buffer, stdout); | 519 | fputs(output_buffer, stdout); |
| 520 | fflush(stdout); | ||
| 438 | outp = output_buffer; | 521 | outp = output_buffer; |
| 439 | } | 522 | } |
| 440 | void flush_stderr() | 523 | void flush_stderr() |
| @@ -461,6 +544,13 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ | |||
| 461 | for_all_cpus(format_counters, t, c, p); | 544 | for_all_cpus(format_counters, t, c, p); |
| 462 | } | 545 | } |
| 463 | 546 | ||
| 547 | #define DELTA_WRAP32(new, old) \ | ||
| 548 | if (new > old) { \ | ||
| 549 | old = new - old; \ | ||
| 550 | } else { \ | ||
| 551 | old = 0x100000000 + new - old; \ | ||
| 552 | } | ||
| 553 | |||
| 464 | void | 554 | void |
| 465 | delta_package(struct pkg_data *new, struct pkg_data *old) | 555 | delta_package(struct pkg_data *new, struct pkg_data *old) |
| 466 | { | 556 | { |
| @@ -468,6 +558,14 @@ delta_package(struct pkg_data *new, struct pkg_data *old) | |||
| 468 | old->pc3 = new->pc3 - old->pc3; | 558 | old->pc3 = new->pc3 - old->pc3; |
| 469 | old->pc6 = new->pc6 - old->pc6; | 559 | old->pc6 = new->pc6 - old->pc6; |
| 470 | old->pc7 = new->pc7 - old->pc7; | 560 | old->pc7 = new->pc7 - old->pc7; |
| 561 | old->pkg_temp_c = new->pkg_temp_c; | ||
| 562 | |||
| 563 | DELTA_WRAP32(new->energy_pkg, old->energy_pkg); | ||
| 564 | DELTA_WRAP32(new->energy_cores, old->energy_cores); | ||
| 565 | DELTA_WRAP32(new->energy_gfx, old->energy_gfx); | ||
| 566 | DELTA_WRAP32(new->energy_dram, old->energy_dram); | ||
| 567 | DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); | ||
| 568 | DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); | ||
| 471 | } | 569 | } |
| 472 | 570 | ||
| 473 | void | 571 | void |
| @@ -476,6 +574,7 @@ delta_core(struct core_data *new, struct core_data *old) | |||
| 476 | old->c3 = new->c3 - old->c3; | 574 | old->c3 = new->c3 - old->c3; |
| 477 | old->c6 = new->c6 - old->c6; | 575 | old->c6 = new->c6 - old->c6; |
| 478 | old->c7 = new->c7 - old->c7; | 576 | old->c7 = new->c7 - old->c7; |
| 577 | old->core_temp_c = new->core_temp_c; | ||
| 479 | } | 578 | } |
| 480 | 579 | ||
| 481 | /* | 580 | /* |
| @@ -582,11 +681,20 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data | |||
| 582 | c->c3 = 0; | 681 | c->c3 = 0; |
| 583 | c->c6 = 0; | 682 | c->c6 = 0; |
| 584 | c->c7 = 0; | 683 | c->c7 = 0; |
| 684 | c->core_temp_c = 0; | ||
| 585 | 685 | ||
| 586 | p->pc2 = 0; | 686 | p->pc2 = 0; |
| 587 | p->pc3 = 0; | 687 | p->pc3 = 0; |
| 588 | p->pc6 = 0; | 688 | p->pc6 = 0; |
| 589 | p->pc7 = 0; | 689 | p->pc7 = 0; |
| 690 | |||
| 691 | p->energy_pkg = 0; | ||
| 692 | p->energy_dram = 0; | ||
| 693 | p->energy_cores = 0; | ||
| 694 | p->energy_gfx = 0; | ||
| 695 | p->rapl_pkg_perf_status = 0; | ||
| 696 | p->rapl_dram_perf_status = 0; | ||
| 697 | p->pkg_temp_c = 0; | ||
| 590 | } | 698 | } |
| 591 | int sum_counters(struct thread_data *t, struct core_data *c, | 699 | int sum_counters(struct thread_data *t, struct core_data *c, |
| 592 | struct pkg_data *p) | 700 | struct pkg_data *p) |
| @@ -607,6 +715,8 @@ int sum_counters(struct thread_data *t, struct core_data *c, | |||
| 607 | average.cores.c6 += c->c6; | 715 | average.cores.c6 += c->c6; |
| 608 | average.cores.c7 += c->c7; | 716 | average.cores.c7 += c->c7; |
| 609 | 717 | ||
| 718 | average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); | ||
| 719 | |||
| 610 | /* sum per-pkg values only for 1st core in pkg */ | 720 | /* sum per-pkg values only for 1st core in pkg */ |
| 611 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | 721 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
| 612 | return 0; | 722 | return 0; |
| @@ -616,6 +726,15 @@ int sum_counters(struct thread_data *t, struct core_data *c, | |||
| 616 | average.packages.pc6 += p->pc6; | 726 | average.packages.pc6 += p->pc6; |
| 617 | average.packages.pc7 += p->pc7; | 727 | average.packages.pc7 += p->pc7; |
| 618 | 728 | ||
| 729 | average.packages.energy_pkg += p->energy_pkg; | ||
| 730 | average.packages.energy_dram += p->energy_dram; | ||
| 731 | average.packages.energy_cores += p->energy_cores; | ||
| 732 | average.packages.energy_gfx += p->energy_gfx; | ||
| 733 | |||
| 734 | average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); | ||
| 735 | |||
| 736 | average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; | ||
| 737 | average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; | ||
| 619 | return 0; | 738 | return 0; |
| 620 | } | 739 | } |
| 621 | /* | 740 | /* |
| @@ -667,23 +786,26 @@ static unsigned long long rdtsc(void) | |||
| 667 | int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | 786 | int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) |
| 668 | { | 787 | { |
| 669 | int cpu = t->cpu_id; | 788 | int cpu = t->cpu_id; |
| 789 | unsigned long long msr; | ||
| 670 | 790 | ||
| 671 | if (cpu_migrate(cpu)) | 791 | if (cpu_migrate(cpu)) { |
| 792 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
| 672 | return -1; | 793 | return -1; |
| 794 | } | ||
| 673 | 795 | ||
| 674 | t->tsc = rdtsc(); /* we are running on local CPU of interest */ | 796 | t->tsc = rdtsc(); /* we are running on local CPU of interest */ |
| 675 | 797 | ||
| 676 | if (has_aperf) { | 798 | if (has_aperf) { |
| 677 | if (get_msr(cpu, MSR_APERF, &t->aperf)) | 799 | if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) |
| 678 | return -3; | 800 | return -3; |
| 679 | if (get_msr(cpu, MSR_MPERF, &t->mperf)) | 801 | if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) |
| 680 | return -4; | 802 | return -4; |
| 681 | } | 803 | } |
| 682 | 804 | ||
| 683 | if (extra_delta_offset32) { | 805 | if (extra_delta_offset32) { |
| 684 | if (get_msr(cpu, extra_delta_offset32, &t->extra_delta32)) | 806 | if (get_msr(cpu, extra_delta_offset32, &msr)) |
| 685 | return -5; | 807 | return -5; |
| 686 | t->extra_delta32 &= 0xFFFFFFFF; | 808 | t->extra_delta32 = msr & 0xFFFFFFFF; |
| 687 | } | 809 | } |
| 688 | 810 | ||
| 689 | if (extra_delta_offset64) | 811 | if (extra_delta_offset64) |
| @@ -691,9 +813,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 691 | return -5; | 813 | return -5; |
| 692 | 814 | ||
| 693 | if (extra_msr_offset32) { | 815 | if (extra_msr_offset32) { |
| 694 | if (get_msr(cpu, extra_msr_offset32, &t->extra_msr32)) | 816 | if (get_msr(cpu, extra_msr_offset32, &msr)) |
| 695 | return -5; | 817 | return -5; |
| 696 | t->extra_msr32 &= 0xFFFFFFFF; | 818 | t->extra_msr32 = msr & 0xFFFFFFFF; |
| 697 | } | 819 | } |
| 698 | 820 | ||
| 699 | if (extra_msr_offset64) | 821 | if (extra_msr_offset64) |
| @@ -715,6 +837,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 715 | if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) | 837 | if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) |
| 716 | return -8; | 838 | return -8; |
| 717 | 839 | ||
| 840 | if (do_dts) { | ||
| 841 | if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) | ||
| 842 | return -9; | ||
| 843 | c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); | ||
| 844 | } | ||
| 845 | |||
| 846 | |||
| 718 | /* collect package counters only for 1st core in package */ | 847 | /* collect package counters only for 1st core in package */ |
| 719 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | 848 | if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) |
| 720 | return 0; | 849 | return 0; |
| @@ -731,6 +860,41 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) | |||
| 731 | if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) | 860 | if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) |
| 732 | return -12; | 861 | return -12; |
| 733 | } | 862 | } |
| 863 | if (do_rapl & RAPL_PKG) { | ||
| 864 | if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) | ||
| 865 | return -13; | ||
| 866 | p->energy_pkg = msr & 0xFFFFFFFF; | ||
| 867 | } | ||
| 868 | if (do_rapl & RAPL_CORES) { | ||
| 869 | if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) | ||
| 870 | return -14; | ||
| 871 | p->energy_cores = msr & 0xFFFFFFFF; | ||
| 872 | } | ||
| 873 | if (do_rapl & RAPL_DRAM) { | ||
| 874 | if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) | ||
| 875 | return -15; | ||
| 876 | p->energy_dram = msr & 0xFFFFFFFF; | ||
| 877 | } | ||
| 878 | if (do_rapl & RAPL_GFX) { | ||
| 879 | if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr)) | ||
| 880 | return -16; | ||
| 881 | p->energy_gfx = msr & 0xFFFFFFFF; | ||
| 882 | } | ||
| 883 | if (do_rapl & RAPL_PKG_PERF_STATUS) { | ||
| 884 | if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr)) | ||
| 885 | return -16; | ||
| 886 | p->rapl_pkg_perf_status = msr & 0xFFFFFFFF; | ||
| 887 | } | ||
| 888 | if (do_rapl & RAPL_DRAM_PERF_STATUS) { | ||
| 889 | if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr)) | ||
| 890 | return -16; | ||
| 891 | p->rapl_dram_perf_status = msr & 0xFFFFFFFF; | ||
| 892 | } | ||
| 893 | if (do_ptm) { | ||
| 894 | if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) | ||
| 895 | return -17; | ||
| 896 | p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); | ||
| 897 | } | ||
| 734 | return 0; | 898 | return 0; |
| 735 | } | 899 | } |
| 736 | 900 | ||
| @@ -742,10 +906,10 @@ void print_verbose_header(void) | |||
| 742 | if (!do_nehalem_platform_info) | 906 | if (!do_nehalem_platform_info) |
| 743 | return; | 907 | return; |
| 744 | 908 | ||
| 745 | get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); | 909 | get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); |
| 746 | 910 | ||
| 747 | if (verbose > 1) | 911 | if (verbose) |
| 748 | fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); | 912 | fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); |
| 749 | 913 | ||
| 750 | ratio = (msr >> 40) & 0xFF; | 914 | ratio = (msr >> 40) & 0xFF; |
| 751 | fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", | 915 | fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", |
| @@ -760,8 +924,8 @@ void print_verbose_header(void) | |||
| 760 | 924 | ||
| 761 | get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); | 925 | get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); |
| 762 | 926 | ||
| 763 | if (verbose > 1) | 927 | if (verbose) |
| 764 | fprintf(stderr, "MSR_IVT_TURBO_RATIO_LIMIT: 0x%llx\n", msr); | 928 | fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); |
| 765 | 929 | ||
| 766 | ratio = (msr >> 56) & 0xFF; | 930 | ratio = (msr >> 56) & 0xFF; |
| 767 | if (ratio) | 931 | if (ratio) |
| @@ -804,14 +968,56 @@ void print_verbose_header(void) | |||
| 804 | ratio, bclk, ratio * bclk); | 968 | ratio, bclk, ratio * bclk); |
| 805 | 969 | ||
| 806 | print_nhm_turbo_ratio_limits: | 970 | print_nhm_turbo_ratio_limits: |
| 971 | get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); | ||
| 972 | |||
| 973 | #define SNB_C1_AUTO_UNDEMOTE (1UL << 27) | ||
| 974 | #define SNB_C3_AUTO_UNDEMOTE (1UL << 28) | ||
| 975 | |||
| 976 | fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); | ||
| 977 | |||
| 978 | fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ", | ||
| 979 | (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", | ||
| 980 | (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", | ||
| 981 | (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", | ||
| 982 | (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", | ||
| 983 | (msr & (1 << 15)) ? "" : "UN", | ||
| 984 | (unsigned int)msr & 7); | ||
| 985 | |||
| 986 | |||
| 987 | switch(msr & 0x7) { | ||
| 988 | case 0: | ||
| 989 | fprintf(stderr, "pc0"); | ||
| 990 | break; | ||
| 991 | case 1: | ||
| 992 | fprintf(stderr, do_snb_cstates ? "pc2" : "pc0"); | ||
| 993 | break; | ||
| 994 | case 2: | ||
| 995 | fprintf(stderr, do_snb_cstates ? "pc6-noret" : "pc3"); | ||
| 996 | break; | ||
| 997 | case 3: | ||
| 998 | fprintf(stderr, "pc6"); | ||
| 999 | break; | ||
| 1000 | case 4: | ||
| 1001 | fprintf(stderr, "pc7"); | ||
| 1002 | break; | ||
| 1003 | case 5: | ||
| 1004 | fprintf(stderr, do_snb_cstates ? "pc7s" : "invalid"); | ||
| 1005 | break; | ||
| 1006 | case 7: | ||
| 1007 | fprintf(stderr, "unlimited"); | ||
| 1008 | break; | ||
| 1009 | default: | ||
| 1010 | fprintf(stderr, "invalid"); | ||
| 1011 | } | ||
| 1012 | fprintf(stderr, ")\n"); | ||
| 807 | 1013 | ||
| 808 | if (!do_nehalem_turbo_ratio_limit) | 1014 | if (!do_nehalem_turbo_ratio_limit) |
| 809 | return; | 1015 | return; |
| 810 | 1016 | ||
| 811 | get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); | 1017 | get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); |
| 812 | 1018 | ||
| 813 | if (verbose > 1) | 1019 | if (verbose) |
| 814 | fprintf(stderr, "MSR_NEHALEM_TURBO_RATIO_LIMIT: 0x%llx\n", msr); | 1020 | fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); |
| 815 | 1021 | ||
| 816 | ratio = (msr >> 56) & 0xFF; | 1022 | ratio = (msr >> 56) & 0xFF; |
| 817 | if (ratio) | 1023 | if (ratio) |
| @@ -1100,15 +1306,22 @@ int mark_cpu_present(int cpu) | |||
| 1100 | void turbostat_loop() | 1306 | void turbostat_loop() |
| 1101 | { | 1307 | { |
| 1102 | int retval; | 1308 | int retval; |
| 1309 | int restarted = 0; | ||
| 1103 | 1310 | ||
| 1104 | restart: | 1311 | restart: |
| 1312 | restarted++; | ||
| 1313 | |||
| 1105 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); | 1314 | retval = for_all_cpus(get_counters, EVEN_COUNTERS); |
| 1106 | if (retval < -1) { | 1315 | if (retval < -1) { |
| 1107 | exit(retval); | 1316 | exit(retval); |
| 1108 | } else if (retval == -1) { | 1317 | } else if (retval == -1) { |
| 1318 | if (restarted > 1) { | ||
| 1319 | exit(retval); | ||
| 1320 | } | ||
| 1109 | re_initialize(); | 1321 | re_initialize(); |
| 1110 | goto restart; | 1322 | goto restart; |
| 1111 | } | 1323 | } |
| 1324 | restarted = 0; | ||
| 1112 | gettimeofday(&tv_even, (struct timezone *)NULL); | 1325 | gettimeofday(&tv_even, (struct timezone *)NULL); |
| 1113 | 1326 | ||
| 1114 | while (1) { | 1327 | while (1) { |
| @@ -1207,6 +1420,299 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) | |||
| 1207 | } | 1420 | } |
| 1208 | } | 1421 | } |
| 1209 | 1422 | ||
| 1423 | /* | ||
| 1424 | * print_epb() | ||
| 1425 | * Decode the ENERGY_PERF_BIAS MSR | ||
| 1426 | */ | ||
| 1427 | int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
| 1428 | { | ||
| 1429 | unsigned long long msr; | ||
| 1430 | char *epb_string; | ||
| 1431 | int cpu; | ||
| 1432 | |||
| 1433 | if (!has_epb) | ||
| 1434 | return 0; | ||
| 1435 | |||
| 1436 | cpu = t->cpu_id; | ||
| 1437 | |||
| 1438 | /* EPB is per-package */ | ||
| 1439 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
| 1440 | return 0; | ||
| 1441 | |||
| 1442 | if (cpu_migrate(cpu)) { | ||
| 1443 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
| 1444 | return -1; | ||
| 1445 | } | ||
| 1446 | |||
| 1447 | if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) | ||
| 1448 | return 0; | ||
| 1449 | |||
| 1450 | switch (msr & 0x7) { | ||
| 1451 | case ENERGY_PERF_BIAS_PERFORMANCE: | ||
| 1452 | epb_string = "performance"; | ||
| 1453 | break; | ||
| 1454 | case ENERGY_PERF_BIAS_NORMAL: | ||
| 1455 | epb_string = "balanced"; | ||
| 1456 | break; | ||
| 1457 | case ENERGY_PERF_BIAS_POWERSAVE: | ||
| 1458 | epb_string = "powersave"; | ||
| 1459 | break; | ||
| 1460 | default: | ||
| 1461 | epb_string = "custom"; | ||
| 1462 | break; | ||
| 1463 | } | ||
| 1464 | fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); | ||
| 1465 | |||
| 1466 | return 0; | ||
| 1467 | } | ||
| 1468 | |||
| 1469 | #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ | ||
| 1470 | #define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ | ||
| 1471 | |||
| 1472 | /* | ||
| 1473 | * rapl_probe() | ||
| 1474 | * | ||
| 1475 | * sets do_rapl | ||
| 1476 | */ | ||
| 1477 | void rapl_probe(unsigned int family, unsigned int model) | ||
| 1478 | { | ||
| 1479 | unsigned long long msr; | ||
| 1480 | double tdp; | ||
| 1481 | |||
| 1482 | if (!genuine_intel) | ||
| 1483 | return; | ||
| 1484 | |||
| 1485 | if (family != 6) | ||
| 1486 | return; | ||
| 1487 | |||
| 1488 | switch (model) { | ||
| 1489 | case 0x2A: | ||
| 1490 | case 0x3A: | ||
| 1491 | do_rapl = RAPL_PKG | RAPL_CORES | RAPL_GFX; | ||
| 1492 | break; | ||
| 1493 | case 0x2D: | ||
| 1494 | case 0x3E: | ||
| 1495 | do_rapl = RAPL_PKG | RAPL_CORES | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS; | ||
| 1496 | break; | ||
| 1497 | default: | ||
| 1498 | return; | ||
| 1499 | } | ||
| 1500 | |||
| 1501 | /* units on package 0, verify later other packages match */ | ||
| 1502 | if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr)) | ||
| 1503 | return; | ||
| 1504 | |||
| 1505 | rapl_power_units = 1.0 / (1 << (msr & 0xF)); | ||
| 1506 | rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); | ||
| 1507 | rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); | ||
| 1508 | |||
| 1509 | /* get TDP to determine energy counter range */ | ||
| 1510 | if (get_msr(0, MSR_PKG_POWER_INFO, &msr)) | ||
| 1511 | return; | ||
| 1512 | |||
| 1513 | tdp = ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; | ||
| 1514 | |||
| 1515 | rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; | ||
| 1516 | |||
| 1517 | if (verbose) | ||
| 1518 | fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range\n", rapl_joule_counter_range); | ||
| 1519 | |||
| 1520 | return; | ||
| 1521 | } | ||
| 1522 | |||
| 1523 | int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
| 1524 | { | ||
| 1525 | unsigned long long msr; | ||
| 1526 | unsigned int dts; | ||
| 1527 | int cpu; | ||
| 1528 | |||
| 1529 | if (!(do_dts || do_ptm)) | ||
| 1530 | return 0; | ||
| 1531 | |||
| 1532 | cpu = t->cpu_id; | ||
| 1533 | |||
| 1534 | /* DTS is per-core, no need to print for each thread */ | ||
| 1535 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) | ||
| 1536 | return 0; | ||
| 1537 | |||
| 1538 | if (cpu_migrate(cpu)) { | ||
| 1539 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
| 1540 | return -1; | ||
| 1541 | } | ||
| 1542 | |||
| 1543 | if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) { | ||
| 1544 | if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) | ||
| 1545 | return 0; | ||
| 1546 | |||
| 1547 | dts = (msr >> 16) & 0x7F; | ||
| 1548 | fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", | ||
| 1549 | cpu, msr, tcc_activation_temp - dts); | ||
| 1550 | |||
| 1551 | #ifdef THERM_DEBUG | ||
| 1552 | if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) | ||
| 1553 | return 0; | ||
| 1554 | |||
| 1555 | dts = (msr >> 16) & 0x7F; | ||
| 1556 | dts2 = (msr >> 8) & 0x7F; | ||
| 1557 | fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", | ||
| 1558 | cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); | ||
| 1559 | #endif | ||
| 1560 | } | ||
| 1561 | |||
| 1562 | |||
| 1563 | if (do_dts) { | ||
| 1564 | unsigned int resolution; | ||
| 1565 | |||
| 1566 | if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) | ||
| 1567 | return 0; | ||
| 1568 | |||
| 1569 | dts = (msr >> 16) & 0x7F; | ||
| 1570 | resolution = (msr >> 27) & 0xF; | ||
| 1571 | fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", | ||
| 1572 | cpu, msr, tcc_activation_temp - dts, resolution); | ||
| 1573 | |||
| 1574 | #ifdef THERM_DEBUG | ||
| 1575 | if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) | ||
| 1576 | return 0; | ||
| 1577 | |||
| 1578 | dts = (msr >> 16) & 0x7F; | ||
| 1579 | dts2 = (msr >> 8) & 0x7F; | ||
| 1580 | fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", | ||
| 1581 | cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); | ||
| 1582 | #endif | ||
| 1583 | } | ||
| 1584 | |||
| 1585 | return 0; | ||
| 1586 | } | ||
| 1587 | |||
| 1588 | void print_power_limit_msr(int cpu, unsigned long long msr, char *label) | ||
| 1589 | { | ||
| 1590 | fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", | ||
| 1591 | cpu, label, | ||
| 1592 | ((msr >> 15) & 1) ? "EN" : "DIS", | ||
| 1593 | ((msr >> 0) & 0x7FFF) * rapl_power_units, | ||
| 1594 | (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, | ||
| 1595 | (((msr >> 16) & 1) ? "EN" : "DIS")); | ||
| 1596 | |||
| 1597 | return; | ||
| 1598 | } | ||
| 1599 | |||
| 1600 | int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
| 1601 | { | ||
| 1602 | unsigned long long msr; | ||
| 1603 | int cpu; | ||
| 1604 | double local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units; | ||
| 1605 | |||
| 1606 | if (!do_rapl) | ||
| 1607 | return 0; | ||
| 1608 | |||
| 1609 | /* RAPL counters are per package, so print only for 1st thread/package */ | ||
| 1610 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
| 1611 | return 0; | ||
| 1612 | |||
| 1613 | cpu = t->cpu_id; | ||
| 1614 | if (cpu_migrate(cpu)) { | ||
| 1615 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
| 1616 | return -1; | ||
| 1617 | } | ||
| 1618 | |||
| 1619 | if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) | ||
| 1620 | return -1; | ||
| 1621 | |||
| 1622 | local_rapl_power_units = 1.0 / (1 << (msr & 0xF)); | ||
| 1623 | local_rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); | ||
| 1624 | local_rapl_time_units = 1.0 / (1 << (msr >> 16 & 0xF)); | ||
| 1625 | |||
| 1626 | if (local_rapl_power_units != rapl_power_units) | ||
| 1627 | fprintf(stderr, "cpu%d, ERROR: Power units mis-match\n", cpu); | ||
| 1628 | if (local_rapl_energy_units != rapl_energy_units) | ||
| 1629 | fprintf(stderr, "cpu%d, ERROR: Energy units mis-match\n", cpu); | ||
| 1630 | if (local_rapl_time_units != rapl_time_units) | ||
| 1631 | fprintf(stderr, "cpu%d, ERROR: Time units mis-match\n", cpu); | ||
| 1632 | |||
| 1633 | if (verbose) { | ||
| 1634 | fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " | ||
| 1635 | "(%f Watts, %f Joules, %f sec.)\n", cpu, msr, | ||
| 1636 | local_rapl_power_units, local_rapl_energy_units, local_rapl_time_units); | ||
| 1637 | } | ||
| 1638 | if (do_rapl & RAPL_PKG) { | ||
| 1639 | if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) | ||
| 1640 | return -5; | ||
| 1641 | |||
| 1642 | |||
| 1643 | fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", | ||
| 1644 | cpu, msr, | ||
| 1645 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
| 1646 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
| 1647 | ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
| 1648 | ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); | ||
| 1649 | |||
| 1650 | if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) | ||
| 1651 | return -9; | ||
| 1652 | |||
| 1653 | fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", | ||
| 1654 | cpu, msr, (msr >> 63) & 1 ? "": "UN"); | ||
| 1655 | |||
| 1656 | print_power_limit_msr(cpu, msr, "PKG Limit #1"); | ||
| 1657 | fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", | ||
| 1658 | cpu, | ||
| 1659 | ((msr >> 47) & 1) ? "EN" : "DIS", | ||
| 1660 | ((msr >> 32) & 0x7FFF) * rapl_power_units, | ||
| 1661 | (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, | ||
| 1662 | ((msr >> 48) & 1) ? "EN" : "DIS"); | ||
| 1663 | } | ||
| 1664 | |||
| 1665 | if (do_rapl & RAPL_DRAM) { | ||
| 1666 | if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) | ||
| 1667 | return -6; | ||
| 1668 | |||
| 1669 | |||
| 1670 | fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", | ||
| 1671 | cpu, msr, | ||
| 1672 | ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
| 1673 | ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
| 1674 | ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, | ||
| 1675 | ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); | ||
| 1676 | |||
| 1677 | |||
| 1678 | if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) | ||
| 1679 | return -9; | ||
| 1680 | fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", | ||
| 1681 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | ||
| 1682 | |||
| 1683 | print_power_limit_msr(cpu, msr, "DRAM Limit"); | ||
| 1684 | } | ||
| 1685 | if (do_rapl & RAPL_CORES) { | ||
| 1686 | if (verbose) { | ||
| 1687 | if (get_msr(cpu, MSR_PP0_POLICY, &msr)) | ||
| 1688 | return -7; | ||
| 1689 | |||
| 1690 | fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); | ||
| 1691 | |||
| 1692 | if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) | ||
| 1693 | return -9; | ||
| 1694 | fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", | ||
| 1695 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | ||
| 1696 | print_power_limit_msr(cpu, msr, "Cores Limit"); | ||
| 1697 | } | ||
| 1698 | } | ||
| 1699 | if (do_rapl & RAPL_GFX) { | ||
| 1700 | if (verbose) { | ||
| 1701 | if (get_msr(cpu, MSR_PP1_POLICY, &msr)) | ||
| 1702 | return -8; | ||
| 1703 | |||
| 1704 | fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); | ||
| 1705 | |||
| 1706 | if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) | ||
| 1707 | return -9; | ||
| 1708 | fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", | ||
| 1709 | cpu, msr, (msr >> 31) & 1 ? "": "UN"); | ||
| 1710 | print_power_limit_msr(cpu, msr, "GFX Limit"); | ||
| 1711 | } | ||
| 1712 | } | ||
| 1713 | return 0; | ||
| 1714 | } | ||
| 1715 | |||
| 1210 | 1716 | ||
| 1211 | int is_snb(unsigned int family, unsigned int model) | 1717 | int is_snb(unsigned int family, unsigned int model) |
| 1212 | { | 1718 | { |
| @@ -1231,6 +1737,72 @@ double discover_bclk(unsigned int family, unsigned int model) | |||
| 1231 | return 133.33; | 1737 | return 133.33; |
| 1232 | } | 1738 | } |
| 1233 | 1739 | ||
| 1740 | /* | ||
| 1741 | * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where | ||
| 1742 | * the Thermal Control Circuit (TCC) activates. | ||
| 1743 | * This is usually equal to tjMax. | ||
| 1744 | * | ||
| 1745 | * Older processors do not have this MSR, so there we guess, | ||
| 1746 | * but also allow cmdline over-ride with -T. | ||
| 1747 | * | ||
| 1748 | * Several MSR temperature values are in units of degrees-C | ||
| 1749 | * below this value, including the Digital Thermal Sensor (DTS), | ||
| 1750 | * Package Thermal Management Sensor (PTM), and thermal event thresholds. | ||
| 1751 | */ | ||
| 1752 | int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) | ||
| 1753 | { | ||
| 1754 | unsigned long long msr; | ||
| 1755 | unsigned int target_c_local; | ||
| 1756 | int cpu; | ||
| 1757 | |||
| 1758 | /* tcc_activation_temp is used only for dts or ptm */ | ||
| 1759 | if (!(do_dts || do_ptm)) | ||
| 1760 | return 0; | ||
| 1761 | |||
| 1762 | /* this is a per-package concept */ | ||
| 1763 | if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) | ||
| 1764 | return 0; | ||
| 1765 | |||
| 1766 | cpu = t->cpu_id; | ||
| 1767 | if (cpu_migrate(cpu)) { | ||
| 1768 | fprintf(stderr, "Could not migrate to CPU %d\n", cpu); | ||
| 1769 | return -1; | ||
| 1770 | } | ||
| 1771 | |||
| 1772 | if (tcc_activation_temp_override != 0) { | ||
| 1773 | tcc_activation_temp = tcc_activation_temp_override; | ||
| 1774 | fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", | ||
| 1775 | cpu, tcc_activation_temp); | ||
| 1776 | return 0; | ||
| 1777 | } | ||
| 1778 | |||
| 1779 | /* Temperature Target MSR is Nehalem and newer only */ | ||
| 1780 | if (!do_nehalem_platform_info) | ||
| 1781 | goto guess; | ||
| 1782 | |||
| 1783 | if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) | ||
| 1784 | goto guess; | ||
| 1785 | |||
| 1786 | target_c_local = (msr >> 16) & 0x7F; | ||
| 1787 | |||
| 1788 | if (verbose) | ||
| 1789 | fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", | ||
| 1790 | cpu, msr, target_c_local); | ||
| 1791 | |||
| 1792 | if (target_c_local < 85 || target_c_local > 120) | ||
| 1793 | goto guess; | ||
| 1794 | |||
| 1795 | tcc_activation_temp = target_c_local; | ||
| 1796 | |||
| 1797 | return 0; | ||
| 1798 | |||
| 1799 | guess: | ||
| 1800 | tcc_activation_temp = TJMAX_DEFAULT; | ||
| 1801 | fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", | ||
| 1802 | cpu, tcc_activation_temp); | ||
| 1803 | |||
| 1804 | return 0; | ||
| 1805 | } | ||
| 1234 | void check_cpuid() | 1806 | void check_cpuid() |
| 1235 | { | 1807 | { |
| 1236 | unsigned int eax, ebx, ecx, edx, max_level; | 1808 | unsigned int eax, ebx, ecx, edx, max_level; |
| @@ -1244,7 +1816,7 @@ void check_cpuid() | |||
| 1244 | genuine_intel = 1; | 1816 | genuine_intel = 1; |
| 1245 | 1817 | ||
| 1246 | if (verbose) | 1818 | if (verbose) |
| 1247 | fprintf(stderr, "%.4s%.4s%.4s ", | 1819 | fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", |
| 1248 | (char *)&ebx, (char *)&edx, (char *)&ecx); | 1820 | (char *)&ebx, (char *)&edx, (char *)&ecx); |
| 1249 | 1821 | ||
| 1250 | asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); | 1822 | asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); |
| @@ -1295,10 +1867,19 @@ void check_cpuid() | |||
| 1295 | 1867 | ||
| 1296 | asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); | 1868 | asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6)); |
| 1297 | has_aperf = ecx & (1 << 0); | 1869 | has_aperf = ecx & (1 << 0); |
| 1298 | if (!has_aperf) { | 1870 | do_dts = eax & (1 << 0); |
| 1299 | fprintf(stderr, "No APERF MSR\n"); | 1871 | do_ptm = eax & (1 << 6); |
| 1300 | exit(1); | 1872 | has_epb = ecx & (1 << 3); |
| 1301 | } | 1873 | |
| 1874 | if (verbose) | ||
| 1875 | fprintf(stderr, "CPUID(6): %s%s%s%s\n", | ||
| 1876 | has_aperf ? "APERF" : "No APERF!", | ||
| 1877 | do_dts ? ", DTS" : "", | ||
| 1878 | do_ptm ? ", PTM": "", | ||
| 1879 | has_epb ? ", EPB": ""); | ||
| 1880 | |||
| 1881 | if (!has_aperf) | ||
| 1882 | exit(-1); | ||
| 1302 | 1883 | ||
| 1303 | do_nehalem_platform_info = genuine_intel && has_invariant_tsc; | 1884 | do_nehalem_platform_info = genuine_intel && has_invariant_tsc; |
| 1304 | do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ | 1885 | do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */ |
| @@ -1307,12 +1888,15 @@ void check_cpuid() | |||
| 1307 | 1888 | ||
| 1308 | do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); | 1889 | do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); |
| 1309 | do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); | 1890 | do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); |
| 1891 | rapl_probe(family, model); | ||
| 1892 | |||
| 1893 | return; | ||
| 1310 | } | 1894 | } |
| 1311 | 1895 | ||
| 1312 | 1896 | ||
| 1313 | void usage() | 1897 | void usage() |
| 1314 | { | 1898 | { |
| 1315 | fprintf(stderr, "%s: [-v][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", | 1899 | fprintf(stderr, "%s: [-v][-R][-T][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", |
| 1316 | progname); | 1900 | progname); |
| 1317 | exit(1); | 1901 | exit(1); |
| 1318 | } | 1902 | } |
| @@ -1548,6 +2132,17 @@ void turbostat_init() | |||
| 1548 | 2132 | ||
| 1549 | if (verbose) | 2133 | if (verbose) |
| 1550 | print_verbose_header(); | 2134 | print_verbose_header(); |
| 2135 | |||
| 2136 | if (verbose) | ||
| 2137 | for_all_cpus(print_epb, ODD_COUNTERS); | ||
| 2138 | |||
| 2139 | if (verbose) | ||
| 2140 | for_all_cpus(print_rapl, ODD_COUNTERS); | ||
| 2141 | |||
| 2142 | for_all_cpus(set_temperature_target, ODD_COUNTERS); | ||
| 2143 | |||
| 2144 | if (verbose) | ||
| 2145 | for_all_cpus(print_thermal, ODD_COUNTERS); | ||
| 1551 | } | 2146 | } |
| 1552 | 2147 | ||
| 1553 | int fork_it(char **argv) | 2148 | int fork_it(char **argv) |
| @@ -1604,7 +2199,7 @@ void cmdline(int argc, char **argv) | |||
| 1604 | 2199 | ||
| 1605 | progname = argv[0]; | 2200 | progname = argv[0]; |
| 1606 | 2201 | ||
| 1607 | while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:")) != -1) { | 2202 | while ((opt = getopt(argc, argv, "+pPSvi:sc:sC:m:M:RT:")) != -1) { |
| 1608 | switch (opt) { | 2203 | switch (opt) { |
| 1609 | case 'p': | 2204 | case 'p': |
| 1610 | show_core_only++; | 2205 | show_core_only++; |
| @@ -1636,6 +2231,12 @@ void cmdline(int argc, char **argv) | |||
| 1636 | case 'M': | 2231 | case 'M': |
| 1637 | sscanf(optarg, "%x", &extra_msr_offset64); | 2232 | sscanf(optarg, "%x", &extra_msr_offset64); |
| 1638 | break; | 2233 | break; |
| 2234 | case 'R': | ||
| 2235 | rapl_verbose++; | ||
| 2236 | break; | ||
| 2237 | case 'T': | ||
| 2238 | tcc_activation_temp_override = atoi(optarg); | ||
| 2239 | break; | ||
| 1639 | default: | 2240 | default: |
| 1640 | usage(); | 2241 | usage(); |
| 1641 | } | 2242 | } |
| @@ -1646,8 +2247,8 @@ int main(int argc, char **argv) | |||
| 1646 | { | 2247 | { |
| 1647 | cmdline(argc, argv); | 2248 | cmdline(argc, argv); |
| 1648 | 2249 | ||
| 1649 | if (verbose > 1) | 2250 | if (verbose) |
| 1650 | fprintf(stderr, "turbostat v2.1 October 6, 2012" | 2251 | fprintf(stderr, "turbostat v3.0 November 23, 2012" |
| 1651 | " - Len Brown <lenb@kernel.org>\n"); | 2252 | " - Len Brown <lenb@kernel.org>\n"); |
| 1652 | 2253 | ||
| 1653 | turbostat_init(); | 2254 | turbostat_init(); |
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile index f458237fdd79..971c9ffdcb50 100644 --- a/tools/power/x86/x86_energy_perf_policy/Makefile +++ b/tools/power/x86/x86_energy_perf_policy/Makefile | |||
| @@ -1,8 +1,10 @@ | |||
| 1 | DESTDIR ?= | ||
| 2 | |||
| 1 | x86_energy_perf_policy : x86_energy_perf_policy.c | 3 | x86_energy_perf_policy : x86_energy_perf_policy.c |
| 2 | 4 | ||
| 3 | clean : | 5 | clean : |
| 4 | rm -f x86_energy_perf_policy | 6 | rm -f x86_energy_perf_policy |
| 5 | 7 | ||
| 6 | install : | 8 | install : |
| 7 | install x86_energy_perf_policy /usr/bin/ | 9 | install x86_energy_perf_policy ${DESTDIR}/usr/bin/ |
| 8 | install x86_energy_perf_policy.8 /usr/share/man/man8/ | 10 | install x86_energy_perf_policy.8 ${DESTDIR}/usr/share/man/man8/ |
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 33c5c7ee148f..40b3e5482f8a 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c | |||
| @@ -289,7 +289,7 @@ void for_every_cpu(void (func)(int)) | |||
| 289 | "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", | 289 | "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", |
| 290 | &cpu); | 290 | &cpu); |
| 291 | if (retval != 1) | 291 | if (retval != 1) |
| 292 | return; | 292 | break; |
| 293 | 293 | ||
| 294 | func(cpu); | 294 | func(cpu); |
| 295 | } | 295 | } |
