aboutsummaryrefslogtreecommitdiffstats
path: root/tools/power/x86
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /tools/power/x86
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'tools/power/x86')
-rw-r--r--tools/power/x86/turbostat/Makefile8
-rw-r--r--tools/power/x86/turbostat/turbostat.8172
-rw-r--r--tools/power/x86/turbostat/turbostat.c1044
-rw-r--r--tools/power/x86/x86_energy_perf_policy/Makefile8
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8104
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c325
6 files changed, 1661 insertions, 0 deletions
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
new file mode 100644
index 000000000000..fd8e1f1297aa
--- /dev/null
+++ b/tools/power/x86/turbostat/Makefile
@@ -0,0 +1,8 @@
1turbostat : turbostat.c
2
3clean :
4 rm -f turbostat
5
6install :
7 install turbostat /usr/bin/turbostat
8 install turbostat.8 /usr/share/man/man8
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
new file mode 100644
index 000000000000..ff75125deed0
--- /dev/null
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -0,0 +1,172 @@
1.TH TURBOSTAT 8
2.SH NAME
3turbostat \- Report processor frequency and idle statistics
4.SH SYNOPSIS
5.ft B
6.B turbostat
7.RB [ "\-v" ]
8.RB [ "\-M MSR#" ]
9.RB command
10.br
11.B turbostat
12.RB [ "\-v" ]
13.RB [ "\-M MSR#" ]
14.RB [ "\-i interval_sec" ]
15.SH DESCRIPTION
16\fBturbostat \fP reports processor topology, frequency
17and idle power state statistics on modern X86 processors.
18Either \fBcommand\fP is forked and statistics are printed
19upon its completion, or statistics are printed periodically.
20
21\fBturbostat \fP
22requires that the processor
23supports an "invariant" TSC, plus the APERF and MPERF MSRs.
24\fBturbostat \fP will report idle cpu power state residency
25on processors that additionally support C-state residency counters.
26
27.SS Options
28The \fB-v\fP option increases verbosity.
29.PP
30The \fB-M MSR#\fP option dumps the specified MSR,
31in addition to the usual frequency and idle statistics.
32.PP
33The \fB-i interval_sec\fP option prints statistics every \fiinterval_sec\fP seconds.
34The default is 5 seconds.
35.PP
36The \fBcommand\fP parameter forks \fBcommand\fP and upon its exit,
37displays the statistics gathered since it was forked.
38.PP
39.SH FIELD DESCRIPTIONS
40.nf
41\fBpkg\fP processor package number.
42\fBcore\fP processor core number.
43\fBCPU\fP Linux CPU (logical processor) number.
44\fB%c0\fP percent of the interval that the CPU retired instructions.
45\fBGHz\fP average clock rate while the CPU was in c0 state.
46\fBTSC\fP average GHz that the TSC ran during the entire interval.
47\fB%c1, %c3, %c6\fP show the percentage residency in hardware core idle states.
48\fB%pc3, %pc6\fP percentage residency in hardware package idle states.
49.fi
50.PP
51.SH EXAMPLE
52Without any parameters, turbostat prints out counters ever 5 seconds.
53(override interval with "-i sec" option, or specify a command
54for turbostat to fork).
55
56The first row of statistics reflect the average for the entire system.
57Subsequent rows show per-CPU statistics.
58
59.nf
60[root@x980]# ./turbostat
61core CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
62 0.04 1.62 3.38 0.11 0.00 99.85 0.00 95.07
63 0 0 0.04 1.62 3.38 0.06 0.00 99.90 0.00 95.07
64 0 6 0.02 1.62 3.38 0.08 0.00 99.90 0.00 95.07
65 1 2 0.10 1.62 3.38 0.29 0.00 99.61 0.00 95.07
66 1 8 0.11 1.62 3.38 0.28 0.00 99.61 0.00 95.07
67 2 4 0.01 1.62 3.38 0.01 0.00 99.98 0.00 95.07
68 2 10 0.01 1.61 3.38 0.02 0.00 99.98 0.00 95.07
69 8 1 0.07 1.62 3.38 0.15 0.00 99.78 0.00 95.07
70 8 7 0.03 1.62 3.38 0.19 0.00 99.78 0.00 95.07
71 9 3 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07
72 9 9 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07
73 10 5 0.01 1.62 3.38 0.13 0.00 99.86 0.00 95.07
74 10 11 0.08 1.62 3.38 0.05 0.00 99.86 0.00 95.07
75.fi
76.SH VERBOSE EXAMPLE
77The "-v" option adds verbosity to the output:
78
79.nf
80GenuineIntel 11 CPUID levels; family:model:stepping 0x6:2c:2 (6:44:2)
8112 * 133 = 1600 MHz max efficiency
8225 * 133 = 3333 MHz TSC frequency
8326 * 133 = 3467 MHz max turbo 4 active cores
8426 * 133 = 3467 MHz max turbo 3 active cores
8527 * 133 = 3600 MHz max turbo 2 active cores
8627 * 133 = 3600 MHz max turbo 1 active cores
87
88.fi
89The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
90available at the minimum package voltage. The \fBTSC frequency\fP is the nominal
91maximum frequency of the processor if turbo-mode were not available. This frequency
92should be sustainable on all CPUs indefinitely, given nominal power and cooling.
93The remaining rows show what maximum turbo frequency is possible
94depending on the number of idle cores. Note that this information is
95not available on all processors.
96.SH FORK EXAMPLE
97If turbostat is invoked with a command, it will fork that command
98and output the statistics gathered when the command exits.
99eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
100until ^C while the other CPUs are mostly idle:
101
102.nf
103[root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null
104
105^Ccore CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
106 8.49 3.63 3.38 16.23 0.66 74.63 0.00 0.00
107 0 0 1.22 3.62 3.38 32.18 0.00 66.60 0.00 0.00
108 0 6 0.40 3.61 3.38 33.00 0.00 66.60 0.00 0.00
109 1 2 0.11 3.14 3.38 0.19 3.95 95.75 0.00 0.00
110 1 8 0.05 2.88 3.38 0.25 3.95 95.75 0.00 0.00
111 2 4 0.00 3.13 3.38 0.02 0.00 99.98 0.00 0.00
112 2 10 0.00 3.09 3.38 0.02 0.00 99.98 0.00 0.00
113 8 1 0.04 3.50 3.38 14.43 0.00 85.54 0.00 0.00
114 8 7 0.03 2.98 3.38 14.43 0.00 85.54 0.00 0.00
115 9 3 0.00 3.16 3.38 100.00 0.00 0.00 0.00 0.00
116 9 9 99.93 3.63 3.38 0.06 0.00 0.00 0.00 0.00
117 10 5 0.01 2.82 3.38 0.08 0.00 99.91 0.00 0.00
118 10 11 0.02 3.36 3.38 0.06 0.00 99.91 0.00 0.00
1196.950866 sec
120
121.fi
122Above the cycle soaker drives cpu9 up 3.6 Ghz turbo limit
123while the other processors are generally in various states of idle.
124
125Note that cpu3 is an HT sibling sharing core9
126with cpu9, and thus it is unable to get to an idle state
127deeper than c1 while cpu9 is busy.
128
129Note that turbostat reports average GHz of 3.61, while
130the arithmetic average of the GHz column above is 3.24.
131This is a weighted average, where the weight is %c0. ie. it is the total number of
132un-halted cycles elapsed per time divided by the number of CPUs.
133.SH NOTES
134
135.B "turbostat "
136must be run as root.
137
138.B "turbostat "
139reads hardware counters, but doesn't write them.
140So it will not interfere with the OS or other programs, including
141multiple invocations of itself.
142
143\fBturbostat \fP
144may work poorly on Linux-2.6.20 through 2.6.29,
145as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF
146in those kernels.
147
148The APERF, MPERF MSRs are defined to count non-halted cycles.
149Although it is not guaranteed by the architecture, turbostat assumes
150that they count at TSC rate, which is true on all processors tested to date.
151
152.SH REFERENCES
153"Intel® Turbo Boost Technology
154in Intel® Core™ Microarchitecture (Nehalem) Based Processors"
155http://download.intel.com/design/processor/applnots/320354.pdf
156
157"Intel® 64 and IA-32 Architectures Software Developer's Manual
158Volume 3B: System Programming Guide"
159http://www.intel.com/products/processor/manuals/
160
161.SH FILES
162.ta
163.nf
164/dev/cpu/*/msr
165.fi
166
167.SH "SEE ALSO"
168msr(4), vmstat(8)
169.PP
170.SH AUTHORS
171.nf
172Written by Len Brown <len.brown@intel.com>
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
new file mode 100644
index 000000000000..6d8ef4a3a9b5
--- /dev/null
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -0,0 +1,1044 @@
1/*
2 * turbostat -- show CPU frequency and C-state residency
3 * on modern Intel turbo-capable processors.
4 *
5 * Copyright (c) 2010, Intel Corporation.
6 * Len Brown <len.brown@intel.com>
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
20 */
21
22#include <stdio.h>
23#include <unistd.h>
24#include <sys/types.h>
25#include <sys/wait.h>
26#include <sys/stat.h>
27#include <sys/resource.h>
28#include <fcntl.h>
29#include <signal.h>
30#include <sys/time.h>
31#include <stdlib.h>
32#include <dirent.h>
33#include <string.h>
34#include <ctype.h>
35
36#define MSR_TSC 0x10
37#define MSR_NEHALEM_PLATFORM_INFO 0xCE
38#define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD
39#define MSR_APERF 0xE8
40#define MSR_MPERF 0xE7
41#define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */
42#define MSR_PKG_C3_RESIDENCY 0x3F8
43#define MSR_PKG_C6_RESIDENCY 0x3F9
44#define MSR_PKG_C7_RESIDENCY 0x3FA /* SNB only */
45#define MSR_CORE_C3_RESIDENCY 0x3FC
46#define MSR_CORE_C6_RESIDENCY 0x3FD
47#define MSR_CORE_C7_RESIDENCY 0x3FE /* SNB only */
48
49char *proc_stat = "/proc/stat";
50unsigned int interval_sec = 5; /* set with -i interval_sec */
51unsigned int verbose; /* set with -v */
52unsigned int skip_c0;
53unsigned int skip_c1;
54unsigned int do_nhm_cstates;
55unsigned int do_snb_cstates;
56unsigned int has_aperf;
57unsigned int units = 1000000000; /* Ghz etc */
58unsigned int genuine_intel;
59unsigned int has_invariant_tsc;
60unsigned int do_nehalem_platform_info;
61unsigned int do_nehalem_turbo_ratio_limit;
62unsigned int extra_msr_offset;
63double bclk;
64unsigned int show_pkg;
65unsigned int show_core;
66unsigned int show_cpu;
67
68int aperf_mperf_unstable;
69int backwards_count;
70char *progname;
71int need_reinitialize;
72
73int num_cpus;
74
75struct counters {
76 unsigned long long tsc; /* per thread */
77 unsigned long long aperf; /* per thread */
78 unsigned long long mperf; /* per thread */
79 unsigned long long c1; /* per thread (calculated) */
80 unsigned long long c3; /* per core */
81 unsigned long long c6; /* per core */
82 unsigned long long c7; /* per core */
83 unsigned long long pc2; /* per package */
84 unsigned long long pc3; /* per package */
85 unsigned long long pc6; /* per package */
86 unsigned long long pc7; /* per package */
87 unsigned long long extra_msr; /* per thread */
88 int pkg;
89 int core;
90 int cpu;
91 struct counters *next;
92};
93
94struct counters *cnt_even;
95struct counters *cnt_odd;
96struct counters *cnt_delta;
97struct counters *cnt_average;
98struct timeval tv_even;
99struct timeval tv_odd;
100struct timeval tv_delta;
101
102unsigned long long get_msr(int cpu, off_t offset)
103{
104 ssize_t retval;
105 unsigned long long msr;
106 char pathname[32];
107 int fd;
108
109 sprintf(pathname, "/dev/cpu/%d/msr", cpu);
110 fd = open(pathname, O_RDONLY);
111 if (fd < 0) {
112 perror(pathname);
113 need_reinitialize = 1;
114 return 0;
115 }
116
117 retval = pread(fd, &msr, sizeof msr, offset);
118 if (retval != sizeof msr) {
119 fprintf(stderr, "cpu%d pread(..., 0x%zx) = %jd\n",
120 cpu, offset, retval);
121 exit(-2);
122 }
123
124 close(fd);
125 return msr;
126}
127
128void print_header(void)
129{
130 if (show_pkg)
131 fprintf(stderr, "pkg ");
132 if (show_core)
133 fprintf(stderr, "core");
134 if (show_cpu)
135 fprintf(stderr, " CPU");
136 if (do_nhm_cstates)
137 fprintf(stderr, " %%c0 ");
138 if (has_aperf)
139 fprintf(stderr, " GHz");
140 fprintf(stderr, " TSC");
141 if (do_nhm_cstates)
142 fprintf(stderr, " %%c1 ");
143 if (do_nhm_cstates)
144 fprintf(stderr, " %%c3 ");
145 if (do_nhm_cstates)
146 fprintf(stderr, " %%c6 ");
147 if (do_snb_cstates)
148 fprintf(stderr, " %%c7 ");
149 if (do_snb_cstates)
150 fprintf(stderr, " %%pc2 ");
151 if (do_nhm_cstates)
152 fprintf(stderr, " %%pc3 ");
153 if (do_nhm_cstates)
154 fprintf(stderr, " %%pc6 ");
155 if (do_snb_cstates)
156 fprintf(stderr, " %%pc7 ");
157 if (extra_msr_offset)
158 fprintf(stderr, " MSR 0x%x ", extra_msr_offset);
159
160 putc('\n', stderr);
161}
162
163void dump_cnt(struct counters *cnt)
164{
165 fprintf(stderr, "package: %d ", cnt->pkg);
166 fprintf(stderr, "core:: %d ", cnt->core);
167 fprintf(stderr, "CPU: %d ", cnt->cpu);
168 fprintf(stderr, "TSC: %016llX\n", cnt->tsc);
169 fprintf(stderr, "c3: %016llX\n", cnt->c3);
170 fprintf(stderr, "c6: %016llX\n", cnt->c6);
171 fprintf(stderr, "c7: %016llX\n", cnt->c7);
172 fprintf(stderr, "aperf: %016llX\n", cnt->aperf);
173 fprintf(stderr, "pc2: %016llX\n", cnt->pc2);
174 fprintf(stderr, "pc3: %016llX\n", cnt->pc3);
175 fprintf(stderr, "pc6: %016llX\n", cnt->pc6);
176 fprintf(stderr, "pc7: %016llX\n", cnt->pc7);
177 fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, cnt->extra_msr);
178}
179
180void dump_list(struct counters *cnt)
181{
182 printf("dump_list 0x%p\n", cnt);
183
184 for (; cnt; cnt = cnt->next)
185 dump_cnt(cnt);
186}
187
188void print_cnt(struct counters *p)
189{
190 double interval_float;
191
192 interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
193
194 /* topology columns, print blanks on 1st (average) line */
195 if (p == cnt_average) {
196 if (show_pkg)
197 fprintf(stderr, " ");
198 if (show_core)
199 fprintf(stderr, " ");
200 if (show_cpu)
201 fprintf(stderr, " ");
202 } else {
203 if (show_pkg)
204 fprintf(stderr, "%4d", p->pkg);
205 if (show_core)
206 fprintf(stderr, "%4d", p->core);
207 if (show_cpu)
208 fprintf(stderr, "%4d", p->cpu);
209 }
210
211 /* %c0 */
212 if (do_nhm_cstates) {
213 if (!skip_c0)
214 fprintf(stderr, "%7.2f", 100.0 * p->mperf/p->tsc);
215 else
216 fprintf(stderr, " ****");
217 }
218
219 /* GHz */
220 if (has_aperf) {
221 if (!aperf_mperf_unstable) {
222 fprintf(stderr, "%5.2f",
223 1.0 * p->tsc / units * p->aperf /
224 p->mperf / interval_float);
225 } else {
226 if (p->aperf > p->tsc || p->mperf > p->tsc) {
227 fprintf(stderr, " ****");
228 } else {
229 fprintf(stderr, "%4.1f*",
230 1.0 * p->tsc /
231 units * p->aperf /
232 p->mperf / interval_float);
233 }
234 }
235 }
236
237 /* TSC */
238 fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float);
239
240 if (do_nhm_cstates) {
241 if (!skip_c1)
242 fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc);
243 else
244 fprintf(stderr, " ****");
245 }
246 if (do_nhm_cstates)
247 fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc);
248 if (do_nhm_cstates)
249 fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc);
250 if (do_snb_cstates)
251 fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc);
252 if (do_snb_cstates)
253 fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc);
254 if (do_nhm_cstates)
255 fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc);
256 if (do_nhm_cstates)
257 fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc);
258 if (do_snb_cstates)
259 fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc);
260 if (extra_msr_offset)
261 fprintf(stderr, " 0x%016llx", p->extra_msr);
262 putc('\n', stderr);
263}
264
265void print_counters(struct counters *counters)
266{
267 struct counters *cnt;
268
269 print_header();
270
271 if (num_cpus > 1)
272 print_cnt(cnt_average);
273
274 for (cnt = counters; cnt != NULL; cnt = cnt->next)
275 print_cnt(cnt);
276
277}
278
279#define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after))
280
281int compute_delta(struct counters *after,
282 struct counters *before, struct counters *delta)
283{
284 int errors = 0;
285 int perf_err = 0;
286
287 skip_c0 = skip_c1 = 0;
288
289 for ( ; after && before && delta;
290 after = after->next, before = before->next, delta = delta->next) {
291 if (before->cpu != after->cpu) {
292 printf("cpu configuration changed: %d != %d\n",
293 before->cpu, after->cpu);
294 return -1;
295 }
296
297 if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) {
298 fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n",
299 before->cpu, before->tsc, after->tsc);
300 errors++;
301 }
302 /* check for TSC < 1 Mcycles over interval */
303 if (delta->tsc < (1000 * 1000)) {
304 fprintf(stderr, "Insanely slow TSC rate,"
305 " TSC stops in idle?\n");
306 fprintf(stderr, "You can disable all c-states"
307 " by booting with \"idle=poll\"\n");
308 fprintf(stderr, "or just the deep ones with"
309 " \"processor.max_cstate=1\"\n");
310 exit(-3);
311 }
312 if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) {
313 fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n",
314 before->cpu, before->c3, after->c3);
315 errors++;
316 }
317 if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) {
318 fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n",
319 before->cpu, before->c6, after->c6);
320 errors++;
321 }
322 if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) {
323 fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n",
324 before->cpu, before->c7, after->c7);
325 errors++;
326 }
327 if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) {
328 fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n",
329 before->cpu, before->pc2, after->pc2);
330 errors++;
331 }
332 if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) {
333 fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n",
334 before->cpu, before->pc3, after->pc3);
335 errors++;
336 }
337 if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) {
338 fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n",
339 before->cpu, before->pc6, after->pc6);
340 errors++;
341 }
342 if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) {
343 fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n",
344 before->cpu, before->pc7, after->pc7);
345 errors++;
346 }
347
348 perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf);
349 if (perf_err) {
350 fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n",
351 before->cpu, before->aperf, after->aperf);
352 }
353 perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf);
354 if (perf_err) {
355 fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n",
356 before->cpu, before->mperf, after->mperf);
357 }
358 if (perf_err) {
359 if (!aperf_mperf_unstable) {
360 fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
361 fprintf(stderr, "* Frequency results do not cover entire interval *\n");
362 fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
363
364 aperf_mperf_unstable = 1;
365 }
366 /*
367 * mperf delta is likely a huge "positive" number
368 * can not use it for calculating c0 time
369 */
370 skip_c0 = 1;
371 skip_c1 = 1;
372 }
373
374 /*
375 * As mperf and tsc collection are not atomic,
376 * it is possible for mperf's non-halted cycles
377 * to exceed TSC's all cycles: show c1 = 0% in that case.
378 */
379 if (delta->mperf > delta->tsc)
380 delta->c1 = 0;
381 else /* normal case, derive c1 */
382 delta->c1 = delta->tsc - delta->mperf
383 - delta->c3 - delta->c6 - delta->c7;
384
385 if (delta->mperf == 0)
386 delta->mperf = 1; /* divide by 0 protection */
387
388 /*
389 * for "extra msr", just copy the latest w/o subtracting
390 */
391 delta->extra_msr = after->extra_msr;
392 if (errors) {
393 fprintf(stderr, "ERROR cpu%d before:\n", before->cpu);
394 dump_cnt(before);
395 fprintf(stderr, "ERROR cpu%d after:\n", before->cpu);
396 dump_cnt(after);
397 errors = 0;
398 }
399 }
400 return 0;
401}
402
403void compute_average(struct counters *delta, struct counters *avg)
404{
405 struct counters *sum;
406
407 sum = calloc(1, sizeof(struct counters));
408 if (sum == NULL) {
409 perror("calloc sum");
410 exit(1);
411 }
412
413 for (; delta; delta = delta->next) {
414 sum->tsc += delta->tsc;
415 sum->c1 += delta->c1;
416 sum->c3 += delta->c3;
417 sum->c6 += delta->c6;
418 sum->c7 += delta->c7;
419 sum->aperf += delta->aperf;
420 sum->mperf += delta->mperf;
421 sum->pc2 += delta->pc2;
422 sum->pc3 += delta->pc3;
423 sum->pc6 += delta->pc6;
424 sum->pc7 += delta->pc7;
425 }
426 avg->tsc = sum->tsc/num_cpus;
427 avg->c1 = sum->c1/num_cpus;
428 avg->c3 = sum->c3/num_cpus;
429 avg->c6 = sum->c6/num_cpus;
430 avg->c7 = sum->c7/num_cpus;
431 avg->aperf = sum->aperf/num_cpus;
432 avg->mperf = sum->mperf/num_cpus;
433 avg->pc2 = sum->pc2/num_cpus;
434 avg->pc3 = sum->pc3/num_cpus;
435 avg->pc6 = sum->pc6/num_cpus;
436 avg->pc7 = sum->pc7/num_cpus;
437
438 free(sum);
439}
440
441void get_counters(struct counters *cnt)
442{
443 for ( ; cnt; cnt = cnt->next) {
444 cnt->tsc = get_msr(cnt->cpu, MSR_TSC);
445 if (do_nhm_cstates)
446 cnt->c3 = get_msr(cnt->cpu, MSR_CORE_C3_RESIDENCY);
447 if (do_nhm_cstates)
448 cnt->c6 = get_msr(cnt->cpu, MSR_CORE_C6_RESIDENCY);
449 if (do_snb_cstates)
450 cnt->c7 = get_msr(cnt->cpu, MSR_CORE_C7_RESIDENCY);
451 if (has_aperf)
452 cnt->aperf = get_msr(cnt->cpu, MSR_APERF);
453 if (has_aperf)
454 cnt->mperf = get_msr(cnt->cpu, MSR_MPERF);
455 if (do_snb_cstates)
456 cnt->pc2 = get_msr(cnt->cpu, MSR_PKG_C2_RESIDENCY);
457 if (do_nhm_cstates)
458 cnt->pc3 = get_msr(cnt->cpu, MSR_PKG_C3_RESIDENCY);
459 if (do_nhm_cstates)
460 cnt->pc6 = get_msr(cnt->cpu, MSR_PKG_C6_RESIDENCY);
461 if (do_snb_cstates)
462 cnt->pc7 = get_msr(cnt->cpu, MSR_PKG_C7_RESIDENCY);
463 if (extra_msr_offset)
464 cnt->extra_msr = get_msr(cnt->cpu, extra_msr_offset);
465 }
466}
467
468void print_nehalem_info(void)
469{
470 unsigned long long msr;
471 unsigned int ratio;
472
473 if (!do_nehalem_platform_info)
474 return;
475
476 msr = get_msr(0, MSR_NEHALEM_PLATFORM_INFO);
477
478 ratio = (msr >> 40) & 0xFF;
479 fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
480 ratio, bclk, ratio * bclk);
481
482 ratio = (msr >> 8) & 0xFF;
483 fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
484 ratio, bclk, ratio * bclk);
485
486 if (verbose > 1)
487 fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr);
488
489 if (!do_nehalem_turbo_ratio_limit)
490 return;
491
492 msr = get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT);
493
494 ratio = (msr >> 24) & 0xFF;
495 if (ratio)
496 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
497 ratio, bclk, ratio * bclk);
498
499 ratio = (msr >> 16) & 0xFF;
500 if (ratio)
501 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
502 ratio, bclk, ratio * bclk);
503
504 ratio = (msr >> 8) & 0xFF;
505 if (ratio)
506 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
507 ratio, bclk, ratio * bclk);
508
509 ratio = (msr >> 0) & 0xFF;
510 if (ratio)
511 fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
512 ratio, bclk, ratio * bclk);
513
514}
515
516void free_counter_list(struct counters *list)
517{
518 struct counters *p;
519
520 for (p = list; p; ) {
521 struct counters *free_me;
522
523 free_me = p;
524 p = p->next;
525 free(free_me);
526 }
527}
528
529void free_all_counters(void)
530{
531 free_counter_list(cnt_even);
532 cnt_even = NULL;
533
534 free_counter_list(cnt_odd);
535 cnt_odd = NULL;
536
537 free_counter_list(cnt_delta);
538 cnt_delta = NULL;
539
540 free_counter_list(cnt_average);
541 cnt_average = NULL;
542}
543
544void insert_counters(struct counters **list,
545 struct counters *new)
546{
547 struct counters *prev;
548
549 /*
550 * list was empty
551 */
552 if (*list == NULL) {
553 new->next = *list;
554 *list = new;
555 return;
556 }
557
558 show_cpu = 1; /* there is more than one CPU */
559
560 /*
561 * insert on front of list.
562 * It is sorted by ascending package#, core#, cpu#
563 */
564 if (((*list)->pkg > new->pkg) ||
565 (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) ||
566 (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) {
567 new->next = *list;
568 *list = new;
569 return;
570 }
571
572 prev = *list;
573
574 while (prev->next && (prev->next->pkg < new->pkg)) {
575 prev = prev->next;
576 show_pkg = 1; /* there is more than 1 package */
577 }
578
579 while (prev->next && (prev->next->pkg == new->pkg)
580 && (prev->next->core < new->core)) {
581 prev = prev->next;
582 show_core = 1; /* there is more than 1 core */
583 }
584
585 while (prev->next && (prev->next->pkg == new->pkg)
586 && (prev->next->core == new->core)
587 && (prev->next->cpu < new->cpu)) {
588 prev = prev->next;
589 }
590
591 /*
592 * insert after "prev"
593 */
594 new->next = prev->next;
595 prev->next = new;
596}
597
598void alloc_new_counters(int pkg, int core, int cpu)
599{
600 struct counters *new;
601
602 if (verbose > 1)
603 printf("pkg%d core%d, cpu%d\n", pkg, core, cpu);
604
605 new = (struct counters *)calloc(1, sizeof(struct counters));
606 if (new == NULL) {
607 perror("calloc");
608 exit(1);
609 }
610 new->pkg = pkg;
611 new->core = core;
612 new->cpu = cpu;
613 insert_counters(&cnt_odd, new);
614
615 new = (struct counters *)calloc(1,
616 sizeof(struct counters));
617 if (new == NULL) {
618 perror("calloc");
619 exit(1);
620 }
621 new->pkg = pkg;
622 new->core = core;
623 new->cpu = cpu;
624 insert_counters(&cnt_even, new);
625
626 new = (struct counters *)calloc(1, sizeof(struct counters));
627 if (new == NULL) {
628 perror("calloc");
629 exit(1);
630 }
631 new->pkg = pkg;
632 new->core = core;
633 new->cpu = cpu;
634 insert_counters(&cnt_delta, new);
635
636 new = (struct counters *)calloc(1, sizeof(struct counters));
637 if (new == NULL) {
638 perror("calloc");
639 exit(1);
640 }
641 new->pkg = pkg;
642 new->core = core;
643 new->cpu = cpu;
644 cnt_average = new;
645}
646
647int get_physical_package_id(int cpu)
648{
649 char path[64];
650 FILE *filep;
651 int pkg;
652
653 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
654 filep = fopen(path, "r");
655 if (filep == NULL) {
656 perror(path);
657 exit(1);
658 }
659 fscanf(filep, "%d", &pkg);
660 fclose(filep);
661 return pkg;
662}
663
664int get_core_id(int cpu)
665{
666 char path[64];
667 FILE *filep;
668 int core;
669
670 sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
671 filep = fopen(path, "r");
672 if (filep == NULL) {
673 perror(path);
674 exit(1);
675 }
676 fscanf(filep, "%d", &core);
677 fclose(filep);
678 return core;
679}
680
681/*
682 * run func(index, cpu) on every cpu in /proc/stat
683 */
684
685int for_all_cpus(void (func)(int, int, int))
686{
687 FILE *fp;
688 int cpu_count;
689 int retval;
690
691 fp = fopen(proc_stat, "r");
692 if (fp == NULL) {
693 perror(proc_stat);
694 exit(1);
695 }
696
697 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
698 if (retval != 0) {
699 perror("/proc/stat format");
700 exit(1);
701 }
702
703 for (cpu_count = 0; ; cpu_count++) {
704 int cpu;
705
706 retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu);
707 if (retval != 1)
708 break;
709
710 func(get_physical_package_id(cpu), get_core_id(cpu), cpu);
711 }
712 fclose(fp);
713 return cpu_count;
714}
715
716void re_initialize(void)
717{
718 printf("turbostat: topology changed, re-initializing.\n");
719 free_all_counters();
720 num_cpus = for_all_cpus(alloc_new_counters);
721 need_reinitialize = 0;
722 printf("num_cpus is now %d\n", num_cpus);
723}
724
725void dummy(int pkg, int core, int cpu) { return; }
726/*
727 * check to see if a cpu came on-line
728 */
729void verify_num_cpus(void)
730{
731 int new_num_cpus;
732
733 new_num_cpus = for_all_cpus(dummy);
734
735 if (new_num_cpus != num_cpus) {
736 if (verbose)
737 printf("num_cpus was %d, is now %d\n",
738 num_cpus, new_num_cpus);
739 need_reinitialize = 1;
740 }
741}
742
743void turbostat_loop()
744{
745restart:
746 get_counters(cnt_even);
747 gettimeofday(&tv_even, (struct timezone *)NULL);
748
749 while (1) {
750 verify_num_cpus();
751 if (need_reinitialize) {
752 re_initialize();
753 goto restart;
754 }
755 sleep(interval_sec);
756 get_counters(cnt_odd);
757 gettimeofday(&tv_odd, (struct timezone *)NULL);
758
759 compute_delta(cnt_odd, cnt_even, cnt_delta);
760 timersub(&tv_odd, &tv_even, &tv_delta);
761 compute_average(cnt_delta, cnt_average);
762 print_counters(cnt_delta);
763 if (need_reinitialize) {
764 re_initialize();
765 goto restart;
766 }
767 sleep(interval_sec);
768 get_counters(cnt_even);
769 gettimeofday(&tv_even, (struct timezone *)NULL);
770 compute_delta(cnt_even, cnt_odd, cnt_delta);
771 timersub(&tv_even, &tv_odd, &tv_delta);
772 compute_average(cnt_delta, cnt_average);
773 print_counters(cnt_delta);
774 }
775}
776
777void check_dev_msr()
778{
779 struct stat sb;
780
781 if (stat("/dev/cpu/0/msr", &sb)) {
782 fprintf(stderr, "no /dev/cpu/0/msr\n");
783 fprintf(stderr, "Try \"# modprobe msr\"\n");
784 exit(-5);
785 }
786}
787
788void check_super_user()
789{
790 if (getuid() != 0) {
791 fprintf(stderr, "must be root\n");
792 exit(-6);
793 }
794}
795
796int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
797{
798 if (!genuine_intel)
799 return 0;
800
801 if (family != 6)
802 return 0;
803
804 switch (model) {
805 case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
806 case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
807 case 0x1F: /* Core i7 and i5 Processor - Nehalem */
808 case 0x25: /* Westmere Client - Clarkdale, Arrandale */
809 case 0x2C: /* Westmere EP - Gulftown */
810 case 0x2A: /* SNB */
811 case 0x2D: /* SNB Xeon */
812 return 1;
813 case 0x2E: /* Nehalem-EX Xeon - Beckton */
814 case 0x2F: /* Westmere-EX Xeon - Eagleton */
815 default:
816 return 0;
817 }
818}
819
820int is_snb(unsigned int family, unsigned int model)
821{
822 if (!genuine_intel)
823 return 0;
824
825 switch (model) {
826 case 0x2A:
827 case 0x2D:
828 return 1;
829 }
830 return 0;
831}
832
833double discover_bclk(unsigned int family, unsigned int model)
834{
835 if (is_snb(family, model))
836 return 100.00;
837 else
838 return 133.33;
839}
840
841void check_cpuid()
842{
843 unsigned int eax, ebx, ecx, edx, max_level;
844 unsigned int fms, family, model, stepping;
845
846 eax = ebx = ecx = edx = 0;
847
848 asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0));
849
850 if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
851 genuine_intel = 1;
852
853 if (verbose)
854 fprintf(stderr, "%.4s%.4s%.4s ",
855 (char *)&ebx, (char *)&edx, (char *)&ecx);
856
857 asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
858 family = (fms >> 8) & 0xf;
859 model = (fms >> 4) & 0xf;
860 stepping = fms & 0xf;
861 if (family == 6 || family == 0xf)
862 model += ((fms >> 16) & 0xf) << 4;
863
864 if (verbose)
865 fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
866 max_level, family, model, stepping, family, model, stepping);
867
868 if (!(edx & (1 << 5))) {
869 fprintf(stderr, "CPUID: no MSR\n");
870 exit(1);
871 }
872
873 /*
874 * check max extended function levels of CPUID.
875 * This is needed to check for invariant TSC.
876 * This check is valid for both Intel and AMD.
877 */
878 ebx = ecx = edx = 0;
879 asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000));
880
881 if (max_level < 0x80000007) {
882 fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level);
883 exit(1);
884 }
885
886 /*
887 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
888 * this check is valid for both Intel and AMD
889 */
890 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007));
891 has_invariant_tsc = edx & (1 << 8);
892
893 if (!has_invariant_tsc) {
894 fprintf(stderr, "No invariant TSC\n");
895 exit(1);
896 }
897
898 /*
899 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
900 * this check is valid for both Intel and AMD
901 */
902
903 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6));
904 has_aperf = ecx & (1 << 0);
905 if (!has_aperf) {
906 fprintf(stderr, "No APERF MSR\n");
907 exit(1);
908 }
909
910 do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
911 do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */
912 do_snb_cstates = is_snb(family, model);
913 bclk = discover_bclk(family, model);
914
915 do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
916}
917
918
919void usage()
920{
921 fprintf(stderr, "%s: [-v] [-M MSR#] [-i interval_sec | command ...]\n",
922 progname);
923 exit(1);
924}
925
926
927/*
928 * in /dev/cpu/ return success for names that are numbers
929 * ie. filter out ".", "..", "microcode".
930 */
931int dir_filter(const struct dirent *dirp)
932{
933 if (isdigit(dirp->d_name[0]))
934 return 1;
935 else
936 return 0;
937}
938
939int open_dev_cpu_msr(int dummy1)
940{
941 return 0;
942}
943
944void turbostat_init()
945{
946 check_cpuid();
947
948 check_dev_msr();
949 check_super_user();
950
951 num_cpus = for_all_cpus(alloc_new_counters);
952
953 if (verbose)
954 print_nehalem_info();
955}
956
957int fork_it(char **argv)
958{
959 int retval;
960 pid_t child_pid;
961 get_counters(cnt_even);
962 gettimeofday(&tv_even, (struct timezone *)NULL);
963
964 child_pid = fork();
965 if (!child_pid) {
966 /* child */
967 execvp(argv[0], argv);
968 } else {
969 int status;
970
971 /* parent */
972 if (child_pid == -1) {
973 perror("fork");
974 exit(1);
975 }
976
977 signal(SIGINT, SIG_IGN);
978 signal(SIGQUIT, SIG_IGN);
979 if (waitpid(child_pid, &status, 0) == -1) {
980 perror("wait");
981 exit(1);
982 }
983 }
984 get_counters(cnt_odd);
985 gettimeofday(&tv_odd, (struct timezone *)NULL);
986 retval = compute_delta(cnt_odd, cnt_even, cnt_delta);
987
988 timersub(&tv_odd, &tv_even, &tv_delta);
989 compute_average(cnt_delta, cnt_average);
990 if (!retval)
991 print_counters(cnt_delta);
992
993 fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
994
995 return 0;
996}
997
998void cmdline(int argc, char **argv)
999{
1000 int opt;
1001
1002 progname = argv[0];
1003
1004 while ((opt = getopt(argc, argv, "+vi:M:")) != -1) {
1005 switch (opt) {
1006 case 'v':
1007 verbose++;
1008 break;
1009 case 'i':
1010 interval_sec = atoi(optarg);
1011 break;
1012 case 'M':
1013 sscanf(optarg, "%x", &extra_msr_offset);
1014 if (verbose > 1)
1015 fprintf(stderr, "MSR 0x%X\n", extra_msr_offset);
1016 break;
1017 default:
1018 usage();
1019 }
1020 }
1021}
1022
1023int main(int argc, char **argv)
1024{
1025 cmdline(argc, argv);
1026
1027 if (verbose > 1)
1028 fprintf(stderr, "turbostat Dec 6, 2010"
1029 " - Len Brown <lenb@kernel.org>\n");
1030 if (verbose > 1)
1031 fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n");
1032
1033 turbostat_init();
1034
1035 /*
1036 * if any params left, it must be a command to fork
1037 */
1038 if (argc - optind)
1039 return fork_it(argv + optind);
1040 else
1041 turbostat_loop();
1042
1043 return 0;
1044}
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
new file mode 100644
index 000000000000..f458237fdd79
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile
@@ -0,0 +1,8 @@
1x86_energy_perf_policy : x86_energy_perf_policy.c
2
3clean :
4 rm -f x86_energy_perf_policy
5
6install :
7 install x86_energy_perf_policy /usr/bin/
8 install x86_energy_perf_policy.8 /usr/share/man/man8/
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
new file mode 100644
index 000000000000..8eaaad648cdb
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
@@ -0,0 +1,104 @@
1.\" This page Copyright (C) 2010 Len Brown <len.brown@intel.com>
2.\" Distributed under the GPL, Copyleft 1994.
3.TH X86_ENERGY_PERF_POLICY 8
4.SH NAME
5x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS
6.SH SYNOPSIS
7.ft B
8.B x86_energy_perf_policy
9.RB [ "\-c cpu" ]
10.RB [ "\-v" ]
11.RB "\-r"
12.br
13.B x86_energy_perf_policy
14.RB [ "\-c cpu" ]
15.RB [ "\-v" ]
16.RB 'performance'
17.br
18.B x86_energy_perf_policy
19.RB [ "\-c cpu" ]
20.RB [ "\-v" ]
21.RB 'normal'
22.br
23.B x86_energy_perf_policy
24.RB [ "\-c cpu" ]
25.RB [ "\-v" ]
26.RB 'powersave'
27.br
28.B x86_energy_perf_policy
29.RB [ "\-c cpu" ]
30.RB [ "\-v" ]
31.RB n
32.br
33.SH DESCRIPTION
34\fBx86_energy_perf_policy\fP
35allows software to convey
36its policy for the relative importance of performance
37versus energy savings to the processor.
38
39The processor uses this information in model-specific ways
40when it must select trade-offs between performance and
41energy efficiency.
42
43This policy hint does not supersede Processor Performance states
44(P-states) or CPU Idle power states (C-states), but allows
45software to have influence where it would otherwise be unable
46to express a preference.
47
48For example, this setting may tell the hardware how
49aggressively or conservatively to control frequency
50in the "turbo range" above the explicitly OS-controlled
51P-state frequency range. It may also tell the hardware
52how aggressively is should enter the OS requested C-states.
53
54Support for this feature is indicated by CPUID.06H.ECX.bit3
55per the Intel Architectures Software Developer's Manual.
56
57.SS Options
58\fB-c\fP limits operation to a single CPU.
59The default is to operate on all CPUs.
60Note that MSR_IA32_ENERGY_PERF_BIAS is defined per
61logical processor, but that the initial implementations
62of the MSR were shared among all processors in each package.
63.PP
64\fB-v\fP increases verbosity. By default
65x86_energy_perf_policy is silent.
66.PP
67\fB-r\fP is for "read-only" mode - the unchanged state
68is read and displayed.
69.PP
70.I performance
71Set a policy where performance is paramount.
72The processor will be unwilling to sacrifice any performance
73for the sake of energy saving. This is the hardware default.
74.PP
75.I normal
76Set a policy with a normal balance between performance and energy efficiency.
77The processor will tolerate minor performance compromise
78for potentially significant energy savings.
79This reasonable default for most desktops and servers.
80.PP
81.I powersave
82Set a policy where the processor can accept
83a measurable performance hit to maximize energy efficiency.
84.PP
85.I n
86Set MSR_IA32_ENERGY_PERF_BIAS to the specified number.
87The range of valid numbers is 0-15, where 0 is maximum
88performance and 15 is maximum energy efficiency.
89
90.SH NOTES
91.B "x86_energy_perf_policy "
92runs only as root.
93.SH FILES
94.ta
95.nf
96/dev/cpu/*/msr
97.fi
98
99.SH "SEE ALSO"
100msr(4)
101.PP
102.SH AUTHORS
103.nf
104Written by Len Brown <len.brown@intel.com>
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
new file mode 100644
index 000000000000..2618ef2ba31f
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -0,0 +1,325 @@
1/*
2 * x86_energy_perf_policy -- set the energy versus performance
3 * policy preference bias on recent X86 processors.
4 */
5/*
6 * Copyright (c) 2010, Intel Corporation.
7 * Len Brown <len.brown@intel.com>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
23#include <stdio.h>
24#include <unistd.h>
25#include <sys/types.h>
26#include <sys/stat.h>
27#include <sys/resource.h>
28#include <fcntl.h>
29#include <signal.h>
30#include <sys/time.h>
31#include <stdlib.h>
32#include <string.h>
33
34unsigned int verbose; /* set with -v */
35unsigned int read_only; /* set with -r */
36char *progname;
37unsigned long long new_bias;
38int cpu = -1;
39
40/*
41 * Usage:
42 *
43 * -c cpu: limit action to a single CPU (default is all CPUs)
44 * -v: verbose output (can invoke more than once)
45 * -r: read-only, don't change any settings
46 *
47 * performance
48 * Performance is paramount.
49 * Unwilling to sacrifice any performance
50 * for the sake of energy saving. (hardware default)
51 *
52 * normal
53 * Can tolerate minor performance compromise
54 * for potentially significant energy savings.
55 * (reasonable default for most desktops and servers)
56 *
57 * powersave
58 * Can tolerate significant performance hit
59 * to maximize energy savings.
60 *
61 * n
62 * a numerical value to write to the underlying MSR.
63 */
64void usage(void)
65{
66 printf("%s: [-c cpu] [-v] "
67 "(-r | 'performance' | 'normal' | 'powersave' | n)\n",
68 progname);
69 exit(1);
70}
71
72#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
73
74#define BIAS_PERFORMANCE 0
75#define BIAS_BALANCE 6
76#define BIAS_POWERSAVE 15
77
78void cmdline(int argc, char **argv)
79{
80 int opt;
81
82 progname = argv[0];
83
84 while ((opt = getopt(argc, argv, "+rvc:")) != -1) {
85 switch (opt) {
86 case 'c':
87 cpu = atoi(optarg);
88 break;
89 case 'r':
90 read_only = 1;
91 break;
92 case 'v':
93 verbose++;
94 break;
95 default:
96 usage();
97 }
98 }
99 /* if -r, then should be no additional optind */
100 if (read_only && (argc > optind))
101 usage();
102
103 /*
104 * if no -r , then must be one additional optind
105 */
106 if (!read_only) {
107
108 if (argc != optind + 1) {
109 printf("must supply -r or policy param\n");
110 usage();
111 }
112
113 if (!strcmp("performance", argv[optind])) {
114 new_bias = BIAS_PERFORMANCE;
115 } else if (!strcmp("normal", argv[optind])) {
116 new_bias = BIAS_BALANCE;
117 } else if (!strcmp("powersave", argv[optind])) {
118 new_bias = BIAS_POWERSAVE;
119 } else {
120 char *endptr;
121
122 new_bias = strtoull(argv[optind], &endptr, 0);
123 if (endptr == argv[optind] ||
124 new_bias > BIAS_POWERSAVE) {
125 fprintf(stderr, "invalid value: %s\n",
126 argv[optind]);
127 usage();
128 }
129 }
130 }
131}
132
133/*
134 * validate_cpuid()
135 * returns on success, quietly exits on failure (make verbose with -v)
136 */
137void validate_cpuid(void)
138{
139 unsigned int eax, ebx, ecx, edx, max_level;
140 char brand[16];
141 unsigned int fms, family, model, stepping;
142
143 eax = ebx = ecx = edx = 0;
144
145 asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx),
146 "=d" (edx) : "a" (0));
147
148 if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) {
149 if (verbose)
150 fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel",
151 (char *)&ebx, (char *)&edx, (char *)&ecx);
152 exit(1);
153 }
154
155 asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
156 family = (fms >> 8) & 0xf;
157 model = (fms >> 4) & 0xf;
158 stepping = fms & 0xf;
159 if (family == 6 || family == 0xf)
160 model += ((fms >> 16) & 0xf) << 4;
161
162 if (verbose > 1)
163 printf("CPUID %s %d levels family:model:stepping "
164 "0x%x:%x:%x (%d:%d:%d)\n", brand, max_level,
165 family, model, stepping, family, model, stepping);
166
167 if (!(edx & (1 << 5))) {
168 if (verbose)
169 printf("CPUID: no MSR\n");
170 exit(1);
171 }
172
173 /*
174 * Support for MSR_IA32_ENERGY_PERF_BIAS
175 * is indicated by CPUID.06H.ECX.bit3
176 */
177 asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6));
178 if (verbose)
179 printf("CPUID.06H.ECX: 0x%x\n", ecx);
180 if (!(ecx & (1 << 3))) {
181 if (verbose)
182 printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n");
183 exit(1);
184 }
185 return; /* success */
186}
187
188unsigned long long get_msr(int cpu, int offset)
189{
190 unsigned long long msr;
191 char msr_path[32];
192 int retval;
193 int fd;
194
195 sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
196 fd = open(msr_path, O_RDONLY);
197 if (fd < 0) {
198 printf("Try \"# modprobe msr\"\n");
199 perror(msr_path);
200 exit(1);
201 }
202
203 retval = pread(fd, &msr, sizeof msr, offset);
204
205 if (retval != sizeof msr) {
206 printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
207 exit(-2);
208 }
209 close(fd);
210 return msr;
211}
212
213unsigned long long put_msr(int cpu, unsigned long long new_msr, int offset)
214{
215 unsigned long long old_msr;
216 char msr_path[32];
217 int retval;
218 int fd;
219
220 sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
221 fd = open(msr_path, O_RDWR);
222 if (fd < 0) {
223 perror(msr_path);
224 exit(1);
225 }
226
227 retval = pread(fd, &old_msr, sizeof old_msr, offset);
228 if (retval != sizeof old_msr) {
229 perror("pwrite");
230 printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
231 exit(-2);
232 }
233
234 retval = pwrite(fd, &new_msr, sizeof new_msr, offset);
235 if (retval != sizeof new_msr) {
236 perror("pwrite");
237 printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval);
238 exit(-2);
239 }
240
241 close(fd);
242
243 return old_msr;
244}
245
246void print_msr(int cpu)
247{
248 printf("cpu%d: 0x%016llx\n",
249 cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS));
250}
251
252void update_msr(int cpu)
253{
254 unsigned long long previous_msr;
255
256 previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS);
257
258 if (verbose)
259 printf("cpu%d msr0x%x 0x%016llx -> 0x%016llx\n",
260 cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias);
261
262 return;
263}
264
265char *proc_stat = "/proc/stat";
266/*
267 * run func() on every cpu in /dev/cpu
268 */
269void for_every_cpu(void (func)(int))
270{
271 FILE *fp;
272 int retval;
273
274 fp = fopen(proc_stat, "r");
275 if (fp == NULL) {
276 perror(proc_stat);
277 exit(1);
278 }
279
280 retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
281 if (retval != 0) {
282 perror("/proc/stat format");
283 exit(1);
284 }
285
286 while (1) {
287 int cpu;
288
289 retval = fscanf(fp,
290 "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n",
291 &cpu);
292 if (retval != 1)
293 return;
294
295 func(cpu);
296 }
297 fclose(fp);
298}
299
300int main(int argc, char **argv)
301{
302 cmdline(argc, argv);
303
304 if (verbose > 1)
305 printf("x86_energy_perf_policy Nov 24, 2010"
306 " - Len Brown <lenb@kernel.org>\n");
307 if (verbose > 1 && !read_only)
308 printf("new_bias %lld\n", new_bias);
309
310 validate_cpuid();
311
312 if (cpu != -1) {
313 if (read_only)
314 print_msr(cpu);
315 else
316 update_msr(cpu);
317 } else {
318 if (read_only)
319 for_every_cpu(print_msr);
320 else
321 for_every_cpu(update_msr);
322 }
323
324 return 0;
325}