diff options
author | Len Brown <len.brown@intel.com> | 2010-10-22 23:53:03 -0400 |
---|---|---|
committer | Len Brown <len.brown@intel.com> | 2011-01-11 23:02:21 -0500 |
commit | d5532ee7b40b4a64e605e543b0387694430ecb79 (patch) | |
tree | 1411d02a42ca83156d37ce6d62c031afc465a5e1 | |
parent | f6f94e2ab1b33f0082ac22d71f66385a60d8157f (diff) |
tools: create power/x86/x86_energy_perf_policy
MSR_IA32_ENERGY_PERF_BIAS first became available on Westmere Xeon.
It is implemented in all Sandy Bridge processors -- mobile, desktop and server.
It is expected to become increasingly important in subsequent generations.
x86_energy_perf_policy is a user-space utility to set the
hardware energy vs performance policy hint in the processor.
Most systems would benefit from "x86_energy_perf_policy normal"
at system startup, as the hardware default is maximum performance
at the expense of energy efficiency.
See x86_energy_perf_policy.8 man page for more information.
Background:
Linux-2.6.36 added "epb" to /proc/cpuinfo to indicate
if an x86 processor supports MSR_IA32_ENERGY_PERF_BIAS,
without actually modifying the MSR.
In March, 2010, Venkatesh Pallipadi proposed a small driver
that programmed MSR_IA32_ENERGY_PERF_BIAS, based on
the cpufreq governor in use. It also offered
a boot-time cmdline option to override.
http://lkml.org/lkml/2010/3/4/457
But hiding the hardware policy behind the
governor choice was deemed "kinda icky".
In June, 2010, I proposed a generic user/kernel API to
generalize the power/performance policy trade-off.
"RFC: /sys/power/policy_preference"
http://lkml.org/lkml/2010/6/16/399
That is my preference for implementing this capability,
but I received no support on the list.
So in September, 2010, I sent x86_energy_perf_policy.c to LKML,
a user-space utility that scribbles directly to the MSR.
http://lkml.org/lkml/2010/9/28/246
Here is that same utility, after responding to some review feedback,
to live in tools/power/, where it is easily found.
Signed-off-by: Len Brown <len.brown@intel.com>
3 files changed, 437 insertions, 0 deletions
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile new file mode 100644 index 000000000000..f458237fdd79 --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/Makefile | |||
@@ -0,0 +1,8 @@ | |||
1 | x86_energy_perf_policy : x86_energy_perf_policy.c | ||
2 | |||
3 | clean : | ||
4 | rm -f x86_energy_perf_policy | ||
5 | |||
6 | install : | ||
7 | install x86_energy_perf_policy /usr/bin/ | ||
8 | install x86_energy_perf_policy.8 /usr/share/man/man8/ | ||
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 new file mode 100644 index 000000000000..8eaaad648cdb --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 | |||
@@ -0,0 +1,104 @@ | |||
1 | .\" This page Copyright (C) 2010 Len Brown <len.brown@intel.com> | ||
2 | .\" Distributed under the GPL, Copyleft 1994. | ||
3 | .TH X86_ENERGY_PERF_POLICY 8 | ||
4 | .SH NAME | ||
5 | x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS | ||
6 | .SH SYNOPSIS | ||
7 | .ft B | ||
8 | .B x86_energy_perf_policy | ||
9 | .RB [ "\-c cpu" ] | ||
10 | .RB [ "\-v" ] | ||
11 | .RB "\-r" | ||
12 | .br | ||
13 | .B x86_energy_perf_policy | ||
14 | .RB [ "\-c cpu" ] | ||
15 | .RB [ "\-v" ] | ||
16 | .RB 'performance' | ||
17 | .br | ||
18 | .B x86_energy_perf_policy | ||
19 | .RB [ "\-c cpu" ] | ||
20 | .RB [ "\-v" ] | ||
21 | .RB 'normal' | ||
22 | .br | ||
23 | .B x86_energy_perf_policy | ||
24 | .RB [ "\-c cpu" ] | ||
25 | .RB [ "\-v" ] | ||
26 | .RB 'powersave' | ||
27 | .br | ||
28 | .B x86_energy_perf_policy | ||
29 | .RB [ "\-c cpu" ] | ||
30 | .RB [ "\-v" ] | ||
31 | .RB n | ||
32 | .br | ||
33 | .SH DESCRIPTION | ||
34 | \fBx86_energy_perf_policy\fP | ||
35 | allows software to convey | ||
36 | its policy for the relative importance of performance | ||
37 | versus energy savings to the processor. | ||
38 | |||
39 | The processor uses this information in model-specific ways | ||
40 | when it must select trade-offs between performance and | ||
41 | energy efficiency. | ||
42 | |||
43 | This policy hint does not supersede Processor Performance states | ||
44 | (P-states) or CPU Idle power states (C-states), but allows | ||
45 | software to have influence where it would otherwise be unable | ||
46 | to express a preference. | ||
47 | |||
48 | For example, this setting may tell the hardware how | ||
49 | aggressively or conservatively to control frequency | ||
50 | in the "turbo range" above the explicitly OS-controlled | ||
51 | P-state frequency range. It may also tell the hardware | ||
52 | how aggressively is should enter the OS requested C-states. | ||
53 | |||
54 | Support for this feature is indicated by CPUID.06H.ECX.bit3 | ||
55 | per the Intel Architectures Software Developer's Manual. | ||
56 | |||
57 | .SS Options | ||
58 | \fB-c\fP limits operation to a single CPU. | ||
59 | The default is to operate on all CPUs. | ||
60 | Note that MSR_IA32_ENERGY_PERF_BIAS is defined per | ||
61 | logical processor, but that the initial implementations | ||
62 | of the MSR were shared among all processors in each package. | ||
63 | .PP | ||
64 | \fB-v\fP increases verbosity. By default | ||
65 | x86_energy_perf_policy is silent. | ||
66 | .PP | ||
67 | \fB-r\fP is for "read-only" mode - the unchanged state | ||
68 | is read and displayed. | ||
69 | .PP | ||
70 | .I performance | ||
71 | Set a policy where performance is paramount. | ||
72 | The processor will be unwilling to sacrifice any performance | ||
73 | for the sake of energy saving. This is the hardware default. | ||
74 | .PP | ||
75 | .I normal | ||
76 | Set a policy with a normal balance between performance and energy efficiency. | ||
77 | The processor will tolerate minor performance compromise | ||
78 | for potentially significant energy savings. | ||
79 | This reasonable default for most desktops and servers. | ||
80 | .PP | ||
81 | .I powersave | ||
82 | Set a policy where the processor can accept | ||
83 | a measurable performance hit to maximize energy efficiency. | ||
84 | .PP | ||
85 | .I n | ||
86 | Set MSR_IA32_ENERGY_PERF_BIAS to the specified number. | ||
87 | The range of valid numbers is 0-15, where 0 is maximum | ||
88 | performance and 15 is maximum energy efficiency. | ||
89 | |||
90 | .SH NOTES | ||
91 | .B "x86_energy_perf_policy " | ||
92 | runs only as root. | ||
93 | .SH FILES | ||
94 | .ta | ||
95 | .nf | ||
96 | /dev/cpu/*/msr | ||
97 | .fi | ||
98 | |||
99 | .SH "SEE ALSO" | ||
100 | msr(4) | ||
101 | .PP | ||
102 | .SH AUTHORS | ||
103 | .nf | ||
104 | Written by Len Brown <len.brown@intel.com> | ||
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c new file mode 100644 index 000000000000..d9678a34dd70 --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c | |||
@@ -0,0 +1,325 @@ | |||
1 | /* | ||
2 | * x86_energy_perf_policy -- set the energy versus performance | ||
3 | * policy preference bias on recent X86 processors. | ||
4 | */ | ||
5 | /* | ||
6 | * Copyright (c) 2010, Intel Corporation. | ||
7 | * Len Brown <len.brown@intel.com> | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms and conditions of the GNU General Public License, | ||
11 | * version 2, as published by the Free Software Foundation. | ||
12 | * | ||
13 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
15 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
16 | * more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License along with | ||
19 | * this program; if not, write to the Free Software Foundation, Inc., | ||
20 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
21 | */ | ||
22 | |||
23 | #include <stdio.h> | ||
24 | #include <unistd.h> | ||
25 | #include <sys/types.h> | ||
26 | #include <sys/stat.h> | ||
27 | #include <sys/resource.h> | ||
28 | #include <fcntl.h> | ||
29 | #include <signal.h> | ||
30 | #include <sys/time.h> | ||
31 | #include <stdlib.h> | ||
32 | #include <string.h> | ||
33 | |||
34 | unsigned int verbose; /* set with -v */ | ||
35 | unsigned int read_only; /* set with -r */ | ||
36 | char *progname; | ||
37 | unsigned long long new_bias; | ||
38 | int cpu = -1; | ||
39 | |||
40 | /* | ||
41 | * Usage: | ||
42 | * | ||
43 | * -c cpu: limit action to a single CPU (default is all CPUs) | ||
44 | * -v: verbose output (can invoke more than once) | ||
45 | * -r: read-only, don't change any settings | ||
46 | * | ||
47 | * performance | ||
48 | * Performance is paramount. | ||
49 | * Unwilling to sacrafice any performance | ||
50 | * for the sake of energy saving. (hardware default) | ||
51 | * | ||
52 | * normal | ||
53 | * Can tolerate minor performance compromise | ||
54 | * for potentially significant energy savings. | ||
55 | * (reasonable default for most desktops and servers) | ||
56 | * | ||
57 | * powersave | ||
58 | * Can tolerate significant performance hit | ||
59 | * to maximize energy savings. | ||
60 | * | ||
61 | * n | ||
62 | * a numerical value to write to the underlying MSR. | ||
63 | */ | ||
64 | void usage(void) | ||
65 | { | ||
66 | printf("%s: [-c cpu] [-v] " | ||
67 | "(-r | 'performance' | 'normal' | 'powersave' | n)\n", | ||
68 | progname); | ||
69 | exit(1); | ||
70 | } | ||
71 | |||
72 | #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 | ||
73 | |||
74 | #define BIAS_PERFORMANCE 0 | ||
75 | #define BIAS_BALANCE 6 | ||
76 | #define BIAS_POWERSAVE 15 | ||
77 | |||
78 | void cmdline(int argc, char **argv) | ||
79 | { | ||
80 | int opt; | ||
81 | |||
82 | progname = argv[0]; | ||
83 | |||
84 | while ((opt = getopt(argc, argv, "+rvc:")) != -1) { | ||
85 | switch (opt) { | ||
86 | case 'c': | ||
87 | cpu = atoi(optarg); | ||
88 | break; | ||
89 | case 'r': | ||
90 | read_only = 1; | ||
91 | break; | ||
92 | case 'v': | ||
93 | verbose++; | ||
94 | break; | ||
95 | default: | ||
96 | usage(); | ||
97 | } | ||
98 | } | ||
99 | /* if -r, then should be no additional optind */ | ||
100 | if (read_only && (argc > optind)) | ||
101 | usage(); | ||
102 | |||
103 | /* | ||
104 | * if no -r , then must be one additional optind | ||
105 | */ | ||
106 | if (!read_only) { | ||
107 | |||
108 | if (argc != optind + 1) { | ||
109 | printf("must supply -r or policy param\n"); | ||
110 | usage(); | ||
111 | } | ||
112 | |||
113 | if (!strcmp("performance", argv[optind])) { | ||
114 | new_bias = BIAS_PERFORMANCE; | ||
115 | } else if (!strcmp("normal", argv[optind])) { | ||
116 | new_bias = BIAS_BALANCE; | ||
117 | } else if (!strcmp("powersave", argv[optind])) { | ||
118 | new_bias = BIAS_POWERSAVE; | ||
119 | } else { | ||
120 | char *endptr; | ||
121 | |||
122 | new_bias = strtoull(argv[optind], &endptr, 0); | ||
123 | if (endptr == argv[optind] || | ||
124 | new_bias > BIAS_POWERSAVE) { | ||
125 | fprintf(stderr, "invalid value: %s\n", | ||
126 | argv[optind]); | ||
127 | usage(); | ||
128 | } | ||
129 | } | ||
130 | } | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * validate_cpuid() | ||
135 | * returns on success, quietly exits on failure (make verbose with -v) | ||
136 | */ | ||
137 | void validate_cpuid(void) | ||
138 | { | ||
139 | unsigned int eax, ebx, ecx, edx, max_level; | ||
140 | char brand[16]; | ||
141 | unsigned int fms, family, model, stepping; | ||
142 | |||
143 | eax = ebx = ecx = edx = 0; | ||
144 | |||
145 | asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), | ||
146 | "=d" (edx) : "a" (0)); | ||
147 | |||
148 | if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) { | ||
149 | if (verbose) | ||
150 | fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel", | ||
151 | (char *)&ebx, (char *)&edx, (char *)&ecx); | ||
152 | exit(1); | ||
153 | } | ||
154 | |||
155 | asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); | ||
156 | family = (fms >> 8) & 0xf; | ||
157 | model = (fms >> 4) & 0xf; | ||
158 | stepping = fms & 0xf; | ||
159 | if (family == 6 || family == 0xf) | ||
160 | model += ((fms >> 16) & 0xf) << 4; | ||
161 | |||
162 | if (verbose > 1) | ||
163 | printf("CPUID %s %d levels family:model:stepping " | ||
164 | "0x%x:%x:%x (%d:%d:%d)\n", brand, max_level, | ||
165 | family, model, stepping, family, model, stepping); | ||
166 | |||
167 | if (!(edx & (1 << 5))) { | ||
168 | if (verbose) | ||
169 | printf("CPUID: no MSR\n"); | ||
170 | exit(1); | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * Support for MSR_IA32_ENERGY_PERF_BIAS | ||
175 | * is indicated by CPUID.06H.ECX.bit3 | ||
176 | */ | ||
177 | asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6)); | ||
178 | if (verbose) | ||
179 | printf("CPUID.06H.ECX: 0x%x\n", ecx); | ||
180 | if (!(ecx & (1 << 3))) { | ||
181 | if (verbose) | ||
182 | printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n"); | ||
183 | exit(1); | ||
184 | } | ||
185 | return; /* success */ | ||
186 | } | ||
187 | |||
188 | unsigned long long get_msr(int cpu, int offset) | ||
189 | { | ||
190 | unsigned long long msr; | ||
191 | char msr_path[32]; | ||
192 | int retval; | ||
193 | int fd; | ||
194 | |||
195 | sprintf(msr_path, "/dev/cpu/%d/msr", cpu); | ||
196 | fd = open(msr_path, O_RDONLY); | ||
197 | if (fd < 0) { | ||
198 | printf("Try \"# modprobe msr\"\n"); | ||
199 | perror(msr_path); | ||
200 | exit(1); | ||
201 | } | ||
202 | |||
203 | retval = pread(fd, &msr, sizeof msr, offset); | ||
204 | |||
205 | if (retval != sizeof msr) { | ||
206 | printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval); | ||
207 | exit(-2); | ||
208 | } | ||
209 | close(fd); | ||
210 | return msr; | ||
211 | } | ||
212 | |||
213 | unsigned long long put_msr(int cpu, unsigned long long new_msr, int offset) | ||
214 | { | ||
215 | unsigned long long old_msr; | ||
216 | char msr_path[32]; | ||
217 | int retval; | ||
218 | int fd; | ||
219 | |||
220 | sprintf(msr_path, "/dev/cpu/%d/msr", cpu); | ||
221 | fd = open(msr_path, O_RDWR); | ||
222 | if (fd < 0) { | ||
223 | perror(msr_path); | ||
224 | exit(1); | ||
225 | } | ||
226 | |||
227 | retval = pread(fd, &old_msr, sizeof old_msr, offset); | ||
228 | if (retval != sizeof old_msr) { | ||
229 | perror("pwrite"); | ||
230 | printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval); | ||
231 | exit(-2); | ||
232 | } | ||
233 | |||
234 | retval = pwrite(fd, &new_msr, sizeof new_msr, offset); | ||
235 | if (retval != sizeof new_msr) { | ||
236 | perror("pwrite"); | ||
237 | printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval); | ||
238 | exit(-2); | ||
239 | } | ||
240 | |||
241 | close(fd); | ||
242 | |||
243 | return old_msr; | ||
244 | } | ||
245 | |||
246 | void print_msr(int cpu) | ||
247 | { | ||
248 | printf("cpu%d: 0x%016llx\n", | ||
249 | cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS)); | ||
250 | } | ||
251 | |||
252 | void update_msr(int cpu) | ||
253 | { | ||
254 | unsigned long long previous_msr; | ||
255 | |||
256 | previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS); | ||
257 | |||
258 | if (verbose) | ||
259 | printf("cpu%d msr0x%x 0x%016llx -> 0x%016llx\n", | ||
260 | cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias); | ||
261 | |||
262 | return; | ||
263 | } | ||
264 | |||
265 | char *proc_stat = "/proc/stat"; | ||
266 | /* | ||
267 | * run func() on every cpu in /dev/cpu | ||
268 | */ | ||
269 | void for_every_cpu(void (func)(int)) | ||
270 | { | ||
271 | FILE *fp; | ||
272 | int retval; | ||
273 | |||
274 | fp = fopen(proc_stat, "r"); | ||
275 | if (fp == NULL) { | ||
276 | perror(proc_stat); | ||
277 | exit(1); | ||
278 | } | ||
279 | |||
280 | retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); | ||
281 | if (retval != 0) { | ||
282 | perror("/proc/stat format"); | ||
283 | exit(1); | ||
284 | } | ||
285 | |||
286 | while (1) { | ||
287 | int cpu; | ||
288 | |||
289 | retval = fscanf(fp, | ||
290 | "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", | ||
291 | &cpu); | ||
292 | if (retval != 1) | ||
293 | return; | ||
294 | |||
295 | func(cpu); | ||
296 | } | ||
297 | fclose(fp); | ||
298 | } | ||
299 | |||
300 | int main(int argc, char **argv) | ||
301 | { | ||
302 | cmdline(argc, argv); | ||
303 | |||
304 | if (verbose > 1) | ||
305 | printf("x86_energy_perf_policy Nov 24, 2010" | ||
306 | " - Len Brown <lenb@kernel.org>\n"); | ||
307 | if (verbose > 1 && !read_only) | ||
308 | printf("new_bias %lld\n", new_bias); | ||
309 | |||
310 | validate_cpuid(); | ||
311 | |||
312 | if (cpu != -1) { | ||
313 | if (read_only) | ||
314 | print_msr(cpu); | ||
315 | else | ||
316 | update_msr(cpu); | ||
317 | } else { | ||
318 | if (read_only) | ||
319 | for_every_cpu(print_msr); | ||
320 | else | ||
321 | for_every_cpu(update_msr); | ||
322 | } | ||
323 | |||
324 | return 0; | ||
325 | } | ||