aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
diff options
context:
space:
mode:
authorVenkatesh Pallipadi <venkatesh.pallipadi@intel.com>2006-10-03 15:38:45 -0400
committerDave Jones <davej@redhat.com>2006-10-15 19:57:11 -0400
commitdfde5d62ed9b28b0bda676c16e8cb635df244ef2 (patch)
tree12c690189fcc7155389860beae554199456b7d3e /arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
parenta6f6e6e6ab464c9d1dff66570b78be2f66d8ba3d (diff)
[CPUFREQ][8/8] acpi-cpufreq: Add support for freq feedback from hardware
Enable ondemand governor and acpi-cpufreq to use IA32_APERF and IA32_MPERF MSR to get active frequency feedback for the last sampling interval. This will make ondemand take right frequency decisions when hardware coordination of frequency is going on. Without APERF/MPERF, ondemand can take wrong decision at times due to underlying hardware coordination or TM2. Example: * CPU 0 and CPU 1 are hardware cooridnated. * CPU 1 running at highest frequency. * CPU 0 was running at highest freq. Now ondemand reduces it to some intermediate frequency based on utilization. * Due to underlying hardware coordination with other CPU 1, CPU 0 continues to run at highest frequency (as long as other CPU is at highest). * When ondemand samples CPU 0 again next time, without actual frequency feedback from APERF/MPERF, it will think that previous frequency change was successful and can go to wrong target frequency. This is because it thinks that utilization it has got this sampling interval is when running at intermediate frequency, rather than actual highest frequency. More information about IA32_APERF IA32_MPERF MSR: Refer to IA-32 IntelĀ® Architecture Software Developer's Manual at http://developer.intel.com Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> Signed-off-by: Dave Jones <davej@redhat.com>
Diffstat (limited to 'arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c')
-rw-r--r--arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c107
1 files changed, 106 insertions, 1 deletions
diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8b0c7db85a47..f8a8e46acb78 100644
--- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -58,10 +58,12 @@ enum {
58}; 58};
59 59
60#define INTEL_MSR_RANGE (0xffff) 60#define INTEL_MSR_RANGE (0xffff)
61#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1)
61 62
62struct acpi_cpufreq_data { 63struct acpi_cpufreq_data {
63 struct acpi_processor_performance *acpi_data; 64 struct acpi_processor_performance *acpi_data;
64 struct cpufreq_frequency_table *freq_table; 65 struct cpufreq_frequency_table *freq_table;
66 unsigned int max_freq;
65 unsigned int resume; 67 unsigned int resume;
66 unsigned int cpu_feature; 68 unsigned int cpu_feature;
67}; 69};
@@ -258,6 +260,100 @@ static u32 get_cur_val(cpumask_t mask)
258 return cmd.val; 260 return cmd.val;
259} 261}
260 262
263/*
264 * Return the measured active (C0) frequency on this CPU since last call
265 * to this function.
266 * Input: cpu number
267 * Return: Average CPU frequency in terms of max frequency (zero on error)
268 *
269 * We use IA32_MPERF and IA32_APERF MSRs to get the measured performance
270 * over a period of time, while CPU is in C0 state.
271 * IA32_MPERF counts at the rate of max advertised frequency
272 * IA32_APERF counts at the rate of actual CPU frequency
273 * Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
274 * no meaning should be associated with absolute values of these MSRs.
275 */
276static unsigned int get_measured_perf(unsigned int cpu)
277{
278 union {
279 struct {
280 u32 lo;
281 u32 hi;
282 } split;
283 u64 whole;
284 } aperf_cur, mperf_cur;
285
286 cpumask_t saved_mask;
287 unsigned int perf_percent;
288 unsigned int retval;
289
290 saved_mask = current->cpus_allowed;
291 set_cpus_allowed(current, cpumask_of_cpu(cpu));
292 if (get_cpu() != cpu) {
293 /* We were not able to run on requested processor */
294 put_cpu();
295 return 0;
296 }
297
298 rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi);
299 rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi);
300
301 wrmsr(MSR_IA32_APERF, 0,0);
302 wrmsr(MSR_IA32_MPERF, 0,0);
303
304#ifdef __i386__
305 /*
306 * We dont want to do 64 bit divide with 32 bit kernel
307 * Get an approximate value. Return failure in case we cannot get
308 * an approximate value.
309 */
310 if (unlikely(aperf_cur.split.hi || mperf_cur.split.hi)) {
311 int shift_count;
312 u32 h;
313
314 h = max_t(u32, aperf_cur.split.hi, mperf_cur.split.hi);
315 shift_count = fls(h);
316
317 aperf_cur.whole >>= shift_count;
318 mperf_cur.whole >>= shift_count;
319 }
320
321 if (((unsigned long)(-1) / 100) < aperf_cur.split.lo) {
322 int shift_count = 7;
323 aperf_cur.split.lo >>= shift_count;
324 mperf_cur.split.lo >>= shift_count;
325 }
326
327 if (aperf_cur.split.lo && mperf_cur.split.lo) {
328 perf_percent = (aperf_cur.split.lo * 100) / mperf_cur.split.lo;
329 } else {
330 perf_percent = 0;
331 }
332
333#else
334 if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) {
335 int shift_count = 7;
336 aperf_cur.whole >>= shift_count;
337 mperf_cur.whole >>= shift_count;
338 }
339
340 if (aperf_cur.whole && mperf_cur.whole) {
341 perf_percent = (aperf_cur.whole * 100) / mperf_cur.whole;
342 } else {
343 perf_percent = 0;
344 }
345
346#endif
347
348 retval = drv_data[cpu]->max_freq * perf_percent / 100;
349
350 put_cpu();
351 set_cpus_allowed(current, saved_mask);
352
353 dprintk("cpu %d: performance percent %d\n", cpu, perf_percent);
354 return retval;
355}
356
261static unsigned int get_cur_freq_on_cpu(unsigned int cpu) 357static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
262{ 358{
263 struct acpi_cpufreq_data *data = drv_data[cpu]; 359 struct acpi_cpufreq_data *data = drv_data[cpu];
@@ -497,7 +593,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
497 unsigned int valid_states = 0; 593 unsigned int valid_states = 0;
498 unsigned int cpu = policy->cpu; 594 unsigned int cpu = policy->cpu;
499 struct acpi_cpufreq_data *data; 595 struct acpi_cpufreq_data *data;
500 unsigned int l, h;
501 unsigned int result = 0; 596 unsigned int result = 0;
502 struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; 597 struct cpuinfo_x86 *c = &cpu_data[policy->cpu];
503 struct acpi_processor_performance *perf; 598 struct acpi_processor_performance *perf;
@@ -591,6 +686,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
591 } 686 }
592 policy->governor = CPUFREQ_DEFAULT_GOVERNOR; 687 policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
593 688
689 data->max_freq = perf->states[0].core_frequency * 1000;
594 /* table init */ 690 /* table init */
595 for (i = 0; i < perf->state_count; i++) { 691 for (i = 0; i < perf->state_count; i++) {
596 if (i > 0 && perf->states[i].core_frequency == 692 if (i > 0 && perf->states[i].core_frequency ==
@@ -625,6 +721,15 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
625 /* notify BIOS that we exist */ 721 /* notify BIOS that we exist */
626 acpi_processor_notify_smm(THIS_MODULE); 722 acpi_processor_notify_smm(THIS_MODULE);
627 723
724 /* Check for APERF/MPERF support in hardware */
725 if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) {
726 unsigned int ecx;
727 ecx = cpuid_ecx(6);
728 if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) {
729 acpi_cpufreq_driver.getavg = get_measured_perf;
730 }
731 }
732
628 dprintk("CPU%u - ACPI performance management activated.\n", cpu); 733 dprintk("CPU%u - ACPI performance management activated.\n", cpu);
629 for (i = 0; i < perf->state_count; i++) 734 for (i = 0; i < perf->state_count; i++)
630 dprintk(" %cP%d: %d MHz, %d mW, %d uS\n", 735 dprintk(" %cP%d: %d MHz, %d mW, %d uS\n",