diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-09-02 07:49:18 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-09-15 10:51:27 -0400 |
commit | 47fe38fcff0517e67d395c039d2e26d2de688a60 (patch) | |
tree | ada38f4e1aa644354b9de4c2cb128719e9e64a9e | |
parent | 5cbc19a983141729d716be17197028434127b376 (diff) |
x86: sched: Provide arch implementations using aperf/mperf
APERF/MPERF support for cpu_power.
APERF/MPERF is arch defined to be a relative scale of work capacity
per logical cpu, this is assumed to include SMT and Turbo mode.
APERF/MPERF are specified to both reset to 0 when either counter
wraps, which is highly inconvenient, since that'll give a blimp
when that happens. The manual specifies writing 0 to the counters
after each read, but that's 1) too expensive, and 2) destroys the
possibility of sharing these counters with other users, so we live
with the blimp - the other existing user does too.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/sched.c | 55 | ||||
-rw-r--r-- | include/linux/sched.h | 4 |
3 files changed, 60 insertions, 1 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c1f253dac155..8dd30638fe44 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp) | |||
13 | 13 | ||
14 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 14 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
15 | obj-y += proc.o capflags.o powerflags.o common.o | 15 | obj-y += proc.o capflags.o powerflags.o common.o |
16 | obj-y += vmware.o hypervisor.o | 16 | obj-y += vmware.o hypervisor.o sched.o |
17 | 17 | ||
18 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 18 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
19 | obj-$(CONFIG_X86_64) += bugs_64.o | 19 | obj-$(CONFIG_X86_64) += bugs_64.o |
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c new file mode 100644 index 000000000000..6c00a8f3cce5 --- /dev/null +++ b/arch/x86/kernel/cpu/sched.c | |||
@@ -0,0 +1,55 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/math64.h> | ||
3 | #include <linux/percpu.h> | ||
4 | #include <linux/irqflags.h> | ||
5 | |||
6 | #include <asm/cpufeature.h> | ||
7 | #include <asm/processor.h> | ||
8 | |||
9 | #ifdef CONFIG_SMP | ||
10 | |||
11 | static DEFINE_PER_CPU(struct aperfmperf, old_perf); | ||
12 | |||
13 | static unsigned long scale_aperfmperf(void) | ||
14 | { | ||
15 | struct aperfmperf val, *old = &__get_cpu_var(old_perf); | ||
16 | unsigned long ratio, flags; | ||
17 | |||
18 | local_irq_save(flags); | ||
19 | get_aperfmperf(&val); | ||
20 | local_irq_restore(flags); | ||
21 | |||
22 | ratio = calc_aperfmperf_ratio(old, &val); | ||
23 | *old = val; | ||
24 | |||
25 | return ratio; | ||
26 | } | ||
27 | |||
28 | unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
29 | { | ||
30 | /* | ||
31 | * do aperf/mperf on the cpu level because it includes things | ||
32 | * like turbo mode, which are relevant to full cores. | ||
33 | */ | ||
34 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
35 | return scale_aperfmperf(); | ||
36 | |||
37 | /* | ||
38 | * maybe have something cpufreq here | ||
39 | */ | ||
40 | |||
41 | return default_scale_freq_power(sd, cpu); | ||
42 | } | ||
43 | |||
44 | unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
45 | { | ||
46 | /* | ||
47 | * aperf/mperf already includes the smt gain | ||
48 | */ | ||
49 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
50 | return SCHED_LOAD_SCALE; | ||
51 | |||
52 | return default_scale_smt_power(sd, cpu); | ||
53 | } | ||
54 | |||
55 | #endif | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index c30bf3d516d1..fc4c0f9393d2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -992,6 +992,9 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag) | |||
992 | return 0; | 992 | return 0; |
993 | } | 993 | } |
994 | 994 | ||
995 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); | ||
996 | unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); | ||
997 | |||
995 | #else /* CONFIG_SMP */ | 998 | #else /* CONFIG_SMP */ |
996 | 999 | ||
997 | struct sched_domain_attr; | 1000 | struct sched_domain_attr; |
@@ -1003,6 +1006,7 @@ partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | |||
1003 | } | 1006 | } |
1004 | #endif /* !CONFIG_SMP */ | 1007 | #endif /* !CONFIG_SMP */ |
1005 | 1008 | ||
1009 | |||
1006 | struct io_context; /* See blkdev.h */ | 1010 | struct io_context; /* See blkdev.h */ |
1007 | 1011 | ||
1008 | 1012 | ||