diff options
| -rw-r--r-- | arch/ia64/include/asm/topology.h | 17 | ||||
| -rw-r--r-- | arch/mips/include/asm/mach-ip27/topology.h | 1 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/topology.h | 9 | ||||
| -rw-r--r-- | arch/sh/include/asm/topology.h | 10 | ||||
| -rw-r--r-- | arch/sparc/include/asm/topology_64.h | 7 | ||||
| -rw-r--r-- | arch/x86/include/asm/cpufeature.h | 1 | ||||
| -rw-r--r-- | arch/x86/include/asm/processor.h | 30 | ||||
| -rw-r--r-- | arch/x86/include/asm/topology.h | 14 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/Makefile | 2 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | 88 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/intel.c | 6 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/sched.c | 55 | ||||
| -rw-r--r-- | include/linux/sched.h | 23 | ||||
| -rw-r--r-- | include/linux/topology.h | 32 | ||||
| -rw-r--r-- | include/linux/wait.h | 4 | ||||
| -rw-r--r-- | kernel/sched.c | 444 | ||||
| -rw-r--r-- | kernel/sched_debug.c | 1 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 414 | ||||
| -rw-r--r-- | kernel/sched_features.h | 122 | ||||
| -rw-r--r-- | kernel/sched_idletask.c | 4 | ||||
| -rw-r--r-- | kernel/sched_rt.c | 7 |
21 files changed, 688 insertions, 603 deletions
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 7b4c8c70b2d1..d0141fbf51d0 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h | |||
| @@ -61,12 +61,13 @@ void build_cpu_to_node_map(void); | |||
| 61 | .cache_nice_tries = 2, \ | 61 | .cache_nice_tries = 2, \ |
| 62 | .busy_idx = 2, \ | 62 | .busy_idx = 2, \ |
| 63 | .idle_idx = 1, \ | 63 | .idle_idx = 1, \ |
| 64 | .newidle_idx = 2, \ | 64 | .newidle_idx = 0, \ |
| 65 | .wake_idx = 1, \ | 65 | .wake_idx = 0, \ |
| 66 | .forkexec_idx = 1, \ | 66 | .forkexec_idx = 0, \ |
| 67 | .flags = SD_LOAD_BALANCE \ | 67 | .flags = SD_LOAD_BALANCE \ |
| 68 | | SD_BALANCE_NEWIDLE \ | 68 | | SD_BALANCE_NEWIDLE \ |
| 69 | | SD_BALANCE_EXEC \ | 69 | | SD_BALANCE_EXEC \ |
| 70 | | SD_BALANCE_FORK \ | ||
| 70 | | SD_WAKE_AFFINE, \ | 71 | | SD_WAKE_AFFINE, \ |
| 71 | .last_balance = jiffies, \ | 72 | .last_balance = jiffies, \ |
| 72 | .balance_interval = 1, \ | 73 | .balance_interval = 1, \ |
| @@ -85,14 +86,14 @@ void build_cpu_to_node_map(void); | |||
| 85 | .cache_nice_tries = 2, \ | 86 | .cache_nice_tries = 2, \ |
| 86 | .busy_idx = 3, \ | 87 | .busy_idx = 3, \ |
| 87 | .idle_idx = 2, \ | 88 | .idle_idx = 2, \ |
| 88 | .newidle_idx = 2, \ | 89 | .newidle_idx = 0, \ |
| 89 | .wake_idx = 1, \ | 90 | .wake_idx = 0, \ |
| 90 | .forkexec_idx = 1, \ | 91 | .forkexec_idx = 0, \ |
| 91 | .flags = SD_LOAD_BALANCE \ | 92 | .flags = SD_LOAD_BALANCE \ |
| 93 | | SD_BALANCE_NEWIDLE \ | ||
| 92 | | SD_BALANCE_EXEC \ | 94 | | SD_BALANCE_EXEC \ |
| 93 | | SD_BALANCE_FORK \ | 95 | | SD_BALANCE_FORK \ |
| 94 | | SD_SERIALIZE \ | 96 | | SD_SERIALIZE, \ |
| 95 | | SD_WAKE_BALANCE, \ | ||
| 96 | .last_balance = jiffies, \ | 97 | .last_balance = jiffies, \ |
| 97 | .balance_interval = 64, \ | 98 | .balance_interval = 64, \ |
| 98 | .nr_balance_failed = 0, \ | 99 | .nr_balance_failed = 0, \ |
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index 07547231e078..230591707005 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h | |||
| @@ -48,7 +48,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; | |||
| 48 | .cache_nice_tries = 1, \ | 48 | .cache_nice_tries = 1, \ |
| 49 | .flags = SD_LOAD_BALANCE \ | 49 | .flags = SD_LOAD_BALANCE \ |
| 50 | | SD_BALANCE_EXEC \ | 50 | | SD_BALANCE_EXEC \ |
| 51 | | SD_WAKE_BALANCE, \ | ||
| 52 | .last_balance = jiffies, \ | 51 | .last_balance = jiffies, \ |
| 53 | .balance_interval = 1, \ | 52 | .balance_interval = 1, \ |
| 54 | .nr_balance_failed = 0, \ | 53 | .nr_balance_failed = 0, \ |
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 054a16d68082..394edcbcce71 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h | |||
| @@ -57,14 +57,13 @@ static inline int pcibus_to_node(struct pci_bus *bus) | |||
| 57 | .cache_nice_tries = 1, \ | 57 | .cache_nice_tries = 1, \ |
| 58 | .busy_idx = 3, \ | 58 | .busy_idx = 3, \ |
| 59 | .idle_idx = 1, \ | 59 | .idle_idx = 1, \ |
| 60 | .newidle_idx = 2, \ | 60 | .newidle_idx = 0, \ |
| 61 | .wake_idx = 1, \ | 61 | .wake_idx = 0, \ |
| 62 | .flags = SD_LOAD_BALANCE \ | 62 | .flags = SD_LOAD_BALANCE \ |
| 63 | | SD_BALANCE_EXEC \ | 63 | | SD_BALANCE_EXEC \ |
| 64 | | SD_BALANCE_FORK \ | ||
| 64 | | SD_BALANCE_NEWIDLE \ | 65 | | SD_BALANCE_NEWIDLE \ |
| 65 | | SD_WAKE_IDLE \ | 66 | | SD_SERIALIZE, \ |
| 66 | | SD_SERIALIZE \ | ||
| 67 | | SD_WAKE_BALANCE, \ | ||
| 68 | .last_balance = jiffies, \ | 67 | .last_balance = jiffies, \ |
| 69 | .balance_interval = 1, \ | 68 | .balance_interval = 1, \ |
| 70 | .nr_balance_failed = 0, \ | 69 | .nr_balance_failed = 0, \ |
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index b69ee850906d..f8c40cc65054 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h | |||
| @@ -15,14 +15,14 @@ | |||
| 15 | .cache_nice_tries = 2, \ | 15 | .cache_nice_tries = 2, \ |
| 16 | .busy_idx = 3, \ | 16 | .busy_idx = 3, \ |
| 17 | .idle_idx = 2, \ | 17 | .idle_idx = 2, \ |
| 18 | .newidle_idx = 2, \ | 18 | .newidle_idx = 0, \ |
| 19 | .wake_idx = 1, \ | 19 | .wake_idx = 0, \ |
| 20 | .forkexec_idx = 1, \ | 20 | .forkexec_idx = 0, \ |
| 21 | .flags = SD_LOAD_BALANCE \ | 21 | .flags = SD_LOAD_BALANCE \ |
| 22 | | SD_BALANCE_FORK \ | 22 | | SD_BALANCE_FORK \ |
| 23 | | SD_BALANCE_EXEC \ | 23 | | SD_BALANCE_EXEC \ |
| 24 | | SD_SERIALIZE \ | 24 | | SD_BALANCE_NEWIDLE \ |
| 25 | | SD_WAKE_BALANCE, \ | 25 | | SD_SERIALIZE, \ |
| 26 | .last_balance = jiffies, \ | 26 | .last_balance = jiffies, \ |
| 27 | .balance_interval = 1, \ | 27 | .balance_interval = 1, \ |
| 28 | .nr_balance_failed = 0, \ | 28 | .nr_balance_failed = 0, \ |
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index e5ea8d332421..26cd25c08399 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h | |||
| @@ -52,13 +52,12 @@ static inline int pcibus_to_node(struct pci_bus *pbus) | |||
| 52 | .busy_idx = 3, \ | 52 | .busy_idx = 3, \ |
| 53 | .idle_idx = 2, \ | 53 | .idle_idx = 2, \ |
| 54 | .newidle_idx = 0, \ | 54 | .newidle_idx = 0, \ |
| 55 | .wake_idx = 1, \ | 55 | .wake_idx = 0, \ |
| 56 | .forkexec_idx = 1, \ | 56 | .forkexec_idx = 0, \ |
| 57 | .flags = SD_LOAD_BALANCE \ | 57 | .flags = SD_LOAD_BALANCE \ |
| 58 | | SD_BALANCE_FORK \ | 58 | | SD_BALANCE_FORK \ |
| 59 | | SD_BALANCE_EXEC \ | 59 | | SD_BALANCE_EXEC \ |
| 60 | | SD_SERIALIZE \ | 60 | | SD_SERIALIZE, \ |
| 61 | | SD_WAKE_BALANCE, \ | ||
| 62 | .last_balance = jiffies, \ | 61 | .last_balance = jiffies, \ |
| 63 | .balance_interval = 1, \ | 62 | .balance_interval = 1, \ |
| 64 | } | 63 | } |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 847fee6493a2..9cfc88b97742 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
| @@ -96,6 +96,7 @@ | |||
| 96 | #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ | 96 | #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ |
| 97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ | 97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ |
| 98 | #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ | 98 | #define X86_FEATURE_AMD_DCM (3*32+27) /* multi-node processor */ |
| 99 | #define X86_FEATURE_APERFMPERF (3*32+28) /* APERFMPERF */ | ||
| 99 | 100 | ||
| 100 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | 101 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |
| 101 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ | 102 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 42a3f936dadc..c3429e8b2424 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
| @@ -27,6 +27,7 @@ struct mm_struct; | |||
| 27 | #include <linux/cpumask.h> | 27 | #include <linux/cpumask.h> |
| 28 | #include <linux/cache.h> | 28 | #include <linux/cache.h> |
| 29 | #include <linux/threads.h> | 29 | #include <linux/threads.h> |
| 30 | #include <linux/math64.h> | ||
| 30 | #include <linux/init.h> | 31 | #include <linux/init.h> |
| 31 | 32 | ||
| 32 | /* | 33 | /* |
| @@ -1022,4 +1023,33 @@ extern int set_tsc_mode(unsigned int val); | |||
| 1022 | 1023 | ||
| 1023 | extern int amd_get_nb_id(int cpu); | 1024 | extern int amd_get_nb_id(int cpu); |
| 1024 | 1025 | ||
| 1026 | struct aperfmperf { | ||
| 1027 | u64 aperf, mperf; | ||
| 1028 | }; | ||
| 1029 | |||
| 1030 | static inline void get_aperfmperf(struct aperfmperf *am) | ||
| 1031 | { | ||
| 1032 | WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF)); | ||
| 1033 | |||
| 1034 | rdmsrl(MSR_IA32_APERF, am->aperf); | ||
| 1035 | rdmsrl(MSR_IA32_MPERF, am->mperf); | ||
| 1036 | } | ||
| 1037 | |||
| 1038 | #define APERFMPERF_SHIFT 10 | ||
| 1039 | |||
| 1040 | static inline | ||
| 1041 | unsigned long calc_aperfmperf_ratio(struct aperfmperf *old, | ||
| 1042 | struct aperfmperf *new) | ||
| 1043 | { | ||
| 1044 | u64 aperf = new->aperf - old->aperf; | ||
| 1045 | u64 mperf = new->mperf - old->mperf; | ||
| 1046 | unsigned long ratio = aperf; | ||
| 1047 | |||
| 1048 | mperf >>= APERFMPERF_SHIFT; | ||
| 1049 | if (mperf) | ||
| 1050 | ratio = div64_u64(aperf, mperf); | ||
| 1051 | |||
| 1052 | return ratio; | ||
| 1053 | } | ||
| 1054 | |||
| 1025 | #endif /* _ASM_X86_PROCESSOR_H */ | 1055 | #endif /* _ASM_X86_PROCESSOR_H */ |
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 26d06e052a18..6f0695d744bf 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
| @@ -116,15 +116,11 @@ extern unsigned long node_remap_size[]; | |||
| 116 | 116 | ||
| 117 | # define SD_CACHE_NICE_TRIES 1 | 117 | # define SD_CACHE_NICE_TRIES 1 |
| 118 | # define SD_IDLE_IDX 1 | 118 | # define SD_IDLE_IDX 1 |
| 119 | # define SD_NEWIDLE_IDX 2 | ||
| 120 | # define SD_FORKEXEC_IDX 0 | ||
| 121 | 119 | ||
| 122 | #else | 120 | #else |
| 123 | 121 | ||
| 124 | # define SD_CACHE_NICE_TRIES 2 | 122 | # define SD_CACHE_NICE_TRIES 2 |
| 125 | # define SD_IDLE_IDX 2 | 123 | # define SD_IDLE_IDX 2 |
| 126 | # define SD_NEWIDLE_IDX 2 | ||
| 127 | # define SD_FORKEXEC_IDX 1 | ||
| 128 | 124 | ||
| 129 | #endif | 125 | #endif |
| 130 | 126 | ||
| @@ -137,22 +133,20 @@ extern unsigned long node_remap_size[]; | |||
| 137 | .cache_nice_tries = SD_CACHE_NICE_TRIES, \ | 133 | .cache_nice_tries = SD_CACHE_NICE_TRIES, \ |
| 138 | .busy_idx = 3, \ | 134 | .busy_idx = 3, \ |
| 139 | .idle_idx = SD_IDLE_IDX, \ | 135 | .idle_idx = SD_IDLE_IDX, \ |
| 140 | .newidle_idx = SD_NEWIDLE_IDX, \ | 136 | .newidle_idx = 0, \ |
| 141 | .wake_idx = 1, \ | 137 | .wake_idx = 0, \ |
| 142 | .forkexec_idx = SD_FORKEXEC_IDX, \ | 138 | .forkexec_idx = 0, \ |
| 143 | \ | 139 | \ |
| 144 | .flags = 1*SD_LOAD_BALANCE \ | 140 | .flags = 1*SD_LOAD_BALANCE \ |
| 145 | | 1*SD_BALANCE_NEWIDLE \ | 141 | | 1*SD_BALANCE_NEWIDLE \ |
| 146 | | 1*SD_BALANCE_EXEC \ | 142 | | 1*SD_BALANCE_EXEC \ |
| 147 | | 1*SD_BALANCE_FORK \ | 143 | | 1*SD_BALANCE_FORK \ |
| 148 | | 0*SD_WAKE_IDLE \ | 144 | | 0*SD_BALANCE_WAKE \ |
| 149 | | 1*SD_WAKE_AFFINE \ | 145 | | 1*SD_WAKE_AFFINE \ |
| 150 | | 1*SD_WAKE_BALANCE \ | ||
| 151 | | 0*SD_SHARE_CPUPOWER \ | 146 | | 0*SD_SHARE_CPUPOWER \ |
| 152 | | 0*SD_POWERSAVINGS_BALANCE \ | 147 | | 0*SD_POWERSAVINGS_BALANCE \ |
| 153 | | 0*SD_SHARE_PKG_RESOURCES \ | 148 | | 0*SD_SHARE_PKG_RESOURCES \ |
| 154 | | 1*SD_SERIALIZE \ | 149 | | 1*SD_SERIALIZE \ |
| 155 | | 1*SD_WAKE_IDLE_FAR \ | ||
| 156 | | 0*SD_PREFER_SIBLING \ | 150 | | 0*SD_PREFER_SIBLING \ |
| 157 | , \ | 151 | , \ |
| 158 | .last_balance = jiffies, \ | 152 | .last_balance = jiffies, \ |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c1f253dac155..8dd30638fe44 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
| @@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp) | |||
| 13 | 13 | ||
| 14 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 14 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
| 15 | obj-y += proc.o capflags.o powerflags.o common.o | 15 | obj-y += proc.o capflags.o powerflags.o common.o |
| 16 | obj-y += vmware.o hypervisor.o | 16 | obj-y += vmware.o hypervisor.o sched.o |
| 17 | 17 | ||
| 18 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 18 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
| 19 | obj-$(CONFIG_X86_64) += bugs_64.o | 19 | obj-$(CONFIG_X86_64) += bugs_64.o |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ae9b503220ca..4109679863c1 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
| @@ -60,7 +60,6 @@ enum { | |||
| 60 | }; | 60 | }; |
| 61 | 61 | ||
| 62 | #define INTEL_MSR_RANGE (0xffff) | 62 | #define INTEL_MSR_RANGE (0xffff) |
| 63 | #define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) | ||
| 64 | 63 | ||
| 65 | struct acpi_cpufreq_data { | 64 | struct acpi_cpufreq_data { |
| 66 | struct acpi_processor_performance *acpi_data; | 65 | struct acpi_processor_performance *acpi_data; |
| @@ -71,11 +70,7 @@ struct acpi_cpufreq_data { | |||
| 71 | 70 | ||
| 72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); | 71 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); |
| 73 | 72 | ||
| 74 | struct acpi_msr_data { | 73 | static DEFINE_PER_CPU(struct aperfmperf, old_perf); |
| 75 | u64 saved_aperf, saved_mperf; | ||
| 76 | }; | ||
| 77 | |||
| 78 | static DEFINE_PER_CPU(struct acpi_msr_data, msr_data); | ||
| 79 | 74 | ||
| 80 | DEFINE_TRACE(power_mark); | 75 | DEFINE_TRACE(power_mark); |
| 81 | 76 | ||
| @@ -244,23 +239,12 @@ static u32 get_cur_val(const struct cpumask *mask) | |||
| 244 | return cmd.val; | 239 | return cmd.val; |
| 245 | } | 240 | } |
| 246 | 241 | ||
| 247 | struct perf_pair { | ||
| 248 | union { | ||
| 249 | struct { | ||
| 250 | u32 lo; | ||
| 251 | u32 hi; | ||
| 252 | } split; | ||
| 253 | u64 whole; | ||
| 254 | } aperf, mperf; | ||
| 255 | }; | ||
| 256 | |||
| 257 | /* Called via smp_call_function_single(), on the target CPU */ | 242 | /* Called via smp_call_function_single(), on the target CPU */ |
| 258 | static void read_measured_perf_ctrs(void *_cur) | 243 | static void read_measured_perf_ctrs(void *_cur) |
| 259 | { | 244 | { |
| 260 | struct perf_pair *cur = _cur; | 245 | struct aperfmperf *am = _cur; |
| 261 | 246 | ||
| 262 | rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); | 247 | get_aperfmperf(am); |
| 263 | rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi); | ||
| 264 | } | 248 | } |
| 265 | 249 | ||
| 266 | /* | 250 | /* |
| @@ -279,63 +263,17 @@ static void read_measured_perf_ctrs(void *_cur) | |||
| 279 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, | 263 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, |
| 280 | unsigned int cpu) | 264 | unsigned int cpu) |
| 281 | { | 265 | { |
| 282 | struct perf_pair readin, cur; | 266 | struct aperfmperf perf; |
| 283 | unsigned int perf_percent; | 267 | unsigned long ratio; |
| 284 | unsigned int retval; | 268 | unsigned int retval; |
| 285 | 269 | ||
| 286 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) | 270 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) |
| 287 | return 0; | 271 | return 0; |
| 288 | 272 | ||
| 289 | cur.aperf.whole = readin.aperf.whole - | 273 | ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf); |
| 290 | per_cpu(msr_data, cpu).saved_aperf; | 274 | per_cpu(old_perf, cpu) = perf; |
| 291 | cur.mperf.whole = readin.mperf.whole - | ||
| 292 | per_cpu(msr_data, cpu).saved_mperf; | ||
| 293 | per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole; | ||
| 294 | per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole; | ||
| 295 | |||
| 296 | #ifdef __i386__ | ||
| 297 | /* | ||
| 298 | * We dont want to do 64 bit divide with 32 bit kernel | ||
| 299 | * Get an approximate value. Return failure in case we cannot get | ||
| 300 | * an approximate value. | ||
| 301 | */ | ||
| 302 | if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) { | ||
| 303 | int shift_count; | ||
| 304 | u32 h; | ||
| 305 | |||
| 306 | h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi); | ||
| 307 | shift_count = fls(h); | ||
| 308 | |||
| 309 | cur.aperf.whole >>= shift_count; | ||
| 310 | cur.mperf.whole >>= shift_count; | ||
| 311 | } | ||
| 312 | |||
| 313 | if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) { | ||
| 314 | int shift_count = 7; | ||
| 315 | cur.aperf.split.lo >>= shift_count; | ||
| 316 | cur.mperf.split.lo >>= shift_count; | ||
| 317 | } | ||
| 318 | |||
| 319 | if (cur.aperf.split.lo && cur.mperf.split.lo) | ||
| 320 | perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo; | ||
| 321 | else | ||
| 322 | perf_percent = 0; | ||
| 323 | 275 | ||
| 324 | #else | 276 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; |
| 325 | if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) { | ||
| 326 | int shift_count = 7; | ||
| 327 | cur.aperf.whole >>= shift_count; | ||
| 328 | cur.mperf.whole >>= shift_count; | ||
| 329 | } | ||
| 330 | |||
| 331 | if (cur.aperf.whole && cur.mperf.whole) | ||
| 332 | perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole; | ||
| 333 | else | ||
| 334 | perf_percent = 0; | ||
| 335 | |||
| 336 | #endif | ||
| 337 | |||
| 338 | retval = (policy->cpuinfo.max_freq * perf_percent) / 100; | ||
| 339 | 277 | ||
| 340 | return retval; | 278 | return retval; |
| 341 | } | 279 | } |
| @@ -731,12 +669,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
| 731 | acpi_processor_notify_smm(THIS_MODULE); | 669 | acpi_processor_notify_smm(THIS_MODULE); |
| 732 | 670 | ||
| 733 | /* Check for APERF/MPERF support in hardware */ | 671 | /* Check for APERF/MPERF support in hardware */ |
| 734 | if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { | 672 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) |
| 735 | unsigned int ecx; | 673 | acpi_cpufreq_driver.getavg = get_measured_perf; |
| 736 | ecx = cpuid_ecx(6); | ||
| 737 | if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) | ||
| 738 | acpi_cpufreq_driver.getavg = get_measured_perf; | ||
| 739 | } | ||
| 740 | 674 | ||
| 741 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); | 675 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); |
| 742 | for (i = 0; i < perf->state_count; i++) | 676 | for (i = 0; i < perf->state_count; i++) |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 80a722a071b5..40e1835b35e8 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
| @@ -350,6 +350,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
| 350 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | 350 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
| 351 | } | 351 | } |
| 352 | 352 | ||
| 353 | if (c->cpuid_level > 6) { | ||
| 354 | unsigned ecx = cpuid_ecx(6); | ||
| 355 | if (ecx & 0x01) | ||
| 356 | set_cpu_cap(c, X86_FEATURE_APERFMPERF); | ||
| 357 | } | ||
| 358 | |||
| 353 | if (cpu_has_xmm2) | 359 | if (cpu_has_xmm2) |
| 354 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | 360 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
| 355 | if (cpu_has_ds) { | 361 | if (cpu_has_ds) { |
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c new file mode 100644 index 000000000000..a640ae5ad201 --- /dev/null +++ b/arch/x86/kernel/cpu/sched.c | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | #include <linux/sched.h> | ||
| 2 | #include <linux/math64.h> | ||
| 3 | #include <linux/percpu.h> | ||
| 4 | #include <linux/irqflags.h> | ||
| 5 | |||
| 6 | #include <asm/cpufeature.h> | ||
| 7 | #include <asm/processor.h> | ||
| 8 | |||
| 9 | #ifdef CONFIG_SMP | ||
| 10 | |||
| 11 | static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched); | ||
| 12 | |||
| 13 | static unsigned long scale_aperfmperf(void) | ||
| 14 | { | ||
| 15 | struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched); | ||
| 16 | unsigned long ratio, flags; | ||
| 17 | |||
| 18 | local_irq_save(flags); | ||
| 19 | get_aperfmperf(&val); | ||
| 20 | local_irq_restore(flags); | ||
| 21 | |||
| 22 | ratio = calc_aperfmperf_ratio(old, &val); | ||
| 23 | *old = val; | ||
| 24 | |||
| 25 | return ratio; | ||
| 26 | } | ||
| 27 | |||
| 28 | unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
| 29 | { | ||
| 30 | /* | ||
| 31 | * do aperf/mperf on the cpu level because it includes things | ||
| 32 | * like turbo mode, which are relevant to full cores. | ||
| 33 | */ | ||
| 34 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
| 35 | return scale_aperfmperf(); | ||
| 36 | |||
| 37 | /* | ||
| 38 | * maybe have something cpufreq here | ||
| 39 | */ | ||
| 40 | |||
| 41 | return default_scale_freq_power(sd, cpu); | ||
| 42 | } | ||
| 43 | |||
| 44 | unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
| 45 | { | ||
| 46 | /* | ||
| 47 | * aperf/mperf already includes the smt gain | ||
| 48 | */ | ||
| 49 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
| 50 | return SCHED_LOAD_SCALE; | ||
| 51 | |||
| 52 | return default_scale_smt_power(sd, cpu); | ||
| 53 | } | ||
| 54 | |||
| 55 | #endif | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index f3d74bd04d18..8af3d249170e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -190,6 +190,7 @@ extern unsigned long long time_sync_thresh; | |||
| 190 | /* in tsk->state again */ | 190 | /* in tsk->state again */ |
| 191 | #define TASK_DEAD 64 | 191 | #define TASK_DEAD 64 |
| 192 | #define TASK_WAKEKILL 128 | 192 | #define TASK_WAKEKILL 128 |
| 193 | #define TASK_WAKING 256 | ||
| 193 | 194 | ||
| 194 | /* Convenience macros for the sake of set_task_state */ | 195 | /* Convenience macros for the sake of set_task_state */ |
| 195 | #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) | 196 | #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) |
| @@ -802,14 +803,14 @@ enum cpu_idle_type { | |||
| 802 | #define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ | 803 | #define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ |
| 803 | #define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ | 804 | #define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ |
| 804 | #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ | 805 | #define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ |
| 805 | #define SD_WAKE_IDLE 0x0010 /* Wake to idle CPU on task wakeup */ | 806 | #define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */ |
| 806 | #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ | 807 | #define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ |
| 807 | #define SD_WAKE_BALANCE 0x0040 /* Perform balancing at task wakeup */ | 808 | #define SD_PREFER_LOCAL 0x0040 /* Prefer to keep tasks local to this domain */ |
| 808 | #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ | 809 | #define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ |
| 809 | #define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */ | 810 | #define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */ |
| 810 | #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ | 811 | #define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ |
| 811 | #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ | 812 | #define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ |
| 812 | #define SD_WAKE_IDLE_FAR 0x0800 /* Gain latency sacrificing cache hit */ | 813 | |
| 813 | #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ | 814 | #define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ |
| 814 | 815 | ||
| 815 | enum powersavings_balance_level { | 816 | enum powersavings_balance_level { |
| @@ -991,6 +992,9 @@ static inline int test_sd_parent(struct sched_domain *sd, int flag) | |||
| 991 | return 0; | 992 | return 0; |
| 992 | } | 993 | } |
| 993 | 994 | ||
| 995 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu); | ||
| 996 | unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu); | ||
| 997 | |||
| 994 | #else /* CONFIG_SMP */ | 998 | #else /* CONFIG_SMP */ |
| 995 | 999 | ||
| 996 | struct sched_domain_attr; | 1000 | struct sched_domain_attr; |
| @@ -1002,6 +1006,7 @@ partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | |||
| 1002 | } | 1006 | } |
| 1003 | #endif /* !CONFIG_SMP */ | 1007 | #endif /* !CONFIG_SMP */ |
| 1004 | 1008 | ||
| 1009 | |||
| 1005 | struct io_context; /* See blkdev.h */ | 1010 | struct io_context; /* See blkdev.h */ |
| 1006 | 1011 | ||
| 1007 | 1012 | ||
| @@ -1019,6 +1024,12 @@ struct uts_namespace; | |||
| 1019 | struct rq; | 1024 | struct rq; |
| 1020 | struct sched_domain; | 1025 | struct sched_domain; |
| 1021 | 1026 | ||
| 1027 | /* | ||
| 1028 | * wake flags | ||
| 1029 | */ | ||
| 1030 | #define WF_SYNC 0x01 /* waker goes to sleep after wakup */ | ||
| 1031 | #define WF_FORK 0x02 /* child wakeup after fork */ | ||
| 1032 | |||
| 1022 | struct sched_class { | 1033 | struct sched_class { |
| 1023 | const struct sched_class *next; | 1034 | const struct sched_class *next; |
| 1024 | 1035 | ||
| @@ -1026,13 +1037,13 @@ struct sched_class { | |||
| 1026 | void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); | 1037 | void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); |
| 1027 | void (*yield_task) (struct rq *rq); | 1038 | void (*yield_task) (struct rq *rq); |
| 1028 | 1039 | ||
| 1029 | void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int sync); | 1040 | void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags); |
| 1030 | 1041 | ||
| 1031 | struct task_struct * (*pick_next_task) (struct rq *rq); | 1042 | struct task_struct * (*pick_next_task) (struct rq *rq); |
| 1032 | void (*put_prev_task) (struct rq *rq, struct task_struct *p); | 1043 | void (*put_prev_task) (struct rq *rq, struct task_struct *p); |
| 1033 | 1044 | ||
| 1034 | #ifdef CONFIG_SMP | 1045 | #ifdef CONFIG_SMP |
| 1035 | int (*select_task_rq)(struct task_struct *p, int sync); | 1046 | int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags); |
| 1036 | 1047 | ||
| 1037 | unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, | 1048 | unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, |
| 1038 | struct rq *busiest, unsigned long max_load_move, | 1049 | struct rq *busiest, unsigned long max_load_move, |
| @@ -1102,6 +1113,8 @@ struct sched_entity { | |||
| 1102 | u64 start_runtime; | 1113 | u64 start_runtime; |
| 1103 | u64 avg_wakeup; | 1114 | u64 avg_wakeup; |
| 1104 | 1115 | ||
| 1116 | u64 avg_running; | ||
| 1117 | |||
| 1105 | #ifdef CONFIG_SCHEDSTATS | 1118 | #ifdef CONFIG_SCHEDSTATS |
| 1106 | u64 wait_start; | 1119 | u64 wait_start; |
| 1107 | u64 wait_max; | 1120 | u64 wait_max; |
diff --git a/include/linux/topology.h b/include/linux/topology.h index 85e8cf7d393c..809b26c07090 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h | |||
| @@ -95,14 +95,12 @@ int arch_update_cpu_topology(void); | |||
| 95 | | 1*SD_BALANCE_NEWIDLE \ | 95 | | 1*SD_BALANCE_NEWIDLE \ |
| 96 | | 1*SD_BALANCE_EXEC \ | 96 | | 1*SD_BALANCE_EXEC \ |
| 97 | | 1*SD_BALANCE_FORK \ | 97 | | 1*SD_BALANCE_FORK \ |
| 98 | | 0*SD_WAKE_IDLE \ | 98 | | 0*SD_BALANCE_WAKE \ |
| 99 | | 1*SD_WAKE_AFFINE \ | 99 | | 1*SD_WAKE_AFFINE \ |
| 100 | | 1*SD_WAKE_BALANCE \ | ||
| 101 | | 1*SD_SHARE_CPUPOWER \ | 100 | | 1*SD_SHARE_CPUPOWER \ |
| 102 | | 0*SD_POWERSAVINGS_BALANCE \ | 101 | | 0*SD_POWERSAVINGS_BALANCE \ |
| 103 | | 0*SD_SHARE_PKG_RESOURCES \ | 102 | | 0*SD_SHARE_PKG_RESOURCES \ |
| 104 | | 0*SD_SERIALIZE \ | 103 | | 0*SD_SERIALIZE \ |
| 105 | | 0*SD_WAKE_IDLE_FAR \ | ||
| 106 | | 0*SD_PREFER_SIBLING \ | 104 | | 0*SD_PREFER_SIBLING \ |
| 107 | , \ | 105 | , \ |
| 108 | .last_balance = jiffies, \ | 106 | .last_balance = jiffies, \ |
| @@ -122,20 +120,19 @@ int arch_update_cpu_topology(void); | |||
| 122 | .imbalance_pct = 125, \ | 120 | .imbalance_pct = 125, \ |
| 123 | .cache_nice_tries = 1, \ | 121 | .cache_nice_tries = 1, \ |
| 124 | .busy_idx = 2, \ | 122 | .busy_idx = 2, \ |
| 125 | .wake_idx = 1, \ | 123 | .wake_idx = 0, \ |
| 126 | .forkexec_idx = 1, \ | 124 | .forkexec_idx = 0, \ |
| 127 | \ | 125 | \ |
| 128 | .flags = 1*SD_LOAD_BALANCE \ | 126 | .flags = 1*SD_LOAD_BALANCE \ |
| 129 | | 1*SD_BALANCE_NEWIDLE \ | 127 | | 1*SD_BALANCE_NEWIDLE \ |
| 130 | | 1*SD_BALANCE_EXEC \ | 128 | | 1*SD_BALANCE_EXEC \ |
| 131 | | 1*SD_BALANCE_FORK \ | 129 | | 1*SD_BALANCE_FORK \ |
| 132 | | 1*SD_WAKE_IDLE \ | 130 | | 0*SD_BALANCE_WAKE \ |
| 133 | | 1*SD_WAKE_AFFINE \ | 131 | | 1*SD_WAKE_AFFINE \ |
| 134 | | 1*SD_WAKE_BALANCE \ | 132 | | 1*SD_PREFER_LOCAL \ |
| 135 | | 0*SD_SHARE_CPUPOWER \ | 133 | | 0*SD_SHARE_CPUPOWER \ |
| 136 | | 1*SD_SHARE_PKG_RESOURCES \ | 134 | | 1*SD_SHARE_PKG_RESOURCES \ |
| 137 | | 0*SD_SERIALIZE \ | 135 | | 0*SD_SERIALIZE \ |
| 138 | | 0*SD_WAKE_IDLE_FAR \ | ||
| 139 | | sd_balance_for_mc_power() \ | 136 | | sd_balance_for_mc_power() \ |
| 140 | | sd_power_saving_flags() \ | 137 | | sd_power_saving_flags() \ |
| 141 | , \ | 138 | , \ |
| @@ -155,21 +152,20 @@ int arch_update_cpu_topology(void); | |||
| 155 | .cache_nice_tries = 1, \ | 152 | .cache_nice_tries = 1, \ |
| 156 | .busy_idx = 2, \ | 153 | .busy_idx = 2, \ |
| 157 | .idle_idx = 1, \ | 154 | .idle_idx = 1, \ |
| 158 | .newidle_idx = 2, \ | 155 | .newidle_idx = 0, \ |
| 159 | .wake_idx = 1, \ | 156 | .wake_idx = 0, \ |
| 160 | .forkexec_idx = 1, \ | 157 | .forkexec_idx = 0, \ |
| 161 | \ | 158 | \ |
| 162 | .flags = 1*SD_LOAD_BALANCE \ | 159 | .flags = 1*SD_LOAD_BALANCE \ |
| 163 | | 1*SD_BALANCE_NEWIDLE \ | 160 | | 1*SD_BALANCE_NEWIDLE \ |
| 164 | | 1*SD_BALANCE_EXEC \ | 161 | | 1*SD_BALANCE_EXEC \ |
| 165 | | 1*SD_BALANCE_FORK \ | 162 | | 1*SD_BALANCE_FORK \ |
| 166 | | 1*SD_WAKE_IDLE \ | 163 | | 0*SD_BALANCE_WAKE \ |
| 167 | | 0*SD_WAKE_AFFINE \ | 164 | | 1*SD_WAKE_AFFINE \ |
| 168 | | 1*SD_WAKE_BALANCE \ | 165 | | 1*SD_PREFER_LOCAL \ |
| 169 | | 0*SD_SHARE_CPUPOWER \ | 166 | | 0*SD_SHARE_CPUPOWER \ |
| 170 | | 0*SD_SHARE_PKG_RESOURCES \ | 167 | | 0*SD_SHARE_PKG_RESOURCES \ |
| 171 | | 0*SD_SERIALIZE \ | 168 | | 0*SD_SERIALIZE \ |
| 172 | | 0*SD_WAKE_IDLE_FAR \ | ||
| 173 | | sd_balance_for_package_power() \ | 169 | | sd_balance_for_package_power() \ |
| 174 | | sd_power_saving_flags() \ | 170 | | sd_power_saving_flags() \ |
| 175 | , \ | 171 | , \ |
| @@ -191,14 +187,12 @@ int arch_update_cpu_topology(void); | |||
| 191 | | 1*SD_BALANCE_NEWIDLE \ | 187 | | 1*SD_BALANCE_NEWIDLE \ |
| 192 | | 0*SD_BALANCE_EXEC \ | 188 | | 0*SD_BALANCE_EXEC \ |
| 193 | | 0*SD_BALANCE_FORK \ | 189 | | 0*SD_BALANCE_FORK \ |
| 194 | | 0*SD_WAKE_IDLE \ | 190 | | 0*SD_BALANCE_WAKE \ |
| 195 | | 1*SD_WAKE_AFFINE \ | 191 | | 0*SD_WAKE_AFFINE \ |
| 196 | | 0*SD_WAKE_BALANCE \ | ||
| 197 | | 0*SD_SHARE_CPUPOWER \ | 192 | | 0*SD_SHARE_CPUPOWER \ |
| 198 | | 0*SD_POWERSAVINGS_BALANCE \ | 193 | | 0*SD_POWERSAVINGS_BALANCE \ |
| 199 | | 0*SD_SHARE_PKG_RESOURCES \ | 194 | | 0*SD_SHARE_PKG_RESOURCES \ |
| 200 | | 1*SD_SERIALIZE \ | 195 | | 1*SD_SERIALIZE \ |
| 201 | | 1*SD_WAKE_IDLE_FAR \ | ||
| 202 | | 0*SD_PREFER_SIBLING \ | 196 | | 0*SD_PREFER_SIBLING \ |
| 203 | , \ | 197 | , \ |
| 204 | .last_balance = jiffies, \ | 198 | .last_balance = jiffies, \ |
diff --git a/include/linux/wait.h b/include/linux/wait.h index cf3c2f5dba51..a48e16b77d5e 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h | |||
| @@ -26,8 +26,8 @@ | |||
| 26 | #include <asm/current.h> | 26 | #include <asm/current.h> |
| 27 | 27 | ||
| 28 | typedef struct __wait_queue wait_queue_t; | 28 | typedef struct __wait_queue wait_queue_t; |
| 29 | typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int sync, void *key); | 29 | typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); |
| 30 | int default_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); | 30 | int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key); |
| 31 | 31 | ||
| 32 | struct __wait_queue { | 32 | struct __wait_queue { |
| 33 | unsigned int flags; | 33 | unsigned int flags; |
diff --git a/kernel/sched.c b/kernel/sched.c index d9db3fb17573..faf4d463bbff 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -119,8 +119,6 @@ | |||
| 119 | */ | 119 | */ |
| 120 | #define RUNTIME_INF ((u64)~0ULL) | 120 | #define RUNTIME_INF ((u64)~0ULL) |
| 121 | 121 | ||
| 122 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
| 123 | |||
| 124 | static inline int rt_policy(int policy) | 122 | static inline int rt_policy(int policy) |
| 125 | { | 123 | { |
| 126 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) | 124 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) |
| @@ -378,13 +376,6 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | |||
| 378 | 376 | ||
| 379 | #else | 377 | #else |
| 380 | 378 | ||
| 381 | #ifdef CONFIG_SMP | ||
| 382 | static int root_task_group_empty(void) | ||
| 383 | { | ||
| 384 | return 1; | ||
| 385 | } | ||
| 386 | #endif | ||
| 387 | |||
| 388 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | 379 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } |
| 389 | static inline struct task_group *task_group(struct task_struct *p) | 380 | static inline struct task_group *task_group(struct task_struct *p) |
| 390 | { | 381 | { |
| @@ -514,14 +505,6 @@ struct root_domain { | |||
| 514 | #ifdef CONFIG_SMP | 505 | #ifdef CONFIG_SMP |
| 515 | struct cpupri cpupri; | 506 | struct cpupri cpupri; |
| 516 | #endif | 507 | #endif |
| 517 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
| 518 | /* | ||
| 519 | * Preferred wake up cpu nominated by sched_mc balance that will be | ||
| 520 | * used when most cpus are idle in the system indicating overall very | ||
| 521 | * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2) | ||
| 522 | */ | ||
| 523 | unsigned int sched_mc_preferred_wakeup_cpu; | ||
| 524 | #endif | ||
| 525 | }; | 508 | }; |
| 526 | 509 | ||
| 527 | /* | 510 | /* |
| @@ -646,9 +629,10 @@ struct rq { | |||
| 646 | 629 | ||
| 647 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 630 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
| 648 | 631 | ||
| 649 | static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync) | 632 | static inline |
| 633 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
| 650 | { | 634 | { |
| 651 | rq->curr->sched_class->check_preempt_curr(rq, p, sync); | 635 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); |
| 652 | } | 636 | } |
| 653 | 637 | ||
| 654 | static inline int cpu_of(struct rq *rq) | 638 | static inline int cpu_of(struct rq *rq) |
| @@ -1509,8 +1493,65 @@ static int tg_nop(struct task_group *tg, void *data) | |||
| 1509 | #endif | 1493 | #endif |
| 1510 | 1494 | ||
| 1511 | #ifdef CONFIG_SMP | 1495 | #ifdef CONFIG_SMP |
| 1512 | static unsigned long source_load(int cpu, int type); | 1496 | /* Used instead of source_load when we know the type == 0 */ |
| 1513 | static unsigned long target_load(int cpu, int type); | 1497 | static unsigned long weighted_cpuload(const int cpu) |
| 1498 | { | ||
| 1499 | return cpu_rq(cpu)->load.weight; | ||
| 1500 | } | ||
| 1501 | |||
| 1502 | /* | ||
| 1503 | * Return a low guess at the load of a migration-source cpu weighted | ||
| 1504 | * according to the scheduling class and "nice" value. | ||
| 1505 | * | ||
| 1506 | * We want to under-estimate the load of migration sources, to | ||
| 1507 | * balance conservatively. | ||
| 1508 | */ | ||
| 1509 | static unsigned long source_load(int cpu, int type) | ||
| 1510 | { | ||
| 1511 | struct rq *rq = cpu_rq(cpu); | ||
| 1512 | unsigned long total = weighted_cpuload(cpu); | ||
| 1513 | |||
| 1514 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
| 1515 | return total; | ||
| 1516 | |||
| 1517 | return min(rq->cpu_load[type-1], total); | ||
| 1518 | } | ||
| 1519 | |||
| 1520 | /* | ||
| 1521 | * Return a high guess at the load of a migration-target cpu weighted | ||
| 1522 | * according to the scheduling class and "nice" value. | ||
| 1523 | */ | ||
| 1524 | static unsigned long target_load(int cpu, int type) | ||
| 1525 | { | ||
| 1526 | struct rq *rq = cpu_rq(cpu); | ||
| 1527 | unsigned long total = weighted_cpuload(cpu); | ||
| 1528 | |||
| 1529 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
| 1530 | return total; | ||
| 1531 | |||
| 1532 | return max(rq->cpu_load[type-1], total); | ||
| 1533 | } | ||
| 1534 | |||
| 1535 | static struct sched_group *group_of(int cpu) | ||
| 1536 | { | ||
| 1537 | struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd); | ||
| 1538 | |||
| 1539 | if (!sd) | ||
| 1540 | return NULL; | ||
| 1541 | |||
| 1542 | return sd->groups; | ||
| 1543 | } | ||
| 1544 | |||
| 1545 | static unsigned long power_of(int cpu) | ||
| 1546 | { | ||
| 1547 | struct sched_group *group = group_of(cpu); | ||
| 1548 | |||
| 1549 | if (!group) | ||
| 1550 | return SCHED_LOAD_SCALE; | ||
| 1551 | |||
| 1552 | return group->cpu_power; | ||
| 1553 | } | ||
| 1554 | |||
| 1514 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1555 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
| 1515 | 1556 | ||
| 1516 | static unsigned long cpu_avg_load_per_task(int cpu) | 1557 | static unsigned long cpu_avg_load_per_task(int cpu) |
| @@ -1695,6 +1736,8 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) | |||
| 1695 | 1736 | ||
| 1696 | #ifdef CONFIG_PREEMPT | 1737 | #ifdef CONFIG_PREEMPT |
| 1697 | 1738 | ||
| 1739 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
| 1740 | |||
| 1698 | /* | 1741 | /* |
| 1699 | * fair double_lock_balance: Safely acquires both rq->locks in a fair | 1742 | * fair double_lock_balance: Safely acquires both rq->locks in a fair |
| 1700 | * way at the expense of forcing extra atomic operations in all | 1743 | * way at the expense of forcing extra atomic operations in all |
| @@ -1959,13 +2002,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
| 1959 | } | 2002 | } |
| 1960 | 2003 | ||
| 1961 | #ifdef CONFIG_SMP | 2004 | #ifdef CONFIG_SMP |
| 1962 | |||
| 1963 | /* Used instead of source_load when we know the type == 0 */ | ||
| 1964 | static unsigned long weighted_cpuload(const int cpu) | ||
| 1965 | { | ||
| 1966 | return cpu_rq(cpu)->load.weight; | ||
| 1967 | } | ||
| 1968 | |||
| 1969 | /* | 2005 | /* |
| 1970 | * Is this task likely cache-hot: | 2006 | * Is this task likely cache-hot: |
| 1971 | */ | 2007 | */ |
| @@ -2239,185 +2275,6 @@ void kick_process(struct task_struct *p) | |||
| 2239 | preempt_enable(); | 2275 | preempt_enable(); |
| 2240 | } | 2276 | } |
| 2241 | EXPORT_SYMBOL_GPL(kick_process); | 2277 | EXPORT_SYMBOL_GPL(kick_process); |
| 2242 | |||
| 2243 | /* | ||
| 2244 | * Return a low guess at the load of a migration-source cpu weighted | ||
| 2245 | * according to the scheduling class and "nice" value. | ||
| 2246 | * | ||
| 2247 | * We want to under-estimate the load of migration sources, to | ||
| 2248 | * balance conservatively. | ||
| 2249 | */ | ||
| 2250 | static unsigned long source_load(int cpu, int type) | ||
| 2251 | { | ||
| 2252 | struct rq *rq = cpu_rq(cpu); | ||
| 2253 | unsigned long total = weighted_cpuload(cpu); | ||
| 2254 | |||
| 2255 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
| 2256 | return total; | ||
| 2257 | |||
| 2258 | return min(rq->cpu_load[type-1], total); | ||
| 2259 | } | ||
| 2260 | |||
| 2261 | /* | ||
| 2262 | * Return a high guess at the load of a migration-target cpu weighted | ||
| 2263 | * according to the scheduling class and "nice" value. | ||
| 2264 | */ | ||
| 2265 | static unsigned long target_load(int cpu, int type) | ||
| 2266 | { | ||
| 2267 | struct rq *rq = cpu_rq(cpu); | ||
| 2268 | unsigned long total = weighted_cpuload(cpu); | ||
| 2269 | |||
| 2270 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
| 2271 | return total; | ||
| 2272 | |||
| 2273 | return max(rq->cpu_load[type-1], total); | ||
| 2274 | } | ||
| 2275 | |||
| 2276 | /* | ||
| 2277 | * find_idlest_group finds and returns the least busy CPU group within the | ||
| 2278 | * domain. | ||
| 2279 | */ | ||
| 2280 | static struct sched_group * | ||
| 2281 | find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | ||
| 2282 | { | ||
| 2283 | struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; | ||
| 2284 | unsigned long min_load = ULONG_MAX, this_load = 0; | ||
| 2285 | int load_idx = sd->forkexec_idx; | ||
| 2286 | int imbalance = 100 + (sd->imbalance_pct-100)/2; | ||
| 2287 | |||
| 2288 | do { | ||
| 2289 | unsigned long load, avg_load; | ||
| 2290 | int local_group; | ||
| 2291 | int i; | ||
| 2292 | |||
| 2293 | /* Skip over this group if it has no CPUs allowed */ | ||
| 2294 | if (!cpumask_intersects(sched_group_cpus(group), | ||
| 2295 | &p->cpus_allowed)) | ||
| 2296 | continue; | ||
| 2297 | |||
| 2298 | local_group = cpumask_test_cpu(this_cpu, | ||
| 2299 | sched_group_cpus(group)); | ||
| 2300 | |||
| 2301 | /* Tally up the load of all CPUs in the group */ | ||
| 2302 | avg_load = 0; | ||
| 2303 | |||
| 2304 | for_each_cpu(i, sched_group_cpus(group)) { | ||
| 2305 | /* Bias balancing toward cpus of our domain */ | ||
| 2306 | if (local_group) | ||
| 2307 | load = source_load(i, load_idx); | ||
| 2308 | else | ||
| 2309 | load = target_load(i, load_idx); | ||
| 2310 | |||
| 2311 | avg_load += load; | ||
| 2312 | } | ||
| 2313 | |||
| 2314 | /* Adjust by relative CPU power of the group */ | ||
| 2315 | avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; | ||
| 2316 | |||
| 2317 | if (local_group) { | ||
| 2318 | this_load = avg_load; | ||
| 2319 | this = group; | ||
| 2320 | } else if (avg_load < min_load) { | ||
| 2321 | min_load = avg_load; | ||
| 2322 | idlest = group; | ||
| 2323 | } | ||
| 2324 | } while (group = group->next, group != sd->groups); | ||
| 2325 | |||
| 2326 | if (!idlest || 100*this_load < imbalance*min_load) | ||
| 2327 | return NULL; | ||
| 2328 | return idlest; | ||
| 2329 | } | ||
| 2330 | |||
| 2331 | /* | ||
| 2332 | * find_idlest_cpu - find the idlest cpu among the cpus in group. | ||
| 2333 | */ | ||
| 2334 | static int | ||
| 2335 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | ||
| 2336 | { | ||
| 2337 | unsigned long load, min_load = ULONG_MAX; | ||
| 2338 | int idlest = -1; | ||
| 2339 | int i; | ||
| 2340 | |||
| 2341 | /* Traverse only the allowed CPUs */ | ||
| 2342 | for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { | ||
| 2343 | load = weighted_cpuload(i); | ||
| 2344 | |||
| 2345 | if (load < min_load || (load == min_load && i == this_cpu)) { | ||
| 2346 | min_load = load; | ||
| 2347 | idlest = i; | ||
| 2348 | } | ||
| 2349 | } | ||
| 2350 | |||
| 2351 | return idlest; | ||
| 2352 | } | ||
| 2353 | |||
| 2354 | /* | ||
| 2355 | * sched_balance_self: balance the current task (running on cpu) in domains | ||
| 2356 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and | ||
| 2357 | * SD_BALANCE_EXEC. | ||
| 2358 | * | ||
| 2359 | * Balance, ie. select the least loaded group. | ||
| 2360 | * | ||
| 2361 | * Returns the target CPU number, or the same CPU if no balancing is needed. | ||
| 2362 | * | ||
| 2363 | * preempt must be disabled. | ||
| 2364 | */ | ||
| 2365 | static int sched_balance_self(int cpu, int flag) | ||
| 2366 | { | ||
| 2367 | struct task_struct *t = current; | ||
| 2368 | struct sched_domain *tmp, *sd = NULL; | ||
| 2369 | |||
| 2370 | for_each_domain(cpu, tmp) { | ||
| 2371 | /* | ||
| 2372 | * If power savings logic is enabled for a domain, stop there. | ||
| 2373 | */ | ||
| 2374 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) | ||
| 2375 | break; | ||
| 2376 | if (tmp->flags & flag) | ||
| 2377 | sd = tmp; | ||
| 2378 | } | ||
| 2379 | |||
| 2380 | if (sd) | ||
| 2381 | update_shares(sd); | ||
| 2382 | |||
| 2383 | while (sd) { | ||
| 2384 | struct sched_group *group; | ||
| 2385 | int new_cpu, weight; | ||
| 2386 | |||
| 2387 | if (!(sd->flags & flag)) { | ||
| 2388 | sd = sd->child; | ||
| 2389 | continue; | ||
| 2390 | } | ||
| 2391 | |||
| 2392 | group = find_idlest_group(sd, t, cpu); | ||
| 2393 | if (!group) { | ||
| 2394 | sd = sd->child; | ||
| 2395 | continue; | ||
| 2396 | } | ||
| 2397 | |||
| 2398 | new_cpu = find_idlest_cpu(group, t, cpu); | ||
| 2399 | if (new_cpu == -1 || new_cpu == cpu) { | ||
| 2400 | /* Now try balancing at a lower domain level of cpu */ | ||
| 2401 | sd = sd->child; | ||
| 2402 | continue; | ||
| 2403 | } | ||
| 2404 | |||
| 2405 | /* Now try balancing at a lower domain level of new_cpu */ | ||
| 2406 | cpu = new_cpu; | ||
| 2407 | weight = cpumask_weight(sched_domain_span(sd)); | ||
| 2408 | sd = NULL; | ||
| 2409 | for_each_domain(cpu, tmp) { | ||
| 2410 | if (weight <= cpumask_weight(sched_domain_span(tmp))) | ||
| 2411 | break; | ||
| 2412 | if (tmp->flags & flag) | ||
| 2413 | sd = tmp; | ||
| 2414 | } | ||
| 2415 | /* while loop will break here if sd == NULL */ | ||
| 2416 | } | ||
| 2417 | |||
| 2418 | return cpu; | ||
| 2419 | } | ||
| 2420 | |||
| 2421 | #endif /* CONFIG_SMP */ | 2278 | #endif /* CONFIG_SMP */ |
| 2422 | 2279 | ||
| 2423 | /** | 2280 | /** |
| @@ -2455,37 +2312,22 @@ void task_oncpu_function_call(struct task_struct *p, | |||
| 2455 | * | 2312 | * |
| 2456 | * returns failure only if the task is already active. | 2313 | * returns failure only if the task is already active. |
| 2457 | */ | 2314 | */ |
| 2458 | static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | 2315 | static int try_to_wake_up(struct task_struct *p, unsigned int state, |
| 2316 | int wake_flags) | ||
| 2459 | { | 2317 | { |
| 2460 | int cpu, orig_cpu, this_cpu, success = 0; | 2318 | int cpu, orig_cpu, this_cpu, success = 0; |
| 2461 | unsigned long flags; | 2319 | unsigned long flags; |
| 2462 | long old_state; | ||
| 2463 | struct rq *rq; | 2320 | struct rq *rq; |
| 2464 | 2321 | ||
| 2465 | if (!sched_feat(SYNC_WAKEUPS)) | 2322 | if (!sched_feat(SYNC_WAKEUPS)) |
| 2466 | sync = 0; | 2323 | wake_flags &= ~WF_SYNC; |
| 2467 | |||
| 2468 | #ifdef CONFIG_SMP | ||
| 2469 | if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) { | ||
| 2470 | struct sched_domain *sd; | ||
| 2471 | 2324 | ||
| 2472 | this_cpu = raw_smp_processor_id(); | 2325 | this_cpu = get_cpu(); |
| 2473 | cpu = task_cpu(p); | ||
| 2474 | |||
| 2475 | for_each_domain(this_cpu, sd) { | ||
| 2476 | if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { | ||
| 2477 | update_shares(sd); | ||
| 2478 | break; | ||
| 2479 | } | ||
| 2480 | } | ||
| 2481 | } | ||
| 2482 | #endif | ||
| 2483 | 2326 | ||
| 2484 | smp_wmb(); | 2327 | smp_wmb(); |
| 2485 | rq = task_rq_lock(p, &flags); | 2328 | rq = task_rq_lock(p, &flags); |
| 2486 | update_rq_clock(rq); | 2329 | update_rq_clock(rq); |
| 2487 | old_state = p->state; | 2330 | if (!(p->state & state)) |
| 2488 | if (!(old_state & state)) | ||
| 2489 | goto out; | 2331 | goto out; |
| 2490 | 2332 | ||
| 2491 | if (p->se.on_rq) | 2333 | if (p->se.on_rq) |
| @@ -2493,27 +2335,29 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
| 2493 | 2335 | ||
| 2494 | cpu = task_cpu(p); | 2336 | cpu = task_cpu(p); |
| 2495 | orig_cpu = cpu; | 2337 | orig_cpu = cpu; |
| 2496 | this_cpu = smp_processor_id(); | ||
| 2497 | 2338 | ||
| 2498 | #ifdef CONFIG_SMP | 2339 | #ifdef CONFIG_SMP |
| 2499 | if (unlikely(task_running(rq, p))) | 2340 | if (unlikely(task_running(rq, p))) |
| 2500 | goto out_activate; | 2341 | goto out_activate; |
| 2501 | 2342 | ||
| 2502 | cpu = p->sched_class->select_task_rq(p, sync); | 2343 | /* |
| 2503 | if (cpu != orig_cpu) { | 2344 | * In order to handle concurrent wakeups and release the rq->lock |
| 2345 | * we put the task in TASK_WAKING state. | ||
| 2346 | * | ||
| 2347 | * First fix up the nr_uninterruptible count: | ||
| 2348 | */ | ||
| 2349 | if (task_contributes_to_load(p)) | ||
| 2350 | rq->nr_uninterruptible--; | ||
| 2351 | p->state = TASK_WAKING; | ||
| 2352 | task_rq_unlock(rq, &flags); | ||
| 2353 | |||
| 2354 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | ||
| 2355 | if (cpu != orig_cpu) | ||
| 2504 | set_task_cpu(p, cpu); | 2356 | set_task_cpu(p, cpu); |
| 2505 | task_rq_unlock(rq, &flags); | ||
| 2506 | /* might preempt at this point */ | ||
| 2507 | rq = task_rq_lock(p, &flags); | ||
| 2508 | old_state = p->state; | ||
| 2509 | if (!(old_state & state)) | ||
| 2510 | goto out; | ||
| 2511 | if (p->se.on_rq) | ||
| 2512 | goto out_running; | ||
| 2513 | 2357 | ||
| 2514 | this_cpu = smp_processor_id(); | 2358 | rq = task_rq_lock(p, &flags); |
| 2515 | cpu = task_cpu(p); | 2359 | WARN_ON(p->state != TASK_WAKING); |
| 2516 | } | 2360 | cpu = task_cpu(p); |
| 2517 | 2361 | ||
| 2518 | #ifdef CONFIG_SCHEDSTATS | 2362 | #ifdef CONFIG_SCHEDSTATS |
| 2519 | schedstat_inc(rq, ttwu_count); | 2363 | schedstat_inc(rq, ttwu_count); |
| @@ -2533,7 +2377,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
| 2533 | out_activate: | 2377 | out_activate: |
| 2534 | #endif /* CONFIG_SMP */ | 2378 | #endif /* CONFIG_SMP */ |
| 2535 | schedstat_inc(p, se.nr_wakeups); | 2379 | schedstat_inc(p, se.nr_wakeups); |
| 2536 | if (sync) | 2380 | if (wake_flags & WF_SYNC) |
| 2537 | schedstat_inc(p, se.nr_wakeups_sync); | 2381 | schedstat_inc(p, se.nr_wakeups_sync); |
| 2538 | if (orig_cpu != cpu) | 2382 | if (orig_cpu != cpu) |
| 2539 | schedstat_inc(p, se.nr_wakeups_migrate); | 2383 | schedstat_inc(p, se.nr_wakeups_migrate); |
| @@ -2562,7 +2406,7 @@ out_activate: | |||
| 2562 | 2406 | ||
| 2563 | out_running: | 2407 | out_running: |
| 2564 | trace_sched_wakeup(rq, p, success); | 2408 | trace_sched_wakeup(rq, p, success); |
| 2565 | check_preempt_curr(rq, p, sync); | 2409 | check_preempt_curr(rq, p, wake_flags); |
| 2566 | 2410 | ||
| 2567 | p->state = TASK_RUNNING; | 2411 | p->state = TASK_RUNNING; |
| 2568 | #ifdef CONFIG_SMP | 2412 | #ifdef CONFIG_SMP |
| @@ -2571,6 +2415,7 @@ out_running: | |||
| 2571 | #endif | 2415 | #endif |
| 2572 | out: | 2416 | out: |
| 2573 | task_rq_unlock(rq, &flags); | 2417 | task_rq_unlock(rq, &flags); |
| 2418 | put_cpu(); | ||
| 2574 | 2419 | ||
| 2575 | return success; | 2420 | return success; |
| 2576 | } | 2421 | } |
| @@ -2613,6 +2458,7 @@ static void __sched_fork(struct task_struct *p) | |||
| 2613 | p->se.avg_overlap = 0; | 2458 | p->se.avg_overlap = 0; |
| 2614 | p->se.start_runtime = 0; | 2459 | p->se.start_runtime = 0; |
| 2615 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | 2460 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; |
| 2461 | p->se.avg_running = 0; | ||
| 2616 | 2462 | ||
| 2617 | #ifdef CONFIG_SCHEDSTATS | 2463 | #ifdef CONFIG_SCHEDSTATS |
| 2618 | p->se.wait_start = 0; | 2464 | p->se.wait_start = 0; |
| @@ -2674,11 +2520,6 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
| 2674 | 2520 | ||
| 2675 | __sched_fork(p); | 2521 | __sched_fork(p); |
| 2676 | 2522 | ||
| 2677 | #ifdef CONFIG_SMP | ||
| 2678 | cpu = sched_balance_self(cpu, SD_BALANCE_FORK); | ||
| 2679 | #endif | ||
| 2680 | set_task_cpu(p, cpu); | ||
| 2681 | |||
| 2682 | /* | 2523 | /* |
| 2683 | * Make sure we do not leak PI boosting priority to the child. | 2524 | * Make sure we do not leak PI boosting priority to the child. |
| 2684 | */ | 2525 | */ |
| @@ -2709,6 +2550,11 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
| 2709 | if (!rt_prio(p->prio)) | 2550 | if (!rt_prio(p->prio)) |
| 2710 | p->sched_class = &fair_sched_class; | 2551 | p->sched_class = &fair_sched_class; |
| 2711 | 2552 | ||
| 2553 | #ifdef CONFIG_SMP | ||
| 2554 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); | ||
| 2555 | #endif | ||
| 2556 | set_task_cpu(p, cpu); | ||
| 2557 | |||
| 2712 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2558 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
| 2713 | if (likely(sched_info_on())) | 2559 | if (likely(sched_info_on())) |
| 2714 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 2560 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
| @@ -2754,7 +2600,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
| 2754 | inc_nr_running(rq); | 2600 | inc_nr_running(rq); |
| 2755 | } | 2601 | } |
| 2756 | trace_sched_wakeup_new(rq, p, 1); | 2602 | trace_sched_wakeup_new(rq, p, 1); |
| 2757 | check_preempt_curr(rq, p, 0); | 2603 | check_preempt_curr(rq, p, WF_FORK); |
| 2758 | #ifdef CONFIG_SMP | 2604 | #ifdef CONFIG_SMP |
| 2759 | if (p->sched_class->task_wake_up) | 2605 | if (p->sched_class->task_wake_up) |
| 2760 | p->sched_class->task_wake_up(rq, p); | 2606 | p->sched_class->task_wake_up(rq, p); |
| @@ -3263,7 +3109,7 @@ out: | |||
| 3263 | void sched_exec(void) | 3109 | void sched_exec(void) |
| 3264 | { | 3110 | { |
| 3265 | int new_cpu, this_cpu = get_cpu(); | 3111 | int new_cpu, this_cpu = get_cpu(); |
| 3266 | new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC); | 3112 | new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); |
| 3267 | put_cpu(); | 3113 | put_cpu(); |
| 3268 | if (new_cpu != this_cpu) | 3114 | if (new_cpu != this_cpu) |
| 3269 | sched_migrate_task(current, new_cpu); | 3115 | sched_migrate_task(current, new_cpu); |
| @@ -3683,11 +3529,6 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, | |||
| 3683 | *imbalance = sds->min_load_per_task; | 3529 | *imbalance = sds->min_load_per_task; |
| 3684 | sds->busiest = sds->group_min; | 3530 | sds->busiest = sds->group_min; |
| 3685 | 3531 | ||
| 3686 | if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) { | ||
| 3687 | cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu = | ||
| 3688 | group_first_cpu(sds->group_leader); | ||
| 3689 | } | ||
| 3690 | |||
| 3691 | return 1; | 3532 | return 1; |
| 3692 | 3533 | ||
| 3693 | } | 3534 | } |
| @@ -3711,7 +3552,18 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, | |||
| 3711 | } | 3552 | } |
| 3712 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ | 3553 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ |
| 3713 | 3554 | ||
| 3714 | unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | 3555 | |
| 3556 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) | ||
| 3557 | { | ||
| 3558 | return SCHED_LOAD_SCALE; | ||
| 3559 | } | ||
| 3560 | |||
| 3561 | unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
| 3562 | { | ||
| 3563 | return default_scale_freq_power(sd, cpu); | ||
| 3564 | } | ||
| 3565 | |||
| 3566 | unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu) | ||
| 3715 | { | 3567 | { |
| 3716 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); | 3568 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); |
| 3717 | unsigned long smt_gain = sd->smt_gain; | 3569 | unsigned long smt_gain = sd->smt_gain; |
| @@ -3721,6 +3573,11 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | |||
| 3721 | return smt_gain; | 3573 | return smt_gain; |
| 3722 | } | 3574 | } |
| 3723 | 3575 | ||
| 3576 | unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
| 3577 | { | ||
| 3578 | return default_scale_smt_power(sd, cpu); | ||
| 3579 | } | ||
| 3580 | |||
| 3724 | unsigned long scale_rt_power(int cpu) | 3581 | unsigned long scale_rt_power(int cpu) |
| 3725 | { | 3582 | { |
| 3726 | struct rq *rq = cpu_rq(cpu); | 3583 | struct rq *rq = cpu_rq(cpu); |
| @@ -3745,10 +3602,19 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
| 3745 | unsigned long power = SCHED_LOAD_SCALE; | 3602 | unsigned long power = SCHED_LOAD_SCALE; |
| 3746 | struct sched_group *sdg = sd->groups; | 3603 | struct sched_group *sdg = sd->groups; |
| 3747 | 3604 | ||
| 3748 | /* here we could scale based on cpufreq */ | 3605 | if (sched_feat(ARCH_POWER)) |
| 3606 | power *= arch_scale_freq_power(sd, cpu); | ||
| 3607 | else | ||
| 3608 | power *= default_scale_freq_power(sd, cpu); | ||
| 3609 | |||
| 3610 | power >>= SCHED_LOAD_SHIFT; | ||
| 3749 | 3611 | ||
| 3750 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { | 3612 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { |
| 3751 | power *= arch_scale_smt_power(sd, cpu); | 3613 | if (sched_feat(ARCH_POWER)) |
| 3614 | power *= arch_scale_smt_power(sd, cpu); | ||
| 3615 | else | ||
| 3616 | power *= default_scale_smt_power(sd, cpu); | ||
| 3617 | |||
| 3752 | power >>= SCHED_LOAD_SHIFT; | 3618 | power >>= SCHED_LOAD_SHIFT; |
| 3753 | } | 3619 | } |
| 3754 | 3620 | ||
| @@ -4161,26 +4027,6 @@ ret: | |||
| 4161 | return NULL; | 4027 | return NULL; |
| 4162 | } | 4028 | } |
| 4163 | 4029 | ||
| 4164 | static struct sched_group *group_of(int cpu) | ||
| 4165 | { | ||
| 4166 | struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd); | ||
| 4167 | |||
| 4168 | if (!sd) | ||
| 4169 | return NULL; | ||
| 4170 | |||
| 4171 | return sd->groups; | ||
| 4172 | } | ||
| 4173 | |||
| 4174 | static unsigned long power_of(int cpu) | ||
| 4175 | { | ||
| 4176 | struct sched_group *group = group_of(cpu); | ||
| 4177 | |||
| 4178 | if (!group) | ||
| 4179 | return SCHED_LOAD_SCALE; | ||
| 4180 | |||
| 4181 | return group->cpu_power; | ||
| 4182 | } | ||
| 4183 | |||
| 4184 | /* | 4030 | /* |
| 4185 | * find_busiest_queue - find the busiest runqueue among the cpus in group. | 4031 | * find_busiest_queue - find the busiest runqueue among the cpus in group. |
| 4186 | */ | 4032 | */ |
| @@ -5465,14 +5311,13 @@ static inline void schedule_debug(struct task_struct *prev) | |||
| 5465 | #endif | 5311 | #endif |
| 5466 | } | 5312 | } |
| 5467 | 5313 | ||
| 5468 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | 5314 | static void put_prev_task(struct rq *rq, struct task_struct *p) |
| 5469 | { | 5315 | { |
| 5470 | if (prev->state == TASK_RUNNING) { | 5316 | u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; |
| 5471 | u64 runtime = prev->se.sum_exec_runtime; | ||
| 5472 | 5317 | ||
| 5473 | runtime -= prev->se.prev_sum_exec_runtime; | 5318 | update_avg(&p->se.avg_running, runtime); |
| 5474 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
| 5475 | 5319 | ||
| 5320 | if (p->state == TASK_RUNNING) { | ||
| 5476 | /* | 5321 | /* |
| 5477 | * In order to avoid avg_overlap growing stale when we are | 5322 | * In order to avoid avg_overlap growing stale when we are |
| 5478 | * indeed overlapping and hence not getting put to sleep, grow | 5323 | * indeed overlapping and hence not getting put to sleep, grow |
| @@ -5482,9 +5327,12 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev) | |||
| 5482 | * correlates to the amount of cache footprint a task can | 5327 | * correlates to the amount of cache footprint a task can |
| 5483 | * build up. | 5328 | * build up. |
| 5484 | */ | 5329 | */ |
| 5485 | update_avg(&prev->se.avg_overlap, runtime); | 5330 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); |
| 5331 | update_avg(&p->se.avg_overlap, runtime); | ||
| 5332 | } else { | ||
| 5333 | update_avg(&p->se.avg_running, 0); | ||
| 5486 | } | 5334 | } |
| 5487 | prev->sched_class->put_prev_task(rq, prev); | 5335 | p->sched_class->put_prev_task(rq, p); |
| 5488 | } | 5336 | } |
| 5489 | 5337 | ||
| 5490 | /* | 5338 | /* |
| @@ -5716,10 +5564,10 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
| 5716 | 5564 | ||
| 5717 | #endif /* CONFIG_PREEMPT */ | 5565 | #endif /* CONFIG_PREEMPT */ |
| 5718 | 5566 | ||
| 5719 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, | 5567 | int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, |
| 5720 | void *key) | 5568 | void *key) |
| 5721 | { | 5569 | { |
| 5722 | return try_to_wake_up(curr->private, mode, sync); | 5570 | return try_to_wake_up(curr->private, mode, wake_flags); |
| 5723 | } | 5571 | } |
| 5724 | EXPORT_SYMBOL(default_wake_function); | 5572 | EXPORT_SYMBOL(default_wake_function); |
| 5725 | 5573 | ||
| @@ -5733,14 +5581,14 @@ EXPORT_SYMBOL(default_wake_function); | |||
| 5733 | * zero in this (rare) case, and we handle it by continuing to scan the queue. | 5581 | * zero in this (rare) case, and we handle it by continuing to scan the queue. |
| 5734 | */ | 5582 | */ |
| 5735 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | 5583 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, |
| 5736 | int nr_exclusive, int sync, void *key) | 5584 | int nr_exclusive, int wake_flags, void *key) |
| 5737 | { | 5585 | { |
| 5738 | wait_queue_t *curr, *next; | 5586 | wait_queue_t *curr, *next; |
| 5739 | 5587 | ||
| 5740 | list_for_each_entry_safe(curr, next, &q->task_list, task_list) { | 5588 | list_for_each_entry_safe(curr, next, &q->task_list, task_list) { |
| 5741 | unsigned flags = curr->flags; | 5589 | unsigned flags = curr->flags; |
| 5742 | 5590 | ||
| 5743 | if (curr->func(curr, mode, sync, key) && | 5591 | if (curr->func(curr, mode, wake_flags, key) && |
| 5744 | (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) | 5592 | (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) |
| 5745 | break; | 5593 | break; |
| 5746 | } | 5594 | } |
| @@ -5801,16 +5649,16 @@ void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, | |||
| 5801 | int nr_exclusive, void *key) | 5649 | int nr_exclusive, void *key) |
| 5802 | { | 5650 | { |
| 5803 | unsigned long flags; | 5651 | unsigned long flags; |
| 5804 | int sync = 1; | 5652 | int wake_flags = WF_SYNC; |
| 5805 | 5653 | ||
| 5806 | if (unlikely(!q)) | 5654 | if (unlikely(!q)) |
| 5807 | return; | 5655 | return; |
| 5808 | 5656 | ||
| 5809 | if (unlikely(!nr_exclusive)) | 5657 | if (unlikely(!nr_exclusive)) |
| 5810 | sync = 0; | 5658 | wake_flags = 0; |
| 5811 | 5659 | ||
| 5812 | spin_lock_irqsave(&q->lock, flags); | 5660 | spin_lock_irqsave(&q->lock, flags); |
| 5813 | __wake_up_common(q, mode, nr_exclusive, sync, key); | 5661 | __wake_up_common(q, mode, nr_exclusive, wake_flags, key); |
| 5814 | spin_unlock_irqrestore(&q->lock, flags); | 5662 | spin_unlock_irqrestore(&q->lock, flags); |
| 5815 | } | 5663 | } |
| 5816 | EXPORT_SYMBOL_GPL(__wake_up_sync_key); | 5664 | EXPORT_SYMBOL_GPL(__wake_up_sync_key); |
| @@ -8000,9 +7848,7 @@ static int sd_degenerate(struct sched_domain *sd) | |||
| 8000 | } | 7848 | } |
| 8001 | 7849 | ||
| 8002 | /* Following flags don't use groups */ | 7850 | /* Following flags don't use groups */ |
| 8003 | if (sd->flags & (SD_WAKE_IDLE | | 7851 | if (sd->flags & (SD_WAKE_AFFINE)) |
| 8004 | SD_WAKE_AFFINE | | ||
| 8005 | SD_WAKE_BALANCE)) | ||
| 8006 | return 0; | 7852 | return 0; |
| 8007 | 7853 | ||
| 8008 | return 1; | 7854 | return 1; |
| @@ -8019,10 +7865,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
| 8019 | if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) | 7865 | if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) |
| 8020 | return 0; | 7866 | return 0; |
| 8021 | 7867 | ||
| 8022 | /* Does parent contain flags not in child? */ | ||
| 8023 | /* WAKE_BALANCE is a subset of WAKE_AFFINE */ | ||
| 8024 | if (cflags & SD_WAKE_AFFINE) | ||
| 8025 | pflags &= ~SD_WAKE_BALANCE; | ||
| 8026 | /* Flags needing groups don't count if only 1 group in parent */ | 7868 | /* Flags needing groups don't count if only 1 group in parent */ |
| 8027 | if (parent->groups == parent->groups->next) { | 7869 | if (parent->groups == parent->groups->next) { |
| 8028 | pflags &= ~(SD_LOAD_BALANCE | | 7870 | pflags &= ~(SD_LOAD_BALANCE | |
| @@ -8708,10 +8550,10 @@ static void set_domain_attribute(struct sched_domain *sd, | |||
| 8708 | request = attr->relax_domain_level; | 8550 | request = attr->relax_domain_level; |
| 8709 | if (request < sd->level) { | 8551 | if (request < sd->level) { |
| 8710 | /* turn off idle balance on this domain */ | 8552 | /* turn off idle balance on this domain */ |
| 8711 | sd->flags &= ~(SD_WAKE_IDLE|SD_BALANCE_NEWIDLE); | 8553 | sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); |
| 8712 | } else { | 8554 | } else { |
| 8713 | /* turn on idle balance on this domain */ | 8555 | /* turn on idle balance on this domain */ |
| 8714 | sd->flags |= (SD_WAKE_IDLE_FAR|SD_BALANCE_NEWIDLE); | 8556 | sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); |
| 8715 | } | 8557 | } |
| 8716 | } | 8558 | } |
| 8717 | 8559 | ||
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 5ddbd0891267..efb84409bc43 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
| @@ -395,6 +395,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
| 395 | PN(se.sum_exec_runtime); | 395 | PN(se.sum_exec_runtime); |
| 396 | PN(se.avg_overlap); | 396 | PN(se.avg_overlap); |
| 397 | PN(se.avg_wakeup); | 397 | PN(se.avg_wakeup); |
| 398 | PN(se.avg_running); | ||
| 398 | 399 | ||
| 399 | nr_switches = p->nvcsw + p->nivcsw; | 400 | nr_switches = p->nvcsw + p->nivcsw; |
| 400 | 401 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index aa7f84121016..10d218ab69f2 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -711,7 +711,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
| 711 | 711 | ||
| 712 | if (!initial) { | 712 | if (!initial) { |
| 713 | /* sleeps upto a single latency don't count. */ | 713 | /* sleeps upto a single latency don't count. */ |
| 714 | if (sched_feat(NEW_FAIR_SLEEPERS)) { | 714 | if (sched_feat(FAIR_SLEEPERS)) { |
| 715 | unsigned long thresh = sysctl_sched_latency; | 715 | unsigned long thresh = sysctl_sched_latency; |
| 716 | 716 | ||
| 717 | /* | 717 | /* |
| @@ -725,6 +725,13 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
| 725 | task_of(se)->policy != SCHED_IDLE)) | 725 | task_of(se)->policy != SCHED_IDLE)) |
| 726 | thresh = calc_delta_fair(thresh, se); | 726 | thresh = calc_delta_fair(thresh, se); |
| 727 | 727 | ||
| 728 | /* | ||
| 729 | * Halve their sleep time's effect, to allow | ||
| 730 | * for a gentler effect of sleepers: | ||
| 731 | */ | ||
| 732 | if (sched_feat(GENTLE_FAIR_SLEEPERS)) | ||
| 733 | thresh >>= 1; | ||
| 734 | |||
| 728 | vruntime -= thresh; | 735 | vruntime -= thresh; |
| 729 | } | 736 | } |
| 730 | } | 737 | } |
| @@ -757,10 +764,10 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) | |||
| 757 | 764 | ||
| 758 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | 765 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 759 | { | 766 | { |
| 760 | if (cfs_rq->last == se) | 767 | if (!se || cfs_rq->last == se) |
| 761 | cfs_rq->last = NULL; | 768 | cfs_rq->last = NULL; |
| 762 | 769 | ||
| 763 | if (cfs_rq->next == se) | 770 | if (!se || cfs_rq->next == se) |
| 764 | cfs_rq->next = NULL; | 771 | cfs_rq->next = NULL; |
| 765 | } | 772 | } |
| 766 | 773 | ||
| @@ -1062,83 +1069,6 @@ static void yield_task_fair(struct rq *rq) | |||
| 1062 | se->vruntime = rightmost->vruntime + 1; | 1069 | se->vruntime = rightmost->vruntime + 1; |
| 1063 | } | 1070 | } |
| 1064 | 1071 | ||
| 1065 | /* | ||
| 1066 | * wake_idle() will wake a task on an idle cpu if task->cpu is | ||
| 1067 | * not idle and an idle cpu is available. The span of cpus to | ||
| 1068 | * search starts with cpus closest then further out as needed, | ||
| 1069 | * so we always favor a closer, idle cpu. | ||
| 1070 | * Domains may include CPUs that are not usable for migration, | ||
| 1071 | * hence we need to mask them out (rq->rd->online) | ||
| 1072 | * | ||
| 1073 | * Returns the CPU we should wake onto. | ||
| 1074 | */ | ||
| 1075 | #if defined(ARCH_HAS_SCHED_WAKE_IDLE) | ||
| 1076 | |||
| 1077 | #define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online) | ||
| 1078 | |||
| 1079 | static int wake_idle(int cpu, struct task_struct *p) | ||
| 1080 | { | ||
| 1081 | struct sched_domain *sd; | ||
| 1082 | int i; | ||
| 1083 | unsigned int chosen_wakeup_cpu; | ||
| 1084 | int this_cpu; | ||
| 1085 | struct rq *task_rq = task_rq(p); | ||
| 1086 | |||
| 1087 | /* | ||
| 1088 | * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu | ||
| 1089 | * are idle and this is not a kernel thread and this task's affinity | ||
| 1090 | * allows it to be moved to preferred cpu, then just move! | ||
| 1091 | */ | ||
| 1092 | |||
| 1093 | this_cpu = smp_processor_id(); | ||
| 1094 | chosen_wakeup_cpu = | ||
| 1095 | cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu; | ||
| 1096 | |||
| 1097 | if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP && | ||
| 1098 | idle_cpu(cpu) && idle_cpu(this_cpu) && | ||
| 1099 | p->mm && !(p->flags & PF_KTHREAD) && | ||
| 1100 | cpu_isset(chosen_wakeup_cpu, p->cpus_allowed)) | ||
| 1101 | return chosen_wakeup_cpu; | ||
| 1102 | |||
| 1103 | /* | ||
| 1104 | * If it is idle, then it is the best cpu to run this task. | ||
| 1105 | * | ||
| 1106 | * This cpu is also the best, if it has more than one task already. | ||
| 1107 | * Siblings must be also busy(in most cases) as they didn't already | ||
| 1108 | * pickup the extra load from this cpu and hence we need not check | ||
| 1109 | * sibling runqueue info. This will avoid the checks and cache miss | ||
| 1110 | * penalities associated with that. | ||
| 1111 | */ | ||
| 1112 | if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1) | ||
| 1113 | return cpu; | ||
| 1114 | |||
| 1115 | for_each_domain(cpu, sd) { | ||
| 1116 | if ((sd->flags & SD_WAKE_IDLE) | ||
| 1117 | || ((sd->flags & SD_WAKE_IDLE_FAR) | ||
| 1118 | && !task_hot(p, task_rq->clock, sd))) { | ||
| 1119 | for_each_cpu_and(i, sched_domain_span(sd), | ||
| 1120 | &p->cpus_allowed) { | ||
| 1121 | if (cpu_rd_active(i, task_rq) && idle_cpu(i)) { | ||
| 1122 | if (i != task_cpu(p)) { | ||
| 1123 | schedstat_inc(p, | ||
| 1124 | se.nr_wakeups_idle); | ||
| 1125 | } | ||
| 1126 | return i; | ||
| 1127 | } | ||
| 1128 | } | ||
| 1129 | } else { | ||
| 1130 | break; | ||
| 1131 | } | ||
| 1132 | } | ||
| 1133 | return cpu; | ||
| 1134 | } | ||
| 1135 | #else /* !ARCH_HAS_SCHED_WAKE_IDLE*/ | ||
| 1136 | static inline int wake_idle(int cpu, struct task_struct *p) | ||
| 1137 | { | ||
| 1138 | return cpu; | ||
| 1139 | } | ||
| 1140 | #endif | ||
| 1141 | |||
| 1142 | #ifdef CONFIG_SMP | 1072 | #ifdef CONFIG_SMP |
| 1143 | 1073 | ||
| 1144 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1074 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| @@ -1225,25 +1155,34 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu, | |||
| 1225 | 1155 | ||
| 1226 | #endif | 1156 | #endif |
| 1227 | 1157 | ||
| 1228 | static int | 1158 | static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) |
| 1229 | wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | ||
| 1230 | struct task_struct *p, int prev_cpu, int this_cpu, int sync, | ||
| 1231 | int idx, unsigned long load, unsigned long this_load, | ||
| 1232 | unsigned int imbalance) | ||
| 1233 | { | 1159 | { |
| 1234 | struct task_struct *curr = this_rq->curr; | 1160 | struct task_struct *curr = current; |
| 1235 | struct task_group *tg; | 1161 | unsigned long this_load, load; |
| 1236 | unsigned long tl = this_load; | 1162 | int idx, this_cpu, prev_cpu; |
| 1237 | unsigned long tl_per_task; | 1163 | unsigned long tl_per_task; |
| 1164 | unsigned int imbalance; | ||
| 1165 | struct task_group *tg; | ||
| 1238 | unsigned long weight; | 1166 | unsigned long weight; |
| 1239 | int balanced; | 1167 | int balanced; |
| 1240 | 1168 | ||
| 1241 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) | 1169 | idx = sd->wake_idx; |
| 1242 | return 0; | 1170 | this_cpu = smp_processor_id(); |
| 1171 | prev_cpu = task_cpu(p); | ||
| 1172 | load = source_load(prev_cpu, idx); | ||
| 1173 | this_load = target_load(this_cpu, idx); | ||
| 1243 | 1174 | ||
| 1244 | if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || | 1175 | if (sync) { |
| 1245 | p->se.avg_overlap > sysctl_sched_migration_cost)) | 1176 | if (sched_feat(SYNC_LESS) && |
| 1246 | sync = 0; | 1177 | (curr->se.avg_overlap > sysctl_sched_migration_cost || |
| 1178 | p->se.avg_overlap > sysctl_sched_migration_cost)) | ||
| 1179 | sync = 0; | ||
| 1180 | } else { | ||
| 1181 | if (sched_feat(SYNC_MORE) && | ||
| 1182 | (curr->se.avg_overlap < sysctl_sched_migration_cost && | ||
| 1183 | p->se.avg_overlap < sysctl_sched_migration_cost)) | ||
| 1184 | sync = 1; | ||
| 1185 | } | ||
| 1247 | 1186 | ||
| 1248 | /* | 1187 | /* |
| 1249 | * If sync wakeup then subtract the (maximum possible) | 1188 | * If sync wakeup then subtract the (maximum possible) |
| @@ -1254,24 +1193,26 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
| 1254 | tg = task_group(current); | 1193 | tg = task_group(current); |
| 1255 | weight = current->se.load.weight; | 1194 | weight = current->se.load.weight; |
| 1256 | 1195 | ||
| 1257 | tl += effective_load(tg, this_cpu, -weight, -weight); | 1196 | this_load += effective_load(tg, this_cpu, -weight, -weight); |
| 1258 | load += effective_load(tg, prev_cpu, 0, -weight); | 1197 | load += effective_load(tg, prev_cpu, 0, -weight); |
| 1259 | } | 1198 | } |
| 1260 | 1199 | ||
| 1261 | tg = task_group(p); | 1200 | tg = task_group(p); |
| 1262 | weight = p->se.load.weight; | 1201 | weight = p->se.load.weight; |
| 1263 | 1202 | ||
| 1203 | imbalance = 100 + (sd->imbalance_pct - 100) / 2; | ||
| 1204 | |||
| 1264 | /* | 1205 | /* |
| 1265 | * In low-load situations, where prev_cpu is idle and this_cpu is idle | 1206 | * In low-load situations, where prev_cpu is idle and this_cpu is idle |
| 1266 | * due to the sync cause above having dropped tl to 0, we'll always have | 1207 | * due to the sync cause above having dropped this_load to 0, we'll |
| 1267 | * an imbalance, but there's really nothing you can do about that, so | 1208 | * always have an imbalance, but there's really nothing you can do |
| 1268 | * that's good too. | 1209 | * about that, so that's good too. |
| 1269 | * | 1210 | * |
| 1270 | * Otherwise check if either cpus are near enough in load to allow this | 1211 | * Otherwise check if either cpus are near enough in load to allow this |
| 1271 | * task to be woken on this_cpu. | 1212 | * task to be woken on this_cpu. |
| 1272 | */ | 1213 | */ |
| 1273 | balanced = !tl || | 1214 | balanced = !this_load || |
| 1274 | 100*(tl + effective_load(tg, this_cpu, weight, weight)) <= | 1215 | 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <= |
| 1275 | imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); | 1216 | imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); |
| 1276 | 1217 | ||
| 1277 | /* | 1218 | /* |
| @@ -1285,14 +1226,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
| 1285 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | 1226 | schedstat_inc(p, se.nr_wakeups_affine_attempts); |
| 1286 | tl_per_task = cpu_avg_load_per_task(this_cpu); | 1227 | tl_per_task = cpu_avg_load_per_task(this_cpu); |
| 1287 | 1228 | ||
| 1288 | if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <= | 1229 | if (balanced || |
| 1289 | tl_per_task)) { | 1230 | (this_load <= load && |
| 1231 | this_load + target_load(prev_cpu, idx) <= tl_per_task)) { | ||
| 1290 | /* | 1232 | /* |
| 1291 | * This domain has SD_WAKE_AFFINE and | 1233 | * This domain has SD_WAKE_AFFINE and |
| 1292 | * p is cache cold in this domain, and | 1234 | * p is cache cold in this domain, and |
| 1293 | * there is no bad imbalance. | 1235 | * there is no bad imbalance. |
| 1294 | */ | 1236 | */ |
| 1295 | schedstat_inc(this_sd, ttwu_move_affine); | 1237 | schedstat_inc(sd, ttwu_move_affine); |
| 1296 | schedstat_inc(p, se.nr_wakeups_affine); | 1238 | schedstat_inc(p, se.nr_wakeups_affine); |
| 1297 | 1239 | ||
| 1298 | return 1; | 1240 | return 1; |
| @@ -1300,65 +1242,215 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
| 1300 | return 0; | 1242 | return 0; |
| 1301 | } | 1243 | } |
| 1302 | 1244 | ||
| 1303 | static int select_task_rq_fair(struct task_struct *p, int sync) | 1245 | /* |
| 1246 | * find_idlest_group finds and returns the least busy CPU group within the | ||
| 1247 | * domain. | ||
| 1248 | */ | ||
| 1249 | static struct sched_group * | ||
| 1250 | find_idlest_group(struct sched_domain *sd, struct task_struct *p, | ||
| 1251 | int this_cpu, int load_idx) | ||
| 1304 | { | 1252 | { |
| 1305 | struct sched_domain *sd, *this_sd = NULL; | 1253 | struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; |
| 1306 | int prev_cpu, this_cpu, new_cpu; | 1254 | unsigned long min_load = ULONG_MAX, this_load = 0; |
| 1307 | unsigned long load, this_load; | 1255 | int imbalance = 100 + (sd->imbalance_pct-100)/2; |
| 1308 | struct rq *this_rq; | ||
| 1309 | unsigned int imbalance; | ||
| 1310 | int idx; | ||
| 1311 | 1256 | ||
| 1312 | prev_cpu = task_cpu(p); | 1257 | do { |
| 1313 | this_cpu = smp_processor_id(); | 1258 | unsigned long load, avg_load; |
| 1314 | this_rq = cpu_rq(this_cpu); | 1259 | int local_group; |
| 1315 | new_cpu = prev_cpu; | 1260 | int i; |
| 1316 | 1261 | ||
| 1317 | /* | 1262 | /* Skip over this group if it has no CPUs allowed */ |
| 1318 | * 'this_sd' is the first domain that both | 1263 | if (!cpumask_intersects(sched_group_cpus(group), |
| 1319 | * this_cpu and prev_cpu are present in: | 1264 | &p->cpus_allowed)) |
| 1320 | */ | 1265 | continue; |
| 1321 | for_each_domain(this_cpu, sd) { | 1266 | |
| 1322 | if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) { | 1267 | local_group = cpumask_test_cpu(this_cpu, |
| 1323 | this_sd = sd; | 1268 | sched_group_cpus(group)); |
| 1324 | break; | 1269 | |
| 1270 | /* Tally up the load of all CPUs in the group */ | ||
| 1271 | avg_load = 0; | ||
| 1272 | |||
| 1273 | for_each_cpu(i, sched_group_cpus(group)) { | ||
| 1274 | /* Bias balancing toward cpus of our domain */ | ||
| 1275 | if (local_group) | ||
| 1276 | load = source_load(i, load_idx); | ||
| 1277 | else | ||
| 1278 | load = target_load(i, load_idx); | ||
| 1279 | |||
| 1280 | avg_load += load; | ||
| 1281 | } | ||
| 1282 | |||
| 1283 | /* Adjust by relative CPU power of the group */ | ||
| 1284 | avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; | ||
| 1285 | |||
| 1286 | if (local_group) { | ||
| 1287 | this_load = avg_load; | ||
| 1288 | this = group; | ||
| 1289 | } else if (avg_load < min_load) { | ||
| 1290 | min_load = avg_load; | ||
| 1291 | idlest = group; | ||
| 1292 | } | ||
| 1293 | } while (group = group->next, group != sd->groups); | ||
| 1294 | |||
| 1295 | if (!idlest || 100*this_load < imbalance*min_load) | ||
| 1296 | return NULL; | ||
| 1297 | return idlest; | ||
| 1298 | } | ||
| 1299 | |||
| 1300 | /* | ||
| 1301 | * find_idlest_cpu - find the idlest cpu among the cpus in group. | ||
| 1302 | */ | ||
| 1303 | static int | ||
| 1304 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | ||
| 1305 | { | ||
| 1306 | unsigned long load, min_load = ULONG_MAX; | ||
| 1307 | int idlest = -1; | ||
| 1308 | int i; | ||
| 1309 | |||
| 1310 | /* Traverse only the allowed CPUs */ | ||
| 1311 | for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { | ||
| 1312 | load = weighted_cpuload(i); | ||
| 1313 | |||
| 1314 | if (load < min_load || (load == min_load && i == this_cpu)) { | ||
| 1315 | min_load = load; | ||
| 1316 | idlest = i; | ||
| 1325 | } | 1317 | } |
| 1326 | } | 1318 | } |
| 1327 | 1319 | ||
| 1328 | if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed))) | 1320 | return idlest; |
| 1329 | goto out; | 1321 | } |
| 1330 | 1322 | ||
| 1331 | /* | 1323 | /* |
| 1332 | * Check for affine wakeup and passive balancing possibilities. | 1324 | * sched_balance_self: balance the current task (running on cpu) in domains |
| 1333 | */ | 1325 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and |
| 1334 | if (!this_sd) | 1326 | * SD_BALANCE_EXEC. |
| 1327 | * | ||
| 1328 | * Balance, ie. select the least loaded group. | ||
| 1329 | * | ||
| 1330 | * Returns the target CPU number, or the same CPU if no balancing is needed. | ||
| 1331 | * | ||
| 1332 | * preempt must be disabled. | ||
| 1333 | */ | ||
| 1334 | static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) | ||
| 1335 | { | ||
| 1336 | struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; | ||
| 1337 | int cpu = smp_processor_id(); | ||
| 1338 | int prev_cpu = task_cpu(p); | ||
| 1339 | int new_cpu = cpu; | ||
| 1340 | int want_affine = 0; | ||
| 1341 | int want_sd = 1; | ||
| 1342 | int sync = wake_flags & WF_SYNC; | ||
| 1343 | |||
| 1344 | if (sd_flag & SD_BALANCE_WAKE) { | ||
| 1345 | if (sched_feat(AFFINE_WAKEUPS)) | ||
| 1346 | want_affine = 1; | ||
| 1347 | new_cpu = prev_cpu; | ||
| 1348 | } | ||
| 1349 | |||
| 1350 | rcu_read_lock(); | ||
| 1351 | for_each_domain(cpu, tmp) { | ||
| 1352 | /* | ||
| 1353 | * If power savings logic is enabled for a domain, see if we | ||
| 1354 | * are not overloaded, if so, don't balance wider. | ||
| 1355 | */ | ||
| 1356 | if (tmp->flags & (SD_POWERSAVINGS_BALANCE|SD_PREFER_LOCAL)) { | ||
| 1357 | unsigned long power = 0; | ||
| 1358 | unsigned long nr_running = 0; | ||
| 1359 | unsigned long capacity; | ||
| 1360 | int i; | ||
| 1361 | |||
| 1362 | for_each_cpu(i, sched_domain_span(tmp)) { | ||
| 1363 | power += power_of(i); | ||
| 1364 | nr_running += cpu_rq(i)->cfs.nr_running; | ||
| 1365 | } | ||
| 1366 | |||
| 1367 | capacity = DIV_ROUND_CLOSEST(power, SCHED_LOAD_SCALE); | ||
| 1368 | |||
| 1369 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) | ||
| 1370 | nr_running /= 2; | ||
| 1371 | |||
| 1372 | if (nr_running < capacity) | ||
| 1373 | want_sd = 0; | ||
| 1374 | } | ||
| 1375 | |||
| 1376 | if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && | ||
| 1377 | cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { | ||
| 1378 | |||
| 1379 | affine_sd = tmp; | ||
| 1380 | want_affine = 0; | ||
| 1381 | } | ||
| 1382 | |||
| 1383 | if (!want_sd && !want_affine) | ||
| 1384 | break; | ||
| 1385 | |||
| 1386 | if (!(tmp->flags & sd_flag)) | ||
| 1387 | continue; | ||
| 1388 | |||
| 1389 | if (want_sd) | ||
| 1390 | sd = tmp; | ||
| 1391 | } | ||
| 1392 | |||
| 1393 | if (sched_feat(LB_SHARES_UPDATE)) { | ||
| 1394 | /* | ||
| 1395 | * Pick the largest domain to update shares over | ||
| 1396 | */ | ||
| 1397 | tmp = sd; | ||
| 1398 | if (affine_sd && (!tmp || | ||
| 1399 | cpumask_weight(sched_domain_span(affine_sd)) > | ||
| 1400 | cpumask_weight(sched_domain_span(sd)))) | ||
| 1401 | tmp = affine_sd; | ||
| 1402 | |||
| 1403 | if (tmp) | ||
| 1404 | update_shares(tmp); | ||
| 1405 | } | ||
| 1406 | |||
| 1407 | if (affine_sd && wake_affine(affine_sd, p, sync)) { | ||
| 1408 | new_cpu = cpu; | ||
| 1335 | goto out; | 1409 | goto out; |
| 1410 | } | ||
| 1336 | 1411 | ||
| 1337 | idx = this_sd->wake_idx; | 1412 | while (sd) { |
| 1413 | int load_idx = sd->forkexec_idx; | ||
| 1414 | struct sched_group *group; | ||
| 1415 | int weight; | ||
| 1338 | 1416 | ||
| 1339 | imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; | 1417 | if (!(sd->flags & sd_flag)) { |
| 1418 | sd = sd->child; | ||
| 1419 | continue; | ||
| 1420 | } | ||
| 1340 | 1421 | ||
| 1341 | load = source_load(prev_cpu, idx); | 1422 | if (sd_flag & SD_BALANCE_WAKE) |
| 1342 | this_load = target_load(this_cpu, idx); | 1423 | load_idx = sd->wake_idx; |
| 1343 | 1424 | ||
| 1344 | if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, | 1425 | group = find_idlest_group(sd, p, cpu, load_idx); |
| 1345 | load, this_load, imbalance)) | 1426 | if (!group) { |
| 1346 | return this_cpu; | 1427 | sd = sd->child; |
| 1428 | continue; | ||
| 1429 | } | ||
| 1347 | 1430 | ||
| 1348 | /* | 1431 | new_cpu = find_idlest_cpu(group, p, cpu); |
| 1349 | * Start passive balancing when half the imbalance_pct | 1432 | if (new_cpu == -1 || new_cpu == cpu) { |
| 1350 | * limit is reached. | 1433 | /* Now try balancing at a lower domain level of cpu */ |
| 1351 | */ | 1434 | sd = sd->child; |
| 1352 | if (this_sd->flags & SD_WAKE_BALANCE) { | 1435 | continue; |
| 1353 | if (imbalance*this_load <= 100*load) { | ||
| 1354 | schedstat_inc(this_sd, ttwu_move_balance); | ||
| 1355 | schedstat_inc(p, se.nr_wakeups_passive); | ||
| 1356 | return this_cpu; | ||
| 1357 | } | 1436 | } |
| 1437 | |||
| 1438 | /* Now try balancing at a lower domain level of new_cpu */ | ||
| 1439 | cpu = new_cpu; | ||
| 1440 | weight = cpumask_weight(sched_domain_span(sd)); | ||
| 1441 | sd = NULL; | ||
| 1442 | for_each_domain(cpu, tmp) { | ||
| 1443 | if (weight <= cpumask_weight(sched_domain_span(tmp))) | ||
| 1444 | break; | ||
| 1445 | if (tmp->flags & sd_flag) | ||
| 1446 | sd = tmp; | ||
| 1447 | } | ||
| 1448 | /* while loop will break here if sd == NULL */ | ||
| 1358 | } | 1449 | } |
| 1359 | 1450 | ||
| 1360 | out: | 1451 | out: |
| 1361 | return wake_idle(new_cpu, p); | 1452 | rcu_read_unlock(); |
| 1453 | return new_cpu; | ||
| 1362 | } | 1454 | } |
| 1363 | #endif /* CONFIG_SMP */ | 1455 | #endif /* CONFIG_SMP */ |
| 1364 | 1456 | ||
| @@ -1471,11 +1563,12 @@ static void set_next_buddy(struct sched_entity *se) | |||
| 1471 | /* | 1563 | /* |
| 1472 | * Preempt the current task with a newly woken task if needed: | 1564 | * Preempt the current task with a newly woken task if needed: |
| 1473 | */ | 1565 | */ |
| 1474 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | 1566 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) |
| 1475 | { | 1567 | { |
| 1476 | struct task_struct *curr = rq->curr; | 1568 | struct task_struct *curr = rq->curr; |
| 1477 | struct sched_entity *se = &curr->se, *pse = &p->se; | 1569 | struct sched_entity *se = &curr->se, *pse = &p->se; |
| 1478 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 1570 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
| 1571 | int sync = wake_flags & WF_SYNC; | ||
| 1479 | 1572 | ||
| 1480 | update_curr(cfs_rq); | 1573 | update_curr(cfs_rq); |
| 1481 | 1574 | ||
| @@ -1501,7 +1594,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | |||
| 1501 | */ | 1594 | */ |
| 1502 | if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) | 1595 | if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) |
| 1503 | set_last_buddy(se); | 1596 | set_last_buddy(se); |
| 1504 | set_next_buddy(pse); | 1597 | if (sched_feat(NEXT_BUDDY) && !(wake_flags & WF_FORK)) |
| 1598 | set_next_buddy(pse); | ||
| 1505 | 1599 | ||
| 1506 | /* | 1600 | /* |
| 1507 | * We can come here with TIF_NEED_RESCHED already set from new task | 1601 | * We can come here with TIF_NEED_RESCHED already set from new task |
| @@ -1523,16 +1617,25 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | |||
| 1523 | return; | 1617 | return; |
| 1524 | } | 1618 | } |
| 1525 | 1619 | ||
| 1526 | if (!sched_feat(WAKEUP_PREEMPT)) | 1620 | if ((sched_feat(WAKEUP_SYNC) && sync) || |
| 1527 | return; | 1621 | (sched_feat(WAKEUP_OVERLAP) && |
| 1528 | 1622 | (se->avg_overlap < sysctl_sched_migration_cost && | |
| 1529 | if (sched_feat(WAKEUP_OVERLAP) && (sync || | 1623 | pse->avg_overlap < sysctl_sched_migration_cost))) { |
| 1530 | (se->avg_overlap < sysctl_sched_migration_cost && | ||
| 1531 | pse->avg_overlap < sysctl_sched_migration_cost))) { | ||
| 1532 | resched_task(curr); | 1624 | resched_task(curr); |
| 1533 | return; | 1625 | return; |
| 1534 | } | 1626 | } |
| 1535 | 1627 | ||
| 1628 | if (sched_feat(WAKEUP_RUNNING)) { | ||
| 1629 | if (pse->avg_running < se->avg_running) { | ||
| 1630 | set_next_buddy(pse); | ||
| 1631 | resched_task(curr); | ||
| 1632 | return; | ||
| 1633 | } | ||
| 1634 | } | ||
| 1635 | |||
| 1636 | if (!sched_feat(WAKEUP_PREEMPT)) | ||
| 1637 | return; | ||
| 1638 | |||
| 1536 | find_matching_se(&se, &pse); | 1639 | find_matching_se(&se, &pse); |
| 1537 | 1640 | ||
| 1538 | BUG_ON(!pse); | 1641 | BUG_ON(!pse); |
| @@ -1555,8 +1658,13 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) | |||
| 1555 | /* | 1658 | /* |
| 1556 | * If se was a buddy, clear it so that it will have to earn | 1659 | * If se was a buddy, clear it so that it will have to earn |
| 1557 | * the favour again. | 1660 | * the favour again. |
| 1661 | * | ||
| 1662 | * If se was not a buddy, clear the buddies because neither | ||
| 1663 | * was elegible to run, let them earn it again. | ||
| 1664 | * | ||
| 1665 | * IOW. unconditionally clear buddies. | ||
| 1558 | */ | 1666 | */ |
| 1559 | __clear_buddies(cfs_rq, se); | 1667 | __clear_buddies(cfs_rq, NULL); |
| 1560 | set_next_entity(cfs_rq, se); | 1668 | set_next_entity(cfs_rq, se); |
| 1561 | cfs_rq = group_cfs_rq(se); | 1669 | cfs_rq = group_cfs_rq(se); |
| 1562 | } while (cfs_rq); | 1670 | } while (cfs_rq); |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index e2dc63a5815d..0d94083582c7 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
| @@ -1,17 +1,123 @@ | |||
| 1 | SCHED_FEAT(NEW_FAIR_SLEEPERS, 0) | 1 | /* |
| 2 | * Disregards a certain amount of sleep time (sched_latency_ns) and | ||
| 3 | * considers the task to be running during that period. This gives it | ||
| 4 | * a service deficit on wakeup, allowing it to run sooner. | ||
| 5 | */ | ||
| 6 | SCHED_FEAT(FAIR_SLEEPERS, 1) | ||
| 7 | |||
| 8 | /* | ||
| 9 | * Only give sleepers 50% of their service deficit. This allows | ||
| 10 | * them to run sooner, but does not allow tons of sleepers to | ||
| 11 | * rip the spread apart. | ||
| 12 | */ | ||
| 13 | SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1) | ||
| 14 | |||
| 15 | /* | ||
| 16 | * By not normalizing the sleep time, heavy tasks get an effective | ||
| 17 | * longer period, and lighter task an effective shorter period they | ||
| 18 | * are considered running. | ||
| 19 | */ | ||
| 2 | SCHED_FEAT(NORMALIZED_SLEEPER, 0) | 20 | SCHED_FEAT(NORMALIZED_SLEEPER, 0) |
| 3 | SCHED_FEAT(ADAPTIVE_GRAN, 1) | 21 | |
| 4 | SCHED_FEAT(WAKEUP_PREEMPT, 1) | 22 | /* |
| 23 | * Place new tasks ahead so that they do not starve already running | ||
| 24 | * tasks | ||
| 25 | */ | ||
| 5 | SCHED_FEAT(START_DEBIT, 1) | 26 | SCHED_FEAT(START_DEBIT, 1) |
| 27 | |||
| 28 | /* | ||
| 29 | * Should wakeups try to preempt running tasks. | ||
| 30 | */ | ||
| 31 | SCHED_FEAT(WAKEUP_PREEMPT, 1) | ||
| 32 | |||
| 33 | /* | ||
| 34 | * Compute wakeup_gran based on task behaviour, clipped to | ||
| 35 | * [0, sched_wakeup_gran_ns] | ||
| 36 | */ | ||
| 37 | SCHED_FEAT(ADAPTIVE_GRAN, 1) | ||
| 38 | |||
| 39 | /* | ||
| 40 | * When converting the wakeup granularity to virtual time, do it such | ||
| 41 | * that heavier tasks preempting a lighter task have an edge. | ||
| 42 | */ | ||
| 43 | SCHED_FEAT(ASYM_GRAN, 1) | ||
| 44 | |||
| 45 | /* | ||
| 46 | * Always wakeup-preempt SYNC wakeups, see SYNC_WAKEUPS. | ||
| 47 | */ | ||
| 48 | SCHED_FEAT(WAKEUP_SYNC, 0) | ||
| 49 | |||
| 50 | /* | ||
| 51 | * Wakeup preempt based on task behaviour. Tasks that do not overlap | ||
| 52 | * don't get preempted. | ||
| 53 | */ | ||
| 54 | SCHED_FEAT(WAKEUP_OVERLAP, 0) | ||
| 55 | |||
| 56 | /* | ||
| 57 | * Wakeup preemption towards tasks that run short | ||
| 58 | */ | ||
| 59 | SCHED_FEAT(WAKEUP_RUNNING, 0) | ||
| 60 | |||
| 61 | /* | ||
| 62 | * Use the SYNC wakeup hint, pipes and the likes use this to indicate | ||
| 63 | * the remote end is likely to consume the data we just wrote, and | ||
| 64 | * therefore has cache benefit from being placed on the same cpu, see | ||
| 65 | * also AFFINE_WAKEUPS. | ||
| 66 | */ | ||
| 67 | SCHED_FEAT(SYNC_WAKEUPS, 1) | ||
| 68 | |||
| 69 | /* | ||
| 70 | * Based on load and program behaviour, see if it makes sense to place | ||
| 71 | * a newly woken task on the same cpu as the task that woke it -- | ||
| 72 | * improve cache locality. Typically used with SYNC wakeups as | ||
| 73 | * generated by pipes and the like, see also SYNC_WAKEUPS. | ||
| 74 | */ | ||
| 6 | SCHED_FEAT(AFFINE_WAKEUPS, 1) | 75 | SCHED_FEAT(AFFINE_WAKEUPS, 1) |
| 76 | |||
| 77 | /* | ||
| 78 | * Weaken SYNC hint based on overlap | ||
| 79 | */ | ||
| 80 | SCHED_FEAT(SYNC_LESS, 1) | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Add SYNC hint based on overlap | ||
| 84 | */ | ||
| 85 | SCHED_FEAT(SYNC_MORE, 0) | ||
| 86 | |||
| 87 | /* | ||
| 88 | * Prefer to schedule the task we woke last (assuming it failed | ||
| 89 | * wakeup-preemption), since its likely going to consume data we | ||
| 90 | * touched, increases cache locality. | ||
| 91 | */ | ||
| 92 | SCHED_FEAT(NEXT_BUDDY, 0) | ||
| 93 | |||
| 94 | /* | ||
| 95 | * Prefer to schedule the task that ran last (when we did | ||
| 96 | * wake-preempt) as that likely will touch the same data, increases | ||
| 97 | * cache locality. | ||
| 98 | */ | ||
| 99 | SCHED_FEAT(LAST_BUDDY, 1) | ||
| 100 | |||
| 101 | /* | ||
| 102 | * Consider buddies to be cache hot, decreases the likelyness of a | ||
| 103 | * cache buddy being migrated away, increases cache locality. | ||
| 104 | */ | ||
| 7 | SCHED_FEAT(CACHE_HOT_BUDDY, 1) | 105 | SCHED_FEAT(CACHE_HOT_BUDDY, 1) |
| 8 | SCHED_FEAT(SYNC_WAKEUPS, 1) | 106 | |
| 107 | /* | ||
| 108 | * Use arch dependent cpu power functions | ||
| 109 | */ | ||
| 110 | SCHED_FEAT(ARCH_POWER, 0) | ||
| 111 | |||
| 9 | SCHED_FEAT(HRTICK, 0) | 112 | SCHED_FEAT(HRTICK, 0) |
| 10 | SCHED_FEAT(DOUBLE_TICK, 0) | 113 | SCHED_FEAT(DOUBLE_TICK, 0) |
| 11 | SCHED_FEAT(ASYM_GRAN, 1) | ||
| 12 | SCHED_FEAT(LB_BIAS, 1) | 114 | SCHED_FEAT(LB_BIAS, 1) |
| 13 | SCHED_FEAT(LB_WAKEUP_UPDATE, 1) | 115 | SCHED_FEAT(LB_SHARES_UPDATE, 1) |
| 14 | SCHED_FEAT(ASYM_EFF_LOAD, 1) | 116 | SCHED_FEAT(ASYM_EFF_LOAD, 1) |
| 15 | SCHED_FEAT(WAKEUP_OVERLAP, 0) | 117 | |
| 16 | SCHED_FEAT(LAST_BUDDY, 1) | 118 | /* |
| 119 | * Spin-wait on mutex acquisition when the mutex owner is running on | ||
| 120 | * another cpu -- assumes that when the owner is running, it will soon | ||
| 121 | * release the lock. Decreases scheduling overhead. | ||
| 122 | */ | ||
| 17 | SCHED_FEAT(OWNER_SPIN, 1) | 123 | SCHED_FEAT(OWNER_SPIN, 1) |
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 499672c10cbd..a8b448af004b 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | */ | 6 | */ |
| 7 | 7 | ||
| 8 | #ifdef CONFIG_SMP | 8 | #ifdef CONFIG_SMP |
| 9 | static int select_task_rq_idle(struct task_struct *p, int sync) | 9 | static int select_task_rq_idle(struct task_struct *p, int sd_flag, int flags) |
| 10 | { | 10 | { |
| 11 | return task_cpu(p); /* IDLE tasks as never migrated */ | 11 | return task_cpu(p); /* IDLE tasks as never migrated */ |
| 12 | } | 12 | } |
| @@ -14,7 +14,7 @@ static int select_task_rq_idle(struct task_struct *p, int sync) | |||
| 14 | /* | 14 | /* |
| 15 | * Idle tasks are unconditionally rescheduled: | 15 | * Idle tasks are unconditionally rescheduled: |
| 16 | */ | 16 | */ |
| 17 | static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sync) | 17 | static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags) |
| 18 | { | 18 | { |
| 19 | resched_task(rq->idle); | 19 | resched_task(rq->idle); |
| 20 | } | 20 | } |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 2eb4bd6a526c..13de7126a6ab 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -938,10 +938,13 @@ static void yield_task_rt(struct rq *rq) | |||
| 938 | #ifdef CONFIG_SMP | 938 | #ifdef CONFIG_SMP |
| 939 | static int find_lowest_rq(struct task_struct *task); | 939 | static int find_lowest_rq(struct task_struct *task); |
| 940 | 940 | ||
| 941 | static int select_task_rq_rt(struct task_struct *p, int sync) | 941 | static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) |
| 942 | { | 942 | { |
| 943 | struct rq *rq = task_rq(p); | 943 | struct rq *rq = task_rq(p); |
| 944 | 944 | ||
| 945 | if (sd_flag != SD_BALANCE_WAKE) | ||
| 946 | return smp_processor_id(); | ||
| 947 | |||
| 945 | /* | 948 | /* |
| 946 | * If the current task is an RT task, then | 949 | * If the current task is an RT task, then |
| 947 | * try to see if we can wake this RT task up on another | 950 | * try to see if we can wake this RT task up on another |
| @@ -999,7 +1002,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | |||
| 999 | /* | 1002 | /* |
| 1000 | * Preempt the current task with a newly woken task if needed: | 1003 | * Preempt the current task with a newly woken task if needed: |
| 1001 | */ | 1004 | */ |
| 1002 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync) | 1005 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) |
| 1003 | { | 1006 | { |
| 1004 | if (p->prio < rq->curr->prio) { | 1007 | if (p->prio < rq->curr->prio) { |
| 1005 | resched_task(rq->curr); | 1008 | resched_task(rq->curr); |
