aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/ia64/include/asm/topology.h5
-rw-r--r--arch/mips/include/asm/mach-ip27/topology.h2
-rw-r--r--arch/powerpc/include/asm/topology.h5
-rw-r--r--arch/sh/include/asm/topology.h4
-rw-r--r--arch/sparc/include/asm/topology_64.h4
-rw-r--r--arch/x86/include/asm/topology.h4
-rw-r--r--include/linux/sched.h7
-rw-r--r--include/linux/topology.h16
-rw-r--r--kernel/sched.c41
-rw-r--r--kernel/sched_fair.c233
10 files changed, 84 insertions, 237 deletions
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 7b4c8c70b2d1..cf6053b226c3 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -67,6 +67,7 @@ void build_cpu_to_node_map(void);
67 .flags = SD_LOAD_BALANCE \ 67 .flags = SD_LOAD_BALANCE \
68 | SD_BALANCE_NEWIDLE \ 68 | SD_BALANCE_NEWIDLE \
69 | SD_BALANCE_EXEC \ 69 | SD_BALANCE_EXEC \
70 | SD_BALANCE_WAKE \
70 | SD_WAKE_AFFINE, \ 71 | SD_WAKE_AFFINE, \
71 .last_balance = jiffies, \ 72 .last_balance = jiffies, \
72 .balance_interval = 1, \ 73 .balance_interval = 1, \
@@ -91,8 +92,8 @@ void build_cpu_to_node_map(void);
91 .flags = SD_LOAD_BALANCE \ 92 .flags = SD_LOAD_BALANCE \
92 | SD_BALANCE_EXEC \ 93 | SD_BALANCE_EXEC \
93 | SD_BALANCE_FORK \ 94 | SD_BALANCE_FORK \
94 | SD_SERIALIZE \ 95 | SD_BALANCE_WAKE \
95 | SD_WAKE_BALANCE, \ 96 | SD_SERIALIZE, \
96 .last_balance = jiffies, \ 97 .last_balance = jiffies, \
97 .balance_interval = 64, \ 98 .balance_interval = 64, \
98 .nr_balance_failed = 0, \ 99 .nr_balance_failed = 0, \
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 07547231e078..d8332398f5be 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -48,7 +48,7 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
48 .cache_nice_tries = 1, \ 48 .cache_nice_tries = 1, \
49 .flags = SD_LOAD_BALANCE \ 49 .flags = SD_LOAD_BALANCE \
50 | SD_BALANCE_EXEC \ 50 | SD_BALANCE_EXEC \
51 | SD_WAKE_BALANCE, \ 51 | SD_BALANCE_WAKE, \
52 .last_balance = jiffies, \ 52 .last_balance = jiffies, \
53 .balance_interval = 1, \ 53 .balance_interval = 1, \
54 .nr_balance_failed = 0, \ 54 .nr_balance_failed = 0, \
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 054a16d68082..c6343313ff59 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -62,9 +62,8 @@ static inline int pcibus_to_node(struct pci_bus *bus)
62 .flags = SD_LOAD_BALANCE \ 62 .flags = SD_LOAD_BALANCE \
63 | SD_BALANCE_EXEC \ 63 | SD_BALANCE_EXEC \
64 | SD_BALANCE_NEWIDLE \ 64 | SD_BALANCE_NEWIDLE \
65 | SD_WAKE_IDLE \ 65 | SD_BALANCE_WAKE \
66 | SD_SERIALIZE \ 66 | SD_SERIALIZE, \
67 | SD_WAKE_BALANCE, \
68 .last_balance = jiffies, \ 67 .last_balance = jiffies, \
69 .balance_interval = 1, \ 68 .balance_interval = 1, \
70 .nr_balance_failed = 0, \ 69 .nr_balance_failed = 0, \
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index b69ee850906d..dc1531e2f25f 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -21,8 +21,8 @@
21 .flags = SD_LOAD_BALANCE \ 21 .flags = SD_LOAD_BALANCE \
22 | SD_BALANCE_FORK \ 22 | SD_BALANCE_FORK \
23 | SD_BALANCE_EXEC \ 23 | SD_BALANCE_EXEC \
24 | SD_SERIALIZE \ 24 | SD_BALANCE_WAKE \
25 | SD_WAKE_BALANCE, \ 25 | SD_SERIALIZE, \
26 .last_balance = jiffies, \ 26 .last_balance = jiffies, \
27 .balance_interval = 1, \ 27 .balance_interval = 1, \
28 .nr_balance_failed = 0, \ 28 .nr_balance_failed = 0, \
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index e5ea8d332421..1d091abd2d13 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -57,8 +57,8 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
57 .flags = SD_LOAD_BALANCE \ 57 .flags = SD_LOAD_BALANCE \
58 | SD_BALANCE_FORK \ 58 | SD_BALANCE_FORK \
59 | SD_BALANCE_EXEC \ 59 | SD_BALANCE_EXEC \
60 | SD_SERIALIZE \ 60 | SD_BALANCE_WAKE \
61 | SD_WAKE_BALANCE, \ 61 | SD_SERIALIZE, \
62 .last_balance = jiffies, \ 62 .last_balance = jiffies, \
63 .balance_interval = 1, \ 63 .balance_interval = 1, \
64} 64}
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 26d06e052a18..966d58dc6274 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -145,14 +145,12 @@ extern unsigned long node_remap_size[];
145 | 1*SD_BALANCE_NEWIDLE \ 145 | 1*SD_BALANCE_NEWIDLE \
146 | 1*SD_BALANCE_EXEC \ 146 | 1*SD_BALANCE_EXEC \
147 | 1*SD_BALANCE_FORK \ 147 | 1*SD_BALANCE_FORK \
148 | 0*SD_WAKE_IDLE \ 148 | 1*SD_BALANCE_WAKE \
149 | 1*SD_WAKE_AFFINE \ 149 | 1*SD_WAKE_AFFINE \
150 | 1*SD_WAKE_BALANCE \
151 | 0*SD_SHARE_CPUPOWER \ 150 | 0*SD_SHARE_CPUPOWER \
152 | 0*SD_POWERSAVINGS_BALANCE \ 151 | 0*SD_POWERSAVINGS_BALANCE \
153 | 0*SD_SHARE_PKG_RESOURCES \ 152 | 0*SD_SHARE_PKG_RESOURCES \
154 | 1*SD_SERIALIZE \ 153 | 1*SD_SERIALIZE \
155 | 1*SD_WAKE_IDLE_FAR \
156 | 0*SD_PREFER_SIBLING \ 154 | 0*SD_PREFER_SIBLING \
157 , \ 155 , \
158 .last_balance = jiffies, \ 156 .last_balance = jiffies, \
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3b0ca66bd6ce..c30bf3d516d1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -803,16 +803,15 @@ enum cpu_idle_type {
803#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */ 803#define SD_BALANCE_NEWIDLE 0x0002 /* Balance when about to become idle */
804#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */ 804#define SD_BALANCE_EXEC 0x0004 /* Balance on exec */
805#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */ 805#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
806#define SD_WAKE_IDLE 0x0010 /* Wake to idle CPU on task wakeup */ 806#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
807#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */ 807#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
808#define SD_WAKE_BALANCE 0x0040 /* Perform balancing at task wakeup */ 808
809#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */ 809#define SD_SHARE_CPUPOWER 0x0080 /* Domain members share cpu power */
810#define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */ 810#define SD_POWERSAVINGS_BALANCE 0x0100 /* Balance for power savings */
811#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */ 811#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
812#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */ 812#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
813#define SD_WAKE_IDLE_FAR 0x0800 /* Gain latency sacrificing cache hit */ 813
814#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */ 814#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
815#define SD_BALANCE_WAKE 0x2000 /* Balance on wakeup */
816 815
817enum powersavings_balance_level { 816enum powersavings_balance_level {
818 POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */ 817 POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 85e8cf7d393c..6a8cd15555bb 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -95,14 +95,12 @@ int arch_update_cpu_topology(void);
95 | 1*SD_BALANCE_NEWIDLE \ 95 | 1*SD_BALANCE_NEWIDLE \
96 | 1*SD_BALANCE_EXEC \ 96 | 1*SD_BALANCE_EXEC \
97 | 1*SD_BALANCE_FORK \ 97 | 1*SD_BALANCE_FORK \
98 | 0*SD_WAKE_IDLE \ 98 | 1*SD_BALANCE_WAKE \
99 | 1*SD_WAKE_AFFINE \ 99 | 1*SD_WAKE_AFFINE \
100 | 1*SD_WAKE_BALANCE \
101 | 1*SD_SHARE_CPUPOWER \ 100 | 1*SD_SHARE_CPUPOWER \
102 | 0*SD_POWERSAVINGS_BALANCE \ 101 | 0*SD_POWERSAVINGS_BALANCE \
103 | 0*SD_SHARE_PKG_RESOURCES \ 102 | 0*SD_SHARE_PKG_RESOURCES \
104 | 0*SD_SERIALIZE \ 103 | 0*SD_SERIALIZE \
105 | 0*SD_WAKE_IDLE_FAR \
106 | 0*SD_PREFER_SIBLING \ 104 | 0*SD_PREFER_SIBLING \
107 , \ 105 , \
108 .last_balance = jiffies, \ 106 .last_balance = jiffies, \
@@ -129,13 +127,11 @@ int arch_update_cpu_topology(void);
129 | 1*SD_BALANCE_NEWIDLE \ 127 | 1*SD_BALANCE_NEWIDLE \
130 | 1*SD_BALANCE_EXEC \ 128 | 1*SD_BALANCE_EXEC \
131 | 1*SD_BALANCE_FORK \ 129 | 1*SD_BALANCE_FORK \
132 | 1*SD_WAKE_IDLE \ 130 | 1*SD_BALANCE_WAKE \
133 | 1*SD_WAKE_AFFINE \ 131 | 1*SD_WAKE_AFFINE \
134 | 1*SD_WAKE_BALANCE \
135 | 0*SD_SHARE_CPUPOWER \ 132 | 0*SD_SHARE_CPUPOWER \
136 | 1*SD_SHARE_PKG_RESOURCES \ 133 | 1*SD_SHARE_PKG_RESOURCES \
137 | 0*SD_SERIALIZE \ 134 | 0*SD_SERIALIZE \
138 | 0*SD_WAKE_IDLE_FAR \
139 | sd_balance_for_mc_power() \ 135 | sd_balance_for_mc_power() \
140 | sd_power_saving_flags() \ 136 | sd_power_saving_flags() \
141 , \ 137 , \
@@ -163,13 +159,11 @@ int arch_update_cpu_topology(void);
163 | 1*SD_BALANCE_NEWIDLE \ 159 | 1*SD_BALANCE_NEWIDLE \
164 | 1*SD_BALANCE_EXEC \ 160 | 1*SD_BALANCE_EXEC \
165 | 1*SD_BALANCE_FORK \ 161 | 1*SD_BALANCE_FORK \
166 | 1*SD_WAKE_IDLE \ 162 | 1*SD_BALANCE_WAKE \
167 | 0*SD_WAKE_AFFINE \ 163 | 0*SD_WAKE_AFFINE \
168 | 1*SD_WAKE_BALANCE \
169 | 0*SD_SHARE_CPUPOWER \ 164 | 0*SD_SHARE_CPUPOWER \
170 | 0*SD_SHARE_PKG_RESOURCES \ 165 | 0*SD_SHARE_PKG_RESOURCES \
171 | 0*SD_SERIALIZE \ 166 | 0*SD_SERIALIZE \
172 | 0*SD_WAKE_IDLE_FAR \
173 | sd_balance_for_package_power() \ 167 | sd_balance_for_package_power() \
174 | sd_power_saving_flags() \ 168 | sd_power_saving_flags() \
175 , \ 169 , \
@@ -191,14 +185,12 @@ int arch_update_cpu_topology(void);
191 | 1*SD_BALANCE_NEWIDLE \ 185 | 1*SD_BALANCE_NEWIDLE \
192 | 0*SD_BALANCE_EXEC \ 186 | 0*SD_BALANCE_EXEC \
193 | 0*SD_BALANCE_FORK \ 187 | 0*SD_BALANCE_FORK \
194 | 0*SD_WAKE_IDLE \ 188 | 0*SD_BALANCE_WAKE \
195 | 1*SD_WAKE_AFFINE \ 189 | 1*SD_WAKE_AFFINE \
196 | 0*SD_WAKE_BALANCE \
197 | 0*SD_SHARE_CPUPOWER \ 190 | 0*SD_SHARE_CPUPOWER \
198 | 0*SD_POWERSAVINGS_BALANCE \ 191 | 0*SD_POWERSAVINGS_BALANCE \
199 | 0*SD_SHARE_PKG_RESOURCES \ 192 | 0*SD_SHARE_PKG_RESOURCES \
200 | 1*SD_SERIALIZE \ 193 | 1*SD_SERIALIZE \
201 | 1*SD_WAKE_IDLE_FAR \
202 | 0*SD_PREFER_SIBLING \ 194 | 0*SD_PREFER_SIBLING \
203 , \ 195 , \
204 .last_balance = jiffies, \ 196 .last_balance = jiffies, \
diff --git a/kernel/sched.c b/kernel/sched.c
index fc6fda881d2e..6c819f338b11 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -512,14 +512,6 @@ struct root_domain {
512#ifdef CONFIG_SMP 512#ifdef CONFIG_SMP
513 struct cpupri cpupri; 513 struct cpupri cpupri;
514#endif 514#endif
515#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
516 /*
517 * Preferred wake up cpu nominated by sched_mc balance that will be
518 * used when most cpus are idle in the system indicating overall very
519 * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2)
520 */
521 unsigned int sched_mc_preferred_wakeup_cpu;
522#endif
523}; 515};
524 516
525/* 517/*
@@ -2315,22 +2307,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
2315 if (!sched_feat(SYNC_WAKEUPS)) 2307 if (!sched_feat(SYNC_WAKEUPS))
2316 sync = 0; 2308 sync = 0;
2317 2309
2318#ifdef CONFIG_SMP
2319 if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) {
2320 struct sched_domain *sd;
2321
2322 this_cpu = raw_smp_processor_id();
2323 cpu = task_cpu(p);
2324
2325 for_each_domain(this_cpu, sd) {
2326 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2327 update_shares(sd);
2328 break;
2329 }
2330 }
2331 }
2332#endif
2333
2334 this_cpu = get_cpu(); 2310 this_cpu = get_cpu();
2335 2311
2336 smp_wmb(); 2312 smp_wmb();
@@ -3533,11 +3509,6 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
3533 *imbalance = sds->min_load_per_task; 3509 *imbalance = sds->min_load_per_task;
3534 sds->busiest = sds->group_min; 3510 sds->busiest = sds->group_min;
3535 3511
3536 if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
3537 cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
3538 group_first_cpu(sds->group_leader);
3539 }
3540
3541 return 1; 3512 return 1;
3542 3513
3543} 3514}
@@ -7850,9 +7821,7 @@ static int sd_degenerate(struct sched_domain *sd)
7850 } 7821 }
7851 7822
7852 /* Following flags don't use groups */ 7823 /* Following flags don't use groups */
7853 if (sd->flags & (SD_WAKE_IDLE | 7824 if (sd->flags & (SD_WAKE_AFFINE))
7854 SD_WAKE_AFFINE |
7855 SD_WAKE_BALANCE))
7856 return 0; 7825 return 0;
7857 7826
7858 return 1; 7827 return 1;
@@ -7869,10 +7838,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
7869 if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) 7838 if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
7870 return 0; 7839 return 0;
7871 7840
7872 /* Does parent contain flags not in child? */
7873 /* WAKE_BALANCE is a subset of WAKE_AFFINE */
7874 if (cflags & SD_WAKE_AFFINE)
7875 pflags &= ~SD_WAKE_BALANCE;
7876 /* Flags needing groups don't count if only 1 group in parent */ 7841 /* Flags needing groups don't count if only 1 group in parent */
7877 if (parent->groups == parent->groups->next) { 7842 if (parent->groups == parent->groups->next) {
7878 pflags &= ~(SD_LOAD_BALANCE | 7843 pflags &= ~(SD_LOAD_BALANCE |
@@ -8558,10 +8523,10 @@ static void set_domain_attribute(struct sched_domain *sd,
8558 request = attr->relax_domain_level; 8523 request = attr->relax_domain_level;
8559 if (request < sd->level) { 8524 if (request < sd->level) {
8560 /* turn off idle balance on this domain */ 8525 /* turn off idle balance on this domain */
8561 sd->flags &= ~(SD_WAKE_IDLE|SD_BALANCE_NEWIDLE); 8526 sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
8562 } else { 8527 } else {
8563 /* turn on idle balance on this domain */ 8528 /* turn on idle balance on this domain */
8564 sd->flags |= (SD_WAKE_IDLE_FAR|SD_BALANCE_NEWIDLE); 8529 sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
8565 } 8530 }
8566} 8531}
8567 8532
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index f2eb5b934715..09d19f77eb3a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1062,83 +1062,6 @@ static void yield_task_fair(struct rq *rq)
1062 se->vruntime = rightmost->vruntime + 1; 1062 se->vruntime = rightmost->vruntime + 1;
1063} 1063}
1064 1064
1065/*
1066 * wake_idle() will wake a task on an idle cpu if task->cpu is
1067 * not idle and an idle cpu is available. The span of cpus to
1068 * search starts with cpus closest then further out as needed,
1069 * so we always favor a closer, idle cpu.
1070 * Domains may include CPUs that are not usable for migration,
1071 * hence we need to mask them out (rq->rd->online)
1072 *
1073 * Returns the CPU we should wake onto.
1074 */
1075#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
1076
1077#define cpu_rd_active(cpu, rq) cpumask_test_cpu(cpu, rq->rd->online)
1078
1079static int wake_idle(int cpu, struct task_struct *p)
1080{
1081 struct sched_domain *sd;
1082 int i;
1083 unsigned int chosen_wakeup_cpu;
1084 int this_cpu;
1085 struct rq *task_rq = task_rq(p);
1086
1087 /*
1088 * At POWERSAVINGS_BALANCE_WAKEUP level, if both this_cpu and prev_cpu
1089 * are idle and this is not a kernel thread and this task's affinity
1090 * allows it to be moved to preferred cpu, then just move!
1091 */
1092
1093 this_cpu = smp_processor_id();
1094 chosen_wakeup_cpu =
1095 cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu;
1096
1097 if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP &&
1098 idle_cpu(cpu) && idle_cpu(this_cpu) &&
1099 p->mm && !(p->flags & PF_KTHREAD) &&
1100 cpu_isset(chosen_wakeup_cpu, p->cpus_allowed))
1101 return chosen_wakeup_cpu;
1102
1103 /*
1104 * If it is idle, then it is the best cpu to run this task.
1105 *
1106 * This cpu is also the best, if it has more than one task already.
1107 * Siblings must be also busy(in most cases) as they didn't already
1108 * pickup the extra load from this cpu and hence we need not check
1109 * sibling runqueue info. This will avoid the checks and cache miss
1110 * penalities associated with that.
1111 */
1112 if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1)
1113 return cpu;
1114
1115 for_each_domain(cpu, sd) {
1116 if ((sd->flags & SD_WAKE_IDLE)
1117 || ((sd->flags & SD_WAKE_IDLE_FAR)
1118 && !task_hot(p, task_rq->clock, sd))) {
1119 for_each_cpu_and(i, sched_domain_span(sd),
1120 &p->cpus_allowed) {
1121 if (cpu_rd_active(i, task_rq) && idle_cpu(i)) {
1122 if (i != task_cpu(p)) {
1123 schedstat_inc(p,
1124 se.nr_wakeups_idle);
1125 }
1126 return i;
1127 }
1128 }
1129 } else {
1130 break;
1131 }
1132 }
1133 return cpu;
1134}
1135#else /* !ARCH_HAS_SCHED_WAKE_IDLE*/
1136static inline int wake_idle(int cpu, struct task_struct *p)
1137{
1138 return cpu;
1139}
1140#endif
1141
1142#ifdef CONFIG_SMP 1065#ifdef CONFIG_SMP
1143 1066
1144#ifdef CONFIG_FAIR_GROUP_SCHED 1067#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1225,21 +1148,22 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
1225 1148
1226#endif 1149#endif
1227 1150
1228static int 1151static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1229wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1230 struct task_struct *p, int prev_cpu, int this_cpu, int sync,
1231 int idx, unsigned long load, unsigned long this_load,
1232 unsigned int imbalance)
1233{ 1152{
1234 struct task_struct *curr = this_rq->curr; 1153 struct task_struct *curr = current;
1235 struct task_group *tg; 1154 unsigned long this_load, load;
1236 unsigned long tl = this_load; 1155 int idx, this_cpu, prev_cpu;
1237 unsigned long tl_per_task; 1156 unsigned long tl_per_task;
1157 unsigned int imbalance;
1158 struct task_group *tg;
1238 unsigned long weight; 1159 unsigned long weight;
1239 int balanced; 1160 int balanced;
1240 1161
1241 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) 1162 idx = sd->wake_idx;
1242 return 0; 1163 this_cpu = smp_processor_id();
1164 prev_cpu = task_cpu(p);
1165 load = source_load(prev_cpu, idx);
1166 this_load = target_load(this_cpu, idx);
1243 1167
1244 if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || 1168 if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
1245 p->se.avg_overlap > sysctl_sched_migration_cost)) 1169 p->se.avg_overlap > sysctl_sched_migration_cost))
@@ -1254,24 +1178,26 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1254 tg = task_group(current); 1178 tg = task_group(current);
1255 weight = current->se.load.weight; 1179 weight = current->se.load.weight;
1256 1180
1257 tl += effective_load(tg, this_cpu, -weight, -weight); 1181 this_load += effective_load(tg, this_cpu, -weight, -weight);
1258 load += effective_load(tg, prev_cpu, 0, -weight); 1182 load += effective_load(tg, prev_cpu, 0, -weight);
1259 } 1183 }
1260 1184
1261 tg = task_group(p); 1185 tg = task_group(p);
1262 weight = p->se.load.weight; 1186 weight = p->se.load.weight;
1263 1187
1188 imbalance = 100 + (sd->imbalance_pct - 100) / 2;
1189
1264 /* 1190 /*
1265 * In low-load situations, where prev_cpu is idle and this_cpu is idle 1191 * In low-load situations, where prev_cpu is idle and this_cpu is idle
1266 * due to the sync cause above having dropped tl to 0, we'll always have 1192 * due to the sync cause above having dropped this_load to 0, we'll
1267 * an imbalance, but there's really nothing you can do about that, so 1193 * always have an imbalance, but there's really nothing you can do
1268 * that's good too. 1194 * about that, so that's good too.
1269 * 1195 *
1270 * Otherwise check if either cpus are near enough in load to allow this 1196 * Otherwise check if either cpus are near enough in load to allow this
1271 * task to be woken on this_cpu. 1197 * task to be woken on this_cpu.
1272 */ 1198 */
1273 balanced = !tl || 1199 balanced = !this_load ||
1274 100*(tl + effective_load(tg, this_cpu, weight, weight)) <= 1200 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <=
1275 imbalance*(load + effective_load(tg, prev_cpu, 0, weight)); 1201 imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
1276 1202
1277 /* 1203 /*
@@ -1285,14 +1211,15 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1285 schedstat_inc(p, se.nr_wakeups_affine_attempts); 1211 schedstat_inc(p, se.nr_wakeups_affine_attempts);
1286 tl_per_task = cpu_avg_load_per_task(this_cpu); 1212 tl_per_task = cpu_avg_load_per_task(this_cpu);
1287 1213
1288 if (balanced || (tl <= load && tl + target_load(prev_cpu, idx) <= 1214 if (balanced ||
1289 tl_per_task)) { 1215 (this_load <= load &&
1216 this_load + target_load(prev_cpu, idx) <= tl_per_task)) {
1290 /* 1217 /*
1291 * This domain has SD_WAKE_AFFINE and 1218 * This domain has SD_WAKE_AFFINE and
1292 * p is cache cold in this domain, and 1219 * p is cache cold in this domain, and
1293 * there is no bad imbalance. 1220 * there is no bad imbalance.
1294 */ 1221 */
1295 schedstat_inc(this_sd, ttwu_move_affine); 1222 schedstat_inc(sd, ttwu_move_affine);
1296 schedstat_inc(p, se.nr_wakeups_affine); 1223 schedstat_inc(p, se.nr_wakeups_affine);
1297 1224
1298 return 1; 1225 return 1;
@@ -1300,72 +1227,6 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1300 return 0; 1227 return 0;
1301} 1228}
1302 1229
1303static int sched_balance_self(int cpu, int flag);
1304
1305static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
1306{
1307 struct sched_domain *sd, *this_sd = NULL;
1308 int prev_cpu, this_cpu, new_cpu;
1309 unsigned long load, this_load;
1310 struct rq *this_rq;
1311 unsigned int imbalance;
1312 int idx;
1313
1314 prev_cpu = task_cpu(p);
1315 this_cpu = smp_processor_id();
1316 this_rq = cpu_rq(this_cpu);
1317 new_cpu = prev_cpu;
1318
1319 if (flag != SD_BALANCE_WAKE)
1320 return sched_balance_self(this_cpu, flag);
1321
1322 /*
1323 * 'this_sd' is the first domain that both
1324 * this_cpu and prev_cpu are present in:
1325 */
1326 for_each_domain(this_cpu, sd) {
1327 if (cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) {
1328 this_sd = sd;
1329 break;
1330 }
1331 }
1332
1333 if (unlikely(!cpumask_test_cpu(this_cpu, &p->cpus_allowed)))
1334 goto out;
1335
1336 /*
1337 * Check for affine wakeup and passive balancing possibilities.
1338 */
1339 if (!this_sd)
1340 goto out;
1341
1342 idx = this_sd->wake_idx;
1343
1344 imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
1345
1346 load = source_load(prev_cpu, idx);
1347 this_load = target_load(this_cpu, idx);
1348
1349 if (wake_affine(this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
1350 load, this_load, imbalance))
1351 return this_cpu;
1352
1353 /*
1354 * Start passive balancing when half the imbalance_pct
1355 * limit is reached.
1356 */
1357 if (this_sd->flags & SD_WAKE_BALANCE) {
1358 if (imbalance*this_load <= 100*load) {
1359 schedstat_inc(this_sd, ttwu_move_balance);
1360 schedstat_inc(p, se.nr_wakeups_passive);
1361 return this_cpu;
1362 }
1363 }
1364
1365out:
1366 return wake_idle(new_cpu, p);
1367}
1368
1369/* 1230/*
1370 * find_idlest_group finds and returns the least busy CPU group within the 1231 * find_idlest_group finds and returns the least busy CPU group within the
1371 * domain. 1232 * domain.
@@ -1455,10 +1316,20 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
1455 * 1316 *
1456 * preempt must be disabled. 1317 * preempt must be disabled.
1457 */ 1318 */
1458static int sched_balance_self(int cpu, int flag) 1319static int select_task_rq_fair(struct task_struct *p, int flag, int sync)
1459{ 1320{
1460 struct task_struct *t = current; 1321 struct task_struct *t = current;
1461 struct sched_domain *tmp, *sd = NULL; 1322 struct sched_domain *tmp, *sd = NULL;
1323 int cpu = smp_processor_id();
1324 int prev_cpu = task_cpu(p);
1325 int new_cpu = cpu;
1326 int want_affine = 0;
1327
1328 if (flag & SD_BALANCE_WAKE) {
1329 if (sched_feat(AFFINE_WAKEUPS))
1330 want_affine = 1;
1331 new_cpu = prev_cpu;
1332 }
1462 1333
1463 for_each_domain(cpu, tmp) { 1334 for_each_domain(cpu, tmp) {
1464 /* 1335 /*
@@ -1466,16 +1337,38 @@ static int sched_balance_self(int cpu, int flag)
1466 */ 1337 */
1467 if (tmp->flags & SD_POWERSAVINGS_BALANCE) 1338 if (tmp->flags & SD_POWERSAVINGS_BALANCE)
1468 break; 1339 break;
1469 if (tmp->flags & flag)
1470 sd = tmp;
1471 }
1472 1340
1473 if (sd) 1341 switch (flag) {
1474 update_shares(sd); 1342 case SD_BALANCE_WAKE:
1343 if (!sched_feat(LB_WAKEUP_UPDATE))
1344 break;
1345 case SD_BALANCE_FORK:
1346 case SD_BALANCE_EXEC:
1347 if (root_task_group_empty())
1348 break;
1349 update_shares(tmp);
1350 default:
1351 break;
1352 }
1353
1354 if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
1355 cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
1356
1357 if (wake_affine(tmp, p, sync))
1358 return cpu;
1359
1360 want_affine = 0;
1361 }
1362
1363 if (!(tmp->flags & flag))
1364 continue;
1365
1366 sd = tmp;
1367 }
1475 1368
1476 while (sd) { 1369 while (sd) {
1477 struct sched_group *group; 1370 struct sched_group *group;
1478 int new_cpu, weight; 1371 int weight;
1479 1372
1480 if (!(sd->flags & flag)) { 1373 if (!(sd->flags & flag)) {
1481 sd = sd->child; 1374 sd = sd->child;
@@ -1508,7 +1401,7 @@ static int sched_balance_self(int cpu, int flag)
1508 /* while loop will break here if sd == NULL */ 1401 /* while loop will break here if sd == NULL */
1509 } 1402 }
1510 1403
1511 return cpu; 1404 return new_cpu;
1512} 1405}
1513#endif /* CONFIG_SMP */ 1406#endif /* CONFIG_SMP */
1514 1407