aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched/fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r--kernel/sched/fair.c78
1 files changed, 74 insertions, 4 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index f9f9aa0edf3c..22321db64952 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3054,6 +3054,7 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10;
3054 3054
3055#define LBF_ALL_PINNED 0x01 3055#define LBF_ALL_PINNED 0x01
3056#define LBF_NEED_BREAK 0x02 3056#define LBF_NEED_BREAK 0x02
3057#define LBF_SOME_PINNED 0x04
3057 3058
3058struct lb_env { 3059struct lb_env {
3059 struct sched_domain *sd; 3060 struct sched_domain *sd;
@@ -3064,6 +3065,8 @@ struct lb_env {
3064 int dst_cpu; 3065 int dst_cpu;
3065 struct rq *dst_rq; 3066 struct rq *dst_rq;
3066 3067
3068 struct cpumask *dst_grpmask;
3069 int new_dst_cpu;
3067 enum cpu_idle_type idle; 3070 enum cpu_idle_type idle;
3068 long imbalance; 3071 long imbalance;
3069 unsigned int flags; 3072 unsigned int flags;
@@ -3131,9 +3134,31 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
3131 * 3) are cache-hot on their current CPU. 3134 * 3) are cache-hot on their current CPU.
3132 */ 3135 */
3133 if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) { 3136 if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) {
3137 int new_dst_cpu;
3138
3134 schedstat_inc(p, se.statistics.nr_failed_migrations_affine); 3139 schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
3140
3141 /*
3142 * Remember if this task can be migrated to any other cpu in
3143 * our sched_group. We may want to revisit it if we couldn't
3144 * meet load balance goals by pulling other tasks on src_cpu.
3145 *
3146 * Also avoid computing new_dst_cpu if we have already computed
3147 * one in current iteration.
3148 */
3149 if (!env->dst_grpmask || (env->flags & LBF_SOME_PINNED))
3150 return 0;
3151
3152 new_dst_cpu = cpumask_first_and(env->dst_grpmask,
3153 tsk_cpus_allowed(p));
3154 if (new_dst_cpu < nr_cpu_ids) {
3155 env->flags |= LBF_SOME_PINNED;
3156 env->new_dst_cpu = new_dst_cpu;
3157 }
3135 return 0; 3158 return 0;
3136 } 3159 }
3160
3161 /* Record that we found atleast one task that could run on dst_cpu */
3137 env->flags &= ~LBF_ALL_PINNED; 3162 env->flags &= ~LBF_ALL_PINNED;
3138 3163
3139 if (task_running(env->src_rq, p)) { 3164 if (task_running(env->src_rq, p)) {
@@ -4213,7 +4238,8 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4213 struct sched_domain *sd, enum cpu_idle_type idle, 4238 struct sched_domain *sd, enum cpu_idle_type idle,
4214 int *balance) 4239 int *balance)
4215{ 4240{
4216 int ld_moved, active_balance = 0; 4241 int ld_moved, cur_ld_moved, active_balance = 0;
4242 int lb_iterations, max_lb_iterations;
4217 struct sched_group *group; 4243 struct sched_group *group;
4218 struct rq *busiest; 4244 struct rq *busiest;
4219 unsigned long flags; 4245 unsigned long flags;
@@ -4223,11 +4249,13 @@ static int load_balance(int this_cpu, struct rq *this_rq,
4223 .sd = sd, 4249 .sd = sd,
4224 .dst_cpu = this_cpu, 4250 .dst_cpu = this_cpu,
4225 .dst_rq = this_rq, 4251 .dst_rq = this_rq,
4252 .dst_grpmask = sched_group_cpus(sd->groups),
4226 .idle = idle, 4253 .idle = idle,
4227 .loop_break = sched_nr_migrate_break, 4254 .loop_break = sched_nr_migrate_break,
4228 }; 4255 };
4229 4256
4230 cpumask_copy(cpus, cpu_active_mask); 4257 cpumask_copy(cpus, cpu_active_mask);
4258 max_lb_iterations = cpumask_weight(env.dst_grpmask);
4231 4259
4232 schedstat_inc(sd, lb_count[idle]); 4260 schedstat_inc(sd, lb_count[idle]);
4233 4261
@@ -4253,6 +4281,7 @@ redo:
4253 schedstat_add(sd, lb_imbalance[idle], env.imbalance); 4281 schedstat_add(sd, lb_imbalance[idle], env.imbalance);
4254 4282
4255 ld_moved = 0; 4283 ld_moved = 0;
4284 lb_iterations = 1;
4256 if (busiest->nr_running > 1) { 4285 if (busiest->nr_running > 1) {
4257 /* 4286 /*
4258 * Attempt to move tasks. If find_busiest_group has found 4287 * Attempt to move tasks. If find_busiest_group has found
@@ -4270,7 +4299,13 @@ more_balance:
4270 double_rq_lock(this_rq, busiest); 4299 double_rq_lock(this_rq, busiest);
4271 if (!env.loop) 4300 if (!env.loop)
4272 update_h_load(env.src_cpu); 4301 update_h_load(env.src_cpu);
4273 ld_moved += move_tasks(&env); 4302
4303 /*
4304 * cur_ld_moved - load moved in current iteration
4305 * ld_moved - cumulative load moved across iterations
4306 */
4307 cur_ld_moved = move_tasks(&env);
4308 ld_moved += cur_ld_moved;
4274 double_rq_unlock(this_rq, busiest); 4309 double_rq_unlock(this_rq, busiest);
4275 local_irq_restore(flags); 4310 local_irq_restore(flags);
4276 4311
@@ -4282,8 +4317,43 @@ more_balance:
4282 /* 4317 /*
4283 * some other cpu did the load balance for us. 4318 * some other cpu did the load balance for us.
4284 */ 4319 */
4285 if (ld_moved && this_cpu != smp_processor_id()) 4320 if (cur_ld_moved && env.dst_cpu != smp_processor_id())
4286 resched_cpu(this_cpu); 4321 resched_cpu(env.dst_cpu);
4322
4323 /*
4324 * Revisit (affine) tasks on src_cpu that couldn't be moved to
4325 * us and move them to an alternate dst_cpu in our sched_group
4326 * where they can run. The upper limit on how many times we
4327 * iterate on same src_cpu is dependent on number of cpus in our
4328 * sched_group.
4329 *
4330 * This changes load balance semantics a bit on who can move
4331 * load to a given_cpu. In addition to the given_cpu itself
4332 * (or a ilb_cpu acting on its behalf where given_cpu is
4333 * nohz-idle), we now have balance_cpu in a position to move
4334 * load to given_cpu. In rare situations, this may cause
4335 * conflicts (balance_cpu and given_cpu/ilb_cpu deciding
4336 * _independently_ and at _same_ time to move some load to
4337 * given_cpu) causing exceess load to be moved to given_cpu.
4338 * This however should not happen so much in practice and
4339 * moreover subsequent load balance cycles should correct the
4340 * excess load moved.
4341 */
4342 if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 &&
4343 lb_iterations++ < max_lb_iterations) {
4344
4345 this_rq = cpu_rq(env.new_dst_cpu);
4346 env.dst_rq = this_rq;
4347 env.dst_cpu = env.new_dst_cpu;
4348 env.flags &= ~LBF_SOME_PINNED;
4349 env.loop = 0;
4350 env.loop_break = sched_nr_migrate_break;
4351 /*
4352 * Go back to "more_balance" rather than "redo" since we
4353 * need to continue with same src_cpu.
4354 */
4355 goto more_balance;
4356 }
4287 4357
4288 /* All tasks on this runqueue were pinned by CPU affinity */ 4358 /* All tasks on this runqueue were pinned by CPU affinity */
4289 if (unlikely(env.flags & LBF_ALL_PINNED)) { 4359 if (unlikely(env.flags & LBF_ALL_PINNED)) {