diff options
Diffstat (limited to 'kernel/sched/fair.c')
-rw-r--r-- | kernel/sched/fair.c | 78 |
1 files changed, 74 insertions, 4 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f9f9aa0edf3c..22321db64952 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -3054,6 +3054,7 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10; | |||
3054 | 3054 | ||
3055 | #define LBF_ALL_PINNED 0x01 | 3055 | #define LBF_ALL_PINNED 0x01 |
3056 | #define LBF_NEED_BREAK 0x02 | 3056 | #define LBF_NEED_BREAK 0x02 |
3057 | #define LBF_SOME_PINNED 0x04 | ||
3057 | 3058 | ||
3058 | struct lb_env { | 3059 | struct lb_env { |
3059 | struct sched_domain *sd; | 3060 | struct sched_domain *sd; |
@@ -3064,6 +3065,8 @@ struct lb_env { | |||
3064 | int dst_cpu; | 3065 | int dst_cpu; |
3065 | struct rq *dst_rq; | 3066 | struct rq *dst_rq; |
3066 | 3067 | ||
3068 | struct cpumask *dst_grpmask; | ||
3069 | int new_dst_cpu; | ||
3067 | enum cpu_idle_type idle; | 3070 | enum cpu_idle_type idle; |
3068 | long imbalance; | 3071 | long imbalance; |
3069 | unsigned int flags; | 3072 | unsigned int flags; |
@@ -3131,9 +3134,31 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) | |||
3131 | * 3) are cache-hot on their current CPU. | 3134 | * 3) are cache-hot on their current CPU. |
3132 | */ | 3135 | */ |
3133 | if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) { | 3136 | if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) { |
3137 | int new_dst_cpu; | ||
3138 | |||
3134 | schedstat_inc(p, se.statistics.nr_failed_migrations_affine); | 3139 | schedstat_inc(p, se.statistics.nr_failed_migrations_affine); |
3140 | |||
3141 | /* | ||
3142 | * Remember if this task can be migrated to any other cpu in | ||
3143 | * our sched_group. We may want to revisit it if we couldn't | ||
3144 | * meet load balance goals by pulling other tasks on src_cpu. | ||
3145 | * | ||
3146 | * Also avoid computing new_dst_cpu if we have already computed | ||
3147 | * one in current iteration. | ||
3148 | */ | ||
3149 | if (!env->dst_grpmask || (env->flags & LBF_SOME_PINNED)) | ||
3150 | return 0; | ||
3151 | |||
3152 | new_dst_cpu = cpumask_first_and(env->dst_grpmask, | ||
3153 | tsk_cpus_allowed(p)); | ||
3154 | if (new_dst_cpu < nr_cpu_ids) { | ||
3155 | env->flags |= LBF_SOME_PINNED; | ||
3156 | env->new_dst_cpu = new_dst_cpu; | ||
3157 | } | ||
3135 | return 0; | 3158 | return 0; |
3136 | } | 3159 | } |
3160 | |||
3161 | /* Record that we found atleast one task that could run on dst_cpu */ | ||
3137 | env->flags &= ~LBF_ALL_PINNED; | 3162 | env->flags &= ~LBF_ALL_PINNED; |
3138 | 3163 | ||
3139 | if (task_running(env->src_rq, p)) { | 3164 | if (task_running(env->src_rq, p)) { |
@@ -4213,7 +4238,8 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
4213 | struct sched_domain *sd, enum cpu_idle_type idle, | 4238 | struct sched_domain *sd, enum cpu_idle_type idle, |
4214 | int *balance) | 4239 | int *balance) |
4215 | { | 4240 | { |
4216 | int ld_moved, active_balance = 0; | 4241 | int ld_moved, cur_ld_moved, active_balance = 0; |
4242 | int lb_iterations, max_lb_iterations; | ||
4217 | struct sched_group *group; | 4243 | struct sched_group *group; |
4218 | struct rq *busiest; | 4244 | struct rq *busiest; |
4219 | unsigned long flags; | 4245 | unsigned long flags; |
@@ -4223,11 +4249,13 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
4223 | .sd = sd, | 4249 | .sd = sd, |
4224 | .dst_cpu = this_cpu, | 4250 | .dst_cpu = this_cpu, |
4225 | .dst_rq = this_rq, | 4251 | .dst_rq = this_rq, |
4252 | .dst_grpmask = sched_group_cpus(sd->groups), | ||
4226 | .idle = idle, | 4253 | .idle = idle, |
4227 | .loop_break = sched_nr_migrate_break, | 4254 | .loop_break = sched_nr_migrate_break, |
4228 | }; | 4255 | }; |
4229 | 4256 | ||
4230 | cpumask_copy(cpus, cpu_active_mask); | 4257 | cpumask_copy(cpus, cpu_active_mask); |
4258 | max_lb_iterations = cpumask_weight(env.dst_grpmask); | ||
4231 | 4259 | ||
4232 | schedstat_inc(sd, lb_count[idle]); | 4260 | schedstat_inc(sd, lb_count[idle]); |
4233 | 4261 | ||
@@ -4253,6 +4281,7 @@ redo: | |||
4253 | schedstat_add(sd, lb_imbalance[idle], env.imbalance); | 4281 | schedstat_add(sd, lb_imbalance[idle], env.imbalance); |
4254 | 4282 | ||
4255 | ld_moved = 0; | 4283 | ld_moved = 0; |
4284 | lb_iterations = 1; | ||
4256 | if (busiest->nr_running > 1) { | 4285 | if (busiest->nr_running > 1) { |
4257 | /* | 4286 | /* |
4258 | * Attempt to move tasks. If find_busiest_group has found | 4287 | * Attempt to move tasks. If find_busiest_group has found |
@@ -4270,7 +4299,13 @@ more_balance: | |||
4270 | double_rq_lock(this_rq, busiest); | 4299 | double_rq_lock(this_rq, busiest); |
4271 | if (!env.loop) | 4300 | if (!env.loop) |
4272 | update_h_load(env.src_cpu); | 4301 | update_h_load(env.src_cpu); |
4273 | ld_moved += move_tasks(&env); | 4302 | |
4303 | /* | ||
4304 | * cur_ld_moved - load moved in current iteration | ||
4305 | * ld_moved - cumulative load moved across iterations | ||
4306 | */ | ||
4307 | cur_ld_moved = move_tasks(&env); | ||
4308 | ld_moved += cur_ld_moved; | ||
4274 | double_rq_unlock(this_rq, busiest); | 4309 | double_rq_unlock(this_rq, busiest); |
4275 | local_irq_restore(flags); | 4310 | local_irq_restore(flags); |
4276 | 4311 | ||
@@ -4282,8 +4317,43 @@ more_balance: | |||
4282 | /* | 4317 | /* |
4283 | * some other cpu did the load balance for us. | 4318 | * some other cpu did the load balance for us. |
4284 | */ | 4319 | */ |
4285 | if (ld_moved && this_cpu != smp_processor_id()) | 4320 | if (cur_ld_moved && env.dst_cpu != smp_processor_id()) |
4286 | resched_cpu(this_cpu); | 4321 | resched_cpu(env.dst_cpu); |
4322 | |||
4323 | /* | ||
4324 | * Revisit (affine) tasks on src_cpu that couldn't be moved to | ||
4325 | * us and move them to an alternate dst_cpu in our sched_group | ||
4326 | * where they can run. The upper limit on how many times we | ||
4327 | * iterate on same src_cpu is dependent on number of cpus in our | ||
4328 | * sched_group. | ||
4329 | * | ||
4330 | * This changes load balance semantics a bit on who can move | ||
4331 | * load to a given_cpu. In addition to the given_cpu itself | ||
4332 | * (or a ilb_cpu acting on its behalf where given_cpu is | ||
4333 | * nohz-idle), we now have balance_cpu in a position to move | ||
4334 | * load to given_cpu. In rare situations, this may cause | ||
4335 | * conflicts (balance_cpu and given_cpu/ilb_cpu deciding | ||
4336 | * _independently_ and at _same_ time to move some load to | ||
4337 | * given_cpu) causing exceess load to be moved to given_cpu. | ||
4338 | * This however should not happen so much in practice and | ||
4339 | * moreover subsequent load balance cycles should correct the | ||
4340 | * excess load moved. | ||
4341 | */ | ||
4342 | if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 && | ||
4343 | lb_iterations++ < max_lb_iterations) { | ||
4344 | |||
4345 | this_rq = cpu_rq(env.new_dst_cpu); | ||
4346 | env.dst_rq = this_rq; | ||
4347 | env.dst_cpu = env.new_dst_cpu; | ||
4348 | env.flags &= ~LBF_SOME_PINNED; | ||
4349 | env.loop = 0; | ||
4350 | env.loop_break = sched_nr_migrate_break; | ||
4351 | /* | ||
4352 | * Go back to "more_balance" rather than "redo" since we | ||
4353 | * need to continue with same src_cpu. | ||
4354 | */ | ||
4355 | goto more_balance; | ||
4356 | } | ||
4287 | 4357 | ||
4288 | /* All tasks on this runqueue were pinned by CPU affinity */ | 4358 | /* All tasks on this runqueue were pinned by CPU affinity */ |
4289 | if (unlikely(env.flags & LBF_ALL_PINNED)) { | 4359 | if (unlikely(env.flags & LBF_ALL_PINNED)) { |