diff options
| -rw-r--r-- | kernel/sched/fair.c | 110 |
1 files changed, 39 insertions, 71 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a0424fc4cc54..def17aa302d5 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -784,7 +784,7 @@ account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 784 | update_load_add(&rq_of(cfs_rq)->load, se->load.weight); | 784 | update_load_add(&rq_of(cfs_rq)->load, se->load.weight); |
| 785 | #ifdef CONFIG_SMP | 785 | #ifdef CONFIG_SMP |
| 786 | if (entity_is_task(se)) | 786 | if (entity_is_task(se)) |
| 787 | list_add(&se->group_node, &rq_of(cfs_rq)->cfs_tasks); | 787 | list_add_tail(&se->group_node, &rq_of(cfs_rq)->cfs_tasks); |
| 788 | #endif | 788 | #endif |
| 789 | cfs_rq->nr_running++; | 789 | cfs_rq->nr_running++; |
| 790 | } | 790 | } |
| @@ -3071,7 +3071,6 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10; | |||
| 3071 | 3071 | ||
| 3072 | #define LBF_ALL_PINNED 0x01 | 3072 | #define LBF_ALL_PINNED 0x01 |
| 3073 | #define LBF_NEED_BREAK 0x02 | 3073 | #define LBF_NEED_BREAK 0x02 |
| 3074 | #define LBF_ABORT 0x04 | ||
| 3075 | 3074 | ||
| 3076 | struct lb_env { | 3075 | struct lb_env { |
| 3077 | struct sched_domain *sd; | 3076 | struct sched_domain *sd; |
| @@ -3083,7 +3082,7 @@ struct lb_env { | |||
| 3083 | struct rq *dst_rq; | 3082 | struct rq *dst_rq; |
| 3084 | 3083 | ||
| 3085 | enum cpu_idle_type idle; | 3084 | enum cpu_idle_type idle; |
| 3086 | unsigned long max_load_move; | 3085 | long load_move; |
| 3087 | unsigned int flags; | 3086 | unsigned int flags; |
| 3088 | 3087 | ||
| 3089 | unsigned int loop; | 3088 | unsigned int loop; |
| @@ -3216,39 +3215,47 @@ static int move_one_task(struct lb_env *env) | |||
| 3216 | 3215 | ||
| 3217 | static unsigned long task_h_load(struct task_struct *p); | 3216 | static unsigned long task_h_load(struct task_struct *p); |
| 3218 | 3217 | ||
| 3219 | static unsigned long balance_tasks(struct lb_env *env) | 3218 | /* |
| 3219 | * move_tasks tries to move up to load_move weighted load from busiest to | ||
| 3220 | * this_rq, as part of a balancing operation within domain "sd". | ||
| 3221 | * Returns 1 if successful and 0 otherwise. | ||
| 3222 | * | ||
| 3223 | * Called with both runqueues locked. | ||
| 3224 | */ | ||
| 3225 | static int move_tasks(struct lb_env *env) | ||
| 3220 | { | 3226 | { |
| 3221 | long rem_load_move = env->max_load_move; | 3227 | struct list_head *tasks = &env->src_rq->cfs_tasks; |
| 3222 | struct task_struct *p, *n; | 3228 | struct task_struct *p; |
| 3223 | unsigned long load; | 3229 | unsigned long load; |
| 3224 | int pulled = 0; | 3230 | int pulled = 0; |
| 3225 | 3231 | ||
| 3226 | if (env->max_load_move == 0) | 3232 | if (env->load_move <= 0) |
| 3227 | goto out; | 3233 | return 0; |
| 3234 | |||
| 3235 | while (!list_empty(tasks)) { | ||
| 3236 | p = list_first_entry(tasks, struct task_struct, se.group_node); | ||
| 3228 | 3237 | ||
| 3229 | list_for_each_entry_safe(p, n, &env->src_rq->cfs_tasks, se.group_node) { | ||
| 3230 | env->loop++; | 3238 | env->loop++; |
| 3231 | /* We've more or less seen every task there is, call it quits */ | 3239 | /* We've more or less seen every task there is, call it quits */ |
| 3232 | if (env->loop > env->loop_max) { | 3240 | if (env->loop > env->loop_max) |
| 3233 | env->flags |= LBF_ABORT; | ||
| 3234 | break; | 3241 | break; |
| 3235 | } | 3242 | |
| 3236 | /* take a beather every nr_migrate tasks */ | 3243 | /* take a breather every nr_migrate tasks */ |
| 3237 | if (env->loop > env->loop_break) { | 3244 | if (env->loop > env->loop_break) { |
| 3238 | env->loop_break += sysctl_sched_nr_migrate; | 3245 | env->loop_break += sysctl_sched_nr_migrate; |
| 3239 | env->flags |= LBF_NEED_BREAK; | 3246 | env->flags |= LBF_NEED_BREAK; |
| 3240 | break; | 3247 | break; |
| 3241 | } | 3248 | } |
| 3242 | 3249 | ||
| 3243 | if (throttled_lb_pair(task_group(p), env->src_rq->cpu, | 3250 | if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu)) |
| 3244 | env->dst_cpu)) | ||
| 3245 | goto next; | 3251 | goto next; |
| 3246 | 3252 | ||
| 3247 | load = task_h_load(p); | 3253 | load = task_h_load(p); |
| 3254 | |||
| 3248 | if (load < 16 && !env->sd->nr_balance_failed) | 3255 | if (load < 16 && !env->sd->nr_balance_failed) |
| 3249 | goto next; | 3256 | goto next; |
| 3250 | 3257 | ||
| 3251 | if ((load * 2) > rem_load_move) | 3258 | if ((load / 2) > env->load_move) |
| 3252 | goto next; | 3259 | goto next; |
| 3253 | 3260 | ||
| 3254 | if (!can_migrate_task(p, env)) | 3261 | if (!can_migrate_task(p, env)) |
| @@ -3256,7 +3263,7 @@ static unsigned long balance_tasks(struct lb_env *env) | |||
| 3256 | 3263 | ||
| 3257 | move_task(p, env); | 3264 | move_task(p, env); |
| 3258 | pulled++; | 3265 | pulled++; |
| 3259 | rem_load_move -= load; | 3266 | env->load_move -= load; |
| 3260 | 3267 | ||
| 3261 | #ifdef CONFIG_PREEMPT | 3268 | #ifdef CONFIG_PREEMPT |
| 3262 | /* | 3269 | /* |
| @@ -3264,24 +3271,22 @@ static unsigned long balance_tasks(struct lb_env *env) | |||
| 3264 | * kernels will stop after the first task is pulled to minimize | 3271 | * kernels will stop after the first task is pulled to minimize |
| 3265 | * the critical section. | 3272 | * the critical section. |
| 3266 | */ | 3273 | */ |
| 3267 | if (env->idle == CPU_NEWLY_IDLE) { | 3274 | if (env->idle == CPU_NEWLY_IDLE) |
| 3268 | env->flags |= LBF_ABORT; | ||
| 3269 | break; | 3275 | break; |
| 3270 | } | ||
| 3271 | #endif | 3276 | #endif |
| 3272 | 3277 | ||
| 3273 | /* | 3278 | /* |
| 3274 | * We only want to steal up to the prescribed amount of | 3279 | * We only want to steal up to the prescribed amount of |
| 3275 | * weighted load. | 3280 | * weighted load. |
| 3276 | */ | 3281 | */ |
| 3277 | if (rem_load_move <= 0) | 3282 | if (env->load_move <= 0) |
| 3278 | break; | 3283 | break; |
| 3279 | 3284 | ||
| 3280 | continue; | 3285 | continue; |
| 3281 | next: | 3286 | next: |
| 3282 | list_move_tail(&p->se.group_node, &env->src_rq->cfs_tasks); | 3287 | list_move_tail(&p->se.group_node, tasks); |
| 3283 | } | 3288 | } |
| 3284 | out: | 3289 | |
| 3285 | /* | 3290 | /* |
| 3286 | * Right now, this is one of only two places move_task() is called, | 3291 | * Right now, this is one of only two places move_task() is called, |
| 3287 | * so we can safely collect move_task() stats here rather than | 3292 | * so we can safely collect move_task() stats here rather than |
| @@ -3289,7 +3294,7 @@ out: | |||
| 3289 | */ | 3294 | */ |
| 3290 | schedstat_add(env->sd, lb_gained[env->idle], pulled); | 3295 | schedstat_add(env->sd, lb_gained[env->idle], pulled); |
| 3291 | 3296 | ||
| 3292 | return env->max_load_move - rem_load_move; | 3297 | return pulled; |
| 3293 | } | 3298 | } |
| 3294 | 3299 | ||
| 3295 | #ifdef CONFIG_FAIR_GROUP_SCHED | 3300 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| @@ -3399,43 +3404,6 @@ static unsigned long task_h_load(struct task_struct *p) | |||
| 3399 | } | 3404 | } |
| 3400 | #endif | 3405 | #endif |
| 3401 | 3406 | ||
| 3402 | /* | ||
| 3403 | * move_tasks tries to move up to max_load_move weighted load from busiest to | ||
| 3404 | * this_rq, as part of a balancing operation within domain "sd". | ||
| 3405 | * Returns 1 if successful and 0 otherwise. | ||
| 3406 | * | ||
| 3407 | * Called with both runqueues locked. | ||
| 3408 | */ | ||
| 3409 | static int move_tasks(struct lb_env *env) | ||
| 3410 | { | ||
| 3411 | unsigned long max_load_move = env->max_load_move; | ||
| 3412 | unsigned long total_load_moved = 0, load_moved; | ||
| 3413 | |||
| 3414 | update_h_load(cpu_of(env->src_rq)); | ||
| 3415 | do { | ||
| 3416 | env->max_load_move = max_load_move - total_load_moved; | ||
| 3417 | load_moved = balance_tasks(env); | ||
| 3418 | total_load_moved += load_moved; | ||
| 3419 | |||
| 3420 | if (env->flags & (LBF_NEED_BREAK|LBF_ABORT)) | ||
| 3421 | break; | ||
| 3422 | |||
| 3423 | #ifdef CONFIG_PREEMPT | ||
| 3424 | /* | ||
| 3425 | * NEWIDLE balancing is a source of latency, so preemptible | ||
| 3426 | * kernels will stop after the first task is pulled to minimize | ||
| 3427 | * the critical section. | ||
| 3428 | */ | ||
| 3429 | if (env->idle == CPU_NEWLY_IDLE && env->dst_rq->nr_running) { | ||
| 3430 | env->flags |= LBF_ABORT; | ||
| 3431 | break; | ||
| 3432 | } | ||
| 3433 | #endif | ||
| 3434 | } while (load_moved && max_load_move > total_load_moved); | ||
| 3435 | |||
| 3436 | return total_load_moved > 0; | ||
| 3437 | } | ||
| 3438 | |||
| 3439 | /********** Helpers for find_busiest_group ************************/ | 3407 | /********** Helpers for find_busiest_group ************************/ |
| 3440 | /* | 3408 | /* |
| 3441 | * sd_lb_stats - Structure to store the statistics of a sched_domain | 3409 | * sd_lb_stats - Structure to store the statistics of a sched_domain |
| @@ -4477,31 +4445,31 @@ redo: | |||
| 4477 | * correctly treated as an imbalance. | 4445 | * correctly treated as an imbalance. |
| 4478 | */ | 4446 | */ |
| 4479 | env.flags |= LBF_ALL_PINNED; | 4447 | env.flags |= LBF_ALL_PINNED; |
| 4480 | env.max_load_move = imbalance; | 4448 | env.load_move = imbalance; |
| 4481 | env.src_cpu = busiest->cpu; | 4449 | env.src_cpu = busiest->cpu; |
| 4482 | env.src_rq = busiest; | 4450 | env.src_rq = busiest; |
| 4483 | env.loop_max = busiest->nr_running; | 4451 | env.loop_max = busiest->nr_running; |
| 4484 | 4452 | ||
| 4453 | more_balance: | ||
| 4485 | local_irq_save(flags); | 4454 | local_irq_save(flags); |
| 4486 | double_rq_lock(this_rq, busiest); | 4455 | double_rq_lock(this_rq, busiest); |
| 4487 | ld_moved = move_tasks(&env); | 4456 | if (!env.loop) |
| 4457 | update_h_load(env.src_cpu); | ||
| 4458 | ld_moved += move_tasks(&env); | ||
| 4488 | double_rq_unlock(this_rq, busiest); | 4459 | double_rq_unlock(this_rq, busiest); |
| 4489 | local_irq_restore(flags); | 4460 | local_irq_restore(flags); |
| 4490 | 4461 | ||
| 4462 | if (env.flags & LBF_NEED_BREAK) { | ||
| 4463 | env.flags &= ~LBF_NEED_BREAK; | ||
| 4464 | goto more_balance; | ||
| 4465 | } | ||
| 4466 | |||
| 4491 | /* | 4467 | /* |
| 4492 | * some other cpu did the load balance for us. | 4468 | * some other cpu did the load balance for us. |
| 4493 | */ | 4469 | */ |
| 4494 | if (ld_moved && this_cpu != smp_processor_id()) | 4470 | if (ld_moved && this_cpu != smp_processor_id()) |
| 4495 | resched_cpu(this_cpu); | 4471 | resched_cpu(this_cpu); |
| 4496 | 4472 | ||
| 4497 | if (env.flags & LBF_ABORT) | ||
| 4498 | goto out_balanced; | ||
| 4499 | |||
| 4500 | if (env.flags & LBF_NEED_BREAK) { | ||
| 4501 | env.flags &= ~LBF_NEED_BREAK; | ||
| 4502 | goto redo; | ||
| 4503 | } | ||
| 4504 | |||
| 4505 | /* All tasks on this runqueue were pinned by CPU affinity */ | 4473 | /* All tasks on this runqueue were pinned by CPU affinity */ |
| 4506 | if (unlikely(env.flags & LBF_ALL_PINNED)) { | 4474 | if (unlikely(env.flags & LBF_ALL_PINNED)) { |
| 4507 | cpumask_clear_cpu(cpu_of(busiest), cpus); | 4475 | cpumask_clear_cpu(cpu_of(busiest), cpus); |
