diff options
-rw-r--r-- | kernel/sched.c | 175 |
1 files changed, 100 insertions, 75 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 109db122de50..1893d5562f5f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3237,6 +3237,103 @@ static inline int get_sd_load_idx(struct sched_domain *sd, | |||
3237 | 3237 | ||
3238 | return load_idx; | 3238 | return load_idx; |
3239 | } | 3239 | } |
3240 | |||
3241 | |||
3242 | /** | ||
3243 | * update_sg_lb_stats - Update sched_group's statistics for load balancing. | ||
3244 | * @group: sched_group whose statistics are to be updated. | ||
3245 | * @this_cpu: Cpu for which load balance is currently performed. | ||
3246 | * @idle: Idle status of this_cpu | ||
3247 | * @load_idx: Load index of sched_domain of this_cpu for load calc. | ||
3248 | * @sd_idle: Idle status of the sched_domain containing group. | ||
3249 | * @local_group: Does group contain this_cpu. | ||
3250 | * @cpus: Set of cpus considered for load balancing. | ||
3251 | * @balance: Should we balance. | ||
3252 | * @sgs: variable to hold the statistics for this group. | ||
3253 | */ | ||
3254 | static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu, | ||
3255 | enum cpu_idle_type idle, int load_idx, int *sd_idle, | ||
3256 | int local_group, const struct cpumask *cpus, | ||
3257 | int *balance, struct sg_lb_stats *sgs) | ||
3258 | { | ||
3259 | unsigned long load, max_cpu_load, min_cpu_load; | ||
3260 | int i; | ||
3261 | unsigned int balance_cpu = -1, first_idle_cpu = 0; | ||
3262 | unsigned long sum_avg_load_per_task; | ||
3263 | unsigned long avg_load_per_task; | ||
3264 | |||
3265 | if (local_group) | ||
3266 | balance_cpu = group_first_cpu(group); | ||
3267 | |||
3268 | /* Tally up the load of all CPUs in the group */ | ||
3269 | sum_avg_load_per_task = avg_load_per_task = 0; | ||
3270 | max_cpu_load = 0; | ||
3271 | min_cpu_load = ~0UL; | ||
3272 | |||
3273 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { | ||
3274 | struct rq *rq = cpu_rq(i); | ||
3275 | |||
3276 | if (*sd_idle && rq->nr_running) | ||
3277 | *sd_idle = 0; | ||
3278 | |||
3279 | /* Bias balancing toward cpus of our domain */ | ||
3280 | if (local_group) { | ||
3281 | if (idle_cpu(i) && !first_idle_cpu) { | ||
3282 | first_idle_cpu = 1; | ||
3283 | balance_cpu = i; | ||
3284 | } | ||
3285 | |||
3286 | load = target_load(i, load_idx); | ||
3287 | } else { | ||
3288 | load = source_load(i, load_idx); | ||
3289 | if (load > max_cpu_load) | ||
3290 | max_cpu_load = load; | ||
3291 | if (min_cpu_load > load) | ||
3292 | min_cpu_load = load; | ||
3293 | } | ||
3294 | |||
3295 | sgs->group_load += load; | ||
3296 | sgs->sum_nr_running += rq->nr_running; | ||
3297 | sgs->sum_weighted_load += weighted_cpuload(i); | ||
3298 | |||
3299 | sum_avg_load_per_task += cpu_avg_load_per_task(i); | ||
3300 | } | ||
3301 | |||
3302 | /* | ||
3303 | * First idle cpu or the first cpu(busiest) in this sched group | ||
3304 | * is eligible for doing load balancing at this and above | ||
3305 | * domains. In the newly idle case, we will allow all the cpu's | ||
3306 | * to do the newly idle load balance. | ||
3307 | */ | ||
3308 | if (idle != CPU_NEWLY_IDLE && local_group && | ||
3309 | balance_cpu != this_cpu && balance) { | ||
3310 | *balance = 0; | ||
3311 | return; | ||
3312 | } | ||
3313 | |||
3314 | /* Adjust by relative CPU power of the group */ | ||
3315 | sgs->avg_load = sg_div_cpu_power(group, | ||
3316 | sgs->group_load * SCHED_LOAD_SCALE); | ||
3317 | |||
3318 | |||
3319 | /* | ||
3320 | * Consider the group unbalanced when the imbalance is larger | ||
3321 | * than the average weight of two tasks. | ||
3322 | * | ||
3323 | * APZ: with cgroup the avg task weight can vary wildly and | ||
3324 | * might not be a suitable number - should we keep a | ||
3325 | * normalized nr_running number somewhere that negates | ||
3326 | * the hierarchy? | ||
3327 | */ | ||
3328 | avg_load_per_task = sg_div_cpu_power(group, | ||
3329 | sum_avg_load_per_task * SCHED_LOAD_SCALE); | ||
3330 | |||
3331 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) | ||
3332 | sgs->group_imb = 1; | ||
3333 | |||
3334 | sgs->group_capacity = group->__cpu_power / SCHED_LOAD_SCALE; | ||
3335 | |||
3336 | } | ||
3240 | /******* find_busiest_group() helpers end here *********************/ | 3337 | /******* find_busiest_group() helpers end here *********************/ |
3241 | 3338 | ||
3242 | /* | 3339 | /* |
@@ -3270,92 +3367,20 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
3270 | 3367 | ||
3271 | do { | 3368 | do { |
3272 | struct sg_lb_stats sgs; | 3369 | struct sg_lb_stats sgs; |
3273 | unsigned long load, max_cpu_load, min_cpu_load; | ||
3274 | int local_group; | 3370 | int local_group; |
3275 | int i; | ||
3276 | unsigned int balance_cpu = -1, first_idle_cpu = 0; | ||
3277 | unsigned long sum_avg_load_per_task; | ||
3278 | unsigned long avg_load_per_task; | ||
3279 | 3371 | ||
3280 | local_group = cpumask_test_cpu(this_cpu, | 3372 | local_group = cpumask_test_cpu(this_cpu, |
3281 | sched_group_cpus(group)); | 3373 | sched_group_cpus(group)); |
3282 | memset(&sgs, 0, sizeof(sgs)); | 3374 | memset(&sgs, 0, sizeof(sgs)); |
3375 | update_sg_lb_stats(group, this_cpu, idle, load_idx, sd_idle, | ||
3376 | local_group, cpus, balance, &sgs); | ||
3283 | 3377 | ||
3284 | if (local_group) | 3378 | if (balance && !(*balance)) |
3285 | balance_cpu = group_first_cpu(group); | ||
3286 | |||
3287 | /* Tally up the load of all CPUs in the group */ | ||
3288 | sum_avg_load_per_task = avg_load_per_task = 0; | ||
3289 | |||
3290 | max_cpu_load = 0; | ||
3291 | min_cpu_load = ~0UL; | ||
3292 | |||
3293 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { | ||
3294 | struct rq *rq = cpu_rq(i); | ||
3295 | |||
3296 | if (*sd_idle && rq->nr_running) | ||
3297 | *sd_idle = 0; | ||
3298 | |||
3299 | /* Bias balancing toward cpus of our domain */ | ||
3300 | if (local_group) { | ||
3301 | if (idle_cpu(i) && !first_idle_cpu) { | ||
3302 | first_idle_cpu = 1; | ||
3303 | balance_cpu = i; | ||
3304 | } | ||
3305 | |||
3306 | load = target_load(i, load_idx); | ||
3307 | } else { | ||
3308 | load = source_load(i, load_idx); | ||
3309 | if (load > max_cpu_load) | ||
3310 | max_cpu_load = load; | ||
3311 | if (min_cpu_load > load) | ||
3312 | min_cpu_load = load; | ||
3313 | } | ||
3314 | |||
3315 | sgs.group_load += load; | ||
3316 | sgs.sum_nr_running += rq->nr_running; | ||
3317 | sgs.sum_weighted_load += weighted_cpuload(i); | ||
3318 | |||
3319 | sum_avg_load_per_task += cpu_avg_load_per_task(i); | ||
3320 | } | ||
3321 | |||
3322 | /* | ||
3323 | * First idle cpu or the first cpu(busiest) in this sched group | ||
3324 | * is eligible for doing load balancing at this and above | ||
3325 | * domains. In the newly idle case, we will allow all the cpu's | ||
3326 | * to do the newly idle load balance. | ||
3327 | */ | ||
3328 | if (idle != CPU_NEWLY_IDLE && local_group && | ||
3329 | balance_cpu != this_cpu && balance) { | ||
3330 | *balance = 0; | ||
3331 | goto ret; | 3379 | goto ret; |
3332 | } | ||
3333 | 3380 | ||
3334 | total_load += sgs.group_load; | 3381 | total_load += sgs.group_load; |
3335 | total_pwr += group->__cpu_power; | 3382 | total_pwr += group->__cpu_power; |
3336 | 3383 | ||
3337 | /* Adjust by relative CPU power of the group */ | ||
3338 | sgs.avg_load = sg_div_cpu_power(group, | ||
3339 | sgs.group_load * SCHED_LOAD_SCALE); | ||
3340 | |||
3341 | |||
3342 | /* | ||
3343 | * Consider the group unbalanced when the imbalance is larger | ||
3344 | * than the average weight of two tasks. | ||
3345 | * | ||
3346 | * APZ: with cgroup the avg task weight can vary wildly and | ||
3347 | * might not be a suitable number - should we keep a | ||
3348 | * normalized nr_running number somewhere that negates | ||
3349 | * the hierarchy? | ||
3350 | */ | ||
3351 | avg_load_per_task = sg_div_cpu_power(group, | ||
3352 | sum_avg_load_per_task * SCHED_LOAD_SCALE); | ||
3353 | |||
3354 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) | ||
3355 | sgs.group_imb = 1; | ||
3356 | |||
3357 | sgs.group_capacity = group->__cpu_power / SCHED_LOAD_SCALE; | ||
3358 | |||
3359 | if (local_group) { | 3384 | if (local_group) { |
3360 | this_load = sgs.avg_load; | 3385 | this_load = sgs.avg_load; |
3361 | this = group; | 3386 | this = group; |