diff options
| author | Gautham R Shenoy <ego@in.ibm.com> | 2009-03-25 05:13:51 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2009-03-25 05:30:45 -0400 |
| commit | 1f8c553d0f11d85f7993fe21015695d266771c00 (patch) | |
| tree | f7444308d28ddab455ccd48f246b628fdb854eaf /kernel | |
| parent | 381be78fdc829a22f6327a0ed09f54b6270a976d (diff) | |
sched: Create a helper function to calculate sched_group stats for fbg()
Impact: cleanup
Create a helper function named update_sg_lb_stats() which
can be invoked to calculate the individual group's statistics
in find_busiest_group().
This reduces the lenght of find_busiest_group() considerably.
Credit: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Aked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Balbir Singh" <balbir@in.ibm.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: "Dhaval Giani" <dhaval@linux.vnet.ibm.com>
Cc: Bharata B Rao <bharata@linux.vnet.ibm.com>
LKML-Reference: <20090325091351.13992.43461.stgit@sofia.in.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched.c | 175 |
1 files changed, 100 insertions, 75 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 109db122de50..1893d5562f5f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -3237,6 +3237,103 @@ static inline int get_sd_load_idx(struct sched_domain *sd, | |||
| 3237 | 3237 | ||
| 3238 | return load_idx; | 3238 | return load_idx; |
| 3239 | } | 3239 | } |
| 3240 | |||
| 3241 | |||
| 3242 | /** | ||
| 3243 | * update_sg_lb_stats - Update sched_group's statistics for load balancing. | ||
| 3244 | * @group: sched_group whose statistics are to be updated. | ||
| 3245 | * @this_cpu: Cpu for which load balance is currently performed. | ||
| 3246 | * @idle: Idle status of this_cpu | ||
| 3247 | * @load_idx: Load index of sched_domain of this_cpu for load calc. | ||
| 3248 | * @sd_idle: Idle status of the sched_domain containing group. | ||
| 3249 | * @local_group: Does group contain this_cpu. | ||
| 3250 | * @cpus: Set of cpus considered for load balancing. | ||
| 3251 | * @balance: Should we balance. | ||
| 3252 | * @sgs: variable to hold the statistics for this group. | ||
| 3253 | */ | ||
| 3254 | static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu, | ||
| 3255 | enum cpu_idle_type idle, int load_idx, int *sd_idle, | ||
| 3256 | int local_group, const struct cpumask *cpus, | ||
| 3257 | int *balance, struct sg_lb_stats *sgs) | ||
| 3258 | { | ||
| 3259 | unsigned long load, max_cpu_load, min_cpu_load; | ||
| 3260 | int i; | ||
| 3261 | unsigned int balance_cpu = -1, first_idle_cpu = 0; | ||
| 3262 | unsigned long sum_avg_load_per_task; | ||
| 3263 | unsigned long avg_load_per_task; | ||
| 3264 | |||
| 3265 | if (local_group) | ||
| 3266 | balance_cpu = group_first_cpu(group); | ||
| 3267 | |||
| 3268 | /* Tally up the load of all CPUs in the group */ | ||
| 3269 | sum_avg_load_per_task = avg_load_per_task = 0; | ||
| 3270 | max_cpu_load = 0; | ||
| 3271 | min_cpu_load = ~0UL; | ||
| 3272 | |||
| 3273 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { | ||
| 3274 | struct rq *rq = cpu_rq(i); | ||
| 3275 | |||
| 3276 | if (*sd_idle && rq->nr_running) | ||
| 3277 | *sd_idle = 0; | ||
| 3278 | |||
| 3279 | /* Bias balancing toward cpus of our domain */ | ||
| 3280 | if (local_group) { | ||
| 3281 | if (idle_cpu(i) && !first_idle_cpu) { | ||
| 3282 | first_idle_cpu = 1; | ||
| 3283 | balance_cpu = i; | ||
| 3284 | } | ||
| 3285 | |||
| 3286 | load = target_load(i, load_idx); | ||
| 3287 | } else { | ||
| 3288 | load = source_load(i, load_idx); | ||
| 3289 | if (load > max_cpu_load) | ||
| 3290 | max_cpu_load = load; | ||
| 3291 | if (min_cpu_load > load) | ||
| 3292 | min_cpu_load = load; | ||
| 3293 | } | ||
| 3294 | |||
| 3295 | sgs->group_load += load; | ||
| 3296 | sgs->sum_nr_running += rq->nr_running; | ||
| 3297 | sgs->sum_weighted_load += weighted_cpuload(i); | ||
| 3298 | |||
| 3299 | sum_avg_load_per_task += cpu_avg_load_per_task(i); | ||
| 3300 | } | ||
| 3301 | |||
| 3302 | /* | ||
| 3303 | * First idle cpu or the first cpu(busiest) in this sched group | ||
| 3304 | * is eligible for doing load balancing at this and above | ||
| 3305 | * domains. In the newly idle case, we will allow all the cpu's | ||
| 3306 | * to do the newly idle load balance. | ||
| 3307 | */ | ||
| 3308 | if (idle != CPU_NEWLY_IDLE && local_group && | ||
| 3309 | balance_cpu != this_cpu && balance) { | ||
| 3310 | *balance = 0; | ||
| 3311 | return; | ||
| 3312 | } | ||
| 3313 | |||
| 3314 | /* Adjust by relative CPU power of the group */ | ||
| 3315 | sgs->avg_load = sg_div_cpu_power(group, | ||
| 3316 | sgs->group_load * SCHED_LOAD_SCALE); | ||
| 3317 | |||
| 3318 | |||
| 3319 | /* | ||
| 3320 | * Consider the group unbalanced when the imbalance is larger | ||
| 3321 | * than the average weight of two tasks. | ||
| 3322 | * | ||
| 3323 | * APZ: with cgroup the avg task weight can vary wildly and | ||
| 3324 | * might not be a suitable number - should we keep a | ||
| 3325 | * normalized nr_running number somewhere that negates | ||
| 3326 | * the hierarchy? | ||
| 3327 | */ | ||
| 3328 | avg_load_per_task = sg_div_cpu_power(group, | ||
| 3329 | sum_avg_load_per_task * SCHED_LOAD_SCALE); | ||
| 3330 | |||
| 3331 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) | ||
| 3332 | sgs->group_imb = 1; | ||
| 3333 | |||
| 3334 | sgs->group_capacity = group->__cpu_power / SCHED_LOAD_SCALE; | ||
| 3335 | |||
| 3336 | } | ||
| 3240 | /******* find_busiest_group() helpers end here *********************/ | 3337 | /******* find_busiest_group() helpers end here *********************/ |
| 3241 | 3338 | ||
| 3242 | /* | 3339 | /* |
| @@ -3270,92 +3367,20 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
| 3270 | 3367 | ||
| 3271 | do { | 3368 | do { |
| 3272 | struct sg_lb_stats sgs; | 3369 | struct sg_lb_stats sgs; |
| 3273 | unsigned long load, max_cpu_load, min_cpu_load; | ||
| 3274 | int local_group; | 3370 | int local_group; |
| 3275 | int i; | ||
| 3276 | unsigned int balance_cpu = -1, first_idle_cpu = 0; | ||
| 3277 | unsigned long sum_avg_load_per_task; | ||
| 3278 | unsigned long avg_load_per_task; | ||
| 3279 | 3371 | ||
| 3280 | local_group = cpumask_test_cpu(this_cpu, | 3372 | local_group = cpumask_test_cpu(this_cpu, |
| 3281 | sched_group_cpus(group)); | 3373 | sched_group_cpus(group)); |
| 3282 | memset(&sgs, 0, sizeof(sgs)); | 3374 | memset(&sgs, 0, sizeof(sgs)); |
| 3375 | update_sg_lb_stats(group, this_cpu, idle, load_idx, sd_idle, | ||
| 3376 | local_group, cpus, balance, &sgs); | ||
| 3283 | 3377 | ||
| 3284 | if (local_group) | 3378 | if (balance && !(*balance)) |
| 3285 | balance_cpu = group_first_cpu(group); | ||
| 3286 | |||
| 3287 | /* Tally up the load of all CPUs in the group */ | ||
| 3288 | sum_avg_load_per_task = avg_load_per_task = 0; | ||
| 3289 | |||
| 3290 | max_cpu_load = 0; | ||
| 3291 | min_cpu_load = ~0UL; | ||
| 3292 | |||
| 3293 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { | ||
| 3294 | struct rq *rq = cpu_rq(i); | ||
| 3295 | |||
| 3296 | if (*sd_idle && rq->nr_running) | ||
| 3297 | *sd_idle = 0; | ||
| 3298 | |||
| 3299 | /* Bias balancing toward cpus of our domain */ | ||
| 3300 | if (local_group) { | ||
| 3301 | if (idle_cpu(i) && !first_idle_cpu) { | ||
| 3302 | first_idle_cpu = 1; | ||
| 3303 | balance_cpu = i; | ||
| 3304 | } | ||
| 3305 | |||
| 3306 | load = target_load(i, load_idx); | ||
| 3307 | } else { | ||
| 3308 | load = source_load(i, load_idx); | ||
| 3309 | if (load > max_cpu_load) | ||
| 3310 | max_cpu_load = load; | ||
| 3311 | if (min_cpu_load > load) | ||
| 3312 | min_cpu_load = load; | ||
| 3313 | } | ||
| 3314 | |||
| 3315 | sgs.group_load += load; | ||
| 3316 | sgs.sum_nr_running += rq->nr_running; | ||
| 3317 | sgs.sum_weighted_load += weighted_cpuload(i); | ||
| 3318 | |||
| 3319 | sum_avg_load_per_task += cpu_avg_load_per_task(i); | ||
| 3320 | } | ||
| 3321 | |||
| 3322 | /* | ||
| 3323 | * First idle cpu or the first cpu(busiest) in this sched group | ||
| 3324 | * is eligible for doing load balancing at this and above | ||
| 3325 | * domains. In the newly idle case, we will allow all the cpu's | ||
| 3326 | * to do the newly idle load balance. | ||
| 3327 | */ | ||
| 3328 | if (idle != CPU_NEWLY_IDLE && local_group && | ||
| 3329 | balance_cpu != this_cpu && balance) { | ||
| 3330 | *balance = 0; | ||
| 3331 | goto ret; | 3379 | goto ret; |
| 3332 | } | ||
| 3333 | 3380 | ||
| 3334 | total_load += sgs.group_load; | 3381 | total_load += sgs.group_load; |
| 3335 | total_pwr += group->__cpu_power; | 3382 | total_pwr += group->__cpu_power; |
| 3336 | 3383 | ||
| 3337 | /* Adjust by relative CPU power of the group */ | ||
| 3338 | sgs.avg_load = sg_div_cpu_power(group, | ||
| 3339 | sgs.group_load * SCHED_LOAD_SCALE); | ||
| 3340 | |||
| 3341 | |||
| 3342 | /* | ||
| 3343 | * Consider the group unbalanced when the imbalance is larger | ||
| 3344 | * than the average weight of two tasks. | ||
| 3345 | * | ||
| 3346 | * APZ: with cgroup the avg task weight can vary wildly and | ||
| 3347 | * might not be a suitable number - should we keep a | ||
| 3348 | * normalized nr_running number somewhere that negates | ||
| 3349 | * the hierarchy? | ||
| 3350 | */ | ||
| 3351 | avg_load_per_task = sg_div_cpu_power(group, | ||
| 3352 | sum_avg_load_per_task * SCHED_LOAD_SCALE); | ||
| 3353 | |||
| 3354 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task) | ||
| 3355 | sgs.group_imb = 1; | ||
| 3356 | |||
| 3357 | sgs.group_capacity = group->__cpu_power / SCHED_LOAD_SCALE; | ||
| 3358 | |||
| 3359 | if (local_group) { | 3384 | if (local_group) { |
| 3360 | this_load = sgs.avg_load; | 3385 | this_load = sgs.avg_load; |
| 3361 | this = group; | 3386 | this = group; |
