aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGautham R Shenoy <ego@in.ibm.com>2009-03-25 05:14:22 -0400
committerIngo Molnar <mingo@elte.hu>2009-03-25 05:30:48 -0400
commitc071df18525a95b37dd5821a6dc4af83bd18675e (patch)
tree3b5a3761ed296465ed5f7b4669e74ec92203de78
parenta021dc03376707c55a3483e32c16b8986d4414cc (diff)
sched: Refactor the power savings balance code
Impact: cleanup Create seperate helper functions to initialize the power-savings-balance related variables, to update them and to check if we have a scope for performing power-savings balance. Add no-op inline functions for the !(CONFIG_SCHED_MC || CONFIG_SCHED_SMT) case. This will eliminate all the #ifdef jungle in find_busiest_group() and the other helper functions. Signed-off-by: Gautham R Shenoy <ego@in.ibm.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Suresh Siddha <suresh.b.siddha@intel.com> Cc: "Balbir Singh" <balbir@in.ibm.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: "Dhaval Giani" <dhaval@linux.vnet.ibm.com> Cc: Bharata B Rao <bharata@linux.vnet.ibm.com> Cc: "Vaidyanathan Srinivasan" <svaidy@linux.vnet.ibm.com> LKML-Reference: <20090325091422.13992.73616.stgit@sofia.in.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/sched.c236
1 files changed, 153 insertions, 83 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 71e8dcaf2c79..5f21658b0f67 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3270,6 +3270,151 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
3270} 3270}
3271 3271
3272 3272
3273#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
3274/**
3275 * init_sd_power_savings_stats - Initialize power savings statistics for
3276 * the given sched_domain, during load balancing.
3277 *
3278 * @sd: Sched domain whose power-savings statistics are to be initialized.
3279 * @sds: Variable containing the statistics for sd.
3280 * @idle: Idle status of the CPU at which we're performing load-balancing.
3281 */
3282static inline void init_sd_power_savings_stats(struct sched_domain *sd,
3283 struct sd_lb_stats *sds, enum cpu_idle_type idle)
3284{
3285 /*
3286 * Busy processors will not participate in power savings
3287 * balance.
3288 */
3289 if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
3290 sds->power_savings_balance = 0;
3291 else {
3292 sds->power_savings_balance = 1;
3293 sds->min_nr_running = ULONG_MAX;
3294 sds->leader_nr_running = 0;
3295 }
3296}
3297
3298/**
3299 * update_sd_power_savings_stats - Update the power saving stats for a
3300 * sched_domain while performing load balancing.
3301 *
3302 * @group: sched_group belonging to the sched_domain under consideration.
3303 * @sds: Variable containing the statistics of the sched_domain
3304 * @local_group: Does group contain the CPU for which we're performing
3305 * load balancing ?
3306 * @sgs: Variable containing the statistics of the group.
3307 */
3308static inline void update_sd_power_savings_stats(struct sched_group *group,
3309 struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
3310{
3311
3312 if (!sds->power_savings_balance)
3313 return;
3314
3315 /*
3316 * If the local group is idle or completely loaded
3317 * no need to do power savings balance at this domain
3318 */
3319 if (local_group && (sds->this_nr_running >= sgs->group_capacity ||
3320 !sds->this_nr_running))
3321 sds->power_savings_balance = 0;
3322
3323 /*
3324 * If a group is already running at full capacity or idle,
3325 * don't include that group in power savings calculations
3326 */
3327 if (!sds->power_savings_balance ||
3328 sgs->sum_nr_running >= sgs->group_capacity ||
3329 !sgs->sum_nr_running)
3330 return;
3331
3332 /*
3333 * Calculate the group which has the least non-idle load.
3334 * This is the group from where we need to pick up the load
3335 * for saving power
3336 */
3337 if ((sgs->sum_nr_running < sds->min_nr_running) ||
3338 (sgs->sum_nr_running == sds->min_nr_running &&
3339 group_first_cpu(group) > group_first_cpu(sds->group_min))) {
3340 sds->group_min = group;
3341 sds->min_nr_running = sgs->sum_nr_running;
3342 sds->min_load_per_task = sgs->sum_weighted_load /
3343 sgs->sum_nr_running;
3344 }
3345
3346 /*
3347 * Calculate the group which is almost near its
3348 * capacity but still has some space to pick up some load
3349 * from other group and save more power
3350 */
3351 if (sgs->sum_nr_running > sgs->group_capacity - 1)
3352 return;
3353
3354 if (sgs->sum_nr_running > sds->leader_nr_running ||
3355 (sgs->sum_nr_running == sds->leader_nr_running &&
3356 group_first_cpu(group) < group_first_cpu(sds->group_leader))) {
3357 sds->group_leader = group;
3358 sds->leader_nr_running = sgs->sum_nr_running;
3359 }
3360}
3361
3362/**
3363 * check_power_save_busiest_group - Check if we have potential to perform
3364 * some power-savings balance. If yes, set the busiest group to be
3365 * the least loaded group in the sched_domain, so that it's CPUs can
3366 * be put to idle.
3367 *
3368 * @sds: Variable containing the statistics of the sched_domain
3369 * under consideration.
3370 * @this_cpu: Cpu at which we're currently performing load-balancing.
3371 * @imbalance: Variable to store the imbalance.
3372 *
3373 * Returns 1 if there is potential to perform power-savings balance.
3374 * Else returns 0.
3375 */
3376static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
3377 int this_cpu, unsigned long *imbalance)
3378{
3379 if (!sds->power_savings_balance)
3380 return 0;
3381
3382 if (sds->this != sds->group_leader ||
3383 sds->group_leader == sds->group_min)
3384 return 0;
3385
3386 *imbalance = sds->min_load_per_task;
3387 sds->busiest = sds->group_min;
3388
3389 if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
3390 cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
3391 group_first_cpu(sds->group_leader);
3392 }
3393
3394 return 1;
3395
3396}
3397#else /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
3398static inline void init_sd_power_savings_stats(struct sched_domain *sd,
3399 struct sd_lb_stats *sds, enum cpu_idle_type idle)
3400{
3401 return;
3402}
3403
3404static inline void update_sd_power_savings_stats(struct sched_group *group,
3405 struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
3406{
3407 return;
3408}
3409
3410static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
3411 int this_cpu, unsigned long *imbalance)
3412{
3413 return 0;
3414}
3415#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
3416
3417
3273/** 3418/**
3274 * update_sg_lb_stats - Update sched_group's statistics for load balancing. 3419 * update_sg_lb_stats - Update sched_group's statistics for load balancing.
3275 * @group: sched_group whose statistics are to be updated. 3420 * @group: sched_group whose statistics are to be updated.
@@ -3385,19 +3530,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
3385 struct sg_lb_stats sgs; 3530 struct sg_lb_stats sgs;
3386 int load_idx; 3531 int load_idx;
3387 3532
3388#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 3533 init_sd_power_savings_stats(sd, sds, idle);
3389 /*
3390 * Busy processors will not participate in power savings
3391 * balance.
3392 */
3393 if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
3394 sds->power_savings_balance = 0;
3395 else {
3396 sds->power_savings_balance = 1;
3397 sds->min_nr_running = ULONG_MAX;
3398 sds->leader_nr_running = 0;
3399 }
3400#endif
3401 load_idx = get_sd_load_idx(sd, idle); 3534 load_idx = get_sd_load_idx(sd, idle);
3402 3535
3403 do { 3536 do {
@@ -3430,61 +3563,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
3430 sds->group_imb = sgs.group_imb; 3563 sds->group_imb = sgs.group_imb;
3431 } 3564 }
3432 3565
3433#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 3566 update_sd_power_savings_stats(group, sds, local_group, &sgs);
3434
3435 if (!sds->power_savings_balance)
3436 goto group_next;
3437
3438 /*
3439 * If the local group is idle or completely loaded
3440 * no need to do power savings balance at this domain
3441 */
3442 if (local_group &&
3443 (sds->this_nr_running >= sgs.group_capacity ||
3444 !sds->this_nr_running))
3445 sds->power_savings_balance = 0;
3446
3447 /*
3448 * If a group is already running at full capacity or idle,
3449 * don't include that group in power savings calculations
3450 */
3451 if (!sds->power_savings_balance ||
3452 sgs.sum_nr_running >= sgs.group_capacity ||
3453 !sgs.sum_nr_running)
3454 goto group_next;
3455
3456 /*
3457 * Calculate the group which has the least non-idle load.
3458 * This is the group from where we need to pick up the load
3459 * for saving power
3460 */
3461 if ((sgs.sum_nr_running < sds->min_nr_running) ||
3462 (sgs.sum_nr_running == sds->min_nr_running &&
3463 group_first_cpu(group) >
3464 group_first_cpu(sds->group_min))) {
3465 sds->group_min = group;
3466 sds->min_nr_running = sgs.sum_nr_running;
3467 sds->min_load_per_task = sgs.sum_weighted_load /
3468 sgs.sum_nr_running;
3469 }
3470
3471 /*
3472 * Calculate the group which is almost near its
3473 * capacity but still has some space to pick up some load
3474 * from other group and save more power
3475 */
3476 if (sgs.sum_nr_running > sgs.group_capacity - 1)
3477 goto group_next;
3478
3479 if (sgs.sum_nr_running > sds->leader_nr_running ||
3480 (sgs.sum_nr_running == sds->leader_nr_running &&
3481 group_first_cpu(group) <
3482 group_first_cpu(sds->group_leader))) {
3483 sds->group_leader = group;
3484 sds->leader_nr_running = sgs.sum_nr_running;
3485 }
3486group_next:
3487#endif
3488 group = group->next; 3567 group = group->next;
3489 } while (group != sd->groups); 3568 } while (group != sd->groups);
3490 3569
@@ -3655,21 +3734,12 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3655 return sds.busiest; 3734 return sds.busiest;
3656 3735
3657out_balanced: 3736out_balanced:
3658#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 3737 /*
3659 if (!sds.power_savings_balance) 3738 * There is no obvious imbalance. But check if we can do some balancing
3660 goto ret; 3739 * to save power.
3661 3740 */
3662 if (sds.this != sds.group_leader || sds.group_leader == sds.group_min) 3741 if (check_power_save_busiest_group(&sds, this_cpu, imbalance))
3663 goto ret; 3742 return sds.busiest;
3664
3665 *imbalance = sds.min_load_per_task;
3666 if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
3667 cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
3668 group_first_cpu(sds.group_leader);
3669 }
3670 return sds.group_min;
3671
3672#endif
3673ret: 3743ret:
3674 *imbalance = 0; 3744 *imbalance = 0;
3675 return NULL; 3745 return NULL;