aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGautham R Shenoy <ego@in.ibm.com>2009-03-25 05:13:56 -0400
committerIngo Molnar <mingo@elte.hu>2009-03-25 05:30:46 -0400
commit222d656dea57e4e084fbd1e9383e6fed2ca9fa61 (patch)
treeeb5f5be4312e050c28ba9b80dc9d0e113e711190
parent1f8c553d0f11d85f7993fe21015695d266771c00 (diff)
sched: Define structure to store the sched_domain statistics for fbg()
Impact: cleanup Currently we use a lot of local variables in find_busiest_group() to capture the various statistics related to the sched_domain. Group them together into a single data structure. This will help us to offload the job of updating the sched_domain statistics to a helper function. Credit: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Gautham R Shenoy <ego@in.ibm.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Suresh Siddha <suresh.b.siddha@intel.com> Cc: "Balbir Singh" <balbir@in.ibm.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: "Dhaval Giani" <dhaval@linux.vnet.ibm.com> Cc: Bharata B Rao <bharata@linux.vnet.ibm.com> LKML-Reference: <20090325091356.13992.25970.stgit@sofia.in.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/sched.c207
1 files changed, 121 insertions, 86 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 1893d5562f5f..8198dbe8e4aa 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3190,6 +3190,37 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
3190 return 0; 3190 return 0;
3191} 3191}
3192/********** Helpers for find_busiest_group ************************/ 3192/********** Helpers for find_busiest_group ************************/
3193/**
3194 * sd_lb_stats - Structure to store the statistics of a sched_domain
3195 * during load balancing.
3196 */
3197struct sd_lb_stats {
3198 struct sched_group *busiest; /* Busiest group in this sd */
3199 struct sched_group *this; /* Local group in this sd */
3200 unsigned long total_load; /* Total load of all groups in sd */
3201 unsigned long total_pwr; /* Total power of all groups in sd */
3202 unsigned long avg_load; /* Average load across all groups in sd */
3203
3204 /** Statistics of this group */
3205 unsigned long this_load;
3206 unsigned long this_load_per_task;
3207 unsigned long this_nr_running;
3208
3209 /* Statistics of the busiest group */
3210 unsigned long max_load;
3211 unsigned long busiest_load_per_task;
3212 unsigned long busiest_nr_running;
3213
3214 int group_imb; /* Is there imbalance in this sd */
3215#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
3216 int power_savings_balance; /* Is powersave balance needed for this sd */
3217 struct sched_group *group_min; /* Least loaded group in sd */
3218 struct sched_group *group_leader; /* Group which relieves group_min */
3219 unsigned long min_load_per_task; /* load_per_task in group_min */
3220 unsigned long leader_nr_running; /* Nr running of group_leader */
3221 unsigned long min_nr_running; /* Nr running of group_min */
3222#endif
3223};
3193 3224
3194/** 3225/**
3195 * sg_lb_stats - stats of a sched_group required for load_balancing 3226 * sg_lb_stats - stats of a sched_group required for load_balancing
@@ -3346,23 +3377,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3346 unsigned long *imbalance, enum cpu_idle_type idle, 3377 unsigned long *imbalance, enum cpu_idle_type idle,
3347 int *sd_idle, const struct cpumask *cpus, int *balance) 3378 int *sd_idle, const struct cpumask *cpus, int *balance)
3348{ 3379{
3349 struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; 3380 struct sd_lb_stats sds;
3350 unsigned long max_load, avg_load, total_load, this_load, total_pwr; 3381 struct sched_group *group = sd->groups;
3351 unsigned long max_pull; 3382 unsigned long max_pull;
3352 unsigned long busiest_load_per_task, busiest_nr_running; 3383 int load_idx;
3353 unsigned long this_load_per_task, this_nr_running; 3384
3354 int load_idx, group_imb = 0; 3385 memset(&sds, 0, sizeof(sds));
3355#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 3386#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
3356 int power_savings_balance = 1; 3387 sds.power_savings_balance = 1;
3357 unsigned long leader_nr_running = 0, min_load_per_task = 0; 3388 sds.min_nr_running = ULONG_MAX;
3358 unsigned long min_nr_running = ULONG_MAX;
3359 struct sched_group *group_min = NULL, *group_leader = NULL;
3360#endif 3389#endif
3361
3362 max_load = this_load = total_load = total_pwr = 0;
3363 busiest_load_per_task = busiest_nr_running = 0;
3364 this_load_per_task = this_nr_running = 0;
3365
3366 load_idx = get_sd_load_idx(sd, idle); 3390 load_idx = get_sd_load_idx(sd, idle);
3367 3391
3368 do { 3392 do {
@@ -3378,22 +3402,22 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3378 if (balance && !(*balance)) 3402 if (balance && !(*balance))
3379 goto ret; 3403 goto ret;
3380 3404
3381 total_load += sgs.group_load; 3405 sds.total_load += sgs.group_load;
3382 total_pwr += group->__cpu_power; 3406 sds.total_pwr += group->__cpu_power;
3383 3407
3384 if (local_group) { 3408 if (local_group) {
3385 this_load = sgs.avg_load; 3409 sds.this_load = sgs.avg_load;
3386 this = group; 3410 sds.this = group;
3387 this_nr_running = sgs.sum_nr_running; 3411 sds.this_nr_running = sgs.sum_nr_running;
3388 this_load_per_task = sgs.sum_weighted_load; 3412 sds.this_load_per_task = sgs.sum_weighted_load;
3389 } else if (sgs.avg_load > max_load && 3413 } else if (sgs.avg_load > sds.max_load &&
3390 (sgs.sum_nr_running > sgs.group_capacity || 3414 (sgs.sum_nr_running > sgs.group_capacity ||
3391 sgs.group_imb)) { 3415 sgs.group_imb)) {
3392 max_load = sgs.avg_load; 3416 sds.max_load = sgs.avg_load;
3393 busiest = group; 3417 sds.busiest = group;
3394 busiest_nr_running = sgs.sum_nr_running; 3418 sds.busiest_nr_running = sgs.sum_nr_running;
3395 busiest_load_per_task = sgs.sum_weighted_load; 3419 sds.busiest_load_per_task = sgs.sum_weighted_load;
3396 group_imb = sgs.group_imb; 3420 sds.group_imb = sgs.group_imb;
3397 } 3421 }
3398 3422
3399#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 3423#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
@@ -3409,15 +3433,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3409 * If the local group is idle or completely loaded 3433 * If the local group is idle or completely loaded
3410 * no need to do power savings balance at this domain 3434 * no need to do power savings balance at this domain
3411 */ 3435 */
3412 if (local_group && (this_nr_running >= sgs.group_capacity || 3436 if (local_group &&
3413 !this_nr_running)) 3437 (sds.this_nr_running >= sgs.group_capacity ||
3414 power_savings_balance = 0; 3438 !sds.this_nr_running))
3439 sds.power_savings_balance = 0;
3415 3440
3416 /* 3441 /*
3417 * If a group is already running at full capacity or idle, 3442 * If a group is already running at full capacity or idle,
3418 * don't include that group in power savings calculations 3443 * don't include that group in power savings calculations
3419 */ 3444 */
3420 if (!power_savings_balance || 3445 if (!sds.power_savings_balance ||
3421 sgs.sum_nr_running >= sgs.group_capacity || 3446 sgs.sum_nr_running >= sgs.group_capacity ||
3422 !sgs.sum_nr_running) 3447 !sgs.sum_nr_running)
3423 goto group_next; 3448 goto group_next;
@@ -3427,12 +3452,13 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3427 * This is the group from where we need to pick up the load 3452 * This is the group from where we need to pick up the load
3428 * for saving power 3453 * for saving power
3429 */ 3454 */
3430 if ((sgs.sum_nr_running < min_nr_running) || 3455 if ((sgs.sum_nr_running < sds.min_nr_running) ||
3431 (sgs.sum_nr_running == min_nr_running && 3456 (sgs.sum_nr_running == sds.min_nr_running &&
3432 group_first_cpu(group) > group_first_cpu(group_min))) { 3457 group_first_cpu(group) >
3433 group_min = group; 3458 group_first_cpu(sds.group_min))) {
3434 min_nr_running = sgs.sum_nr_running; 3459 sds.group_min = group;
3435 min_load_per_task = sgs.sum_weighted_load / 3460 sds.min_nr_running = sgs.sum_nr_running;
3461 sds.min_load_per_task = sgs.sum_weighted_load /
3436 sgs.sum_nr_running; 3462 sgs.sum_nr_running;
3437 } 3463 }
3438 3464
@@ -3444,29 +3470,32 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3444 if (sgs.sum_nr_running > sgs.group_capacity - 1) 3470 if (sgs.sum_nr_running > sgs.group_capacity - 1)
3445 goto group_next; 3471 goto group_next;
3446 3472
3447 if (sgs.sum_nr_running > leader_nr_running || 3473 if (sgs.sum_nr_running > sds.leader_nr_running ||
3448 (sgs.sum_nr_running == leader_nr_running && 3474 (sgs.sum_nr_running == sds.leader_nr_running &&
3449 group_first_cpu(group) < group_first_cpu(group_leader))) { 3475 group_first_cpu(group) <
3450 group_leader = group; 3476 group_first_cpu(sds.group_leader))) {
3451 leader_nr_running = sgs.sum_nr_running; 3477 sds.group_leader = group;
3478 sds.leader_nr_running = sgs.sum_nr_running;
3452 } 3479 }
3453group_next: 3480group_next:
3454#endif 3481#endif
3455 group = group->next; 3482 group = group->next;
3456 } while (group != sd->groups); 3483 } while (group != sd->groups);
3457 3484
3458 if (!busiest || this_load >= max_load || busiest_nr_running == 0) 3485 if (!sds.busiest || sds.this_load >= sds.max_load
3486 || sds.busiest_nr_running == 0)
3459 goto out_balanced; 3487 goto out_balanced;
3460 3488
3461 avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr; 3489 sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
3462 3490
3463 if (this_load >= avg_load || 3491 if (sds.this_load >= sds.avg_load ||
3464 100*max_load <= sd->imbalance_pct*this_load) 3492 100*sds.max_load <= sd->imbalance_pct * sds.this_load)
3465 goto out_balanced; 3493 goto out_balanced;
3466 3494
3467 busiest_load_per_task /= busiest_nr_running; 3495 sds.busiest_load_per_task /= sds.busiest_nr_running;
3468 if (group_imb) 3496 if (sds.group_imb)
3469 busiest_load_per_task = min(busiest_load_per_task, avg_load); 3497 sds.busiest_load_per_task =
3498 min(sds.busiest_load_per_task, sds.avg_load);
3470 3499
3471 /* 3500 /*
3472 * We're trying to get all the cpus to the average_load, so we don't 3501 * We're trying to get all the cpus to the average_load, so we don't
@@ -3479,7 +3508,7 @@ group_next:
3479 * by pulling tasks to us. Be careful of negative numbers as they'll 3508 * by pulling tasks to us. Be careful of negative numbers as they'll
3480 * appear as very large values with unsigned longs. 3509 * appear as very large values with unsigned longs.
3481 */ 3510 */
3482 if (max_load <= busiest_load_per_task) 3511 if (sds.max_load <= sds.busiest_load_per_task)
3483 goto out_balanced; 3512 goto out_balanced;
3484 3513
3485 /* 3514 /*
@@ -3487,17 +3516,18 @@ group_next:
3487 * max load less than avg load(as we skip the groups at or below 3516 * max load less than avg load(as we skip the groups at or below
3488 * its cpu_power, while calculating max_load..) 3517 * its cpu_power, while calculating max_load..)
3489 */ 3518 */
3490 if (max_load < avg_load) { 3519 if (sds.max_load < sds.avg_load) {
3491 *imbalance = 0; 3520 *imbalance = 0;
3492 goto small_imbalance; 3521 goto small_imbalance;
3493 } 3522 }
3494 3523
3495 /* Don't want to pull so many tasks that a group would go idle */ 3524 /* Don't want to pull so many tasks that a group would go idle */
3496 max_pull = min(max_load - avg_load, max_load - busiest_load_per_task); 3525 max_pull = min(sds.max_load - sds.avg_load,
3526 sds.max_load - sds.busiest_load_per_task);
3497 3527
3498 /* How much load to actually move to equalise the imbalance */ 3528 /* How much load to actually move to equalise the imbalance */
3499 *imbalance = min(max_pull * busiest->__cpu_power, 3529 *imbalance = min(max_pull * sds.busiest->__cpu_power,
3500 (avg_load - this_load) * this->__cpu_power) 3530 (sds.avg_load - sds.this_load) * sds.this->__cpu_power)
3501 / SCHED_LOAD_SCALE; 3531 / SCHED_LOAD_SCALE;
3502 3532
3503 /* 3533 /*
@@ -3506,24 +3536,27 @@ group_next:
3506 * a think about bumping its value to force at least one task to be 3536 * a think about bumping its value to force at least one task to be
3507 * moved 3537 * moved
3508 */ 3538 */
3509 if (*imbalance < busiest_load_per_task) { 3539 if (*imbalance < sds.busiest_load_per_task) {
3510 unsigned long tmp, pwr_now, pwr_move; 3540 unsigned long tmp, pwr_now, pwr_move;
3511 unsigned int imbn; 3541 unsigned int imbn;
3512 3542
3513small_imbalance: 3543small_imbalance:
3514 pwr_move = pwr_now = 0; 3544 pwr_move = pwr_now = 0;
3515 imbn = 2; 3545 imbn = 2;
3516 if (this_nr_running) { 3546 if (sds.this_nr_running) {
3517 this_load_per_task /= this_nr_running; 3547 sds.this_load_per_task /= sds.this_nr_running;
3518 if (busiest_load_per_task > this_load_per_task) 3548 if (sds.busiest_load_per_task >
3549 sds.this_load_per_task)
3519 imbn = 1; 3550 imbn = 1;
3520 } else 3551 } else
3521 this_load_per_task = cpu_avg_load_per_task(this_cpu); 3552 sds.this_load_per_task =
3522 3553 cpu_avg_load_per_task(this_cpu);
3523 if (max_load - this_load + busiest_load_per_task >= 3554
3524 busiest_load_per_task * imbn) { 3555 if (sds.max_load - sds.this_load +
3525 *imbalance = busiest_load_per_task; 3556 sds.busiest_load_per_task >=
3526 return busiest; 3557 sds.busiest_load_per_task * imbn) {
3558 *imbalance = sds.busiest_load_per_task;
3559 return sds.busiest;
3527 } 3560 }
3528 3561
3529 /* 3562 /*
@@ -3532,52 +3565,54 @@ small_imbalance:
3532 * moving them. 3565 * moving them.
3533 */ 3566 */
3534 3567
3535 pwr_now += busiest->__cpu_power * 3568 pwr_now += sds.busiest->__cpu_power *
3536 min(busiest_load_per_task, max_load); 3569 min(sds.busiest_load_per_task, sds.max_load);
3537 pwr_now += this->__cpu_power * 3570 pwr_now += sds.this->__cpu_power *
3538 min(this_load_per_task, this_load); 3571 min(sds.this_load_per_task, sds.this_load);
3539 pwr_now /= SCHED_LOAD_SCALE; 3572 pwr_now /= SCHED_LOAD_SCALE;
3540 3573
3541 /* Amount of load we'd subtract */ 3574 /* Amount of load we'd subtract */
3542 tmp = sg_div_cpu_power(busiest, 3575 tmp = sg_div_cpu_power(sds.busiest,
3543 busiest_load_per_task * SCHED_LOAD_SCALE); 3576 sds.busiest_load_per_task * SCHED_LOAD_SCALE);
3544 if (max_load > tmp) 3577 if (sds.max_load > tmp)
3545 pwr_move += busiest->__cpu_power * 3578 pwr_move += sds.busiest->__cpu_power *
3546 min(busiest_load_per_task, max_load - tmp); 3579 min(sds.busiest_load_per_task,
3580 sds.max_load - tmp);
3547 3581
3548 /* Amount of load we'd add */ 3582 /* Amount of load we'd add */
3549 if (max_load * busiest->__cpu_power < 3583 if (sds.max_load * sds.busiest->__cpu_power <
3550 busiest_load_per_task * SCHED_LOAD_SCALE) 3584 sds.busiest_load_per_task * SCHED_LOAD_SCALE)
3551 tmp = sg_div_cpu_power(this, 3585 tmp = sg_div_cpu_power(sds.this,
3552 max_load * busiest->__cpu_power); 3586 sds.max_load * sds.busiest->__cpu_power);
3553 else 3587 else
3554 tmp = sg_div_cpu_power(this, 3588 tmp = sg_div_cpu_power(sds.this,
3555 busiest_load_per_task * SCHED_LOAD_SCALE); 3589 sds.busiest_load_per_task * SCHED_LOAD_SCALE);
3556 pwr_move += this->__cpu_power * 3590 pwr_move += sds.this->__cpu_power *
3557 min(this_load_per_task, this_load + tmp); 3591 min(sds.this_load_per_task,
3592 sds.this_load + tmp);
3558 pwr_move /= SCHED_LOAD_SCALE; 3593 pwr_move /= SCHED_LOAD_SCALE;
3559 3594
3560 /* Move if we gain throughput */ 3595 /* Move if we gain throughput */
3561 if (pwr_move > pwr_now) 3596 if (pwr_move > pwr_now)
3562 *imbalance = busiest_load_per_task; 3597 *imbalance = sds.busiest_load_per_task;
3563 } 3598 }
3564 3599
3565 return busiest; 3600 return sds.busiest;
3566 3601
3567out_balanced: 3602out_balanced:
3568#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 3603#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
3569 if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE)) 3604 if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
3570 goto ret; 3605 goto ret;
3571 3606
3572 if (this != group_leader || group_leader == group_min) 3607 if (sds.this != sds.group_leader || sds.group_leader == sds.group_min)
3573 goto ret; 3608 goto ret;
3574 3609
3575 *imbalance = min_load_per_task; 3610 *imbalance = sds.min_load_per_task;
3576 if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) { 3611 if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
3577 cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu = 3612 cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
3578 group_first_cpu(group_leader); 3613 group_first_cpu(sds.group_leader);
3579 } 3614 }
3580 return group_min; 3615 return sds.group_min;
3581 3616
3582#endif 3617#endif
3583ret: 3618ret: