diff options
author | Gautham R Shenoy <ego@in.ibm.com> | 2009-03-25 05:13:56 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-03-25 05:30:46 -0400 |
commit | 222d656dea57e4e084fbd1e9383e6fed2ca9fa61 (patch) | |
tree | eb5f5be4312e050c28ba9b80dc9d0e113e711190 /kernel | |
parent | 1f8c553d0f11d85f7993fe21015695d266771c00 (diff) |
sched: Define structure to store the sched_domain statistics for fbg()
Impact: cleanup
Currently we use a lot of local variables in find_busiest_group()
to capture the various statistics related to the sched_domain.
Group them together into a single data structure.
This will help us to offload the job of updating the sched_domain
statistics to a helper function.
Credit: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: Gautham R Shenoy <ego@in.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: "Balbir Singh" <balbir@in.ibm.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: "Dhaval Giani" <dhaval@linux.vnet.ibm.com>
Cc: Bharata B Rao <bharata@linux.vnet.ibm.com>
LKML-Reference: <20090325091356.13992.25970.stgit@sofia.in.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 207 |
1 files changed, 121 insertions, 86 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 1893d5562f5f..8198dbe8e4aa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3190,6 +3190,37 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
3190 | return 0; | 3190 | return 0; |
3191 | } | 3191 | } |
3192 | /********** Helpers for find_busiest_group ************************/ | 3192 | /********** Helpers for find_busiest_group ************************/ |
3193 | /** | ||
3194 | * sd_lb_stats - Structure to store the statistics of a sched_domain | ||
3195 | * during load balancing. | ||
3196 | */ | ||
3197 | struct sd_lb_stats { | ||
3198 | struct sched_group *busiest; /* Busiest group in this sd */ | ||
3199 | struct sched_group *this; /* Local group in this sd */ | ||
3200 | unsigned long total_load; /* Total load of all groups in sd */ | ||
3201 | unsigned long total_pwr; /* Total power of all groups in sd */ | ||
3202 | unsigned long avg_load; /* Average load across all groups in sd */ | ||
3203 | |||
3204 | /** Statistics of this group */ | ||
3205 | unsigned long this_load; | ||
3206 | unsigned long this_load_per_task; | ||
3207 | unsigned long this_nr_running; | ||
3208 | |||
3209 | /* Statistics of the busiest group */ | ||
3210 | unsigned long max_load; | ||
3211 | unsigned long busiest_load_per_task; | ||
3212 | unsigned long busiest_nr_running; | ||
3213 | |||
3214 | int group_imb; /* Is there imbalance in this sd */ | ||
3215 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
3216 | int power_savings_balance; /* Is powersave balance needed for this sd */ | ||
3217 | struct sched_group *group_min; /* Least loaded group in sd */ | ||
3218 | struct sched_group *group_leader; /* Group which relieves group_min */ | ||
3219 | unsigned long min_load_per_task; /* load_per_task in group_min */ | ||
3220 | unsigned long leader_nr_running; /* Nr running of group_leader */ | ||
3221 | unsigned long min_nr_running; /* Nr running of group_min */ | ||
3222 | #endif | ||
3223 | }; | ||
3193 | 3224 | ||
3194 | /** | 3225 | /** |
3195 | * sg_lb_stats - stats of a sched_group required for load_balancing | 3226 | * sg_lb_stats - stats of a sched_group required for load_balancing |
@@ -3346,23 +3377,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
3346 | unsigned long *imbalance, enum cpu_idle_type idle, | 3377 | unsigned long *imbalance, enum cpu_idle_type idle, |
3347 | int *sd_idle, const struct cpumask *cpus, int *balance) | 3378 | int *sd_idle, const struct cpumask *cpus, int *balance) |
3348 | { | 3379 | { |
3349 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 3380 | struct sd_lb_stats sds; |
3350 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 3381 | struct sched_group *group = sd->groups; |
3351 | unsigned long max_pull; | 3382 | unsigned long max_pull; |
3352 | unsigned long busiest_load_per_task, busiest_nr_running; | 3383 | int load_idx; |
3353 | unsigned long this_load_per_task, this_nr_running; | 3384 | |
3354 | int load_idx, group_imb = 0; | 3385 | memset(&sds, 0, sizeof(sds)); |
3355 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 3386 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
3356 | int power_savings_balance = 1; | 3387 | sds.power_savings_balance = 1; |
3357 | unsigned long leader_nr_running = 0, min_load_per_task = 0; | 3388 | sds.min_nr_running = ULONG_MAX; |
3358 | unsigned long min_nr_running = ULONG_MAX; | ||
3359 | struct sched_group *group_min = NULL, *group_leader = NULL; | ||
3360 | #endif | 3389 | #endif |
3361 | |||
3362 | max_load = this_load = total_load = total_pwr = 0; | ||
3363 | busiest_load_per_task = busiest_nr_running = 0; | ||
3364 | this_load_per_task = this_nr_running = 0; | ||
3365 | |||
3366 | load_idx = get_sd_load_idx(sd, idle); | 3390 | load_idx = get_sd_load_idx(sd, idle); |
3367 | 3391 | ||
3368 | do { | 3392 | do { |
@@ -3378,22 +3402,22 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
3378 | if (balance && !(*balance)) | 3402 | if (balance && !(*balance)) |
3379 | goto ret; | 3403 | goto ret; |
3380 | 3404 | ||
3381 | total_load += sgs.group_load; | 3405 | sds.total_load += sgs.group_load; |
3382 | total_pwr += group->__cpu_power; | 3406 | sds.total_pwr += group->__cpu_power; |
3383 | 3407 | ||
3384 | if (local_group) { | 3408 | if (local_group) { |
3385 | this_load = sgs.avg_load; | 3409 | sds.this_load = sgs.avg_load; |
3386 | this = group; | 3410 | sds.this = group; |
3387 | this_nr_running = sgs.sum_nr_running; | 3411 | sds.this_nr_running = sgs.sum_nr_running; |
3388 | this_load_per_task = sgs.sum_weighted_load; | 3412 | sds.this_load_per_task = sgs.sum_weighted_load; |
3389 | } else if (sgs.avg_load > max_load && | 3413 | } else if (sgs.avg_load > sds.max_load && |
3390 | (sgs.sum_nr_running > sgs.group_capacity || | 3414 | (sgs.sum_nr_running > sgs.group_capacity || |
3391 | sgs.group_imb)) { | 3415 | sgs.group_imb)) { |
3392 | max_load = sgs.avg_load; | 3416 | sds.max_load = sgs.avg_load; |
3393 | busiest = group; | 3417 | sds.busiest = group; |
3394 | busiest_nr_running = sgs.sum_nr_running; | 3418 | sds.busiest_nr_running = sgs.sum_nr_running; |
3395 | busiest_load_per_task = sgs.sum_weighted_load; | 3419 | sds.busiest_load_per_task = sgs.sum_weighted_load; |
3396 | group_imb = sgs.group_imb; | 3420 | sds.group_imb = sgs.group_imb; |
3397 | } | 3421 | } |
3398 | 3422 | ||
3399 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 3423 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
@@ -3409,15 +3433,16 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
3409 | * If the local group is idle or completely loaded | 3433 | * If the local group is idle or completely loaded |
3410 | * no need to do power savings balance at this domain | 3434 | * no need to do power savings balance at this domain |
3411 | */ | 3435 | */ |
3412 | if (local_group && (this_nr_running >= sgs.group_capacity || | 3436 | if (local_group && |
3413 | !this_nr_running)) | 3437 | (sds.this_nr_running >= sgs.group_capacity || |
3414 | power_savings_balance = 0; | 3438 | !sds.this_nr_running)) |
3439 | sds.power_savings_balance = 0; | ||
3415 | 3440 | ||
3416 | /* | 3441 | /* |
3417 | * If a group is already running at full capacity or idle, | 3442 | * If a group is already running at full capacity or idle, |
3418 | * don't include that group in power savings calculations | 3443 | * don't include that group in power savings calculations |
3419 | */ | 3444 | */ |
3420 | if (!power_savings_balance || | 3445 | if (!sds.power_savings_balance || |
3421 | sgs.sum_nr_running >= sgs.group_capacity || | 3446 | sgs.sum_nr_running >= sgs.group_capacity || |
3422 | !sgs.sum_nr_running) | 3447 | !sgs.sum_nr_running) |
3423 | goto group_next; | 3448 | goto group_next; |
@@ -3427,12 +3452,13 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
3427 | * This is the group from where we need to pick up the load | 3452 | * This is the group from where we need to pick up the load |
3428 | * for saving power | 3453 | * for saving power |
3429 | */ | 3454 | */ |
3430 | if ((sgs.sum_nr_running < min_nr_running) || | 3455 | if ((sgs.sum_nr_running < sds.min_nr_running) || |
3431 | (sgs.sum_nr_running == min_nr_running && | 3456 | (sgs.sum_nr_running == sds.min_nr_running && |
3432 | group_first_cpu(group) > group_first_cpu(group_min))) { | 3457 | group_first_cpu(group) > |
3433 | group_min = group; | 3458 | group_first_cpu(sds.group_min))) { |
3434 | min_nr_running = sgs.sum_nr_running; | 3459 | sds.group_min = group; |
3435 | min_load_per_task = sgs.sum_weighted_load / | 3460 | sds.min_nr_running = sgs.sum_nr_running; |
3461 | sds.min_load_per_task = sgs.sum_weighted_load / | ||
3436 | sgs.sum_nr_running; | 3462 | sgs.sum_nr_running; |
3437 | } | 3463 | } |
3438 | 3464 | ||
@@ -3444,29 +3470,32 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
3444 | if (sgs.sum_nr_running > sgs.group_capacity - 1) | 3470 | if (sgs.sum_nr_running > sgs.group_capacity - 1) |
3445 | goto group_next; | 3471 | goto group_next; |
3446 | 3472 | ||
3447 | if (sgs.sum_nr_running > leader_nr_running || | 3473 | if (sgs.sum_nr_running > sds.leader_nr_running || |
3448 | (sgs.sum_nr_running == leader_nr_running && | 3474 | (sgs.sum_nr_running == sds.leader_nr_running && |
3449 | group_first_cpu(group) < group_first_cpu(group_leader))) { | 3475 | group_first_cpu(group) < |
3450 | group_leader = group; | 3476 | group_first_cpu(sds.group_leader))) { |
3451 | leader_nr_running = sgs.sum_nr_running; | 3477 | sds.group_leader = group; |
3478 | sds.leader_nr_running = sgs.sum_nr_running; | ||
3452 | } | 3479 | } |
3453 | group_next: | 3480 | group_next: |
3454 | #endif | 3481 | #endif |
3455 | group = group->next; | 3482 | group = group->next; |
3456 | } while (group != sd->groups); | 3483 | } while (group != sd->groups); |
3457 | 3484 | ||
3458 | if (!busiest || this_load >= max_load || busiest_nr_running == 0) | 3485 | if (!sds.busiest || sds.this_load >= sds.max_load |
3486 | || sds.busiest_nr_running == 0) | ||
3459 | goto out_balanced; | 3487 | goto out_balanced; |
3460 | 3488 | ||
3461 | avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr; | 3489 | sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr; |
3462 | 3490 | ||
3463 | if (this_load >= avg_load || | 3491 | if (sds.this_load >= sds.avg_load || |
3464 | 100*max_load <= sd->imbalance_pct*this_load) | 3492 | 100*sds.max_load <= sd->imbalance_pct * sds.this_load) |
3465 | goto out_balanced; | 3493 | goto out_balanced; |
3466 | 3494 | ||
3467 | busiest_load_per_task /= busiest_nr_running; | 3495 | sds.busiest_load_per_task /= sds.busiest_nr_running; |
3468 | if (group_imb) | 3496 | if (sds.group_imb) |
3469 | busiest_load_per_task = min(busiest_load_per_task, avg_load); | 3497 | sds.busiest_load_per_task = |
3498 | min(sds.busiest_load_per_task, sds.avg_load); | ||
3470 | 3499 | ||
3471 | /* | 3500 | /* |
3472 | * We're trying to get all the cpus to the average_load, so we don't | 3501 | * We're trying to get all the cpus to the average_load, so we don't |
@@ -3479,7 +3508,7 @@ group_next: | |||
3479 | * by pulling tasks to us. Be careful of negative numbers as they'll | 3508 | * by pulling tasks to us. Be careful of negative numbers as they'll |
3480 | * appear as very large values with unsigned longs. | 3509 | * appear as very large values with unsigned longs. |
3481 | */ | 3510 | */ |
3482 | if (max_load <= busiest_load_per_task) | 3511 | if (sds.max_load <= sds.busiest_load_per_task) |
3483 | goto out_balanced; | 3512 | goto out_balanced; |
3484 | 3513 | ||
3485 | /* | 3514 | /* |
@@ -3487,17 +3516,18 @@ group_next: | |||
3487 | * max load less than avg load(as we skip the groups at or below | 3516 | * max load less than avg load(as we skip the groups at or below |
3488 | * its cpu_power, while calculating max_load..) | 3517 | * its cpu_power, while calculating max_load..) |
3489 | */ | 3518 | */ |
3490 | if (max_load < avg_load) { | 3519 | if (sds.max_load < sds.avg_load) { |
3491 | *imbalance = 0; | 3520 | *imbalance = 0; |
3492 | goto small_imbalance; | 3521 | goto small_imbalance; |
3493 | } | 3522 | } |
3494 | 3523 | ||
3495 | /* Don't want to pull so many tasks that a group would go idle */ | 3524 | /* Don't want to pull so many tasks that a group would go idle */ |
3496 | max_pull = min(max_load - avg_load, max_load - busiest_load_per_task); | 3525 | max_pull = min(sds.max_load - sds.avg_load, |
3526 | sds.max_load - sds.busiest_load_per_task); | ||
3497 | 3527 | ||
3498 | /* How much load to actually move to equalise the imbalance */ | 3528 | /* How much load to actually move to equalise the imbalance */ |
3499 | *imbalance = min(max_pull * busiest->__cpu_power, | 3529 | *imbalance = min(max_pull * sds.busiest->__cpu_power, |
3500 | (avg_load - this_load) * this->__cpu_power) | 3530 | (sds.avg_load - sds.this_load) * sds.this->__cpu_power) |
3501 | / SCHED_LOAD_SCALE; | 3531 | / SCHED_LOAD_SCALE; |
3502 | 3532 | ||
3503 | /* | 3533 | /* |
@@ -3506,24 +3536,27 @@ group_next: | |||
3506 | * a think about bumping its value to force at least one task to be | 3536 | * a think about bumping its value to force at least one task to be |
3507 | * moved | 3537 | * moved |
3508 | */ | 3538 | */ |
3509 | if (*imbalance < busiest_load_per_task) { | 3539 | if (*imbalance < sds.busiest_load_per_task) { |
3510 | unsigned long tmp, pwr_now, pwr_move; | 3540 | unsigned long tmp, pwr_now, pwr_move; |
3511 | unsigned int imbn; | 3541 | unsigned int imbn; |
3512 | 3542 | ||
3513 | small_imbalance: | 3543 | small_imbalance: |
3514 | pwr_move = pwr_now = 0; | 3544 | pwr_move = pwr_now = 0; |
3515 | imbn = 2; | 3545 | imbn = 2; |
3516 | if (this_nr_running) { | 3546 | if (sds.this_nr_running) { |
3517 | this_load_per_task /= this_nr_running; | 3547 | sds.this_load_per_task /= sds.this_nr_running; |
3518 | if (busiest_load_per_task > this_load_per_task) | 3548 | if (sds.busiest_load_per_task > |
3549 | sds.this_load_per_task) | ||
3519 | imbn = 1; | 3550 | imbn = 1; |
3520 | } else | 3551 | } else |
3521 | this_load_per_task = cpu_avg_load_per_task(this_cpu); | 3552 | sds.this_load_per_task = |
3522 | 3553 | cpu_avg_load_per_task(this_cpu); | |
3523 | if (max_load - this_load + busiest_load_per_task >= | 3554 | |
3524 | busiest_load_per_task * imbn) { | 3555 | if (sds.max_load - sds.this_load + |
3525 | *imbalance = busiest_load_per_task; | 3556 | sds.busiest_load_per_task >= |
3526 | return busiest; | 3557 | sds.busiest_load_per_task * imbn) { |
3558 | *imbalance = sds.busiest_load_per_task; | ||
3559 | return sds.busiest; | ||
3527 | } | 3560 | } |
3528 | 3561 | ||
3529 | /* | 3562 | /* |
@@ -3532,52 +3565,54 @@ small_imbalance: | |||
3532 | * moving them. | 3565 | * moving them. |
3533 | */ | 3566 | */ |
3534 | 3567 | ||
3535 | pwr_now += busiest->__cpu_power * | 3568 | pwr_now += sds.busiest->__cpu_power * |
3536 | min(busiest_load_per_task, max_load); | 3569 | min(sds.busiest_load_per_task, sds.max_load); |
3537 | pwr_now += this->__cpu_power * | 3570 | pwr_now += sds.this->__cpu_power * |
3538 | min(this_load_per_task, this_load); | 3571 | min(sds.this_load_per_task, sds.this_load); |
3539 | pwr_now /= SCHED_LOAD_SCALE; | 3572 | pwr_now /= SCHED_LOAD_SCALE; |
3540 | 3573 | ||
3541 | /* Amount of load we'd subtract */ | 3574 | /* Amount of load we'd subtract */ |
3542 | tmp = sg_div_cpu_power(busiest, | 3575 | tmp = sg_div_cpu_power(sds.busiest, |
3543 | busiest_load_per_task * SCHED_LOAD_SCALE); | 3576 | sds.busiest_load_per_task * SCHED_LOAD_SCALE); |
3544 | if (max_load > tmp) | 3577 | if (sds.max_load > tmp) |
3545 | pwr_move += busiest->__cpu_power * | 3578 | pwr_move += sds.busiest->__cpu_power * |
3546 | min(busiest_load_per_task, max_load - tmp); | 3579 | min(sds.busiest_load_per_task, |
3580 | sds.max_load - tmp); | ||
3547 | 3581 | ||
3548 | /* Amount of load we'd add */ | 3582 | /* Amount of load we'd add */ |
3549 | if (max_load * busiest->__cpu_power < | 3583 | if (sds.max_load * sds.busiest->__cpu_power < |
3550 | busiest_load_per_task * SCHED_LOAD_SCALE) | 3584 | sds.busiest_load_per_task * SCHED_LOAD_SCALE) |
3551 | tmp = sg_div_cpu_power(this, | 3585 | tmp = sg_div_cpu_power(sds.this, |
3552 | max_load * busiest->__cpu_power); | 3586 | sds.max_load * sds.busiest->__cpu_power); |
3553 | else | 3587 | else |
3554 | tmp = sg_div_cpu_power(this, | 3588 | tmp = sg_div_cpu_power(sds.this, |
3555 | busiest_load_per_task * SCHED_LOAD_SCALE); | 3589 | sds.busiest_load_per_task * SCHED_LOAD_SCALE); |
3556 | pwr_move += this->__cpu_power * | 3590 | pwr_move += sds.this->__cpu_power * |
3557 | min(this_load_per_task, this_load + tmp); | 3591 | min(sds.this_load_per_task, |
3592 | sds.this_load + tmp); | ||
3558 | pwr_move /= SCHED_LOAD_SCALE; | 3593 | pwr_move /= SCHED_LOAD_SCALE; |
3559 | 3594 | ||
3560 | /* Move if we gain throughput */ | 3595 | /* Move if we gain throughput */ |
3561 | if (pwr_move > pwr_now) | 3596 | if (pwr_move > pwr_now) |
3562 | *imbalance = busiest_load_per_task; | 3597 | *imbalance = sds.busiest_load_per_task; |
3563 | } | 3598 | } |
3564 | 3599 | ||
3565 | return busiest; | 3600 | return sds.busiest; |
3566 | 3601 | ||
3567 | out_balanced: | 3602 | out_balanced: |
3568 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 3603 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
3569 | if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE)) | 3604 | if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE)) |
3570 | goto ret; | 3605 | goto ret; |
3571 | 3606 | ||
3572 | if (this != group_leader || group_leader == group_min) | 3607 | if (sds.this != sds.group_leader || sds.group_leader == sds.group_min) |
3573 | goto ret; | 3608 | goto ret; |
3574 | 3609 | ||
3575 | *imbalance = min_load_per_task; | 3610 | *imbalance = sds.min_load_per_task; |
3576 | if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) { | 3611 | if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) { |
3577 | cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu = | 3612 | cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu = |
3578 | group_first_cpu(group_leader); | 3613 | group_first_cpu(sds.group_leader); |
3579 | } | 3614 | } |
3580 | return group_min; | 3615 | return sds.group_min; |
3581 | 3616 | ||
3582 | #endif | 3617 | #endif |
3583 | ret: | 3618 | ret: |