aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c145
1 files changed, 82 insertions, 63 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 0feeacb91497..0a5e814cc618 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2541,8 +2541,14 @@ static int load_balance(int this_cpu, struct rq *this_rq,
2541 struct rq *busiest; 2541 struct rq *busiest;
2542 cpumask_t cpus = CPU_MASK_ALL; 2542 cpumask_t cpus = CPU_MASK_ALL;
2543 2543
2544 /*
2545 * When power savings policy is enabled for the parent domain, idle
2546 * sibling can pick up load irrespective of busy siblings. In this case,
2547 * let the state of idle sibling percolate up as IDLE, instead of
2548 * portraying it as NOT_IDLE.
2549 */
2544 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && 2550 if (idle != NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER &&
2545 !sched_smt_power_savings) 2551 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2546 sd_idle = 1; 2552 sd_idle = 1;
2547 2553
2548 schedstat_inc(sd, lb_cnt[idle]); 2554 schedstat_inc(sd, lb_cnt[idle]);
@@ -2638,7 +2644,7 @@ redo:
2638 } 2644 }
2639 2645
2640 if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && 2646 if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2641 !sched_smt_power_savings) 2647 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2642 return -1; 2648 return -1;
2643 return nr_moved; 2649 return nr_moved;
2644 2650
@@ -2654,7 +2660,7 @@ out_one_pinned:
2654 sd->balance_interval *= 2; 2660 sd->balance_interval *= 2;
2655 2661
2656 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 2662 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2657 !sched_smt_power_savings) 2663 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2658 return -1; 2664 return -1;
2659 return 0; 2665 return 0;
2660} 2666}
@@ -2676,7 +2682,14 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2676 int sd_idle = 0; 2682 int sd_idle = 0;
2677 cpumask_t cpus = CPU_MASK_ALL; 2683 cpumask_t cpus = CPU_MASK_ALL;
2678 2684
2679 if (sd->flags & SD_SHARE_CPUPOWER && !sched_smt_power_savings) 2685 /*
2686 * When power savings policy is enabled for the parent domain, idle
2687 * sibling can pick up load irrespective of busy siblings. In this case,
2688 * let the state of idle sibling percolate up as IDLE, instead of
2689 * portraying it as NOT_IDLE.
2690 */
2691 if (sd->flags & SD_SHARE_CPUPOWER &&
2692 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2680 sd_idle = 1; 2693 sd_idle = 1;
2681 2694
2682 schedstat_inc(sd, lb_cnt[NEWLY_IDLE]); 2695 schedstat_inc(sd, lb_cnt[NEWLY_IDLE]);
@@ -2717,7 +2730,8 @@ redo:
2717 2730
2718 if (!nr_moved) { 2731 if (!nr_moved) {
2719 schedstat_inc(sd, lb_failed[NEWLY_IDLE]); 2732 schedstat_inc(sd, lb_failed[NEWLY_IDLE]);
2720 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER) 2733 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2734 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2721 return -1; 2735 return -1;
2722 } else 2736 } else
2723 sd->nr_balance_failed = 0; 2737 sd->nr_balance_failed = 0;
@@ -2727,7 +2741,7 @@ redo:
2727out_balanced: 2741out_balanced:
2728 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]); 2742 schedstat_inc(sd, lb_balanced[NEWLY_IDLE]);
2729 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 2743 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2730 !sched_smt_power_savings) 2744 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2731 return -1; 2745 return -1;
2732 sd->nr_balance_failed = 0; 2746 sd->nr_balance_failed = 0;
2733 2747
@@ -5400,7 +5414,9 @@ static int sd_degenerate(struct sched_domain *sd)
5400 if (sd->flags & (SD_LOAD_BALANCE | 5414 if (sd->flags & (SD_LOAD_BALANCE |
5401 SD_BALANCE_NEWIDLE | 5415 SD_BALANCE_NEWIDLE |
5402 SD_BALANCE_FORK | 5416 SD_BALANCE_FORK |
5403 SD_BALANCE_EXEC)) { 5417 SD_BALANCE_EXEC |
5418 SD_SHARE_CPUPOWER |
5419 SD_SHARE_PKG_RESOURCES)) {
5404 if (sd->groups != sd->groups->next) 5420 if (sd->groups != sd->groups->next)
5405 return 0; 5421 return 0;
5406 } 5422 }
@@ -5434,7 +5450,9 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
5434 pflags &= ~(SD_LOAD_BALANCE | 5450 pflags &= ~(SD_LOAD_BALANCE |
5435 SD_BALANCE_NEWIDLE | 5451 SD_BALANCE_NEWIDLE |
5436 SD_BALANCE_FORK | 5452 SD_BALANCE_FORK |
5437 SD_BALANCE_EXEC); 5453 SD_BALANCE_EXEC |
5454 SD_SHARE_CPUPOWER |
5455 SD_SHARE_PKG_RESOURCES);
5438 } 5456 }
5439 if (~cflags & pflags) 5457 if (~cflags & pflags)
5440 return 0; 5458 return 0;
@@ -6241,12 +6259,65 @@ static void free_sched_groups(const cpumask_t *cpu_map)
6241#endif 6259#endif
6242 6260
6243/* 6261/*
6262 * Initialize sched groups cpu_power.
6263 *
6264 * cpu_power indicates the capacity of sched group, which is used while
6265 * distributing the load between different sched groups in a sched domain.
6266 * Typically cpu_power for all the groups in a sched domain will be same unless
6267 * there are asymmetries in the topology. If there are asymmetries, group
6268 * having more cpu_power will pickup more load compared to the group having
6269 * less cpu_power.
6270 *
6271 * cpu_power will be a multiple of SCHED_LOAD_SCALE. This multiple represents
6272 * the maximum number of tasks a group can handle in the presence of other idle
6273 * or lightly loaded groups in the same sched domain.
6274 */
6275static void init_sched_groups_power(int cpu, struct sched_domain *sd)
6276{
6277 struct sched_domain *child;
6278 struct sched_group *group;
6279
6280 WARN_ON(!sd || !sd->groups);
6281
6282 if (cpu != first_cpu(sd->groups->cpumask))
6283 return;
6284
6285 child = sd->child;
6286
6287 /*
6288 * For perf policy, if the groups in child domain share resources
6289 * (for example cores sharing some portions of the cache hierarchy
6290 * or SMT), then set this domain groups cpu_power such that each group
6291 * can handle only one task, when there are other idle groups in the
6292 * same sched domain.
6293 */
6294 if (!child || (!(sd->flags & SD_POWERSAVINGS_BALANCE) &&
6295 (child->flags &
6296 (SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES)))) {
6297 sd->groups->cpu_power = SCHED_LOAD_SCALE;
6298 return;
6299 }
6300
6301 sd->groups->cpu_power = 0;
6302
6303 /*
6304 * add cpu_power of each child group to this groups cpu_power
6305 */
6306 group = child->groups;
6307 do {
6308 sd->groups->cpu_power += group->cpu_power;
6309 group = group->next;
6310 } while (group != child->groups);
6311}
6312
6313/*
6244 * Build sched domains for a given set of cpus and attach the sched domains 6314 * Build sched domains for a given set of cpus and attach the sched domains
6245 * to the individual cpus 6315 * to the individual cpus
6246 */ 6316 */
6247static int build_sched_domains(const cpumask_t *cpu_map) 6317static int build_sched_domains(const cpumask_t *cpu_map)
6248{ 6318{
6249 int i; 6319 int i;
6320 struct sched_domain *sd;
6250#ifdef CONFIG_NUMA 6321#ifdef CONFIG_NUMA
6251 struct sched_group **sched_group_nodes = NULL; 6322 struct sched_group **sched_group_nodes = NULL;
6252 struct sched_group *sched_group_allnodes = NULL; 6323 struct sched_group *sched_group_allnodes = NULL;
@@ -6456,72 +6527,20 @@ static int build_sched_domains(const cpumask_t *cpu_map)
6456 /* Calculate CPU power for physical packages and nodes */ 6527 /* Calculate CPU power for physical packages and nodes */
6457#ifdef CONFIG_SCHED_SMT 6528#ifdef CONFIG_SCHED_SMT
6458 for_each_cpu_mask(i, *cpu_map) { 6529 for_each_cpu_mask(i, *cpu_map) {
6459 struct sched_domain *sd;
6460 sd = &per_cpu(cpu_domains, i); 6530 sd = &per_cpu(cpu_domains, i);
6461 sd->groups->cpu_power = SCHED_LOAD_SCALE; 6531 init_sched_groups_power(i, sd);
6462 } 6532 }
6463#endif 6533#endif
6464#ifdef CONFIG_SCHED_MC 6534#ifdef CONFIG_SCHED_MC
6465 for_each_cpu_mask(i, *cpu_map) { 6535 for_each_cpu_mask(i, *cpu_map) {
6466 int power;
6467 struct sched_domain *sd;
6468 sd = &per_cpu(core_domains, i); 6536 sd = &per_cpu(core_domains, i);
6469 if (sched_smt_power_savings) 6537 init_sched_groups_power(i, sd);
6470 power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask);
6471 else
6472 power = SCHED_LOAD_SCALE + (cpus_weight(sd->groups->cpumask)-1)
6473 * SCHED_LOAD_SCALE / 10;
6474 sd->groups->cpu_power = power;
6475 } 6538 }
6476#endif 6539#endif
6477 6540
6478 for_each_cpu_mask(i, *cpu_map) { 6541 for_each_cpu_mask(i, *cpu_map) {
6479 struct sched_domain *sd;
6480#ifdef CONFIG_SCHED_MC
6481 sd = &per_cpu(phys_domains, i);
6482 if (i != first_cpu(sd->groups->cpumask))
6483 continue;
6484
6485 sd->groups->cpu_power = 0;
6486 if (sched_mc_power_savings || sched_smt_power_savings) {
6487 int j;
6488
6489 for_each_cpu_mask(j, sd->groups->cpumask) {
6490 struct sched_domain *sd1;
6491 sd1 = &per_cpu(core_domains, j);
6492 /*
6493 * for each core we will add once
6494 * to the group in physical domain
6495 */
6496 if (j != first_cpu(sd1->groups->cpumask))
6497 continue;
6498
6499 if (sched_smt_power_savings)
6500 sd->groups->cpu_power += sd1->groups->cpu_power;
6501 else
6502 sd->groups->cpu_power += SCHED_LOAD_SCALE;
6503 }
6504 } else
6505 /*
6506 * This has to be < 2 * SCHED_LOAD_SCALE
6507 * Lets keep it SCHED_LOAD_SCALE, so that
6508 * while calculating NUMA group's cpu_power
6509 * we can simply do
6510 * numa_group->cpu_power += phys_group->cpu_power;
6511 *
6512 * See "only add power once for each physical pkg"
6513 * comment below
6514 */
6515 sd->groups->cpu_power = SCHED_LOAD_SCALE;
6516#else
6517 int power;
6518 sd = &per_cpu(phys_domains, i); 6542 sd = &per_cpu(phys_domains, i);
6519 if (sched_smt_power_savings) 6543 init_sched_groups_power(i, sd);
6520 power = SCHED_LOAD_SCALE * cpus_weight(sd->groups->cpumask);
6521 else
6522 power = SCHED_LOAD_SCALE;
6523 sd->groups->cpu_power = power;
6524#endif
6525 } 6544 }
6526 6545
6527#ifdef CONFIG_NUMA 6546#ifdef CONFIG_NUMA