diff options
author | Nicolas Pitre <nicolas.pitre@linaro.org> | 2014-05-26 18:19:37 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-06-05 05:52:26 -0400 |
commit | 63b2ca30bdb3dbf60bc7ac5f46713c0d32308261 (patch) | |
tree | eda6000f59abf091fb53812505d5d0ffb67d4a8f /kernel | |
parent | 0fedc6c8e34f4ce0b37b1f25c3619b4a8faa244c (diff) |
sched: Let 'struct sched_group_power' care about CPU capacity
It is better not to think about compute capacity as being equivalent
to "CPU power". The upcoming "power aware" scheduler work may create
confusion with the notion of energy consumption if "power" is used too
liberally.
Since struct sched_group_power is really about compute capacity of sched
groups, let's rename it to struct sched_group_capacity. Similarly sgp
becomes sgc. Related variables and functions dealing with groups are also
adjusted accordingly.
Signed-off-by: Nicolas Pitre <nico@linaro.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Morten Rasmussen <morten.rasmussen@arm.com>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: linaro-kernel@lists.linaro.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/n/tip-5yeix833vvgf2uyj5o36hpu9@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched/core.c | 81 | ||||
-rw-r--r-- | kernel/sched/fair.c | 131 | ||||
-rw-r--r-- | kernel/sched/sched.h | 16 |
3 files changed, 114 insertions, 114 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index afcc84234a3e..2e1fb0902200 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -5221,14 +5221,13 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
5221 | } | 5221 | } |
5222 | 5222 | ||
5223 | /* | 5223 | /* |
5224 | * Even though we initialize ->power to something semi-sane, | 5224 | * Even though we initialize ->capacity to something semi-sane, |
5225 | * we leave power_orig unset. This allows us to detect if | 5225 | * we leave capacity_orig unset. This allows us to detect if |
5226 | * domain iteration is still funny without causing /0 traps. | 5226 | * domain iteration is still funny without causing /0 traps. |
5227 | */ | 5227 | */ |
5228 | if (!group->sgp->power_orig) { | 5228 | if (!group->sgc->capacity_orig) { |
5229 | printk(KERN_CONT "\n"); | 5229 | printk(KERN_CONT "\n"); |
5230 | printk(KERN_ERR "ERROR: domain->cpu_power not " | 5230 | printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n"); |
5231 | "set\n"); | ||
5232 | break; | 5231 | break; |
5233 | } | 5232 | } |
5234 | 5233 | ||
@@ -5250,9 +5249,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
5250 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); | 5249 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); |
5251 | 5250 | ||
5252 | printk(KERN_CONT " %s", str); | 5251 | printk(KERN_CONT " %s", str); |
5253 | if (group->sgp->power != SCHED_POWER_SCALE) { | 5252 | if (group->sgc->capacity != SCHED_POWER_SCALE) { |
5254 | printk(KERN_CONT " (cpu_power = %d)", | 5253 | printk(KERN_CONT " (cpu_capacity = %d)", |
5255 | group->sgp->power); | 5254 | group->sgc->capacity); |
5256 | } | 5255 | } |
5257 | 5256 | ||
5258 | group = group->next; | 5257 | group = group->next; |
@@ -5466,7 +5465,7 @@ static struct root_domain *alloc_rootdomain(void) | |||
5466 | return rd; | 5465 | return rd; |
5467 | } | 5466 | } |
5468 | 5467 | ||
5469 | static void free_sched_groups(struct sched_group *sg, int free_sgp) | 5468 | static void free_sched_groups(struct sched_group *sg, int free_sgc) |
5470 | { | 5469 | { |
5471 | struct sched_group *tmp, *first; | 5470 | struct sched_group *tmp, *first; |
5472 | 5471 | ||
@@ -5477,8 +5476,8 @@ static void free_sched_groups(struct sched_group *sg, int free_sgp) | |||
5477 | do { | 5476 | do { |
5478 | tmp = sg->next; | 5477 | tmp = sg->next; |
5479 | 5478 | ||
5480 | if (free_sgp && atomic_dec_and_test(&sg->sgp->ref)) | 5479 | if (free_sgc && atomic_dec_and_test(&sg->sgc->ref)) |
5481 | kfree(sg->sgp); | 5480 | kfree(sg->sgc); |
5482 | 5481 | ||
5483 | kfree(sg); | 5482 | kfree(sg); |
5484 | sg = tmp; | 5483 | sg = tmp; |
@@ -5496,7 +5495,7 @@ static void free_sched_domain(struct rcu_head *rcu) | |||
5496 | if (sd->flags & SD_OVERLAP) { | 5495 | if (sd->flags & SD_OVERLAP) { |
5497 | free_sched_groups(sd->groups, 1); | 5496 | free_sched_groups(sd->groups, 1); |
5498 | } else if (atomic_dec_and_test(&sd->groups->ref)) { | 5497 | } else if (atomic_dec_and_test(&sd->groups->ref)) { |
5499 | kfree(sd->groups->sgp); | 5498 | kfree(sd->groups->sgc); |
5500 | kfree(sd->groups); | 5499 | kfree(sd->groups); |
5501 | } | 5500 | } |
5502 | kfree(sd); | 5501 | kfree(sd); |
@@ -5707,17 +5706,17 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) | |||
5707 | 5706 | ||
5708 | cpumask_or(covered, covered, sg_span); | 5707 | cpumask_or(covered, covered, sg_span); |
5709 | 5708 | ||
5710 | sg->sgp = *per_cpu_ptr(sdd->sgp, i); | 5709 | sg->sgc = *per_cpu_ptr(sdd->sgc, i); |
5711 | if (atomic_inc_return(&sg->sgp->ref) == 1) | 5710 | if (atomic_inc_return(&sg->sgc->ref) == 1) |
5712 | build_group_mask(sd, sg); | 5711 | build_group_mask(sd, sg); |
5713 | 5712 | ||
5714 | /* | 5713 | /* |
5715 | * Initialize sgp->power such that even if we mess up the | 5714 | * Initialize sgc->capacity such that even if we mess up the |
5716 | * domains and no possible iteration will get us here, we won't | 5715 | * domains and no possible iteration will get us here, we won't |
5717 | * die on a /0 trap. | 5716 | * die on a /0 trap. |
5718 | */ | 5717 | */ |
5719 | sg->sgp->power = SCHED_POWER_SCALE * cpumask_weight(sg_span); | 5718 | sg->sgc->capacity = SCHED_POWER_SCALE * cpumask_weight(sg_span); |
5720 | sg->sgp->power_orig = sg->sgp->power; | 5719 | sg->sgc->capacity_orig = sg->sgc->capacity; |
5721 | 5720 | ||
5722 | /* | 5721 | /* |
5723 | * Make sure the first group of this domain contains the | 5722 | * Make sure the first group of this domain contains the |
@@ -5755,8 +5754,8 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) | |||
5755 | 5754 | ||
5756 | if (sg) { | 5755 | if (sg) { |
5757 | *sg = *per_cpu_ptr(sdd->sg, cpu); | 5756 | *sg = *per_cpu_ptr(sdd->sg, cpu); |
5758 | (*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu); | 5757 | (*sg)->sgc = *per_cpu_ptr(sdd->sgc, cpu); |
5759 | atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */ | 5758 | atomic_set(&(*sg)->sgc->ref, 1); /* for claim_allocations */ |
5760 | } | 5759 | } |
5761 | 5760 | ||
5762 | return cpu; | 5761 | return cpu; |
@@ -5819,16 +5818,16 @@ build_sched_groups(struct sched_domain *sd, int cpu) | |||
5819 | } | 5818 | } |
5820 | 5819 | ||
5821 | /* | 5820 | /* |
5822 | * Initialize sched groups cpu_power. | 5821 | * Initialize sched groups cpu_capacity. |
5823 | * | 5822 | * |
5824 | * cpu_power indicates the capacity of sched group, which is used while | 5823 | * cpu_capacity indicates the capacity of sched group, which is used while |
5825 | * distributing the load between different sched groups in a sched domain. | 5824 | * distributing the load between different sched groups in a sched domain. |
5826 | * Typically cpu_power for all the groups in a sched domain will be same unless | 5825 | * Typically cpu_capacity for all the groups in a sched domain will be same |
5827 | * there are asymmetries in the topology. If there are asymmetries, group | 5826 | * unless there are asymmetries in the topology. If there are asymmetries, |
5828 | * having more cpu_power will pickup more load compared to the group having | 5827 | * group having more cpu_capacity will pickup more load compared to the |
5829 | * less cpu_power. | 5828 | * group having less cpu_capacity. |
5830 | */ | 5829 | */ |
5831 | static void init_sched_groups_power(int cpu, struct sched_domain *sd) | 5830 | static void init_sched_groups_capacity(int cpu, struct sched_domain *sd) |
5832 | { | 5831 | { |
5833 | struct sched_group *sg = sd->groups; | 5832 | struct sched_group *sg = sd->groups; |
5834 | 5833 | ||
@@ -5842,8 +5841,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
5842 | if (cpu != group_balance_cpu(sg)) | 5841 | if (cpu != group_balance_cpu(sg)) |
5843 | return; | 5842 | return; |
5844 | 5843 | ||
5845 | update_group_power(sd, cpu); | 5844 | update_group_capacity(sd, cpu); |
5846 | atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight); | 5845 | atomic_set(&sg->sgc->nr_busy_cpus, sg->group_weight); |
5847 | } | 5846 | } |
5848 | 5847 | ||
5849 | /* | 5848 | /* |
@@ -5934,8 +5933,8 @@ static void claim_allocations(int cpu, struct sched_domain *sd) | |||
5934 | if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref)) | 5933 | if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref)) |
5935 | *per_cpu_ptr(sdd->sg, cpu) = NULL; | 5934 | *per_cpu_ptr(sdd->sg, cpu) = NULL; |
5936 | 5935 | ||
5937 | if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref)) | 5936 | if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref)) |
5938 | *per_cpu_ptr(sdd->sgp, cpu) = NULL; | 5937 | *per_cpu_ptr(sdd->sgc, cpu) = NULL; |
5939 | } | 5938 | } |
5940 | 5939 | ||
5941 | #ifdef CONFIG_NUMA | 5940 | #ifdef CONFIG_NUMA |
@@ -6337,14 +6336,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map) | |||
6337 | if (!sdd->sg) | 6336 | if (!sdd->sg) |
6338 | return -ENOMEM; | 6337 | return -ENOMEM; |
6339 | 6338 | ||
6340 | sdd->sgp = alloc_percpu(struct sched_group_power *); | 6339 | sdd->sgc = alloc_percpu(struct sched_group_capacity *); |
6341 | if (!sdd->sgp) | 6340 | if (!sdd->sgc) |
6342 | return -ENOMEM; | 6341 | return -ENOMEM; |
6343 | 6342 | ||
6344 | for_each_cpu(j, cpu_map) { | 6343 | for_each_cpu(j, cpu_map) { |
6345 | struct sched_domain *sd; | 6344 | struct sched_domain *sd; |
6346 | struct sched_group *sg; | 6345 | struct sched_group *sg; |
6347 | struct sched_group_power *sgp; | 6346 | struct sched_group_capacity *sgc; |
6348 | 6347 | ||
6349 | sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), | 6348 | sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), |
6350 | GFP_KERNEL, cpu_to_node(j)); | 6349 | GFP_KERNEL, cpu_to_node(j)); |
@@ -6362,12 +6361,12 @@ static int __sdt_alloc(const struct cpumask *cpu_map) | |||
6362 | 6361 | ||
6363 | *per_cpu_ptr(sdd->sg, j) = sg; | 6362 | *per_cpu_ptr(sdd->sg, j) = sg; |
6364 | 6363 | ||
6365 | sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(), | 6364 | sgc = kzalloc_node(sizeof(struct sched_group_capacity) + cpumask_size(), |
6366 | GFP_KERNEL, cpu_to_node(j)); | 6365 | GFP_KERNEL, cpu_to_node(j)); |
6367 | if (!sgp) | 6366 | if (!sgc) |
6368 | return -ENOMEM; | 6367 | return -ENOMEM; |
6369 | 6368 | ||
6370 | *per_cpu_ptr(sdd->sgp, j) = sgp; | 6369 | *per_cpu_ptr(sdd->sgc, j) = sgc; |
6371 | } | 6370 | } |
6372 | } | 6371 | } |
6373 | 6372 | ||
@@ -6394,15 +6393,15 @@ static void __sdt_free(const struct cpumask *cpu_map) | |||
6394 | 6393 | ||
6395 | if (sdd->sg) | 6394 | if (sdd->sg) |
6396 | kfree(*per_cpu_ptr(sdd->sg, j)); | 6395 | kfree(*per_cpu_ptr(sdd->sg, j)); |
6397 | if (sdd->sgp) | 6396 | if (sdd->sgc) |
6398 | kfree(*per_cpu_ptr(sdd->sgp, j)); | 6397 | kfree(*per_cpu_ptr(sdd->sgc, j)); |
6399 | } | 6398 | } |
6400 | free_percpu(sdd->sd); | 6399 | free_percpu(sdd->sd); |
6401 | sdd->sd = NULL; | 6400 | sdd->sd = NULL; |
6402 | free_percpu(sdd->sg); | 6401 | free_percpu(sdd->sg); |
6403 | sdd->sg = NULL; | 6402 | sdd->sg = NULL; |
6404 | free_percpu(sdd->sgp); | 6403 | free_percpu(sdd->sgc); |
6405 | sdd->sgp = NULL; | 6404 | sdd->sgc = NULL; |
6406 | } | 6405 | } |
6407 | } | 6406 | } |
6408 | 6407 | ||
@@ -6479,7 +6478,7 @@ static int build_sched_domains(const struct cpumask *cpu_map, | |||
6479 | 6478 | ||
6480 | for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { | 6479 | for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { |
6481 | claim_allocations(i, sd); | 6480 | claim_allocations(i, sd); |
6482 | init_sched_groups_power(i, sd); | 6481 | init_sched_groups_capacity(i, sd); |
6483 | } | 6482 | } |
6484 | } | 6483 | } |
6485 | 6484 | ||
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e401e446e87c..36bd4d23fca8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -4369,8 +4369,8 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, | |||
4369 | avg_load += load; | 4369 | avg_load += load; |
4370 | } | 4370 | } |
4371 | 4371 | ||
4372 | /* Adjust by relative CPU power of the group */ | 4372 | /* Adjust by relative CPU capacity of the group */ |
4373 | avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgp->power; | 4373 | avg_load = (avg_load * SCHED_POWER_SCALE) / group->sgc->capacity; |
4374 | 4374 | ||
4375 | if (local_group) { | 4375 | if (local_group) { |
4376 | this_load = avg_load; | 4376 | this_load = avg_load; |
@@ -5532,7 +5532,7 @@ struct sg_lb_stats { | |||
5532 | unsigned long group_load; /* Total load over the CPUs of the group */ | 5532 | unsigned long group_load; /* Total load over the CPUs of the group */ |
5533 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ | 5533 | unsigned long sum_weighted_load; /* Weighted load of group's tasks */ |
5534 | unsigned long load_per_task; | 5534 | unsigned long load_per_task; |
5535 | unsigned long group_power; | 5535 | unsigned long group_capacity; |
5536 | unsigned int sum_nr_running; /* Nr tasks running in the group */ | 5536 | unsigned int sum_nr_running; /* Nr tasks running in the group */ |
5537 | unsigned int group_capacity_factor; | 5537 | unsigned int group_capacity_factor; |
5538 | unsigned int idle_cpus; | 5538 | unsigned int idle_cpus; |
@@ -5553,7 +5553,7 @@ struct sd_lb_stats { | |||
5553 | struct sched_group *busiest; /* Busiest group in this sd */ | 5553 | struct sched_group *busiest; /* Busiest group in this sd */ |
5554 | struct sched_group *local; /* Local group in this sd */ | 5554 | struct sched_group *local; /* Local group in this sd */ |
5555 | unsigned long total_load; /* Total load of all groups in sd */ | 5555 | unsigned long total_load; /* Total load of all groups in sd */ |
5556 | unsigned long total_pwr; /* Total power of all groups in sd */ | 5556 | unsigned long total_capacity; /* Total capacity of all groups in sd */ |
5557 | unsigned long avg_load; /* Average load across all groups in sd */ | 5557 | unsigned long avg_load; /* Average load across all groups in sd */ |
5558 | 5558 | ||
5559 | struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */ | 5559 | struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */ |
@@ -5572,7 +5572,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds) | |||
5572 | .busiest = NULL, | 5572 | .busiest = NULL, |
5573 | .local = NULL, | 5573 | .local = NULL, |
5574 | .total_load = 0UL, | 5574 | .total_load = 0UL, |
5575 | .total_pwr = 0UL, | 5575 | .total_capacity = 0UL, |
5576 | .busiest_stat = { | 5576 | .busiest_stat = { |
5577 | .avg_load = 0UL, | 5577 | .avg_load = 0UL, |
5578 | }, | 5578 | }, |
@@ -5681,7 +5681,7 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
5681 | power >>= SCHED_POWER_SHIFT; | 5681 | power >>= SCHED_POWER_SHIFT; |
5682 | } | 5682 | } |
5683 | 5683 | ||
5684 | sdg->sgp->power_orig = power; | 5684 | sdg->sgc->capacity_orig = power; |
5685 | 5685 | ||
5686 | if (sched_feat(ARCH_POWER)) | 5686 | if (sched_feat(ARCH_POWER)) |
5687 | power *= arch_scale_freq_power(sd, cpu); | 5687 | power *= arch_scale_freq_power(sd, cpu); |
@@ -5697,26 +5697,26 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
5697 | power = 1; | 5697 | power = 1; |
5698 | 5698 | ||
5699 | cpu_rq(cpu)->cpu_power = power; | 5699 | cpu_rq(cpu)->cpu_power = power; |
5700 | sdg->sgp->power = power; | 5700 | sdg->sgc->capacity = power; |
5701 | } | 5701 | } |
5702 | 5702 | ||
5703 | void update_group_power(struct sched_domain *sd, int cpu) | 5703 | void update_group_capacity(struct sched_domain *sd, int cpu) |
5704 | { | 5704 | { |
5705 | struct sched_domain *child = sd->child; | 5705 | struct sched_domain *child = sd->child; |
5706 | struct sched_group *group, *sdg = sd->groups; | 5706 | struct sched_group *group, *sdg = sd->groups; |
5707 | unsigned long power, power_orig; | 5707 | unsigned long capacity, capacity_orig; |
5708 | unsigned long interval; | 5708 | unsigned long interval; |
5709 | 5709 | ||
5710 | interval = msecs_to_jiffies(sd->balance_interval); | 5710 | interval = msecs_to_jiffies(sd->balance_interval); |
5711 | interval = clamp(interval, 1UL, max_load_balance_interval); | 5711 | interval = clamp(interval, 1UL, max_load_balance_interval); |
5712 | sdg->sgp->next_update = jiffies + interval; | 5712 | sdg->sgc->next_update = jiffies + interval; |
5713 | 5713 | ||
5714 | if (!child) { | 5714 | if (!child) { |
5715 | update_cpu_power(sd, cpu); | 5715 | update_cpu_power(sd, cpu); |
5716 | return; | 5716 | return; |
5717 | } | 5717 | } |
5718 | 5718 | ||
5719 | power_orig = power = 0; | 5719 | capacity_orig = capacity = 0; |
5720 | 5720 | ||
5721 | if (child->flags & SD_OVERLAP) { | 5721 | if (child->flags & SD_OVERLAP) { |
5722 | /* | 5722 | /* |
@@ -5725,31 +5725,31 @@ void update_group_power(struct sched_domain *sd, int cpu) | |||
5725 | */ | 5725 | */ |
5726 | 5726 | ||
5727 | for_each_cpu(cpu, sched_group_cpus(sdg)) { | 5727 | for_each_cpu(cpu, sched_group_cpus(sdg)) { |
5728 | struct sched_group_power *sgp; | 5728 | struct sched_group_capacity *sgc; |
5729 | struct rq *rq = cpu_rq(cpu); | 5729 | struct rq *rq = cpu_rq(cpu); |
5730 | 5730 | ||
5731 | /* | 5731 | /* |
5732 | * build_sched_domains() -> init_sched_groups_power() | 5732 | * build_sched_domains() -> init_sched_groups_capacity() |
5733 | * gets here before we've attached the domains to the | 5733 | * gets here before we've attached the domains to the |
5734 | * runqueues. | 5734 | * runqueues. |
5735 | * | 5735 | * |
5736 | * Use power_of(), which is set irrespective of domains | 5736 | * Use power_of(), which is set irrespective of domains |
5737 | * in update_cpu_power(). | 5737 | * in update_cpu_power(). |
5738 | * | 5738 | * |
5739 | * This avoids power/power_orig from being 0 and | 5739 | * This avoids capacity/capacity_orig from being 0 and |
5740 | * causing divide-by-zero issues on boot. | 5740 | * causing divide-by-zero issues on boot. |
5741 | * | 5741 | * |
5742 | * Runtime updates will correct power_orig. | 5742 | * Runtime updates will correct capacity_orig. |
5743 | */ | 5743 | */ |
5744 | if (unlikely(!rq->sd)) { | 5744 | if (unlikely(!rq->sd)) { |
5745 | power_orig += power_of(cpu); | 5745 | capacity_orig += power_of(cpu); |
5746 | power += power_of(cpu); | 5746 | capacity += power_of(cpu); |
5747 | continue; | 5747 | continue; |
5748 | } | 5748 | } |
5749 | 5749 | ||
5750 | sgp = rq->sd->groups->sgp; | 5750 | sgc = rq->sd->groups->sgc; |
5751 | power_orig += sgp->power_orig; | 5751 | capacity_orig += sgc->capacity_orig; |
5752 | power += sgp->power; | 5752 | capacity += sgc->capacity; |
5753 | } | 5753 | } |
5754 | } else { | 5754 | } else { |
5755 | /* | 5755 | /* |
@@ -5759,14 +5759,14 @@ void update_group_power(struct sched_domain *sd, int cpu) | |||
5759 | 5759 | ||
5760 | group = child->groups; | 5760 | group = child->groups; |
5761 | do { | 5761 | do { |
5762 | power_orig += group->sgp->power_orig; | 5762 | capacity_orig += group->sgc->capacity_orig; |
5763 | power += group->sgp->power; | 5763 | capacity += group->sgc->capacity; |
5764 | group = group->next; | 5764 | group = group->next; |
5765 | } while (group != child->groups); | 5765 | } while (group != child->groups); |
5766 | } | 5766 | } |
5767 | 5767 | ||
5768 | sdg->sgp->power_orig = power_orig; | 5768 | sdg->sgc->capacity_orig = capacity_orig; |
5769 | sdg->sgp->power = power; | 5769 | sdg->sgc->capacity = capacity; |
5770 | } | 5770 | } |
5771 | 5771 | ||
5772 | /* | 5772 | /* |
@@ -5786,9 +5786,9 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
5786 | return 0; | 5786 | return 0; |
5787 | 5787 | ||
5788 | /* | 5788 | /* |
5789 | * If ~90% of the cpu_power is still there, we're good. | 5789 | * If ~90% of the cpu_capacity is still there, we're good. |
5790 | */ | 5790 | */ |
5791 | if (group->sgp->power * 32 > group->sgp->power_orig * 29) | 5791 | if (group->sgc->capacity * 32 > group->sgc->capacity_orig * 29) |
5792 | return 1; | 5792 | return 1; |
5793 | 5793 | ||
5794 | return 0; | 5794 | return 0; |
@@ -5825,7 +5825,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
5825 | 5825 | ||
5826 | static inline int sg_imbalanced(struct sched_group *group) | 5826 | static inline int sg_imbalanced(struct sched_group *group) |
5827 | { | 5827 | { |
5828 | return group->sgp->imbalance; | 5828 | return group->sgc->imbalance; |
5829 | } | 5829 | } |
5830 | 5830 | ||
5831 | /* | 5831 | /* |
@@ -5833,22 +5833,23 @@ static inline int sg_imbalanced(struct sched_group *group) | |||
5833 | * | 5833 | * |
5834 | * Avoid the issue where N*frac(smt_power) >= 1 creates 'phantom' cores by | 5834 | * Avoid the issue where N*frac(smt_power) >= 1 creates 'phantom' cores by |
5835 | * first dividing out the smt factor and computing the actual number of cores | 5835 | * first dividing out the smt factor and computing the actual number of cores |
5836 | * and limit power unit capacity with that. | 5836 | * and limit unit capacity with that. |
5837 | */ | 5837 | */ |
5838 | static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *group) | 5838 | static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *group) |
5839 | { | 5839 | { |
5840 | unsigned int capacity_factor, smt, cpus; | 5840 | unsigned int capacity_factor, smt, cpus; |
5841 | unsigned int power, power_orig; | 5841 | unsigned int capacity, capacity_orig; |
5842 | 5842 | ||
5843 | power = group->sgp->power; | 5843 | capacity = group->sgc->capacity; |
5844 | power_orig = group->sgp->power_orig; | 5844 | capacity_orig = group->sgc->capacity_orig; |
5845 | cpus = group->group_weight; | 5845 | cpus = group->group_weight; |
5846 | 5846 | ||
5847 | /* smt := ceil(cpus / power), assumes: 1 < smt_power < 2 */ | 5847 | /* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */ |
5848 | smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, power_orig); | 5848 | smt = DIV_ROUND_UP(SCHED_POWER_SCALE * cpus, capacity_orig); |
5849 | capacity_factor = cpus / smt; /* cores */ | 5849 | capacity_factor = cpus / smt; /* cores */ |
5850 | 5850 | ||
5851 | capacity_factor = min_t(unsigned, capacity_factor, DIV_ROUND_CLOSEST(power, SCHED_POWER_SCALE)); | 5851 | capacity_factor = min_t(unsigned, |
5852 | capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_POWER_SCALE)); | ||
5852 | if (!capacity_factor) | 5853 | if (!capacity_factor) |
5853 | capacity_factor = fix_small_capacity(env->sd, group); | 5854 | capacity_factor = fix_small_capacity(env->sd, group); |
5854 | 5855 | ||
@@ -5892,9 +5893,9 @@ static inline void update_sg_lb_stats(struct lb_env *env, | |||
5892 | sgs->idle_cpus++; | 5893 | sgs->idle_cpus++; |
5893 | } | 5894 | } |
5894 | 5895 | ||
5895 | /* Adjust by relative CPU power of the group */ | 5896 | /* Adjust by relative CPU capacity of the group */ |
5896 | sgs->group_power = group->sgp->power; | 5897 | sgs->group_capacity = group->sgc->capacity; |
5897 | sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_power; | 5898 | sgs->avg_load = (sgs->group_load*SCHED_POWER_SCALE) / sgs->group_capacity; |
5898 | 5899 | ||
5899 | if (sgs->sum_nr_running) | 5900 | if (sgs->sum_nr_running) |
5900 | sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; | 5901 | sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; |
@@ -6009,8 +6010,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd | |||
6009 | sgs = &sds->local_stat; | 6010 | sgs = &sds->local_stat; |
6010 | 6011 | ||
6011 | if (env->idle != CPU_NEWLY_IDLE || | 6012 | if (env->idle != CPU_NEWLY_IDLE || |
6012 | time_after_eq(jiffies, sg->sgp->next_update)) | 6013 | time_after_eq(jiffies, sg->sgc->next_update)) |
6013 | update_group_power(env->sd, env->dst_cpu); | 6014 | update_group_capacity(env->sd, env->dst_cpu); |
6014 | } | 6015 | } |
6015 | 6016 | ||
6016 | update_sg_lb_stats(env, sg, load_idx, local_group, sgs); | 6017 | update_sg_lb_stats(env, sg, load_idx, local_group, sgs); |
@@ -6040,7 +6041,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd | |||
6040 | next_group: | 6041 | next_group: |
6041 | /* Now, start updating sd_lb_stats */ | 6042 | /* Now, start updating sd_lb_stats */ |
6042 | sds->total_load += sgs->group_load; | 6043 | sds->total_load += sgs->group_load; |
6043 | sds->total_pwr += sgs->group_power; | 6044 | sds->total_capacity += sgs->group_capacity; |
6044 | 6045 | ||
6045 | sg = sg->next; | 6046 | sg = sg->next; |
6046 | } while (sg != env->sd->groups); | 6047 | } while (sg != env->sd->groups); |
@@ -6087,7 +6088,7 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) | |||
6087 | return 0; | 6088 | return 0; |
6088 | 6089 | ||
6089 | env->imbalance = DIV_ROUND_CLOSEST( | 6090 | env->imbalance = DIV_ROUND_CLOSEST( |
6090 | sds->busiest_stat.avg_load * sds->busiest_stat.group_power, | 6091 | sds->busiest_stat.avg_load * sds->busiest_stat.group_capacity, |
6091 | SCHED_POWER_SCALE); | 6092 | SCHED_POWER_SCALE); |
6092 | 6093 | ||
6093 | return 1; | 6094 | return 1; |
@@ -6103,7 +6104,7 @@ static int check_asym_packing(struct lb_env *env, struct sd_lb_stats *sds) | |||
6103 | static inline | 6104 | static inline |
6104 | void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) | 6105 | void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) |
6105 | { | 6106 | { |
6106 | unsigned long tmp, pwr_now = 0, pwr_move = 0; | 6107 | unsigned long tmp, capa_now = 0, capa_move = 0; |
6107 | unsigned int imbn = 2; | 6108 | unsigned int imbn = 2; |
6108 | unsigned long scaled_busy_load_per_task; | 6109 | unsigned long scaled_busy_load_per_task; |
6109 | struct sg_lb_stats *local, *busiest; | 6110 | struct sg_lb_stats *local, *busiest; |
@@ -6118,7 +6119,7 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) | |||
6118 | 6119 | ||
6119 | scaled_busy_load_per_task = | 6120 | scaled_busy_load_per_task = |
6120 | (busiest->load_per_task * SCHED_POWER_SCALE) / | 6121 | (busiest->load_per_task * SCHED_POWER_SCALE) / |
6121 | busiest->group_power; | 6122 | busiest->group_capacity; |
6122 | 6123 | ||
6123 | if (busiest->avg_load + scaled_busy_load_per_task >= | 6124 | if (busiest->avg_load + scaled_busy_load_per_task >= |
6124 | local->avg_load + (scaled_busy_load_per_task * imbn)) { | 6125 | local->avg_load + (scaled_busy_load_per_task * imbn)) { |
@@ -6132,34 +6133,34 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) | |||
6132 | * moving them. | 6133 | * moving them. |
6133 | */ | 6134 | */ |
6134 | 6135 | ||
6135 | pwr_now += busiest->group_power * | 6136 | capa_now += busiest->group_capacity * |
6136 | min(busiest->load_per_task, busiest->avg_load); | 6137 | min(busiest->load_per_task, busiest->avg_load); |
6137 | pwr_now += local->group_power * | 6138 | capa_now += local->group_capacity * |
6138 | min(local->load_per_task, local->avg_load); | 6139 | min(local->load_per_task, local->avg_load); |
6139 | pwr_now /= SCHED_POWER_SCALE; | 6140 | capa_now /= SCHED_POWER_SCALE; |
6140 | 6141 | ||
6141 | /* Amount of load we'd subtract */ | 6142 | /* Amount of load we'd subtract */ |
6142 | if (busiest->avg_load > scaled_busy_load_per_task) { | 6143 | if (busiest->avg_load > scaled_busy_load_per_task) { |
6143 | pwr_move += busiest->group_power * | 6144 | capa_move += busiest->group_capacity * |
6144 | min(busiest->load_per_task, | 6145 | min(busiest->load_per_task, |
6145 | busiest->avg_load - scaled_busy_load_per_task); | 6146 | busiest->avg_load - scaled_busy_load_per_task); |
6146 | } | 6147 | } |
6147 | 6148 | ||
6148 | /* Amount of load we'd add */ | 6149 | /* Amount of load we'd add */ |
6149 | if (busiest->avg_load * busiest->group_power < | 6150 | if (busiest->avg_load * busiest->group_capacity < |
6150 | busiest->load_per_task * SCHED_POWER_SCALE) { | 6151 | busiest->load_per_task * SCHED_POWER_SCALE) { |
6151 | tmp = (busiest->avg_load * busiest->group_power) / | 6152 | tmp = (busiest->avg_load * busiest->group_capacity) / |
6152 | local->group_power; | 6153 | local->group_capacity; |
6153 | } else { | 6154 | } else { |
6154 | tmp = (busiest->load_per_task * SCHED_POWER_SCALE) / | 6155 | tmp = (busiest->load_per_task * SCHED_POWER_SCALE) / |
6155 | local->group_power; | 6156 | local->group_capacity; |
6156 | } | 6157 | } |
6157 | pwr_move += local->group_power * | 6158 | capa_move += local->group_capacity * |
6158 | min(local->load_per_task, local->avg_load + tmp); | 6159 | min(local->load_per_task, local->avg_load + tmp); |
6159 | pwr_move /= SCHED_POWER_SCALE; | 6160 | capa_move /= SCHED_POWER_SCALE; |
6160 | 6161 | ||
6161 | /* Move if we gain throughput */ | 6162 | /* Move if we gain throughput */ |
6162 | if (pwr_move > pwr_now) | 6163 | if (capa_move > capa_now) |
6163 | env->imbalance = busiest->load_per_task; | 6164 | env->imbalance = busiest->load_per_task; |
6164 | } | 6165 | } |
6165 | 6166 | ||
@@ -6207,7 +6208,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s | |||
6207 | (busiest->sum_nr_running - busiest->group_capacity_factor); | 6208 | (busiest->sum_nr_running - busiest->group_capacity_factor); |
6208 | 6209 | ||
6209 | load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE); | 6210 | load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_POWER_SCALE); |
6210 | load_above_capacity /= busiest->group_power; | 6211 | load_above_capacity /= busiest->group_capacity; |
6211 | } | 6212 | } |
6212 | 6213 | ||
6213 | /* | 6214 | /* |
@@ -6222,8 +6223,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s | |||
6222 | 6223 | ||
6223 | /* How much load to actually move to equalise the imbalance */ | 6224 | /* How much load to actually move to equalise the imbalance */ |
6224 | env->imbalance = min( | 6225 | env->imbalance = min( |
6225 | max_pull * busiest->group_power, | 6226 | max_pull * busiest->group_capacity, |
6226 | (sds->avg_load - local->avg_load) * local->group_power | 6227 | (sds->avg_load - local->avg_load) * local->group_capacity |
6227 | ) / SCHED_POWER_SCALE; | 6228 | ) / SCHED_POWER_SCALE; |
6228 | 6229 | ||
6229 | /* | 6230 | /* |
@@ -6278,7 +6279,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env) | |||
6278 | if (!sds.busiest || busiest->sum_nr_running == 0) | 6279 | if (!sds.busiest || busiest->sum_nr_running == 0) |
6279 | goto out_balanced; | 6280 | goto out_balanced; |
6280 | 6281 | ||
6281 | sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_pwr; | 6282 | sds.avg_load = (SCHED_POWER_SCALE * sds.total_load) / sds.total_capacity; |
6282 | 6283 | ||
6283 | /* | 6284 | /* |
6284 | * If the busiest group is imbalanced the below checks don't | 6285 | * If the busiest group is imbalanced the below checks don't |
@@ -6611,7 +6612,7 @@ more_balance: | |||
6611 | * We failed to reach balance because of affinity. | 6612 | * We failed to reach balance because of affinity. |
6612 | */ | 6613 | */ |
6613 | if (sd_parent) { | 6614 | if (sd_parent) { |
6614 | int *group_imbalance = &sd_parent->groups->sgp->imbalance; | 6615 | int *group_imbalance = &sd_parent->groups->sgc->imbalance; |
6615 | 6616 | ||
6616 | if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0) { | 6617 | if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0) { |
6617 | *group_imbalance = 1; | 6618 | *group_imbalance = 1; |
@@ -6998,7 +6999,7 @@ static inline void set_cpu_sd_state_busy(void) | |||
6998 | goto unlock; | 6999 | goto unlock; |
6999 | sd->nohz_idle = 0; | 7000 | sd->nohz_idle = 0; |
7000 | 7001 | ||
7001 | atomic_inc(&sd->groups->sgp->nr_busy_cpus); | 7002 | atomic_inc(&sd->groups->sgc->nr_busy_cpus); |
7002 | unlock: | 7003 | unlock: |
7003 | rcu_read_unlock(); | 7004 | rcu_read_unlock(); |
7004 | } | 7005 | } |
@@ -7015,7 +7016,7 @@ void set_cpu_sd_state_idle(void) | |||
7015 | goto unlock; | 7016 | goto unlock; |
7016 | sd->nohz_idle = 1; | 7017 | sd->nohz_idle = 1; |
7017 | 7018 | ||
7018 | atomic_dec(&sd->groups->sgp->nr_busy_cpus); | 7019 | atomic_dec(&sd->groups->sgc->nr_busy_cpus); |
7019 | unlock: | 7020 | unlock: |
7020 | rcu_read_unlock(); | 7021 | rcu_read_unlock(); |
7021 | } | 7022 | } |
@@ -7219,7 +7220,7 @@ end: | |||
7219 | * of an idle cpu is the system. | 7220 | * of an idle cpu is the system. |
7220 | * - This rq has more than one task. | 7221 | * - This rq has more than one task. |
7221 | * - At any scheduler domain level, this cpu's scheduler group has multiple | 7222 | * - At any scheduler domain level, this cpu's scheduler group has multiple |
7222 | * busy cpu's exceeding the group's power. | 7223 | * busy cpu's exceeding the group's capacity. |
7223 | * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler | 7224 | * - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler |
7224 | * domain span are idle. | 7225 | * domain span are idle. |
7225 | */ | 7226 | */ |
@@ -7227,7 +7228,7 @@ static inline int nohz_kick_needed(struct rq *rq) | |||
7227 | { | 7228 | { |
7228 | unsigned long now = jiffies; | 7229 | unsigned long now = jiffies; |
7229 | struct sched_domain *sd; | 7230 | struct sched_domain *sd; |
7230 | struct sched_group_power *sgp; | 7231 | struct sched_group_capacity *sgc; |
7231 | int nr_busy, cpu = rq->cpu; | 7232 | int nr_busy, cpu = rq->cpu; |
7232 | 7233 | ||
7233 | if (unlikely(rq->idle_balance)) | 7234 | if (unlikely(rq->idle_balance)) |
@@ -7257,8 +7258,8 @@ static inline int nohz_kick_needed(struct rq *rq) | |||
7257 | sd = rcu_dereference(per_cpu(sd_busy, cpu)); | 7258 | sd = rcu_dereference(per_cpu(sd_busy, cpu)); |
7258 | 7259 | ||
7259 | if (sd) { | 7260 | if (sd) { |
7260 | sgp = sd->groups->sgp; | 7261 | sgc = sd->groups->sgc; |
7261 | nr_busy = atomic_read(&sgp->nr_busy_cpus); | 7262 | nr_busy = atomic_read(&sgc->nr_busy_cpus); |
7262 | 7263 | ||
7263 | if (nr_busy > 1) | 7264 | if (nr_busy > 1) |
7264 | goto need_kick_unlock; | 7265 | goto need_kick_unlock; |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 600e2291a75c..a5b957d53c92 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -728,15 +728,15 @@ DECLARE_PER_CPU(struct sched_domain *, sd_numa); | |||
728 | DECLARE_PER_CPU(struct sched_domain *, sd_busy); | 728 | DECLARE_PER_CPU(struct sched_domain *, sd_busy); |
729 | DECLARE_PER_CPU(struct sched_domain *, sd_asym); | 729 | DECLARE_PER_CPU(struct sched_domain *, sd_asym); |
730 | 730 | ||
731 | struct sched_group_power { | 731 | struct sched_group_capacity { |
732 | atomic_t ref; | 732 | atomic_t ref; |
733 | /* | 733 | /* |
734 | * CPU power of this group, SCHED_LOAD_SCALE being max power for a | 734 | * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity |
735 | * single CPU. | 735 | * for a single CPU. |
736 | */ | 736 | */ |
737 | unsigned int power, power_orig; | 737 | unsigned int capacity, capacity_orig; |
738 | unsigned long next_update; | 738 | unsigned long next_update; |
739 | int imbalance; /* XXX unrelated to power but shared group state */ | 739 | int imbalance; /* XXX unrelated to capacity but shared group state */ |
740 | /* | 740 | /* |
741 | * Number of busy cpus in this group. | 741 | * Number of busy cpus in this group. |
742 | */ | 742 | */ |
@@ -750,7 +750,7 @@ struct sched_group { | |||
750 | atomic_t ref; | 750 | atomic_t ref; |
751 | 751 | ||
752 | unsigned int group_weight; | 752 | unsigned int group_weight; |
753 | struct sched_group_power *sgp; | 753 | struct sched_group_capacity *sgc; |
754 | 754 | ||
755 | /* | 755 | /* |
756 | * The CPUs this group covers. | 756 | * The CPUs this group covers. |
@@ -773,7 +773,7 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) | |||
773 | */ | 773 | */ |
774 | static inline struct cpumask *sched_group_mask(struct sched_group *sg) | 774 | static inline struct cpumask *sched_group_mask(struct sched_group *sg) |
775 | { | 775 | { |
776 | return to_cpumask(sg->sgp->cpumask); | 776 | return to_cpumask(sg->sgc->cpumask); |
777 | } | 777 | } |
778 | 778 | ||
779 | /** | 779 | /** |
@@ -1167,7 +1167,7 @@ extern const struct sched_class idle_sched_class; | |||
1167 | 1167 | ||
1168 | #ifdef CONFIG_SMP | 1168 | #ifdef CONFIG_SMP |
1169 | 1169 | ||
1170 | extern void update_group_power(struct sched_domain *sd, int cpu); | 1170 | extern void update_group_capacity(struct sched_domain *sd, int cpu); |
1171 | 1171 | ||
1172 | extern void trigger_load_balance(struct rq *rq); | 1172 | extern void trigger_load_balance(struct rq *rq); |
1173 | 1173 | ||