aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2012-05-31 08:47:33 -0400
committerIngo Molnar <mingo@kernel.org>2012-06-06 10:52:26 -0400
commitc1174876874dcf8986806e4dad3d7d07af20b439 (patch)
tree7e2ea14ba9421bddd63e1810716f1929c753e28b /kernel
parent7f1b43936f0ecad14770634c021cf4a929aec74d (diff)
sched: Fix domain iteration
Weird topologies can lead to asymmetric domain setups. This needs further consideration since these setups are typically non-minimal too. For now, make it work by adding an extra mask selecting which CPUs are allowed to iterate up. The topology that triggered it is the one from David Rientjes: 10 20 20 30 20 10 20 20 20 20 10 20 30 20 20 10 resulting in boxes that wouldn't even boot. Reported-by: David Rientjes <rientjes@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/n/tip-3p86l9cuaqnxz7uxsojmz5rm@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/core.c64
-rw-r--r--kernel/sched/fair.c5
-rw-r--r--kernel/sched/sched.h2
3 files changed, 61 insertions, 10 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6546083af3e0..781acb91a50a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5994,6 +5994,44 @@ struct sched_domain_topology_level {
5994 struct sd_data data; 5994 struct sd_data data;
5995}; 5995};
5996 5996
5997/*
5998 * Build an iteration mask that can exclude certain CPUs from the upwards
5999 * domain traversal.
6000 *
6001 * Asymmetric node setups can result in situations where the domain tree is of
6002 * unequal depth, make sure to skip domains that already cover the entire
6003 * range.
6004 *
6005 * In that case build_sched_domains() will have terminated the iteration early
6006 * and our sibling sd spans will be empty. Domains should always include the
6007 * cpu they're built on, so check that.
6008 *
6009 */
6010static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
6011{
6012 const struct cpumask *span = sched_domain_span(sd);
6013 struct sd_data *sdd = sd->private;
6014 struct sched_domain *sibling;
6015 int i;
6016
6017 for_each_cpu(i, span) {
6018 sibling = *per_cpu_ptr(sdd->sd, i);
6019 if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
6020 continue;
6021
6022 cpumask_set_cpu(i, sched_group_mask(sg));
6023 }
6024}
6025
6026/*
6027 * Return the canonical balance cpu for this group, this is the first cpu
6028 * of this group that's also in the iteration mask.
6029 */
6030int group_balance_cpu(struct sched_group *sg)
6031{
6032 return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg));
6033}
6034
5997static int 6035static int
5998build_overlap_sched_groups(struct sched_domain *sd, int cpu) 6036build_overlap_sched_groups(struct sched_domain *sd, int cpu)
5999{ 6037{
@@ -6012,6 +6050,12 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
6012 if (cpumask_test_cpu(i, covered)) 6050 if (cpumask_test_cpu(i, covered))
6013 continue; 6051 continue;
6014 6052
6053 child = *per_cpu_ptr(sdd->sd, i);
6054
6055 /* See the comment near build_group_mask(). */
6056 if (!cpumask_test_cpu(i, sched_domain_span(child)))
6057 continue;
6058
6015 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), 6059 sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
6016 GFP_KERNEL, cpu_to_node(cpu)); 6060 GFP_KERNEL, cpu_to_node(cpu));
6017 6061
@@ -6019,8 +6063,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
6019 goto fail; 6063 goto fail;
6020 6064
6021 sg_span = sched_group_cpus(sg); 6065 sg_span = sched_group_cpus(sg);
6022
6023 child = *per_cpu_ptr(sdd->sd, i);
6024 if (child->child) { 6066 if (child->child) {
6025 child = child->child; 6067 child = child->child;
6026 cpumask_copy(sg_span, sched_domain_span(child)); 6068 cpumask_copy(sg_span, sched_domain_span(child));
@@ -6030,13 +6072,18 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
6030 cpumask_or(covered, covered, sg_span); 6072 cpumask_or(covered, covered, sg_span);
6031 6073
6032 sg->sgp = *per_cpu_ptr(sdd->sgp, i); 6074 sg->sgp = *per_cpu_ptr(sdd->sgp, i);
6033 atomic_inc(&sg->sgp->ref); 6075 if (atomic_inc_return(&sg->sgp->ref) == 1)
6076 build_group_mask(sd, sg);
6077
6034 6078
6079 /*
6080 * Make sure the first group of this domain contains the
6081 * canonical balance cpu. Otherwise the sched_domain iteration
6082 * breaks. See update_sg_lb_stats().
6083 */
6035 if ((!groups && cpumask_test_cpu(cpu, sg_span)) || 6084 if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
6036 cpumask_first(sg_span) == cpu) { 6085 group_balance_cpu(sg) == cpu)
6037 WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span));
6038 groups = sg; 6086 groups = sg;
6039 }
6040 6087
6041 if (!first) 6088 if (!first)
6042 first = sg; 6089 first = sg;
@@ -6109,6 +6156,7 @@ build_sched_groups(struct sched_domain *sd, int cpu)
6109 6156
6110 cpumask_clear(sched_group_cpus(sg)); 6157 cpumask_clear(sched_group_cpus(sg));
6111 sg->sgp->power = 0; 6158 sg->sgp->power = 0;
6159 cpumask_setall(sched_group_mask(sg));
6112 6160
6113 for_each_cpu(j, span) { 6161 for_each_cpu(j, span) {
6114 if (get_group(j, sdd, NULL) != group) 6162 if (get_group(j, sdd, NULL) != group)
@@ -6150,7 +6198,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
6150 sg = sg->next; 6198 sg = sg->next;
6151 } while (sg != sd->groups); 6199 } while (sg != sd->groups);
6152 6200
6153 if (cpu != group_first_cpu(sg)) 6201 if (cpu != group_balance_cpu(sg))
6154 return; 6202 return;
6155 6203
6156 update_group_power(sd, cpu); 6204 update_group_power(sd, cpu);
@@ -6525,7 +6573,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
6525 6573
6526 *per_cpu_ptr(sdd->sg, j) = sg; 6574 *per_cpu_ptr(sdd->sg, j) = sg;
6527 6575
6528 sgp = kzalloc_node(sizeof(struct sched_group_power), 6576 sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(),
6529 GFP_KERNEL, cpu_to_node(j)); 6577 GFP_KERNEL, cpu_to_node(j));
6530 if (!sgp) 6578 if (!sgp)
6531 return -ENOMEM; 6579 return -ENOMEM;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b2a2d236f27b..54cbaa4e7b37 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3652,7 +3652,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3652 int i; 3652 int i;
3653 3653
3654 if (local_group) 3654 if (local_group)
3655 balance_cpu = group_first_cpu(group); 3655 balance_cpu = group_balance_cpu(group);
3656 3656
3657 /* Tally up the load of all CPUs in the group */ 3657 /* Tally up the load of all CPUs in the group */
3658 max_cpu_load = 0; 3658 max_cpu_load = 0;
@@ -3667,7 +3667,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
3667 3667
3668 /* Bias balancing toward cpus of our domain */ 3668 /* Bias balancing toward cpus of our domain */
3669 if (local_group) { 3669 if (local_group) {
3670 if (idle_cpu(i) && !first_idle_cpu) { 3670 if (idle_cpu(i) && !first_idle_cpu &&
3671 cpumask_test_cpu(i, sched_group_mask(group))) {
3671 first_idle_cpu = 1; 3672 first_idle_cpu = 1;
3672 balance_cpu = i; 3673 balance_cpu = i;
3673 } 3674 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ba9dccfd24ce..6d52cea7f33d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -526,6 +526,8 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
526DECLARE_PER_CPU(struct sched_domain *, sd_llc); 526DECLARE_PER_CPU(struct sched_domain *, sd_llc);
527DECLARE_PER_CPU(int, sd_llc_id); 527DECLARE_PER_CPU(int, sd_llc_id);
528 528
529extern int group_balance_cpu(struct sched_group *sg);
530
529#endif /* CONFIG_SMP */ 531#endif /* CONFIG_SMP */
530 532
531#include "stats.h" 533#include "stats.h"