aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2012-05-23 12:00:43 -0400
committerIngo Molnar <mingo@kernel.org>2012-05-30 08:02:24 -0400
commit74a5ce20e6eeeb3751340b390e7ac1d1d07bbf55 (patch)
treeebbef56666aa11303eafbfb6adbfce56e7a4c605
parent2ea45800d8e1c3c51c45a233d6bd6289a297a386 (diff)
sched: Fix SD_OVERLAP
SD_OVERLAP exists to allow overlapping groups, overlapping groups appear in NUMA topologies that aren't fully connected. The typical result of not fully connected NUMA is that each cpu (or rather node) will have different spans for a particular distance. However due to how sched domains are traversed -- only the first cpu in the mask goes one level up -- the next level only cares about the spans of the cpus that went up. Due to this two things were observed to be broken: - build_overlap_sched_groups() -- since its possible the cpu we're building the groups for exists in multiple (or all) groups, the selection criteria of the first group didn't ensure there was a cpu for which is was true that cpumask_first(span) == cpu. Thus load- balancing would terminate. - update_group_power() -- assumed that the cpu span of the first group of the domain was covered by all groups of the child domain. The above explains why this isn't true, so deal with it. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: David Rientjes <rientjes@google.com> Link: http://lkml.kernel.org/r/1337788843.9783.14.camel@laptop Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/sched/core.c7
-rw-r--r--kernel/sched/fair.c25
2 files changed, 25 insertions, 7 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 55733616baaa..3a69374fb427 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6030,11 +6030,14 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
6030 6030
6031 cpumask_or(covered, covered, sg_span); 6031 cpumask_or(covered, covered, sg_span);
6032 6032
6033 sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span)); 6033 sg->sgp = *per_cpu_ptr(sdd->sgp, i);
6034 atomic_inc(&sg->sgp->ref); 6034 atomic_inc(&sg->sgp->ref);
6035 6035
6036 if (cpumask_test_cpu(cpu, sg_span)) 6036 if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
6037 cpumask_first(sg_span) == cpu) {
6038 WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span));
6037 groups = sg; 6039 groups = sg;
6040 }
6038 6041
6039 if (!first) 6042 if (!first)
6040 first = sg; 6043 first = sg;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 940e6d17cf96..f0380d4987b3 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3574,11 +3574,26 @@ void update_group_power(struct sched_domain *sd, int cpu)
3574 3574
3575 power = 0; 3575 power = 0;
3576 3576
3577 group = child->groups; 3577 if (child->flags & SD_OVERLAP) {
3578 do { 3578 /*
3579 power += group->sgp->power; 3579 * SD_OVERLAP domains cannot assume that child groups
3580 group = group->next; 3580 * span the current group.
3581 } while (group != child->groups); 3581 */
3582
3583 for_each_cpu(cpu, sched_group_cpus(sdg))
3584 power += power_of(cpu);
3585 } else {
3586 /*
3587 * !SD_OVERLAP domains can assume that child groups
3588 * span the current group.
3589 */
3590
3591 group = child->groups;
3592 do {
3593 power += group->sgp->power;
3594 group = group->next;
3595 } while (group != child->groups);
3596 }
3582 3597
3583 sdg->sgp->power = power; 3598 sdg->sgp->power = power;
3584} 3599}