sched: Dynamically allocate sched_domain/sched_group data-structures

Instead of relying on static allocations for the sched_domain and sched_group trees, dynamically allocate and RCU free them. Allocating this dynamically also allows for some build_sched_groups() simplification since we can now (like with other simplifications) rely on the sched_domain tree instead of hard-coded knowledge. One tricky to note is that detach_destroy_domains() needs to hold rcu_read_lock() over the entire tear-down, per-cpu is not sufficient since that can lead to partial sched_group existance (could possibly be solved by doing the tear-down backwards but this is much more robust). A concequence of the above is that we can no longer print the sched_domain debug stuff from cpu_attach_domain() since that might now run with preemption disabled (due to classic RCU etc.) and sched_domain_debug() does some GFP_KERNEL allocations. Another thing to note is that we now fully rely on normal RCU and not RCU-sched, this is because with the new and exiting RCU flavours we grew over the years BH doesn't necessarily hold off RCU-sched grace periods (-rt is known to break this). This would in fact already cause us grief since we do sched_domain/sched_group iterations from softirq context. This patch is somewhat larger than I would like it to be, but I didn't find any means of shrinking/splitting this. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/20110407122942.245307941@chello.nl Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2011-04-07 08:09:50 -0400
committer: Ingo Molnar <mingo@elte.hu> 2011-04-11 06:58:19 -0400
commit: dce840a08702bd13a9a186e07e63d1ef82256b5e (patch)
tree: 168bb98aed7f5761ebe31aa92c34959e9d0f238a /kernel/sched.c
parent: a9c9a9b6bff27ac9c746344a9c1a19bf3327002c (diff)
1 files changed, 189 insertions, 290 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 1cca59ec4a4..65204845063 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -417,6 +417,7 @@ struct rt_rq {
 */
 struct root_domain {
        atomic_t refcount;
+        struct rcu_head rcu;
        cpumask_var_t span;
        cpumask_var_t online;
@@ -571,7 +572,7 @@ static inline int cpu_of(struct rq *rq)
 #define rcu_dereference_check_sched_domain(p) \
        rcu_dereference_check((p), \
-                              rcu_read_lock_sched_held() || \
+                              rcu_read_lock_held() || \
                              lockdep_is_held(&sched_domains_mutex))
 /*
@@ -6572,12 +6573,11 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent)
        return 1;
 }
-static void free_rootdomain(struct root_domain *rd)
+static void free_rootdomain(struct rcu_head *rcu)
 {
-        synchronize_sched();
+        struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
        cpupri_cleanup(&rd->cpupri);
        free_cpumask_var(rd->rto_mask);
        free_cpumask_var(rd->online);
        free_cpumask_var(rd->span);
@@ -6618,7 +6618,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd)
        raw_spin_unlock_irqrestore(&rq->lock, flags);
        if (old_rd)
-                free_rootdomain(old_rd);
+                call_rcu_sched(&old_rd->rcu, free_rootdomain);
 }
 static int init_rootdomain(struct root_domain *rd)
@@ -6669,6 +6669,25 @@ static struct root_domain *alloc_rootdomain(void)
        return rd;
 }
+static void free_sched_domain(struct rcu_head *rcu)
+{
+        struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
+        if (atomic_dec_and_test(&sd->groups->ref))
+                kfree(sd->groups);
+        kfree(sd);
+}
+static void destroy_sched_domain(struct sched_domain *sd, int cpu)
+{
+        call_rcu(&sd->rcu, free_sched_domain);
+}
+static void destroy_sched_domains(struct sched_domain *sd, int cpu)
+{
+        for (; sd; sd = sd->parent)
+                destroy_sched_domain(sd, cpu);
+}
 /*
 * Attach the domain 'sd' to 'cpu' as its base domain. Callers must
 * hold the hotplug lock.
@@ -6689,20 +6708,25 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
                        tmp->parent = parent->parent;
                        if (parent->parent)
                                parent->parent->child = tmp;
+                        destroy_sched_domain(parent, cpu);
                } else
                        tmp = tmp->parent;
        }
        if (sd && sd_degenerate(sd)) {
+                tmp = sd;
                sd = sd->parent;
+                destroy_sched_domain(tmp, cpu);
                if (sd)
                        sd->child = NULL;
        }
-        sched_domain_debug(sd, cpu);
+        /* sched_domain_debug(sd, cpu); */
        rq_attach_root(rq, rd);
+        tmp = rq->sd;
        rcu_assign_pointer(rq->sd, sd);
+        destroy_sched_domains(tmp, cpu);
 }
 /* cpus with isolated domains */
@@ -6718,56 +6742,6 @@ static int __init isolated_cpu_setup(char *str)
 __setup("isolcpus=", isolated_cpu_setup);
-/*
- * init_sched_build_groups takes the cpumask we wish to span, and a pointer
- * to a function which identifies what group(along with sched group) a CPU
- * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
- * (due to the fact that we keep track of groups covered with a struct cpumask).
- *
- * init_sched_build_groups will build a circular linked list of the groups
- * covered by the given span, and will set each group's ->cpumask correctly,
- * and ->cpu_power to 0.
- */
-static void
-init_sched_build_groups(const struct cpumask *span,
-                        const struct cpumask *cpu_map,
-                        int (*group_fn)(int cpu, const struct cpumask *cpu_map,
-                                        struct sched_group **sg,
-                                        struct cpumask *tmpmask),
-                        struct cpumask *covered, struct cpumask *tmpmask)
-{
-        struct sched_group *first = NULL, *last = NULL;
-        int i;
-        cpumask_clear(covered);
-        for_each_cpu(i, span) {
-                struct sched_group *sg;
-                int group = group_fn(i, cpu_map, &sg, tmpmask);
-                int j;
-                if (cpumask_test_cpu(i, covered))
-                        continue;
-                cpumask_clear(sched_group_cpus(sg));
-                sg->cpu_power = 0;
-                for_each_cpu(j, span) {
-                        if (group_fn(j, cpu_map, NULL, tmpmask) != group)
-                                continue;
-                        cpumask_set_cpu(j, covered);
-                        cpumask_set_cpu(j, sched_group_cpus(sg));
-                }
-                if (!first)
-                        first = sg;
-                if (last)
-                        last->next = sg;
-                last = sg;
-        }
-        last->next = first;
-}
 #define SD_NODES_PER_DOMAIN 16
 #ifdef CONFIG_NUMA
@@ -6858,154 +6832,96 @@ struct static_sched_domain {
        DECLARE_BITMAP(span, CONFIG_NR_CPUS);
 };
+struct sd_data {
+        struct sched_domain **__percpu sd;
+        struct sched_group **__percpu sg;
+};
 struct s_data {
 #ifdef CONFIG_NUMA
        int                     sd_allnodes;
 #endif
        cpumask_var_t           nodemask;
        cpumask_var_t           send_covered;
-        cpumask_var_t           tmpmask;
        struct sched_domain ** __percpu sd;
+        struct sd_data          sdd[SD_LV_MAX];
        struct root_domain      *rd;
 };
 enum s_alloc {
        sa_rootdomain,
        sa_sd,
-        sa_tmpmask,
+        sa_sd_storage,
        sa_send_covered,
        sa_nodemask,
        sa_none,
 };
 /*
- * SMT sched-domains:
+ * Assumes the sched_domain tree is fully constructed
 */
-#ifdef CONFIG_SCHED_SMT
+static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
-static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_groups);
-static int
-cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
-                 struct sched_group **sg, struct cpumask *unused)
 {
-        if (sg)
+        struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
-                *sg = &per_cpu(sched_groups, cpu).sg;
+        struct sched_domain *child = sd->child;
-        return cpu;
-}
-#endif /* CONFIG_SCHED_SMT */
-/*
+        if (child)
- * multi-core sched-domains:
+                cpu = cpumask_first(sched_domain_span(child));
- */
-#ifdef CONFIG_SCHED_MC
-static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
-static int
-cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
-                  struct sched_group **sg, struct cpumask *mask)
-{
-        int group;
-#ifdef CONFIG_SCHED_SMT
-        cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
-        group = cpumask_first(mask);
-#else
-        group = cpu;
-#endif
        if (sg)
-                *sg = &per_cpu(sched_group_core, group).sg;
+                *sg = *per_cpu_ptr(sdd->sg, cpu);
-        return group;
+        return cpu;
 }
-#endif /* CONFIG_SCHED_MC */
 /*
- * book sched-domains:
+ * build_sched_groups takes the cpumask we wish to span, and a pointer
+ * to a function which identifies what group(along with sched group) a CPU
+ * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
+ * (due to the fact that we keep track of groups covered with a struct cpumask).
+ *
+ * build_sched_groups will build a circular linked list of the groups
+ * covered by the given span, and will set each group's ->cpumask correctly,
+ * and ->cpu_power to 0.
 */
-#ifdef CONFIG_SCHED_BOOK
+static void
-static DEFINE_PER_CPU(struct static_sched_domain, book_domains);
+build_sched_groups(struct sched_domain *sd, struct cpumask *covered)
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_book);
-static int
-cpu_to_book_group(int cpu, const struct cpumask *cpu_map,
-                  struct sched_group **sg, struct cpumask *mask)
-{
-        int group = cpu;
-#ifdef CONFIG_SCHED_MC
-        cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
-        group = cpumask_first(mask);
-#elif defined(CONFIG_SCHED_SMT)
-        cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
-        group = cpumask_first(mask);
-#endif
-        if (sg)
-                *sg = &per_cpu(sched_group_book, group).sg;
-        return group;
-}
-#endif /* CONFIG_SCHED_BOOK */
-static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
-static int
-cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
-                  struct sched_group **sg, struct cpumask *mask)
 {
-        int group;
+        struct sched_group *first = NULL, *last = NULL;
-#ifdef CONFIG_SCHED_BOOK
+        struct sd_data *sdd = sd->private;
-        cpumask_and(mask, cpu_book_mask(cpu), cpu_map);
+        const struct cpumask *span = sched_domain_span(sd);
-        group = cpumask_first(mask);
+        int i;
-#elif defined(CONFIG_SCHED_MC)
-        cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
-        group = cpumask_first(mask);
-#elif defined(CONFIG_SCHED_SMT)
-        cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
-        group = cpumask_first(mask);
-#else
-        group = cpu;
-#endif
-        if (sg)
-                *sg = &per_cpu(sched_group_phys, group).sg;
-        return group;
-}
-#ifdef CONFIG_NUMA
-static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_node);
-static int cpu_to_node_group(int cpu, const struct cpumask *cpu_map,
+        cpumask_clear(covered);
-                                 struct sched_group **sg,
-                                 struct cpumask *nodemask)
-{
-        int group;
-        cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map);
+        for_each_cpu(i, span) {
-        group = cpumask_first(nodemask);
+                struct sched_group *sg;
+                int group = get_group(i, sdd, &sg);
+                int j;
-        if (sg)
+                if (cpumask_test_cpu(i, covered))
-                *sg = &per_cpu(sched_group_node, group).sg;
+                        continue;
-        return group;
-}
-static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
+                cpumask_clear(sched_group_cpus(sg));
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
+                sg->cpu_power = 0;
-static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
+                for_each_cpu(j, span) {
-                                 struct sched_group **sg,
+                        if (get_group(j, sdd, NULL) != group)
-                                 struct cpumask *nodemask)
+                                continue;
-{
-        int group;
-        cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map);
+                        cpumask_set_cpu(j, covered);
-        group = cpumask_first(nodemask);
+                        cpumask_set_cpu(j, sched_group_cpus(sg));
+                }
-        if (sg)
+                if (!first)
-                *sg = &per_cpu(sched_group_allnodes, group).sg;
+                        first = sg;
-        return group;
+                if (last)
+                        last->next = sg;
+                last = sg;
+        }
+        last->next = first;
 }
-#endif /* CONFIG_NUMA */
 /*
 * Initialize sched groups cpu_power.
 *
@@ -7039,15 +6955,15 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 # define SD_INIT_NAME(sd, type)         do { } while (0)
 #endif
-#define SD_INIT(sd, type)       sd_init_##type(sd)
+#define SD_INIT_FUNC(type)                                                     \
+static noinline struct sched_domain *sd_init_##type(struct s_data *d, int cpu) \
-#define SD_INIT_FUNC(type)      \
+{                                                                              \
-static noinline void sd_init_##type(struct sched_domain *sd)    \
+        struct sched_domain *sd = *per_cpu_ptr(d->sdd[SD_LV_##type].sd, cpu);  \
-{                                                               \
+        *sd = SD_##type##_INIT;                                                \
-        memset(sd, 0, sizeof(*sd));                             \
+        sd->level = SD_LV_##type;                                              \
-        *sd = SD_##type##_INIT;                                 \
+        SD_INIT_NAME(sd, type);                                                \
-        sd->level = SD_LV_##type;                               \
+        sd->private = &d->sdd[SD_LV_##type];                                   \
-        SD_INIT_NAME(sd, type);                                 \
+        return sd;                                                             \
 }
 SD_INIT_FUNC(CPU)
@@ -7103,13 +7019,22 @@ static void set_domain_attribute(struct sched_domain *sd,
 static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
                                 const struct cpumask *cpu_map)
 {
+        int i, j;
        switch (what) {
        case sa_rootdomain:
-                free_rootdomain(d->rd); /* fall through */
+                free_rootdomain(&d->rd->rcu); /* fall through */
        case sa_sd:
                free_percpu(d->sd); /* fall through */
-        case sa_tmpmask:
+        case sa_sd_storage:
-                free_cpumask_var(d->tmpmask); /* fall through */
+                for (i = 0; i < SD_LV_MAX; i++) {
+                        for_each_cpu(j, cpu_map) {
+                                kfree(*per_cpu_ptr(d->sdd[i].sd, j));
+                                kfree(*per_cpu_ptr(d->sdd[i].sg, j));
+                        }
+                        free_percpu(d->sdd[i].sd);
+                        free_percpu(d->sdd[i].sg);
+                } /* fall through */
        case sa_send_covered:
                free_cpumask_var(d->send_covered); /* fall through */
        case sa_nodemask:
@@ -7122,25 +7047,70 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
 static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
                                                   const struct cpumask *cpu_map)
 {
+        int i, j;
+        memset(d, 0, sizeof(*d));
        if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL))
                return sa_none;
        if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
                return sa_nodemask;
-        if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
+        for (i = 0; i < SD_LV_MAX; i++) {
-                return sa_send_covered;
+                d->sdd[i].sd = alloc_percpu(struct sched_domain *);
-        d->sd = alloc_percpu(struct sched_domain *);
+                if (!d->sdd[i].sd)
-        if (!d->sd) {
+                        return sa_sd_storage;
-                printk(KERN_WARNING "Cannot alloc per-cpu pointers\n");
-                return sa_tmpmask;
+                d->sdd[i].sg = alloc_percpu(struct sched_group *);
+                if (!d->sdd[i].sg)
+                        return sa_sd_storage;
+                for_each_cpu(j, cpu_map) {
+                        struct sched_domain *sd;
+                        struct sched_group *sg;
+                        sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
+                                        GFP_KERNEL, cpu_to_node(j));
+                        if (!sd)
+                                return sa_sd_storage;
+                        *per_cpu_ptr(d->sdd[i].sd, j) = sd;
+                        sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
+                                        GFP_KERNEL, cpu_to_node(j));
+                        if (!sg)
+                                return sa_sd_storage;
+                        *per_cpu_ptr(d->sdd[i].sg, j) = sg;
+                }
        }
+        d->sd = alloc_percpu(struct sched_domain *);
+        if (!d->sd)
+                return sa_sd_storage;
        d->rd = alloc_rootdomain();
-        if (!d->rd) {
+        if (!d->rd)
-                printk(KERN_WARNING "Cannot alloc root domain\n");
                return sa_sd;
-        }
        return sa_rootdomain;
 }
+/*
+ * NULL the sd_data elements we've used to build the sched_domain and
+ * sched_group structure so that the subsequent __free_domain_allocs()
+ * will not free the data we're using.
+ */
+static void claim_allocations(int cpu, struct sched_domain *sd)
+{
+        struct sd_data *sdd = sd->private;
+        struct sched_group *sg = sd->groups;
+        WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
+        *per_cpu_ptr(sdd->sd, cpu) = NULL;
+        if (cpu == cpumask_first(sched_group_cpus(sg))) {
+                WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
+                *per_cpu_ptr(sdd->sg, cpu) = NULL;
+        }
+}
 static struct sched_domain *__build_numa_sched_domains(struct s_data *d,
        const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i)
 {
@@ -7151,24 +7121,20 @@ static struct sched_domain *__build_numa_sched_domains(struct s_data *d,
        d->sd_allnodes = 0;
        if (cpumask_weight(cpu_map) >
            SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) {
-                sd = &per_cpu(allnodes_domains, i).sd;
+                sd = sd_init_ALLNODES(d, i);
-                SD_INIT(sd, ALLNODES);
                set_domain_attribute(sd, attr);
                cpumask_copy(sched_domain_span(sd), cpu_map);
-                cpu_to_allnodes_group(i, cpu_map, &sd->groups, d->tmpmask);
                d->sd_allnodes = 1;
        }
        parent = sd;
-        sd = &per_cpu(node_domains, i).sd;
+        sd = sd_init_NODE(d, i);
-        SD_INIT(sd, NODE);
        set_domain_attribute(sd, attr);
        sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd));
        sd->parent = parent;
        if (parent)
                parent->child = sd;
        cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map);
-        cpu_to_node_group(i, cpu_map, &sd->groups, d->tmpmask);
 #endif
        return sd;
 }
@@ -7178,14 +7144,12 @@ static struct sched_domain *__build_cpu_sched_domain(struct s_data *d,
        struct sched_domain *parent, int i)
 {
        struct sched_domain *sd;
-        sd = &per_cpu(phys_domains, i).sd;
+        sd = sd_init_CPU(d, i);
-        SD_INIT(sd, CPU);
        set_domain_attribute(sd, attr);
        cpumask_copy(sched_domain_span(sd), d->nodemask);
        sd->parent = parent;
        if (parent)
                parent->child = sd;
-        cpu_to_phys_group(i, cpu_map, &sd->groups, d->tmpmask);
        return sd;
 }
@@ -7195,13 +7159,11 @@ static struct sched_domain *__build_book_sched_domain(struct s_data *d,
 {
        struct sched_domain *sd = parent;
 #ifdef CONFIG_SCHED_BOOK
-        sd = &per_cpu(book_domains, i).sd;
+        sd = sd_init_BOOK(d, i);
-        SD_INIT(sd, BOOK);
        set_domain_attribute(sd, attr);
        cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i));
        sd->parent = parent;
        parent->child = sd;
-        cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask);
 #endif
        return sd;
 }
@@ -7212,13 +7174,11 @@ static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
 {
        struct sched_domain *sd = parent;
 #ifdef CONFIG_SCHED_MC
-        sd = &per_cpu(core_domains, i).sd;
+        sd = sd_init_MC(d, i);
-        SD_INIT(sd, MC);
        set_domain_attribute(sd, attr);
        cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i));
        sd->parent = parent;
        parent->child = sd;
-        cpu_to_core_group(i, cpu_map, &sd->groups, d->tmpmask);
 #endif
        return sd;
 }
@@ -7229,92 +7189,32 @@ static struct sched_domain *__build_smt_sched_domain(struct s_data *d,
 {
        struct sched_domain *sd = parent;
 #ifdef CONFIG_SCHED_SMT
-        sd = &per_cpu(cpu_domains, i).sd;
+        sd = sd_init_SIBLING(d, i);
-        SD_INIT(sd, SIBLING);
        set_domain_attribute(sd, attr);
        cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i));
        sd->parent = parent;
        parent->child = sd;
-        cpu_to_cpu_group(i, cpu_map, &sd->groups, d->tmpmask);
 #endif
        return sd;
 }
-static void build_sched_groups(struct s_data *d, struct sched_domain *sd,
-                               const struct cpumask *cpu_map, int cpu)
-{
-        switch (sd->level) {
-#ifdef CONFIG_SCHED_SMT
-        case SD_LV_SIBLING: /* set up CPU (sibling) groups */
-                if (cpu == cpumask_first(sched_domain_span(sd)))
-                        init_sched_build_groups(sched_domain_span(sd), cpu_map,
-                                                &cpu_to_cpu_group,
-                                                d->send_covered, d->tmpmask);
-                break;
-#endif
-#ifdef CONFIG_SCHED_MC
-        case SD_LV_MC: /* set up multi-core groups */
-                if (cpu == cpumask_first(sched_domain_span(sd)))
-                        init_sched_build_groups(sched_domain_span(sd), cpu_map,
-                                                &cpu_to_core_group,
-                                                d->send_covered, d->tmpmask);
-                break;
-#endif
-#ifdef CONFIG_SCHED_BOOK
-        case SD_LV_BOOK: /* set up book groups */
-                if (cpu == cpumask_first(sched_domain_span(sd)))
-                        init_sched_build_groups(sched_domain_span(sd), cpu_map,
-                                                &cpu_to_book_group,
-                                                d->send_covered, d->tmpmask);
-                break;
-#endif
-        case SD_LV_CPU: /* set up physical groups */
-                if (cpu == cpumask_first(sched_domain_span(sd)))
-                        init_sched_build_groups(sched_domain_span(sd), cpu_map,
-                                                &cpu_to_phys_group,
-                                                d->send_covered, d->tmpmask);
-                break;
-#ifdef CONFIG_NUMA
-        case SD_LV_NODE:
-                if (cpu == cpumask_first(sched_domain_span(sd)))
-                        init_sched_build_groups(sched_domain_span(sd), cpu_map,
-                                                &cpu_to_node_group,
-                                                d->send_covered, d->tmpmask);
-        case SD_LV_ALLNODES:
-                if (cpu == cpumask_first(cpu_map))
-                        init_sched_build_groups(cpu_map, cpu_map,
-                                        &cpu_to_allnodes_group,
-                                        d->send_covered, d->tmpmask);
-                break;
-#endif
-        default:
-                break;
-        }
-}
 /*
 * Build sched domains for a given set of cpus and attach the sched domains
 * to the individual cpus
 */
-static int __build_sched_domains(const struct cpumask *cpu_map,
+static int build_sched_domains(const struct cpumask *cpu_map,
-                                 struct sched_domain_attr *attr)
+                               struct sched_domain_attr *attr)
 {
        enum s_alloc alloc_state = sa_none;
+        struct sched_domain *sd;
        struct s_data d;
-        struct sched_domain *sd, *tmp;
        int i;
-#ifdef CONFIG_NUMA
-        d.sd_allnodes = 0;
-#endif
        alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
        if (alloc_state != sa_rootdomain)
                goto error;
-        /*
+        /* Set up domains for cpus specified by the cpu_map. */
-         * Set up domains for cpus specified by the cpu_map.
-         */
        for_each_cpu(i, cpu_map) {
                cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)),
                            cpu_map);
@@ -7326,10 +7226,19 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
                *per_cpu_ptr(d.sd, i) = sd;
+        }
+        /* Build the groups for the domains */
+        for_each_cpu(i, cpu_map) {
+                for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
+                        sd->span_weight = cpumask_weight(sched_domain_span(sd));
+                        get_group(i, sd->private, &sd->groups);
+                        atomic_inc(&sd->groups->ref);
-                for (tmp = sd; tmp; tmp = tmp->parent) {
+                        if (i != cpumask_first(sched_domain_span(sd)))
-                        tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
+                                continue;
-                        build_sched_groups(&d, tmp, cpu_map, i);
+                        build_sched_groups(sd, d.send_covered);
                }
        }
@@ -7338,18 +7247,21 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                if (!cpumask_test_cpu(i, cpu_map))
                        continue;
-                sd = *per_cpu_ptr(d.sd, i);
+                for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
-                for (; sd; sd = sd->parent)
+                        claim_allocations(i, sd);
                        init_sched_groups_power(i, sd);
+                }
        }
        /* Attach the domains */
+        rcu_read_lock();
        for_each_cpu(i, cpu_map) {
                sd = *per_cpu_ptr(d.sd, i);
                cpu_attach_domain(sd, d.rd, i);
        }
+        rcu_read_unlock();
-        __free_domain_allocs(&d, sa_tmpmask, cpu_map);
+        __free_domain_allocs(&d, sa_sd, cpu_map);
        return 0;
 error:
@@ -7357,11 +7269,6 @@ error:
        return -ENOMEM;
 }
-static int build_sched_domains(const struct cpumask *cpu_map)
-{
-        return __build_sched_domains(cpu_map, NULL);
-}
 static cpumask_var_t *doms_cur; /* current sched domains */
 static int ndoms_cur;           /* number of sched domains in 'doms_cur' */
 static struct sched_domain_attr *dattr_cur;
@@ -7425,31 +7332,24 @@ static int init_sched_domains(const struct cpumask *cpu_map)
                doms_cur = &fallback_doms;
        cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map);
        dattr_cur = NULL;
-        err = build_sched_domains(doms_cur[0]);
+        err = build_sched_domains(doms_cur[0], NULL);
        register_sched_domain_sysctl();
        return err;
 }
-static void destroy_sched_domains(const struct cpumask *cpu_map,
-                                       struct cpumask *tmpmask)
-{
-}
 /*
 * Detach sched domains from a group of cpus specified in cpu_map
 * These cpus will now be attached to the NULL domain
 */
 static void detach_destroy_domains(const struct cpumask *cpu_map)
 {
-        /* Save because hotplug lock held. */
-        static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS);
        int i;
+        rcu_read_lock();
        for_each_cpu(i, cpu_map)
                cpu_attach_domain(NULL, &def_root_domain, i);
-        synchronize_sched();
+        rcu_read_unlock();
-        destroy_sched_domains(cpu_map, to_cpumask(tmpmask));
 }
 /* handle null as "default" */
@@ -7538,8 +7438,7 @@ match1:
                                goto match2;
                }
                /* no match - add a new doms_new */
-                __build_sched_domains(doms_new[i],
+                build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
-                                        dattr_new ? dattr_new + i : NULL);
 match2:
                ;
        }
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2011-04-07 08:09:50 -0400
committer	Ingo Molnar <mingo@elte.hu>	2011-04-11 06:58:19 -0400
commit	dce840a08702bd13a9a186e07e63d1ef82256b5e (patch)
tree	168bb98aed7f5761ebe31aa92c34959e9d0f238a /kernel/sched.c
parent	a9c9a9b6bff27ac9c746344a9c1a19bf3327002c (diff)