sched: Change NODE sched_domain group creation

The NODE sched_domain is 'special' in that it allocates sched_groups per CPU, instead of sharing the sched_groups between all CPUs. While this might have some benefits on large NUMA and avoid remote memory accesses when iterating the sched_groups, this does break current code that assumes sched_groups are shared between all sched_domains (since the dynamic cpu_power patches). So refactor the NODE groups to behave like all other groups. (The ALLNODES domain again shared its groups across the CPUs for some reason). If someone does measure a performance decrease due to this change we need to revisit this and come up with another way to have both dynamic cpu_power and NUMA work nice together. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/20110407122941.978111700@chello.nl Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2011-04-07 08:09:45 -0400
committer: Ingo Molnar <mingo@elte.hu> 2011-04-11 06:58:17 -0400
commit: cd4ea6ae3982f6861da3b510e69cbc194f331d83 (patch)
tree: 7ca7e19bf9be55102768f7c1ab2bd76643e6524b /kernel/sched.c
parent: a06dadbec5c5df0bf3a35f33616f67d10ca9ba28 (diff)
1 files changed, 32 insertions, 197 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index e3818f1b98fe..72d561fa67b7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6861,29 +6861,18 @@ struct static_sched_domain {
 struct s_data {
 #ifdef CONFIG_NUMA
        int                     sd_allnodes;
-        cpumask_var_t           domainspan;
-        cpumask_var_t           covered;
-        cpumask_var_t           notcovered;
 #endif
        cpumask_var_t           nodemask;
        cpumask_var_t           send_covered;
        cpumask_var_t           tmpmask;
-        struct sched_group      **sched_group_nodes;
        struct root_domain      *rd;
 };
 enum s_alloc {
-        sa_sched_groups = 0,
        sa_rootdomain,
        sa_tmpmask,
        sa_send_covered,
        sa_nodemask,
-        sa_sched_group_nodes,
-#ifdef CONFIG_NUMA
-        sa_notcovered,
-        sa_covered,
-        sa_domainspan,
-#endif
        sa_none,
 };
@@ -6979,18 +6968,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
 }
 #ifdef CONFIG_NUMA
-/*
- * The init_sched_build_groups can't handle what we want to do with node
- * groups, so roll our own. Now each node has its own list of groups which
- * gets dynamically allocated.
- */
 static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
-static struct sched_group ***sched_group_nodes_bycpu;
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_node);
-static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
-static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
-static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
+static int cpu_to_node_group(int cpu, const struct cpumask *cpu_map,
                                 struct sched_group **sg,
                                 struct cpumask *nodemask)
 {
@@ -7000,142 +6981,27 @@ static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
        group = cpumask_first(nodemask);
        if (sg)
-                *sg = &per_cpu(sched_group_allnodes, group).sg;
+                *sg = &per_cpu(sched_group_node, group).sg;
        return group;
 }
-static void init_numa_sched_groups_power(struct sched_group *group_head)
+static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
-{
+static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
-        struct sched_group *sg = group_head;
-        int j;
-        if (!sg)
-                return;
-        do {
-                for_each_cpu(j, sched_group_cpus(sg)) {
-                        struct sched_domain *sd;
-                        sd = &per_cpu(phys_domains, j).sd;
-                        if (j != group_first_cpu(sd->groups)) {
-                                /*
-                                 * Only add "power" once for each
-                                 * physical package.
-                                 */
-                                continue;
-                        }
-                        sg->cpu_power += sd->groups->cpu_power;
-                }
-                sg = sg->next;
-        } while (sg != group_head);
-}
-static int build_numa_sched_groups(struct s_data *d,
+static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
-                                   const struct cpumask *cpu_map, int num)
+                                 struct sched_group **sg,
+                                 struct cpumask *nodemask)
 {
-        struct sched_domain *sd;
+        int group;
-        struct sched_group *sg, *prev;
-        int n, j;
-        cpumask_clear(d->covered);
-        cpumask_and(d->nodemask, cpumask_of_node(num), cpu_map);
-        if (cpumask_empty(d->nodemask)) {
-                d->sched_group_nodes[num] = NULL;
-                goto out;
-        }
-        sched_domain_node_span(num, d->domainspan);
-        cpumask_and(d->domainspan, d->domainspan, cpu_map);
-        sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
-                          GFP_KERNEL, num);
-        if (!sg) {
-                printk(KERN_WARNING "Can not alloc domain group for node %d\n",
-                       num);
-                return -ENOMEM;
-        }
-        d->sched_group_nodes[num] = sg;
-        for_each_cpu(j, d->nodemask) {
-                sd = &per_cpu(node_domains, j).sd;
-                sd->groups = sg;
-        }
-        sg->cpu_power = 0;
+        cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map);
-        cpumask_copy(sched_group_cpus(sg), d->nodemask);
+        group = cpumask_first(nodemask);
-        sg->next = sg;
-        cpumask_or(d->covered, d->covered, d->nodemask);
-        prev = sg;
+        if (sg)
-        for (j = 0; j < nr_node_ids; j++) {
+                *sg = &per_cpu(sched_group_allnodes, group).sg;
-                n = (num + j) % nr_node_ids;
+        return group;
-                cpumask_complement(d->notcovered, d->covered);
-                cpumask_and(d->tmpmask, d->notcovered, cpu_map);
-                cpumask_and(d->tmpmask, d->tmpmask, d->domainspan);
-                if (cpumask_empty(d->tmpmask))
-                        break;
-                cpumask_and(d->tmpmask, d->tmpmask, cpumask_of_node(n));
-                if (cpumask_empty(d->tmpmask))
-                        continue;
-                sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
-                                  GFP_KERNEL, num);
-                if (!sg) {
-                        printk(KERN_WARNING
-                               "Can not alloc domain group for node %d\n", j);
-                        return -ENOMEM;
-                }
-                sg->cpu_power = 0;
-                cpumask_copy(sched_group_cpus(sg), d->tmpmask);
-                sg->next = prev->next;
-                cpumask_or(d->covered, d->covered, d->tmpmask);
-                prev->next = sg;
-                prev = sg;
-        }
-out:
-        return 0;
 }
-#endif /* CONFIG_NUMA */
-#ifdef CONFIG_NUMA
-/* Free memory allocated for various sched_group structures */
-static void free_sched_groups(const struct cpumask *cpu_map,
-                              struct cpumask *nodemask)
-{
-        int cpu, i;
-        for_each_cpu(cpu, cpu_map) {
-                struct sched_group **sched_group_nodes
-                        = sched_group_nodes_bycpu[cpu];
-                if (!sched_group_nodes)
-                        continue;
-                for (i = 0; i < nr_node_ids; i++) {
-                        struct sched_group *oldsg, *sg = sched_group_nodes[i];
-                        cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
-                        if (cpumask_empty(nodemask))
-                                continue;
-                        if (sg == NULL)
-                                continue;
-                        sg = sg->next;
-next_sg:
-                        oldsg = sg;
-                        sg = sg->next;
-                        kfree(oldsg);
-                        if (oldsg != sched_group_nodes[i])
-                                goto next_sg;
-                }
-                kfree(sched_group_nodes);
-                sched_group_nodes_bycpu[cpu] = NULL;
-        }
-}
-#else /* !CONFIG_NUMA */
-static void free_sched_groups(const struct cpumask *cpu_map,
-                              struct cpumask *nodemask)
-{
-}
 #endif /* CONFIG_NUMA */
 /*
@@ -7236,9 +7102,6 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
                                 const struct cpumask *cpu_map)
 {
        switch (what) {
-        case sa_sched_groups:
-                free_sched_groups(cpu_map, d->tmpmask); /* fall through */
-                d->sched_group_nodes = NULL;
        case sa_rootdomain:
                free_rootdomain(d->rd); /* fall through */
        case sa_tmpmask:
@@ -7247,16 +7110,6 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
                free_cpumask_var(d->send_covered); /* fall through */
        case sa_nodemask:
                free_cpumask_var(d->nodemask); /* fall through */
-        case sa_sched_group_nodes:
-#ifdef CONFIG_NUMA
-                kfree(d->sched_group_nodes); /* fall through */
-        case sa_notcovered:
-                free_cpumask_var(d->notcovered); /* fall through */
-        case sa_covered:
-                free_cpumask_var(d->covered); /* fall through */
-        case sa_domainspan:
-                free_cpumask_var(d->domainspan); /* fall through */
-#endif
        case sa_none:
                break;
        }
@@ -7265,24 +7118,8 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
 static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
                                                   const struct cpumask *cpu_map)
 {
-#ifdef CONFIG_NUMA
-        if (!alloc_cpumask_var(&d->domainspan, GFP_KERNEL))
-                return sa_none;
-        if (!alloc_cpumask_var(&d->covered, GFP_KERNEL))
-                return sa_domainspan;
-        if (!alloc_cpumask_var(&d->notcovered, GFP_KERNEL))
-                return sa_covered;
-        /* Allocate the per-node list of sched groups */
-        d->sched_group_nodes = kcalloc(nr_node_ids,
-                                      sizeof(struct sched_group *), GFP_KERNEL);
-        if (!d->sched_group_nodes) {
-                printk(KERN_WARNING "Can not alloc sched group node list\n");
-                return sa_notcovered;
-        }
-        sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes;
-#endif
        if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL))
-                return sa_sched_group_nodes;
+                return sa_none;
        if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
                return sa_nodemask;
        if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
@@ -7322,6 +7159,7 @@ static struct sched_domain *__build_numa_sched_domains(struct s_data *d,
        if (parent)
                parent->child = sd;
        cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map);
+        cpu_to_node_group(i, cpu_map, &sd->groups, d->tmpmask);
 #endif
        return sd;
 }
@@ -7434,6 +7272,13 @@ static void build_sched_groups(struct s_data *d, enum sched_domain_level l,
                                                d->send_covered, d->tmpmask);
                break;
 #ifdef CONFIG_NUMA
+        case SD_LV_NODE:
+                sd = &per_cpu(node_domains, cpu).sd;
+                if (cpu == cpumask_first(sched_domain_span(sd)))
+                        init_sched_build_groups(sched_domain_span(sd), cpu_map,
+                                                &cpu_to_node_group,
+                                                d->send_covered, d->tmpmask);
        case SD_LV_ALLNODES:
                init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group,
                                        d->send_covered, d->tmpmask);
@@ -7462,7 +7307,6 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
        alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
        if (alloc_state != sa_rootdomain)
                goto error;
-        alloc_state = sa_sched_groups;
        /*
         * Set up domains for cpus specified by the cpu_map.
@@ -7486,16 +7330,13 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                build_sched_groups(&d, SD_LV_BOOK, cpu_map, i);
                build_sched_groups(&d, SD_LV_MC, cpu_map, i);
                build_sched_groups(&d, SD_LV_CPU, cpu_map, i);
+                build_sched_groups(&d, SD_LV_NODE, cpu_map, i);
        }
 #ifdef CONFIG_NUMA
        /* Set up node groups */
        if (d.sd_allnodes)
                build_sched_groups(&d, SD_LV_ALLNODES, cpu_map, 0);
-        for (i = 0; i < nr_node_ids; i++)
-                if (build_numa_sched_groups(&d, cpu_map, i))
-                        goto error;
 #endif
        /* Calculate CPU power for physical packages and nodes */
@@ -7524,15 +7365,16 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
        }
 #ifdef CONFIG_NUMA
-        for (i = 0; i < nr_node_ids; i++)
+        for_each_cpu(i, cpu_map) {
-                init_numa_sched_groups_power(d.sched_group_nodes[i]);
+                sd = &per_cpu(node_domains, i).sd;
+                init_sched_groups_power(i, sd);
+        }
        if (d.sd_allnodes) {
-                struct sched_group *sg;
+                for_each_cpu(i, cpu_map) {
+                        sd = &per_cpu(allnodes_domains, i).sd;
-                cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,
+                        init_sched_groups_power(i, sd);
-                                                                d.tmpmask);
+                }
-                init_numa_sched_groups_power(sg);
        }
 #endif
@@ -7550,7 +7392,6 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
                cpu_attach_domain(sd, d.rd, i);
        }
-        d.sched_group_nodes = NULL; /* don't free this we still need it */
        __free_domain_allocs(&d, sa_tmpmask, cpu_map);
        return 0;
@@ -7636,7 +7477,6 @@ static int init_sched_domains(const struct cpumask *cpu_map)
 static void destroy_sched_domains(const struct cpumask *cpu_map,
                                       struct cpumask *tmpmask)
 {
-        free_sched_groups(cpu_map, tmpmask);
 }
 /*
@@ -7913,11 +7753,6 @@ void __init sched_init_smp(void)
        alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
        alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
-#if defined(CONFIG_NUMA)
-        sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
-                                                                GFP_KERNEL);
-        BUG_ON(sched_group_nodes_bycpu == NULL);
-#endif
        get_online_cpus();
        mutex_lock(&sched_domains_mutex);
        init_sched_domains(cpu_active_mask);
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2011-04-07 08:09:45 -0400
committer	Ingo Molnar <mingo@elte.hu>	2011-04-11 06:58:17 -0400
commit	cd4ea6ae3982f6861da3b510e69cbc194f331d83 (patch)
tree	7ca7e19bf9be55102768f7c1ab2bd76643e6524b /kernel/sched.c
parent	a06dadbec5c5df0bf3a35f33616f67d10ca9ba28 (diff)

diff --git a/kernel/sched.c b/kernel/sched.c index e3818f1b98fe..72d561fa67b7 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -6861,29 +6861,18 @@ struct static_sched_domain {
6861	struct s_data {	6861	struct s_data {
6862	#ifdef CONFIG_NUMA	6862	#ifdef CONFIG_NUMA
6863	int sd_allnodes;	6863	int sd_allnodes;
6864	cpumask_var_t domainspan;
6865	cpumask_var_t covered;
6866	cpumask_var_t notcovered;
6867	#endif	6864	#endif
6868	cpumask_var_t nodemask;	6865	cpumask_var_t nodemask;
6869	cpumask_var_t send_covered;	6866	cpumask_var_t send_covered;
6870	cpumask_var_t tmpmask;	6867	cpumask_var_t tmpmask;
6871	struct sched_group **sched_group_nodes;
6872	struct root_domain *rd;	6868	struct root_domain *rd;
6873	};	6869	};
6874		6870
6875	enum s_alloc {	6871	enum s_alloc {
6876	sa_sched_groups = 0,
6877	sa_rootdomain,	6872	sa_rootdomain,
6878	sa_tmpmask,	6873	sa_tmpmask,
6879	sa_send_covered,	6874	sa_send_covered,
6880	sa_nodemask,	6875	sa_nodemask,
6881	sa_sched_group_nodes,
6882	#ifdef CONFIG_NUMA
6883	sa_notcovered,
6884	sa_covered,
6885	sa_domainspan,
6886	#endif
6887	sa_none,	6876	sa_none,
6888	};	6877	};
6889		6878
@@ -6979,18 +6968,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
6979	}	6968	}
6980		6969
6981	#ifdef CONFIG_NUMA	6970	#ifdef CONFIG_NUMA
6982	/*
6983	* The init_sched_build_groups can't handle what we want to do with node
6984	* groups, so roll our own. Now each node has its own list of groups which
6985	* gets dynamically allocated.
6986	*/
6987	static DEFINE_PER_CPU(struct static_sched_domain, node_domains);	6971	static DEFINE_PER_CPU(struct static_sched_domain, node_domains);
6988	static struct sched_group ***sched_group_nodes_bycpu;	6972	static DEFINE_PER_CPU(struct static_sched_group, sched_group_node);
6989
6990	static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
6991	static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
6992		6973
6993	static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,	6974	static int cpu_to_node_group(int cpu, const struct cpumask *cpu_map,
6994	struct sched_group **sg,	6975	struct sched_group **sg,
6995	struct cpumask *nodemask)	6976	struct cpumask *nodemask)
6996	{	6977	{
@@ -7000,142 +6981,27 @@ static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
7000	group = cpumask_first(nodemask);	6981	group = cpumask_first(nodemask);
7001		6982
7002	if (sg)	6983	if (sg)
7003	*sg = &per_cpu(sched_group_allnodes, group).sg;	6984	*sg = &per_cpu(sched_group_node, group).sg;
7004	return group;	6985	return group;
7005	}	6986	}
7006		6987
7007	static void init_numa_sched_groups_power(struct sched_group *group_head)	6988	static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains);
7008	{	6989	static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes);
7009	struct sched_group *sg = group_head;
7010	int j;
7011
7012	if (!sg)
7013	return;
7014	do {
7015	for_each_cpu(j, sched_group_cpus(sg)) {
7016	struct sched_domain *sd;
7017
7018	sd = &per_cpu(phys_domains, j).sd;
7019	if (j != group_first_cpu(sd->groups)) {
7020	/*
7021	* Only add "power" once for each
7022	* physical package.
7023	*/
7024	continue;
7025	}
7026
7027	sg->cpu_power += sd->groups->cpu_power;
7028	}
7029	sg = sg->next;
7030	} while (sg != group_head);
7031	}
7032		6990
7033	static int build_numa_sched_groups(struct s_data *d,	6991	static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map,
7034	const struct cpumask *cpu_map, int num)	6992	struct sched_group **sg,
		6993	struct cpumask *nodemask)
7035	{	6994	{
7036	struct sched_domain *sd;	6995	int group;
7037	struct sched_group sg, prev;
7038	int n, j;
7039
7040	cpumask_clear(d->covered);
7041	cpumask_and(d->nodemask, cpumask_of_node(num), cpu_map);
7042	if (cpumask_empty(d->nodemask)) {
7043	d->sched_group_nodes[num] = NULL;
7044	goto out;
7045	}
7046
7047	sched_domain_node_span(num, d->domainspan);
7048	cpumask_and(d->domainspan, d->domainspan, cpu_map);
7049
7050	sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
7051	GFP_KERNEL, num);
7052	if (!sg) {
7053	printk(KERN_WARNING "Can not alloc domain group for node %d\n",
7054	num);
7055	return -ENOMEM;
7056	}
7057	d->sched_group_nodes[num] = sg;
7058
7059	for_each_cpu(j, d->nodemask) {
7060	sd = &per_cpu(node_domains, j).sd;
7061	sd->groups = sg;
7062	}
7063		6996
7064	sg->cpu_power = 0;	6997	cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map);
7065	cpumask_copy(sched_group_cpus(sg), d->nodemask);	6998	group = cpumask_first(nodemask);
7066	sg->next = sg;
7067	cpumask_or(d->covered, d->covered, d->nodemask);
7068		6999
7069	prev = sg;	7000	if (sg)
7070	for (j = 0; j < nr_node_ids; j++) {	7001	*sg = &per_cpu(sched_group_allnodes, group).sg;
7071	n = (num + j) % nr_node_ids;	7002	return group;
7072	cpumask_complement(d->notcovered, d->covered);
7073	cpumask_and(d->tmpmask, d->notcovered, cpu_map);
7074	cpumask_and(d->tmpmask, d->tmpmask, d->domainspan);
7075	if (cpumask_empty(d->tmpmask))
7076	break;
7077	cpumask_and(d->tmpmask, d->tmpmask, cpumask_of_node(n));
7078	if (cpumask_empty(d->tmpmask))
7079	continue;
7080	sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(),
7081	GFP_KERNEL, num);
7082	if (!sg) {
7083	printk(KERN_WARNING
7084	"Can not alloc domain group for node %d\n", j);
7085	return -ENOMEM;
7086	}
7087	sg->cpu_power = 0;
7088	cpumask_copy(sched_group_cpus(sg), d->tmpmask);
7089	sg->next = prev->next;
7090	cpumask_or(d->covered, d->covered, d->tmpmask);
7091	prev->next = sg;
7092	prev = sg;
7093	}
7094	out:
7095	return 0;
7096	}	7003	}
7097	#endif /* CONFIG_NUMA */
7098
7099	#ifdef CONFIG_NUMA
7100	/* Free memory allocated for various sched_group structures */
7101	static void free_sched_groups(const struct cpumask *cpu_map,
7102	struct cpumask *nodemask)
7103	{
7104	int cpu, i;
7105		7004
7106	for_each_cpu(cpu, cpu_map) {
7107	struct sched_group **sched_group_nodes
7108	= sched_group_nodes_bycpu[cpu];
7109
7110	if (!sched_group_nodes)
7111	continue;
7112
7113	for (i = 0; i < nr_node_ids; i++) {
7114	struct sched_group oldsg, sg = sched_group_nodes[i];
7115
7116	cpumask_and(nodemask, cpumask_of_node(i), cpu_map);
7117	if (cpumask_empty(nodemask))
7118	continue;
7119
7120	if (sg == NULL)
7121	continue;
7122	sg = sg->next;
7123	next_sg:
7124	oldsg = sg;
7125	sg = sg->next;
7126	kfree(oldsg);
7127	if (oldsg != sched_group_nodes[i])
7128	goto next_sg;
7129	}
7130	kfree(sched_group_nodes);
7131	sched_group_nodes_bycpu[cpu] = NULL;
7132	}
7133	}
7134	#else /* !CONFIG_NUMA */
7135	static void free_sched_groups(const struct cpumask *cpu_map,
7136	struct cpumask *nodemask)
7137	{
7138	}
7139	#endif /* CONFIG_NUMA */	7005	#endif /* CONFIG_NUMA */
7140		7006
7141	/*	7007	/*
@@ -7236,9 +7102,6 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
7236	const struct cpumask *cpu_map)	7102	const struct cpumask *cpu_map)
7237	{	7103	{
7238	switch (what) {	7104	switch (what) {
7239	case sa_sched_groups:
7240	free_sched_groups(cpu_map, d->tmpmask); /* fall through */
7241	d->sched_group_nodes = NULL;
7242	case sa_rootdomain:	7105	case sa_rootdomain:
7243	free_rootdomain(d->rd); /* fall through */	7106	free_rootdomain(d->rd); /* fall through */
7244	case sa_tmpmask:	7107	case sa_tmpmask:
@@ -7247,16 +7110,6 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
7247	free_cpumask_var(d->send_covered); /* fall through */	7110	free_cpumask_var(d->send_covered); /* fall through */
7248	case sa_nodemask:	7111	case sa_nodemask:
7249	free_cpumask_var(d->nodemask); /* fall through */	7112	free_cpumask_var(d->nodemask); /* fall through */
7250	case sa_sched_group_nodes:
7251	#ifdef CONFIG_NUMA
7252	kfree(d->sched_group_nodes); /* fall through */
7253	case sa_notcovered:
7254	free_cpumask_var(d->notcovered); /* fall through */
7255	case sa_covered:
7256	free_cpumask_var(d->covered); /* fall through */
7257	case sa_domainspan:
7258	free_cpumask_var(d->domainspan); /* fall through */
7259	#endif
7260	case sa_none:	7113	case sa_none:
7261	break;	7114	break;
7262	}	7115	}
@@ -7265,24 +7118,8 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
7265	static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,	7118	static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
7266	const struct cpumask *cpu_map)	7119	const struct cpumask *cpu_map)
7267	{	7120	{
7268	#ifdef CONFIG_NUMA
7269	if (!alloc_cpumask_var(&d->domainspan, GFP_KERNEL))
7270	return sa_none;
7271	if (!alloc_cpumask_var(&d->covered, GFP_KERNEL))
7272	return sa_domainspan;
7273	if (!alloc_cpumask_var(&d->notcovered, GFP_KERNEL))
7274	return sa_covered;
7275	/* Allocate the per-node list of sched groups */
7276	d->sched_group_nodes = kcalloc(nr_node_ids,
7277	sizeof(struct sched_group *), GFP_KERNEL);
7278	if (!d->sched_group_nodes) {
7279	printk(KERN_WARNING "Can not alloc sched group node list\n");
7280	return sa_notcovered;
7281	}
7282	sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes;
7283	#endif
7284	if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL))	7121	if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL))
7285	return sa_sched_group_nodes;	7122	return sa_none;
7286	if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))	7123	if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
7287	return sa_nodemask;	7124	return sa_nodemask;
7288	if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))	7125	if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
@@ -7322,6 +7159,7 @@ static struct sched_domain __build_numa_sched_domains(struct s_data d,
7322	if (parent)	7159	if (parent)
7323	parent->child = sd;	7160	parent->child = sd;
7324	cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map);	7161	cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map);
		7162	cpu_to_node_group(i, cpu_map, &sd->groups, d->tmpmask);
7325	#endif	7163	#endif
7326	return sd;	7164	return sd;
7327	}	7165	}
@@ -7434,6 +7272,13 @@ static void build_sched_groups(struct s_data *d, enum sched_domain_level l,
7434	d->send_covered, d->tmpmask);	7272	d->send_covered, d->tmpmask);
7435	break;	7273	break;
7436	#ifdef CONFIG_NUMA	7274	#ifdef CONFIG_NUMA
		7275	case SD_LV_NODE:
		7276	sd = &per_cpu(node_domains, cpu).sd;
		7277	if (cpu == cpumask_first(sched_domain_span(sd)))
		7278	init_sched_build_groups(sched_domain_span(sd), cpu_map,
		7279	&cpu_to_node_group,
		7280	d->send_covered, d->tmpmask);
		7281
7437	case SD_LV_ALLNODES:	7282	case SD_LV_ALLNODES:
7438	init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group,	7283	init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group,
7439	d->send_covered, d->tmpmask);	7284	d->send_covered, d->tmpmask);
@@ -7462,7 +7307,6 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7462	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);	7307	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
7463	if (alloc_state != sa_rootdomain)	7308	if (alloc_state != sa_rootdomain)
7464	goto error;	7309	goto error;
7465	alloc_state = sa_sched_groups;
7466		7310
7467	/*	7311	/*
7468	* Set up domains for cpus specified by the cpu_map.	7312	* Set up domains for cpus specified by the cpu_map.
@@ -7486,16 +7330,13 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7486	build_sched_groups(&d, SD_LV_BOOK, cpu_map, i);	7330	build_sched_groups(&d, SD_LV_BOOK, cpu_map, i);
7487	build_sched_groups(&d, SD_LV_MC, cpu_map, i);	7331	build_sched_groups(&d, SD_LV_MC, cpu_map, i);
7488	build_sched_groups(&d, SD_LV_CPU, cpu_map, i);	7332	build_sched_groups(&d, SD_LV_CPU, cpu_map, i);
		7333	build_sched_groups(&d, SD_LV_NODE, cpu_map, i);
7489	}	7334	}
7490		7335
7491	#ifdef CONFIG_NUMA	7336	#ifdef CONFIG_NUMA
7492	/* Set up node groups */	7337	/* Set up node groups */
7493	if (d.sd_allnodes)	7338	if (d.sd_allnodes)
7494	build_sched_groups(&d, SD_LV_ALLNODES, cpu_map, 0);	7339	build_sched_groups(&d, SD_LV_ALLNODES, cpu_map, 0);
7495
7496	for (i = 0; i < nr_node_ids; i++)
7497	if (build_numa_sched_groups(&d, cpu_map, i))
7498	goto error;
7499	#endif	7340	#endif
7500		7341
7501	/* Calculate CPU power for physical packages and nodes */	7342	/* Calculate CPU power for physical packages and nodes */
@@ -7524,15 +7365,16 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7524	}	7365	}
7525		7366
7526	#ifdef CONFIG_NUMA	7367	#ifdef CONFIG_NUMA
7527	for (i = 0; i < nr_node_ids; i++)	7368	for_each_cpu(i, cpu_map) {
7528	init_numa_sched_groups_power(d.sched_group_nodes[i]);	7369	sd = &per_cpu(node_domains, i).sd;
		7370	init_sched_groups_power(i, sd);
		7371	}
7529		7372
7530	if (d.sd_allnodes) {	7373	if (d.sd_allnodes) {
7531	struct sched_group *sg;	7374	for_each_cpu(i, cpu_map) {
7532		7375	sd = &per_cpu(allnodes_domains, i).sd;
7533	cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg,	7376	init_sched_groups_power(i, sd);
7534	d.tmpmask);	7377	}
7535	init_numa_sched_groups_power(sg);
7536	}	7378	}
7537	#endif	7379	#endif
7538		7380
@@ -7550,7 +7392,6 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7550	cpu_attach_domain(sd, d.rd, i);	7392	cpu_attach_domain(sd, d.rd, i);
7551	}	7393	}
7552		7394
7553	d.sched_group_nodes = NULL; /* don't free this we still need it */
7554	__free_domain_allocs(&d, sa_tmpmask, cpu_map);	7395	__free_domain_allocs(&d, sa_tmpmask, cpu_map);
7555	return 0;	7396	return 0;
7556		7397
@@ -7636,7 +7477,6 @@ static int init_sched_domains(const struct cpumask *cpu_map)
7636	static void destroy_sched_domains(const struct cpumask *cpu_map,	7477	static void destroy_sched_domains(const struct cpumask *cpu_map,
7637	struct cpumask *tmpmask)	7478	struct cpumask *tmpmask)
7638	{	7479	{
7639	free_sched_groups(cpu_map, tmpmask);
7640	}	7480	}
7641		7481
7642	/*	7482	/*
@@ -7913,11 +7753,6 @@ void __init sched_init_smp(void)
7913	alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);	7753	alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL);
7914	alloc_cpumask_var(&fallback_doms, GFP_KERNEL);	7754	alloc_cpumask_var(&fallback_doms, GFP_KERNEL);
7915		7755
7916	#if defined(CONFIG_NUMA)
7917	sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **),
7918	GFP_KERNEL);
7919	BUG_ON(sched_group_nodes_bycpu == NULL);
7920	#endif
7921	get_online_cpus();	7756	get_online_cpus();
7922	mutex_lock(&sched_domains_mutex);	7757	mutex_lock(&sched_domains_mutex);
7923	init_sched_domains(cpu_active_mask);	7758	init_sched_domains(cpu_active_mask);