sched: Allow for overlapping sched_domain spans

Allow for sched_domain spans that overlap by giving such domains their own sched_group list instead of sharing the sched_groups amongst each-other. This is needed for machines with more than 16 nodes, because sched_domain_node_span() will generate a node mask from the 16 nearest nodes without regard if these masks have any overlap. Currently sched_domains have a sched_group that maps to their child sched_domain span, and since there is no overlap we share the sched_group between the sched_domains of the various CPUs. If however there is overlap, we would need to link the sched_group list in different ways for each cpu, and hence sharing isn't possible. In order to solve this, allocate private sched_groups for each CPU's sched_domain but have the sched_groups share a sched_group_power structure such that we can uniquely track the power. Reported-and-tested-by: Anton Blanchard <anton@samba.org> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/n/tip-08bxqw9wis3qti9u5inifh3y@git.kernel.org Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2011-07-15 04:35:52 -0400
committer: Ingo Molnar <mingo@elte.hu> 2011-07-20 12:32:41 -0400
commit: e3589f6c81e4764d32a25d2a2a0afe54fa344f5c (patch)
tree: 414bf6bdbad3f04f629fa2a72254ea85acf723f4
parent: 9c3f75cbd144014bea6af866a154cc2e73ab2287 (diff)
3 files changed, 132 insertions, 29 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2e5b3c8e2d3e..bde99d5358dc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -844,6 +844,7 @@ enum cpu_idle_type {
 #define SD_SERIALIZE            0x0400  /* Only a single load balancing instance */
 #define SD_ASYM_PACKING         0x0800  /* Place busy groups earlier in the domain */
 #define SD_PREFER_SIBLING       0x1000  /* Prefer to place tasks in a sibling domain */
+#define SD_OVERLAP              0x2000  /* sched_domains of this level overlap */
 enum powersavings_balance_level {
        POWERSAVINGS_BALANCE_NONE = 0,  /* No power saving load balance */
@@ -894,6 +895,7 @@ static inline int sd_power_saving_flags(void)
 }
 struct sched_group_power {
+        atomic_t ref;
        /*
         * CPU power of this group, SCHED_LOAD_SCALE being max power for a
         * single CPU.
diff --git a/kernel/sched.c b/kernel/sched.c
index 36c10d25d4cd..921adf6f6fad 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6774,10 +6774,36 @@ static struct root_domain *alloc_rootdomain(void)
        return rd;
 }
+static void free_sched_groups(struct sched_group *sg, int free_sgp)
+{
+        struct sched_group *tmp, *first;
+        if (!sg)
+                return;
+        first = sg;
+        do {
+                tmp = sg->next;
+                if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
+                        kfree(sg->sgp);
+                kfree(sg);
+                sg = tmp;
+        } while (sg != first);
+}
 static void free_sched_domain(struct rcu_head *rcu)
 {
        struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
-        if (atomic_dec_and_test(&sd->groups->ref)) {
+        /*
+         * If its an overlapping domain it has private groups, iterate and
+         * nuke them all.
+         */
+        if (sd->flags & SD_OVERLAP) {
+                free_sched_groups(sd->groups, 1);
+        } else if (atomic_dec_and_test(&sd->groups->ref)) {
                kfree(sd->groups->sgp);
                kfree(sd->groups);
        }
@@ -6967,15 +6993,73 @@ struct sched_domain_topology_level;
 typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
 typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
+#define SDTL_OVERLAP    0x01
 struct sched_domain_topology_level {
        sched_domain_init_f init;
        sched_domain_mask_f mask;
+        int                 flags;
        struct sd_data      data;
 };
-/*
+static int
- * Assumes the sched_domain tree is fully constructed
+build_overlap_sched_groups(struct sched_domain *sd, int cpu)
- */
+{
+        struct sched_group *first = NULL, *last = NULL, *groups = NULL, *sg;
+        const struct cpumask *span = sched_domain_span(sd);
+        struct cpumask *covered = sched_domains_tmpmask;
+        struct sd_data *sdd = sd->private;
+        struct sched_domain *child;
+        int i;
+        cpumask_clear(covered);
+        for_each_cpu(i, span) {
+                struct cpumask *sg_span;
+                if (cpumask_test_cpu(i, covered))
+                        continue;
+                sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
+                                GFP_KERNEL, cpu_to_node(i));
+                if (!sg)
+                        goto fail;
+                sg_span = sched_group_cpus(sg);
+                child = *per_cpu_ptr(sdd->sd, i);
+                if (child->child) {
+                        child = child->child;
+                        cpumask_copy(sg_span, sched_domain_span(child));
+                } else
+                        cpumask_set_cpu(i, sg_span);
+                cpumask_or(covered, covered, sg_span);
+                sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
+                atomic_inc(&sg->sgp->ref);
+                if (cpumask_test_cpu(cpu, sg_span))
+                        groups = sg;
+                if (!first)
+                        first = sg;
+                if (last)
+                        last->next = sg;
+                last = sg;
+                last->next = first;
+        }
+        sd->groups = groups;
+        return 0;
+fail:
+        free_sched_groups(first, 0);
+        return -ENOMEM;
+}
 static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
 {
        struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
@@ -6987,23 +7071,21 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg)
        if (sg) {
                *sg = *per_cpu_ptr(sdd->sg, cpu);
                (*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu);
+                atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */
        }
        return cpu;
 }
 /*
- * build_sched_groups takes the cpumask we wish to span, and a pointer
- * to a function which identifies what group(along with sched group) a CPU
- * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
- * (due to the fact that we keep track of groups covered with a struct cpumask).
- *
 * build_sched_groups will build a circular linked list of the groups
 * covered by the given span, and will set each group's ->cpumask correctly,
 * and ->cpu_power to 0.
+ *
+ * Assumes the sched_domain tree is fully constructed
 */
-static void
+static int
-build_sched_groups(struct sched_domain *sd)
+build_sched_groups(struct sched_domain *sd, int cpu)
 {
        struct sched_group *first = NULL, *last = NULL;
        struct sd_data *sdd = sd->private;
@@ -7011,6 +7093,12 @@ build_sched_groups(struct sched_domain *sd)
        struct cpumask *covered;
        int i;
+        get_group(cpu, sdd, &sd->groups);
+        atomic_inc(&sd->groups->ref);
+        if (cpu != cpumask_first(sched_domain_span(sd)))
+                return 0;
        lockdep_assert_held(&sched_domains_mutex);
        covered = sched_domains_tmpmask;
@@ -7042,6 +7130,8 @@ build_sched_groups(struct sched_domain *sd)
                last = sg;
        }
        last->next = first;
+        return 0;
 }
 /*
@@ -7056,12 +7146,17 @@ build_sched_groups(struct sched_domain *sd)
 */
 static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 {
-        WARN_ON(!sd || !sd->groups);
+        struct sched_group *sg = sd->groups;
-        if (cpu != group_first_cpu(sd->groups))
+        WARN_ON(!sd || !sg);
-                return;
+        do {
+                sg->group_weight = cpumask_weight(sched_group_cpus(sg));
+                sg = sg->next;
+        } while (sg != sd->groups);
-        sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
+        if (cpu != group_first_cpu(sg))
+                return;
        update_group_power(sd, cpu);
 }
@@ -7182,16 +7277,15 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
 static void claim_allocations(int cpu, struct sched_domain *sd)
 {
        struct sd_data *sdd = sd->private;
-        struct sched_group *sg = sd->groups;
        WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
        *per_cpu_ptr(sdd->sd, cpu) = NULL;
-        if (cpu == cpumask_first(sched_group_cpus(sg))) {
+        if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
-                WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
                *per_cpu_ptr(sdd->sg, cpu) = NULL;
+        if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
                *per_cpu_ptr(sdd->sgp, cpu) = NULL;
-        }
 }
 #ifdef CONFIG_SCHED_SMT
@@ -7216,7 +7310,7 @@ static struct sched_domain_topology_level default_topology[] = {
 #endif
        { sd_init_CPU, cpu_cpu_mask, },
 #ifdef CONFIG_NUMA
-        { sd_init_NODE, cpu_node_mask, },
+        { sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, },
        { sd_init_ALLNODES, cpu_allnodes_mask, },
 #endif
        { NULL, },
@@ -7284,7 +7378,9 @@ static void __sdt_free(const struct cpumask *cpu_map)
                struct sd_data *sdd = &tl->data;
                for_each_cpu(j, cpu_map) {
-                        kfree(*per_cpu_ptr(sdd->sd, j));
+                        struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
+                        if (sd && (sd->flags & SD_OVERLAP))
+                                free_sched_groups(sd->groups, 0);
                        kfree(*per_cpu_ptr(sdd->sg, j));
                        kfree(*per_cpu_ptr(sdd->sgp, j));
                }
@@ -7336,8 +7432,11 @@ static int build_sched_domains(const struct cpumask *cpu_map,
                struct sched_domain_topology_level *tl;
                sd = NULL;
-                for (tl = sched_domain_topology; tl->init; tl++)
+                for (tl = sched_domain_topology; tl->init; tl++) {
                        sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
+                        if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP))
+                                sd->flags |= SD_OVERLAP;
+                }
                while (sd->child)
                        sd = sd->child;
@@ -7349,13 +7448,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
        for_each_cpu(i, cpu_map) {
                for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
                        sd->span_weight = cpumask_weight(sched_domain_span(sd));
-                        get_group(i, sd->private, &sd->groups);
+                        if (sd->flags & SD_OVERLAP) {
-                        atomic_inc(&sd->groups->ref);
+                                if (build_overlap_sched_groups(sd, i))
+                                        goto error;
-                        if (i != cpumask_first(sched_domain_span(sd)))
+                        } else {
-                                continue;
+                                if (build_sched_groups(sd, i))
+                                        goto error;
-                        build_sched_groups(sd);
+                        }
                }
        }
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index be40f7371ee1..1e7066d76c26 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -70,3 +70,5 @@ SCHED_FEAT(NONIRQ_POWER, 1)
 * using the scheduler IPI. Reduces rq->lock contention/bounces.
 */
 SCHED_FEAT(TTWU_QUEUE, 1)
+SCHED_FEAT(FORCE_SD_OVERLAP, 0)
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2011-07-15 04:35:52 -0400
committer	Ingo Molnar <mingo@elte.hu>	2011-07-20 12:32:41 -0400
commit	e3589f6c81e4764d32a25d2a2a0afe54fa344f5c (patch)
tree	414bf6bdbad3f04f629fa2a72254ea85acf723f4
parent	9c3f75cbd144014bea6af866a154cc2e73ab2287 (diff)

diff --git a/include/linux/sched.h b/include/linux/sched.h index 2e5b3c8e2d3e..bde99d5358dc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h
@@ -844,6 +844,7 @@ enum cpu_idle_type {
844	#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */	844	#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
845	#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */	845	#define SD_ASYM_PACKING 0x0800 /* Place busy groups earlier in the domain */
846	#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */	846	#define SD_PREFER_SIBLING 0x1000 /* Prefer to place tasks in a sibling domain */
		847	#define SD_OVERLAP 0x2000 /* sched_domains of this level overlap */
847		848
848	enum powersavings_balance_level {	849	enum powersavings_balance_level {
849	POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */	850	POWERSAVINGS_BALANCE_NONE = 0, /* No power saving load balance */
@@ -894,6 +895,7 @@ static inline int sd_power_saving_flags(void)
894	}	895	}
895		896
896	struct sched_group_power {	897	struct sched_group_power {
		898	atomic_t ref;
897	/*	899	/*
898	* CPU power of this group, SCHED_LOAD_SCALE being max power for a	900	* CPU power of this group, SCHED_LOAD_SCALE being max power for a
899	* single CPU.	901	* single CPU.


diff --git a/kernel/sched.c b/kernel/sched.c index 36c10d25d4cd..921adf6f6fad 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -6774,10 +6774,36 @@ static struct root_domain *alloc_rootdomain(void)
6774	return rd;	6774	return rd;
6775	}	6775	}
6776		6776
		6777	static void free_sched_groups(struct sched_group *sg, int free_sgp)
		6778	{
		6779	struct sched_group tmp, first;
		6780
		6781	if (!sg)
		6782	return;
		6783
		6784	first = sg;
		6785	do {
		6786	tmp = sg->next;
		6787
		6788	if (free_sgp && atomic_dec_and_test(&sg->sgp->ref))
		6789	kfree(sg->sgp);
		6790
		6791	kfree(sg);
		6792	sg = tmp;
		6793	} while (sg != first);
		6794	}
		6795
6777	static void free_sched_domain(struct rcu_head *rcu)	6796	static void free_sched_domain(struct rcu_head *rcu)
6778	{	6797	{
6779	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);	6798	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
6780	if (atomic_dec_and_test(&sd->groups->ref)) {	6799
		6800	/*
		6801	* If its an overlapping domain it has private groups, iterate and
		6802	* nuke them all.
		6803	*/
		6804	if (sd->flags & SD_OVERLAP) {
		6805	free_sched_groups(sd->groups, 1);
		6806	} else if (atomic_dec_and_test(&sd->groups->ref)) {
6781	kfree(sd->groups->sgp);	6807	kfree(sd->groups->sgp);
6782	kfree(sd->groups);	6808	kfree(sd->groups);
6783	}	6809	}
@@ -6967,15 +6993,73 @@ struct sched_domain_topology_level;
6967	typedef struct sched_domain (sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);	6993	typedef struct sched_domain (sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
6968	typedef const struct cpumask (sched_domain_mask_f)(int cpu);	6994	typedef const struct cpumask (sched_domain_mask_f)(int cpu);
6969		6995
		6996	#define SDTL_OVERLAP 0x01
		6997
6970	struct sched_domain_topology_level {	6998	struct sched_domain_topology_level {
6971	sched_domain_init_f init;	6999	sched_domain_init_f init;
6972	sched_domain_mask_f mask;	7000	sched_domain_mask_f mask;
		7001	int flags;
6973	struct sd_data data;	7002	struct sd_data data;
6974	};	7003	};
6975		7004
6976	/*	7005	static int
6977	* Assumes the sched_domain tree is fully constructed	7006	build_overlap_sched_groups(struct sched_domain *sd, int cpu)
6978	*/	7007	{
		7008	struct sched_group first = NULL, last = NULL, groups = NULL, sg;
		7009	const struct cpumask *span = sched_domain_span(sd);
		7010	struct cpumask *covered = sched_domains_tmpmask;
		7011	struct sd_data *sdd = sd->private;
		7012	struct sched_domain *child;
		7013	int i;
		7014
		7015	cpumask_clear(covered);
		7016
		7017	for_each_cpu(i, span) {
		7018	struct cpumask *sg_span;
		7019
		7020	if (cpumask_test_cpu(i, covered))
		7021	continue;
		7022
		7023	sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
		7024	GFP_KERNEL, cpu_to_node(i));
		7025
		7026	if (!sg)
		7027	goto fail;
		7028
		7029	sg_span = sched_group_cpus(sg);
		7030
		7031	child = *per_cpu_ptr(sdd->sd, i);
		7032	if (child->child) {
		7033	child = child->child;
		7034	cpumask_copy(sg_span, sched_domain_span(child));
		7035	} else
		7036	cpumask_set_cpu(i, sg_span);
		7037
		7038	cpumask_or(covered, covered, sg_span);
		7039
		7040	sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span));
		7041	atomic_inc(&sg->sgp->ref);
		7042
		7043	if (cpumask_test_cpu(cpu, sg_span))
		7044	groups = sg;
		7045
		7046	if (!first)
		7047	first = sg;
		7048	if (last)
		7049	last->next = sg;
		7050	last = sg;
		7051	last->next = first;
		7052	}
		7053	sd->groups = groups;
		7054
		7055	return 0;
		7056
		7057	fail:
		7058	free_sched_groups(first, 0);
		7059
		7060	return -ENOMEM;
		7061	}
		7062
6979	static int get_group(int cpu, struct sd_data sdd, struct sched_group *sg)	7063	static int get_group(int cpu, struct sd_data sdd, struct sched_group *sg)
6980	{	7064	{
6981	struct sched_domain sd = per_cpu_ptr(sdd->sd, cpu);	7065	struct sched_domain sd = per_cpu_ptr(sdd->sd, cpu);
@@ -6987,23 +7071,21 @@ static int get_group(int cpu, struct sd_data sdd, struct sched_group *sg)
6987	if (sg) {	7071	if (sg) {
6988	sg = per_cpu_ptr(sdd->sg, cpu);	7072	sg = per_cpu_ptr(sdd->sg, cpu);
6989	(sg)->sgp = per_cpu_ptr(sdd->sgp, cpu);	7073	(sg)->sgp = per_cpu_ptr(sdd->sgp, cpu);
		7074	atomic_set(&(sg)->sgp->ref, 1); / for claim_allocations */
6990	}	7075	}
6991		7076
6992	return cpu;	7077	return cpu;
6993	}	7078	}
6994		7079
6995	/*	7080	/*
6996	* build_sched_groups takes the cpumask we wish to span, and a pointer
6997	* to a function which identifies what group(along with sched group) a CPU
6998	* belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids
6999	* (due to the fact that we keep track of groups covered with a struct cpumask).
7000	*
7001	* build_sched_groups will build a circular linked list of the groups	7081	* build_sched_groups will build a circular linked list of the groups
7002	* covered by the given span, and will set each group's ->cpumask correctly,	7082	* covered by the given span, and will set each group's ->cpumask correctly,
7003	* and ->cpu_power to 0.	7083	* and ->cpu_power to 0.
		7084	*
		7085	* Assumes the sched_domain tree is fully constructed
7004	*/	7086	*/
7005	static void	7087	static int
7006	build_sched_groups(struct sched_domain *sd)	7088	build_sched_groups(struct sched_domain *sd, int cpu)
7007	{	7089	{
7008	struct sched_group first = NULL, last = NULL;	7090	struct sched_group first = NULL, last = NULL;
7009	struct sd_data *sdd = sd->private;	7091	struct sd_data *sdd = sd->private;
@@ -7011,6 +7093,12 @@ build_sched_groups(struct sched_domain *sd)
7011	struct cpumask *covered;	7093	struct cpumask *covered;
7012	int i;	7094	int i;
7013		7095
		7096	get_group(cpu, sdd, &sd->groups);
		7097	atomic_inc(&sd->groups->ref);
		7098
		7099	if (cpu != cpumask_first(sched_domain_span(sd)))
		7100	return 0;
		7101
7014	lockdep_assert_held(&sched_domains_mutex);	7102	lockdep_assert_held(&sched_domains_mutex);
7015	covered = sched_domains_tmpmask;	7103	covered = sched_domains_tmpmask;
7016		7104
@@ -7042,6 +7130,8 @@ build_sched_groups(struct sched_domain *sd)
7042	last = sg;	7130	last = sg;
7043	}	7131	}
7044	last->next = first;	7132	last->next = first;
		7133
		7134	return 0;
7045	}	7135	}
7046		7136
7047	/*	7137	/*
@@ -7056,12 +7146,17 @@ build_sched_groups(struct sched_domain *sd)
7056	*/	7146	*/
7057	static void init_sched_groups_power(int cpu, struct sched_domain *sd)	7147	static void init_sched_groups_power(int cpu, struct sched_domain *sd)
7058	{	7148	{
7059	WARN_ON(!sd \|\| !sd->groups);	7149	struct sched_group *sg = sd->groups;
7060		7150
7061	if (cpu != group_first_cpu(sd->groups))	7151	WARN_ON(!sd \|\| !sg);
7062	return;	7152
		7153	do {
		7154	sg->group_weight = cpumask_weight(sched_group_cpus(sg));
		7155	sg = sg->next;
		7156	} while (sg != sd->groups);
7063		7157
7064	sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));	7158	if (cpu != group_first_cpu(sg))
		7159	return;
7065		7160
7066	update_group_power(sd, cpu);	7161	update_group_power(sd, cpu);
7067	}	7162	}
@@ -7182,16 +7277,15 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
7182	static void claim_allocations(int cpu, struct sched_domain *sd)	7277	static void claim_allocations(int cpu, struct sched_domain *sd)
7183	{	7278	{
7184	struct sd_data *sdd = sd->private;	7279	struct sd_data *sdd = sd->private;
7185	struct sched_group *sg = sd->groups;
7186		7280
7187	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);	7281	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
7188	*per_cpu_ptr(sdd->sd, cpu) = NULL;	7282	*per_cpu_ptr(sdd->sd, cpu) = NULL;
7189		7283
7190	if (cpu == cpumask_first(sched_group_cpus(sg))) {	7284	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
7191	WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg);
7192	*per_cpu_ptr(sdd->sg, cpu) = NULL;	7285	*per_cpu_ptr(sdd->sg, cpu) = NULL;
		7286
		7287	if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref))
7193	*per_cpu_ptr(sdd->sgp, cpu) = NULL;	7288	*per_cpu_ptr(sdd->sgp, cpu) = NULL;
7194	}
7195	}	7289	}
7196		7290
7197	#ifdef CONFIG_SCHED_SMT	7291	#ifdef CONFIG_SCHED_SMT
@@ -7216,7 +7310,7 @@ static struct sched_domain_topology_level default_topology[] = {
7216	#endif	7310	#endif
7217	{ sd_init_CPU, cpu_cpu_mask, },	7311	{ sd_init_CPU, cpu_cpu_mask, },
7218	#ifdef CONFIG_NUMA	7312	#ifdef CONFIG_NUMA
7219	{ sd_init_NODE, cpu_node_mask, },	7313	{ sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, },
7220	{ sd_init_ALLNODES, cpu_allnodes_mask, },	7314	{ sd_init_ALLNODES, cpu_allnodes_mask, },
7221	#endif	7315	#endif
7222	{ NULL, },	7316	{ NULL, },
@@ -7284,7 +7378,9 @@ static void __sdt_free(const struct cpumask *cpu_map)
7284	struct sd_data *sdd = &tl->data;	7378	struct sd_data *sdd = &tl->data;
7285		7379
7286	for_each_cpu(j, cpu_map) {	7380	for_each_cpu(j, cpu_map) {
7287	kfree(*per_cpu_ptr(sdd->sd, j));	7381	struct sched_domain sd = per_cpu_ptr(sdd->sd, j);
		7382	if (sd && (sd->flags & SD_OVERLAP))
		7383	free_sched_groups(sd->groups, 0);
7288	kfree(*per_cpu_ptr(sdd->sg, j));	7384	kfree(*per_cpu_ptr(sdd->sg, j));
7289	kfree(*per_cpu_ptr(sdd->sgp, j));	7385	kfree(*per_cpu_ptr(sdd->sgp, j));
7290	}	7386	}
@@ -7336,8 +7432,11 @@ static int build_sched_domains(const struct cpumask *cpu_map,
7336	struct sched_domain_topology_level *tl;	7432	struct sched_domain_topology_level *tl;
7337		7433
7338	sd = NULL;	7434	sd = NULL;
7339	for (tl = sched_domain_topology; tl->init; tl++)	7435	for (tl = sched_domain_topology; tl->init; tl++) {
7340	sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);	7436	sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i);
		7437	if (tl->flags & SDTL_OVERLAP \|\| sched_feat(FORCE_SD_OVERLAP))
		7438	sd->flags \|= SD_OVERLAP;
		7439	}
7341		7440
7342	while (sd->child)	7441	while (sd->child)
7343	sd = sd->child;	7442	sd = sd->child;
@@ -7349,13 +7448,13 @@ static int build_sched_domains(const struct cpumask *cpu_map,
7349	for_each_cpu(i, cpu_map) {	7448	for_each_cpu(i, cpu_map) {
7350	for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {	7449	for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
7351	sd->span_weight = cpumask_weight(sched_domain_span(sd));	7450	sd->span_weight = cpumask_weight(sched_domain_span(sd));
7352	get_group(i, sd->private, &sd->groups);	7451	if (sd->flags & SD_OVERLAP) {
7353	atomic_inc(&sd->groups->ref);	7452	if (build_overlap_sched_groups(sd, i))
7354		7453	goto error;
7355	if (i != cpumask_first(sched_domain_span(sd)))	7454	} else {
7356	continue;	7455	if (build_sched_groups(sd, i))
7357		7456	goto error;
7358	build_sched_groups(sd);	7457	}
7359	}	7458	}
7360	}	7459	}
7361		7460


diff --git a/kernel/sched_features.h b/kernel/sched_features.h index be40f7371ee1..1e7066d76c26 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h
@@ -70,3 +70,5 @@ SCHED_FEAT(NONIRQ_POWER, 1)
70	* using the scheduler IPI. Reduces rq->lock contention/bounces.	70	* using the scheduler IPI. Reduces rq->lock contention/bounces.
71	*/	71	*/
72	SCHED_FEAT(TTWU_QUEUE, 1)	72	SCHED_FEAT(TTWU_QUEUE, 1)
		73
		74	SCHED_FEAT(FORCE_SD_OVERLAP, 0)