sched: Rework sched_domain topology definition

We replace the old way to configure the scheduler topology with a new method which enables a platform to declare additionnal level (if needed). We still have a default topology table definition that can be used by platform that don't want more level than the SMT, MC, CPU and NUMA ones. This table can be overwritten by an arch which either wants to add new level where a load balance make sense like BOOK or powergating level or wants to change the flags configuration of some levels. For each level, we need a function pointer that returns cpumask for each cpu, a function pointer that returns the flags for the level and a name. Only flags that describe topology, can be set by an architecture. The current topology flags are: SD_SHARE_CPUPOWER SD_SHARE_PKG_RESOURCES SD_NUMA SD_ASYM_PACKING Then, each level must be a subset on the next one. The build sequence of the sched_domain will take care of removing useless levels like those with 1 CPU and those with the same CPU span and no more relevant information for load balancing than its children. Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com> Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Christoph Lameter <cl@linux.com> Cc: David S. Miller <davem@davemloft.net> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Hanjun Guo <hanjun.guo@linaro.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Jason Low <jason.low2@hp.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Tony Luck <tony.luck@intel.com> Cc: linux390@de.ibm.com Cc: linux-ia64@vger.kernel.org Cc: linux-s390@vger.kernel.org Link: http://lkml.kernel.org/r/1397209481-28542-2-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Vincent Guittot <vincent.guittot@linaro.org> 2014-04-11 05:44:37 -0400
committer: Ingo Molnar <mingo@kernel.org> 2014-05-07 07:33:49 -0400
commit: 143e1e28cb40bed836b0a06567208bd7347c9672 (patch)
tree: 7fee7ba69929c04534c17dca02bf39332e110c9a /kernel/sched
parent: 107437febd495a50e2cd09c81bbaa84d30e57b07 (diff)
1 files changed, 120 insertions, 113 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 13584f1cccfc..7d332b7899cc 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5566,17 +5566,6 @@ static int __init isolated_cpu_setup(char *str)
 __setup("isolcpus=", isolated_cpu_setup);
-static const struct cpumask *cpu_cpu_mask(int cpu)
-{
-        return cpumask_of_node(cpu_to_node(cpu));
-}
-struct sd_data {
-        struct sched_domain **__percpu sd;
-        struct sched_group **__percpu sg;
-        struct sched_group_power **__percpu sgp;
-};
 struct s_data {
        struct sched_domain ** __percpu sd;
        struct root_domain      *rd;
@@ -5589,21 +5578,6 @@ enum s_alloc {
        sa_none,
 };
-struct sched_domain_topology_level;
-typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
-typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
-#define SDTL_OVERLAP    0x01
-struct sched_domain_topology_level {
-        sched_domain_init_f init;
-        sched_domain_mask_f mask;
-        int                 flags;
-        int                 numa_level;
-        struct sd_data      data;
-};
 /*
 * Build an iteration mask that can exclude certain CPUs from the upwards
 * domain traversal.
@@ -5832,34 +5806,6 @@ int __weak arch_sd_sibling_asym_packing(void)
 * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
 */
-#ifdef CONFIG_SCHED_DEBUG
-# define SD_INIT_NAME(sd, type)         sd->name = #type
-#else
-# define SD_INIT_NAME(sd, type)         do { } while (0)
-#endif
-#define SD_INIT_FUNC(type)                                              \
-static noinline struct sched_domain *                                   \
-sd_init_##type(struct sched_domain_topology_level *tl, int cpu)         \
-{                                                                       \
-        struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);       \
-        *sd = SD_##type##_INIT;                                         \
-        SD_INIT_NAME(sd, type);                                         \
-        sd->private = &tl->data;                                        \
-        return sd;                                                      \
-}
-SD_INIT_FUNC(CPU)
-#ifdef CONFIG_SCHED_SMT
- SD_INIT_FUNC(SIBLING)
-#endif
-#ifdef CONFIG_SCHED_MC
- SD_INIT_FUNC(MC)
-#endif
-#ifdef CONFIG_SCHED_BOOK
- SD_INIT_FUNC(BOOK)
-#endif
 static int default_relax_domain_level = -1;
 int sched_domain_level_max;
@@ -5947,99 +5893,156 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
                *per_cpu_ptr(sdd->sgp, cpu) = NULL;
 }
-#ifdef CONFIG_SCHED_SMT
-static const struct cpumask *cpu_smt_mask(int cpu)
-{
-        return topology_thread_cpumask(cpu);
-}
-#endif
-/*
- * Topology list, bottom-up.
- */
-static struct sched_domain_topology_level default_topology[] = {
-#ifdef CONFIG_SCHED_SMT
-        { sd_init_SIBLING, cpu_smt_mask, },
-#endif
-#ifdef CONFIG_SCHED_MC
-        { sd_init_MC, cpu_coregroup_mask, },
-#endif
-#ifdef CONFIG_SCHED_BOOK
-        { sd_init_BOOK, cpu_book_mask, },
-#endif
-        { sd_init_CPU, cpu_cpu_mask, },
-        { NULL, },
-};
-static struct sched_domain_topology_level *sched_domain_topology = default_topology;
-#define for_each_sd_topology(tl)                        \
-        for (tl = sched_domain_topology; tl->init; tl++)
 #ifdef CONFIG_NUMA
 static int sched_domains_numa_levels;
 static int *sched_domains_numa_distance;
 static struct cpumask ***sched_domains_numa_masks;
 static int sched_domains_curr_level;
+#endif
-static inline int sd_local_flags(int level)
+/*
-{
+ * SD_flags allowed in topology descriptions.
-        if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE)
+ *
-                return 0;
+ * SD_SHARE_CPUPOWER      - describes SMT topologies
+ * SD_SHARE_PKG_RESOURCES - describes shared caches
-        return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE;
+ * SD_NUMA                - describes NUMA topologies
-}
+ *
+ * Odd one out:
+ * SD_ASYM_PACKING        - describes SMT quirks
+ */
+#define TOPOLOGY_SD_FLAGS               \
+        (SD_SHARE_CPUPOWER |            \
+         SD_SHARE_PKG_RESOURCES |       \
+         SD_NUMA |                      \
+         SD_ASYM_PACKING)
 static struct sched_domain *
-sd_numa_init(struct sched_domain_topology_level *tl, int cpu)
+sd_init(struct sched_domain_topology_level *tl, int cpu)
 {
        struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu);
-        int level = tl->numa_level;
+        int sd_weight, sd_flags = 0;
-        int sd_weight = cpumask_weight(
-                        sched_domains_numa_masks[level][cpu_to_node(cpu)]);
+#ifdef CONFIG_NUMA
+        /*
+         * Ugly hack to pass state to sd_numa_mask()...
+         */
+        sched_domains_curr_level = tl->numa_level;
+#endif
+        sd_weight = cpumask_weight(tl->mask(cpu));
+        if (tl->sd_flags)
+                sd_flags = (*tl->sd_flags)();
+        if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS,
+                        "wrong sd_flags in topology description\n"))
+                sd_flags &= ~TOPOLOGY_SD_FLAGS;
        *sd = (struct sched_domain){
                .min_interval           = sd_weight,
                .max_interval           = 2*sd_weight,
                .busy_factor            = 32,
                .imbalance_pct          = 125,
-                .cache_nice_tries       = 2,
-                .busy_idx               = 3,
+                .cache_nice_tries       = 0,
-                .idle_idx               = 2,
+                .busy_idx               = 0,
+                .idle_idx               = 0,
                .newidle_idx            = 0,
                .wake_idx               = 0,
                .forkexec_idx           = 0,
                .flags                  = 1*SD_LOAD_BALANCE
                                        | 1*SD_BALANCE_NEWIDLE
-                                        | 0*SD_BALANCE_EXEC
+                                        | 1*SD_BALANCE_EXEC
-                                        | 0*SD_BALANCE_FORK
+                                        | 1*SD_BALANCE_FORK
                                        | 0*SD_BALANCE_WAKE
-                                        | 0*SD_WAKE_AFFINE
+                                        | 1*SD_WAKE_AFFINE
                                        | 0*SD_SHARE_CPUPOWER
                                        | 0*SD_SHARE_PKG_RESOURCES
-                                        | 1*SD_SERIALIZE
+                                        | 0*SD_SERIALIZE
                                        | 0*SD_PREFER_SIBLING
-                                        | 1*SD_NUMA
+                                        | 0*SD_NUMA
-                                        | sd_local_flags(level)
+                                        | sd_flags
                                        ,
                .last_balance           = jiffies,
                .balance_interval       = sd_weight,
+                .smt_gain               = 0,
                .max_newidle_lb_cost    = 0,
                .next_decay_max_lb_cost = jiffies,
+#ifdef CONFIG_SCHED_DEBUG
+                .name                   = tl->name,
+#endif
        };
-        SD_INIT_NAME(sd, NUMA);
-        sd->private = &tl->data;
        /*
-         * Ugly hack to pass state to sd_numa_mask()...
+         * Convert topological properties into behaviour.
         */
-        sched_domains_curr_level = tl->numa_level;
+        if (sd->flags & SD_SHARE_CPUPOWER) {
+                sd->imbalance_pct = 110;
+                sd->smt_gain = 1178; /* ~15% */
+                sd->flags |= arch_sd_sibling_asym_packing();
+        } else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
+                sd->imbalance_pct = 117;
+                sd->cache_nice_tries = 1;
+                sd->busy_idx = 2;
+#ifdef CONFIG_NUMA
+        } else if (sd->flags & SD_NUMA) {
+                sd->cache_nice_tries = 2;
+                sd->busy_idx = 3;
+                sd->idle_idx = 2;
+                sd->flags |= SD_SERIALIZE;
+                if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) {
+                        sd->flags &= ~(SD_BALANCE_EXEC |
+                                       SD_BALANCE_FORK |
+                                       SD_WAKE_AFFINE);
+                }
+#endif
+        } else {
+                sd->flags |= SD_PREFER_SIBLING;
+                sd->cache_nice_tries = 1;
+                sd->busy_idx = 2;
+                sd->idle_idx = 1;
+        }
+        sd->private = &tl->data;
        return sd;
 }
+/*
+ * Topology list, bottom-up.
+ */
+static struct sched_domain_topology_level default_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+        { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+#ifdef CONFIG_SCHED_MC
+        { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+#endif
+#ifdef CONFIG_SCHED_BOOK
+        { cpu_book_mask, SD_INIT_NAME(BOOK) },
+#endif
+        { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+        { NULL, },
+};
+struct sched_domain_topology_level *sched_domain_topology = default_topology;
+#define for_each_sd_topology(tl)                        \
+        for (tl = sched_domain_topology; tl->mask; tl++)
+void set_sched_topology(struct sched_domain_topology_level *tl)
+{
+        sched_domain_topology = tl;
+}
+#ifdef CONFIG_NUMA
 static const struct cpumask *sd_numa_mask(int cpu)
 {
        return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
@@ -6183,7 +6186,10 @@ static void sched_init_numa(void)
                }
        }
-        tl = kzalloc((ARRAY_SIZE(default_topology) + level) *
+        /* Compute default topology size */
+        for (i = 0; sched_domain_topology[i].mask; i++);
+        tl = kzalloc((i + level) *
                        sizeof(struct sched_domain_topology_level), GFP_KERNEL);
        if (!tl)
                return;
@@ -6191,18 +6197,19 @@ static void sched_init_numa(void)
        /*
         * Copy the default topology bits..
         */
-        for (i = 0; default_topology[i].init; i++)
+        for (i = 0; sched_domain_topology[i].mask; i++)
-                tl[i] = default_topology[i];
+                tl[i] = sched_domain_topology[i];
        /*
         * .. and append 'j' levels of NUMA goodness.
         */
        for (j = 0; j < level; i++, j++) {
                tl[i] = (struct sched_domain_topology_level){
-                        .init = sd_numa_init,
                        .mask = sd_numa_mask,
+                        .sd_flags = cpu_numa_flags,
                        .flags = SDTL_OVERLAP,
                        .numa_level = j,
+                        SD_INIT_NAME(NUMA)
                };
        }
@@ -6360,7 +6367,7 @@ struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl,
                const struct cpumask *cpu_map, struct sched_domain_attr *attr,
                struct sched_domain *child, int cpu)
 {
-        struct sched_domain *sd = tl->init(tl, cpu);
+        struct sched_domain *sd = sd_init(tl, cpu);
        if (!sd)
                return child;
author	Vincent Guittot <vincent.guittot@linaro.org>	2014-04-11 05:44:37 -0400
committer	Ingo Molnar <mingo@kernel.org>	2014-05-07 07:33:49 -0400
commit	143e1e28cb40bed836b0a06567208bd7347c9672 (patch)
tree	7fee7ba69929c04534c17dca02bf39332e110c9a /kernel/sched
parent	107437febd495a50e2cd09c81bbaa84d30e57b07 (diff)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 13584f1cccfc..7d332b7899cc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -5566,17 +5566,6 @@ static int __init isolated_cpu_setup(char *str)
5566		5566
5567	__setup("isolcpus=", isolated_cpu_setup);	5567	__setup("isolcpus=", isolated_cpu_setup);
5568		5568
5569	static const struct cpumask *cpu_cpu_mask(int cpu)
5570	{
5571	return cpumask_of_node(cpu_to_node(cpu));
5572	}
5573
5574	struct sd_data {
5575	struct sched_domain **__percpu sd;
5576	struct sched_group **__percpu sg;
5577	struct sched_group_power **__percpu sgp;
5578	};
5579
5580	struct s_data {	5569	struct s_data {
5581	struct sched_domain ** __percpu sd;	5570	struct sched_domain ** __percpu sd;
5582	struct root_domain *rd;	5571	struct root_domain *rd;
@@ -5589,21 +5578,6 @@ enum s_alloc {
5589	sa_none,	5578	sa_none,
5590	};	5579	};
5591		5580
5592	struct sched_domain_topology_level;
5593
5594	typedef struct sched_domain (sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu);
5595	typedef const struct cpumask (sched_domain_mask_f)(int cpu);
5596
5597	#define SDTL_OVERLAP 0x01
5598
5599	struct sched_domain_topology_level {
5600	sched_domain_init_f init;
5601	sched_domain_mask_f mask;
5602	int flags;
5603	int numa_level;
5604	struct sd_data data;
5605	};
5606
5607	/*	5581	/*
5608	* Build an iteration mask that can exclude certain CPUs from the upwards	5582	* Build an iteration mask that can exclude certain CPUs from the upwards
5609	* domain traversal.	5583	* domain traversal.
@@ -5832,34 +5806,6 @@ int __weak arch_sd_sibling_asym_packing(void)
5832	* Non-inlined to reduce accumulated stack pressure in build_sched_domains()	5806	* Non-inlined to reduce accumulated stack pressure in build_sched_domains()
5833	*/	5807	*/
5834		5808
5835	#ifdef CONFIG_SCHED_DEBUG
5836	# define SD_INIT_NAME(sd, type) sd->name = #type
5837	#else
5838	# define SD_INIT_NAME(sd, type) do { } while (0)
5839	#endif
5840
5841	#define SD_INIT_FUNC(type) \
5842	static noinline struct sched_domain * \
5843	sd_init_##type(struct sched_domain_topology_level *tl, int cpu) \
5844	{ \
5845	struct sched_domain sd = per_cpu_ptr(tl->data.sd, cpu); \
5846	*sd = SD_##type##_INIT; \
5847	SD_INIT_NAME(sd, type); \
5848	sd->private = &tl->data; \
5849	return sd; \
5850	}
5851
5852	SD_INIT_FUNC(CPU)
5853	#ifdef CONFIG_SCHED_SMT
5854	SD_INIT_FUNC(SIBLING)
5855	#endif
5856	#ifdef CONFIG_SCHED_MC
5857	SD_INIT_FUNC(MC)
5858	#endif
5859	#ifdef CONFIG_SCHED_BOOK
5860	SD_INIT_FUNC(BOOK)
5861	#endif
5862
5863	static int default_relax_domain_level = -1;	5809	static int default_relax_domain_level = -1;
5864	int sched_domain_level_max;	5810	int sched_domain_level_max;
5865		5811
@@ -5947,99 +5893,156 @@ static void claim_allocations(int cpu, struct sched_domain *sd)
5947	*per_cpu_ptr(sdd->sgp, cpu) = NULL;	5893	*per_cpu_ptr(sdd->sgp, cpu) = NULL;
5948	}	5894	}
5949		5895
5950	#ifdef CONFIG_SCHED_SMT
5951	static const struct cpumask *cpu_smt_mask(int cpu)
5952	{
5953	return topology_thread_cpumask(cpu);
5954	}
5955	#endif
5956
5957	/*
5958	* Topology list, bottom-up.
5959	*/
5960	static struct sched_domain_topology_level default_topology[] = {
5961	#ifdef CONFIG_SCHED_SMT
5962	{ sd_init_SIBLING, cpu_smt_mask, },
5963	#endif
5964	#ifdef CONFIG_SCHED_MC
5965	{ sd_init_MC, cpu_coregroup_mask, },
5966	#endif
5967	#ifdef CONFIG_SCHED_BOOK
5968	{ sd_init_BOOK, cpu_book_mask, },
5969	#endif
5970	{ sd_init_CPU, cpu_cpu_mask, },
5971	{ NULL, },
5972	};
5973
5974	static struct sched_domain_topology_level *sched_domain_topology = default_topology;
5975
5976	#define for_each_sd_topology(tl) \
5977	for (tl = sched_domain_topology; tl->init; tl++)
5978
5979	#ifdef CONFIG_NUMA	5896	#ifdef CONFIG_NUMA
5980
5981	static int sched_domains_numa_levels;	5897	static int sched_domains_numa_levels;
5982	static int *sched_domains_numa_distance;	5898	static int *sched_domains_numa_distance;
5983	static struct cpumask ***sched_domains_numa_masks;	5899	static struct cpumask ***sched_domains_numa_masks;
5984	static int sched_domains_curr_level;	5900	static int sched_domains_curr_level;
		5901	#endif
5985		5902
5986	static inline int sd_local_flags(int level)	5903	/*
5987	{	5904	* SD_flags allowed in topology descriptions.
5988	if (sched_domains_numa_distance[level] > RECLAIM_DISTANCE)	5905	*
5989	return 0;	5906	* SD_SHARE_CPUPOWER - describes SMT topologies
5990		5907	* SD_SHARE_PKG_RESOURCES - describes shared caches
5991	return SD_BALANCE_EXEC \| SD_BALANCE_FORK \| SD_WAKE_AFFINE;	5908	* SD_NUMA - describes NUMA topologies
5992	}	5909	*
		5910	* Odd one out:
		5911	* SD_ASYM_PACKING - describes SMT quirks
		5912	*/
		5913	#define TOPOLOGY_SD_FLAGS \
		5914	(SD_SHARE_CPUPOWER \| \
		5915	SD_SHARE_PKG_RESOURCES \| \
		5916	SD_NUMA \| \
		5917	SD_ASYM_PACKING)
5993		5918
5994	static struct sched_domain *	5919	static struct sched_domain *
5995	sd_numa_init(struct sched_domain_topology_level *tl, int cpu)	5920	sd_init(struct sched_domain_topology_level *tl, int cpu)
5996	{	5921	{
5997	struct sched_domain sd = per_cpu_ptr(tl->data.sd, cpu);	5922	struct sched_domain sd = per_cpu_ptr(tl->data.sd, cpu);
5998	int level = tl->numa_level;	5923	int sd_weight, sd_flags = 0;
5999	int sd_weight = cpumask_weight(	5924
6000	sched_domains_numa_masks[level][cpu_to_node(cpu)]);	5925	#ifdef CONFIG_NUMA
		5926	/*
		5927	* Ugly hack to pass state to sd_numa_mask()...
		5928	*/
		5929	sched_domains_curr_level = tl->numa_level;
		5930	#endif
		5931
		5932	sd_weight = cpumask_weight(tl->mask(cpu));
		5933
		5934	if (tl->sd_flags)
		5935	sd_flags = (*tl->sd_flags)();
		5936	if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS,
		5937	"wrong sd_flags in topology description\n"))
		5938	sd_flags &= ~TOPOLOGY_SD_FLAGS;
6001		5939
6002	*sd = (struct sched_domain){	5940	*sd = (struct sched_domain){
6003	.min_interval = sd_weight,	5941	.min_interval = sd_weight,
6004	.max_interval = 2*sd_weight,	5942	.max_interval = 2*sd_weight,
6005	.busy_factor = 32,	5943	.busy_factor = 32,
6006	.imbalance_pct = 125,	5944	.imbalance_pct = 125,
6007	.cache_nice_tries = 2,	5945
6008	.busy_idx = 3,	5946	.cache_nice_tries = 0,
6009	.idle_idx = 2,	5947	.busy_idx = 0,
		5948	.idle_idx = 0,
6010	.newidle_idx = 0,	5949	.newidle_idx = 0,
6011	.wake_idx = 0,	5950	.wake_idx = 0,
6012	.forkexec_idx = 0,	5951	.forkexec_idx = 0,
6013		5952
6014	.flags = 1*SD_LOAD_BALANCE	5953	.flags = 1*SD_LOAD_BALANCE
6015	\| 1*SD_BALANCE_NEWIDLE	5954	\| 1*SD_BALANCE_NEWIDLE
6016	\| 0*SD_BALANCE_EXEC	5955	\| 1*SD_BALANCE_EXEC
6017	\| 0*SD_BALANCE_FORK	5956	\| 1*SD_BALANCE_FORK
6018	\| 0*SD_BALANCE_WAKE	5957	\| 0*SD_BALANCE_WAKE
6019	\| 0*SD_WAKE_AFFINE	5958	\| 1*SD_WAKE_AFFINE
6020	\| 0*SD_SHARE_CPUPOWER	5959	\| 0*SD_SHARE_CPUPOWER
6021	\| 0*SD_SHARE_PKG_RESOURCES	5960	\| 0*SD_SHARE_PKG_RESOURCES
6022	\| 1*SD_SERIALIZE	5961	\| 0*SD_SERIALIZE
6023	\| 0*SD_PREFER_SIBLING	5962	\| 0*SD_PREFER_SIBLING
6024	\| 1*SD_NUMA	5963	\| 0*SD_NUMA
6025	\| sd_local_flags(level)	5964	\| sd_flags
6026	,	5965	,
		5966
6027	.last_balance = jiffies,	5967	.last_balance = jiffies,
6028	.balance_interval = sd_weight,	5968	.balance_interval = sd_weight,
		5969	.smt_gain = 0,
6029	.max_newidle_lb_cost = 0,	5970	.max_newidle_lb_cost = 0,
6030	.next_decay_max_lb_cost = jiffies,	5971	.next_decay_max_lb_cost = jiffies,
		5972	#ifdef CONFIG_SCHED_DEBUG
		5973	.name = tl->name,
		5974	#endif
6031	};	5975	};
6032	SD_INIT_NAME(sd, NUMA);
6033	sd->private = &tl->data;
6034		5976
6035	/*	5977	/*
6036	* Ugly hack to pass state to sd_numa_mask()...	5978	* Convert topological properties into behaviour.
6037	*/	5979	*/
6038	sched_domains_curr_level = tl->numa_level;	5980
		5981	if (sd->flags & SD_SHARE_CPUPOWER) {
		5982	sd->imbalance_pct = 110;
		5983	sd->smt_gain = 1178; /* ~15% */
		5984	sd->flags \|= arch_sd_sibling_asym_packing();
		5985
		5986	} else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
		5987	sd->imbalance_pct = 117;
		5988	sd->cache_nice_tries = 1;
		5989	sd->busy_idx = 2;
		5990
		5991	#ifdef CONFIG_NUMA
		5992	} else if (sd->flags & SD_NUMA) {
		5993	sd->cache_nice_tries = 2;
		5994	sd->busy_idx = 3;
		5995	sd->idle_idx = 2;
		5996
		5997	sd->flags \|= SD_SERIALIZE;
		5998	if (sched_domains_numa_distance[tl->numa_level] > RECLAIM_DISTANCE) {
		5999	sd->flags &= ~(SD_BALANCE_EXEC \|
		6000	SD_BALANCE_FORK \|
		6001	SD_WAKE_AFFINE);
		6002	}
		6003
		6004	#endif
		6005	} else {
		6006	sd->flags \|= SD_PREFER_SIBLING;
		6007	sd->cache_nice_tries = 1;
		6008	sd->busy_idx = 2;
		6009	sd->idle_idx = 1;
		6010	}
		6011
		6012	sd->private = &tl->data;
6039		6013
6040	return sd;	6014	return sd;
6041	}	6015	}
6042		6016
		6017	/*
		6018	* Topology list, bottom-up.
		6019	*/
		6020	static struct sched_domain_topology_level default_topology[] = {
		6021	#ifdef CONFIG_SCHED_SMT
		6022	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
		6023	#endif
		6024	#ifdef CONFIG_SCHED_MC
		6025	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
		6026	#endif
		6027	#ifdef CONFIG_SCHED_BOOK
		6028	{ cpu_book_mask, SD_INIT_NAME(BOOK) },
		6029	#endif
		6030	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
		6031	{ NULL, },
		6032	};
		6033
		6034	struct sched_domain_topology_level *sched_domain_topology = default_topology;
		6035
		6036	#define for_each_sd_topology(tl) \
		6037	for (tl = sched_domain_topology; tl->mask; tl++)
		6038
		6039	void set_sched_topology(struct sched_domain_topology_level *tl)
		6040	{
		6041	sched_domain_topology = tl;
		6042	}
		6043
		6044	#ifdef CONFIG_NUMA
		6045
6043	static const struct cpumask *sd_numa_mask(int cpu)	6046	static const struct cpumask *sd_numa_mask(int cpu)
6044	{	6047	{
6045	return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];	6048	return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
@@ -6183,7 +6186,10 @@ static void sched_init_numa(void)
6183	}	6186	}
6184	}	6187	}
6185		6188
6186	tl = kzalloc((ARRAY_SIZE(default_topology) + level) *	6189	/* Compute default topology size */
		6190	for (i = 0; sched_domain_topology[i].mask; i++);
		6191
		6192	tl = kzalloc((i + level) *
6187	sizeof(struct sched_domain_topology_level), GFP_KERNEL);	6193	sizeof(struct sched_domain_topology_level), GFP_KERNEL);
6188	if (!tl)	6194	if (!tl)
6189	return;	6195	return;
@@ -6191,18 +6197,19 @@ static void sched_init_numa(void)
6191	/*	6197	/*
6192	* Copy the default topology bits..	6198	* Copy the default topology bits..
6193	*/	6199	*/
6194	for (i = 0; default_topology[i].init; i++)	6200	for (i = 0; sched_domain_topology[i].mask; i++)
6195	tl[i] = default_topology[i];	6201	tl[i] = sched_domain_topology[i];
6196		6202
6197	/*	6203	/*
6198	* .. and append 'j' levels of NUMA goodness.	6204	* .. and append 'j' levels of NUMA goodness.
6199	*/	6205	*/
6200	for (j = 0; j < level; i++, j++) {	6206	for (j = 0; j < level; i++, j++) {
6201	tl[i] = (struct sched_domain_topology_level){	6207	tl[i] = (struct sched_domain_topology_level){
6202	.init = sd_numa_init,
6203	.mask = sd_numa_mask,	6208	.mask = sd_numa_mask,
		6209	.sd_flags = cpu_numa_flags,
6204	.flags = SDTL_OVERLAP,	6210	.flags = SDTL_OVERLAP,
6205	.numa_level = j,	6211	.numa_level = j,
		6212	SD_INIT_NAME(NUMA)
6206	};	6213	};
6207	}	6214	}
6208		6215
@@ -6360,7 +6367,7 @@ struct sched_domain build_sched_domain(struct sched_domain_topology_level tl,
6360	const struct cpumask cpu_map, struct sched_domain_attr attr,	6367	const struct cpumask cpu_map, struct sched_domain_attr attr,
6361	struct sched_domain *child, int cpu)	6368	struct sched_domain *child, int cpu)
6362	{	6369	{
6363	struct sched_domain *sd = tl->init(tl, cpu);	6370	struct sched_domain *sd = sd_init(tl, cpu);
6364	if (!sd)	6371	if (!sd)
6365	return child;	6372	return child;
6366		6373