aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorVincent Guittot <vincent.guittot@linaro.org>2014-04-11 05:44:37 -0400
committerIngo Molnar <mingo@kernel.org>2014-05-07 07:33:49 -0400
commit143e1e28cb40bed836b0a06567208bd7347c9672 (patch)
tree7fee7ba69929c04534c17dca02bf39332e110c9a /include/linux
parent107437febd495a50e2cd09c81bbaa84d30e57b07 (diff)
sched: Rework sched_domain topology definition
We replace the old way to configure the scheduler topology with a new method which enables a platform to declare additionnal level (if needed). We still have a default topology table definition that can be used by platform that don't want more level than the SMT, MC, CPU and NUMA ones. This table can be overwritten by an arch which either wants to add new level where a load balance make sense like BOOK or powergating level or wants to change the flags configuration of some levels. For each level, we need a function pointer that returns cpumask for each cpu, a function pointer that returns the flags for the level and a name. Only flags that describe topology, can be set by an architecture. The current topology flags are: SD_SHARE_CPUPOWER SD_SHARE_PKG_RESOURCES SD_NUMA SD_ASYM_PACKING Then, each level must be a subset on the next one. The build sequence of the sched_domain will take care of removing useless levels like those with 1 CPU and those with the same CPU span and no more relevant information for load balancing than its children. Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Reviewed-by: Preeti U Murthy <preeti@linux.vnet.ibm.com> Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Christoph Lameter <cl@linux.com> Cc: David S. Miller <davem@davemloft.net> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Hanjun Guo <hanjun.guo@linaro.org> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Jason Low <jason.low2@hp.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Tony Luck <tony.luck@intel.com> Cc: linux390@de.ibm.com Cc: linux-ia64@vger.kernel.org Cc: linux-s390@vger.kernel.org Link: http://lkml.kernel.org/r/1397209481-28542-2-git-send-email-vincent.guittot@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/sched.h53
-rw-r--r--include/linux/topology.h128
2 files changed, 66 insertions, 115 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2a4298fb0d85..656b035c30e5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -879,6 +879,27 @@ enum cpu_idle_type {
879 879
880extern int __weak arch_sd_sibiling_asym_packing(void); 880extern int __weak arch_sd_sibiling_asym_packing(void);
881 881
882#ifdef CONFIG_SCHED_SMT
883static inline const int cpu_smt_flags(void)
884{
885 return SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES;
886}
887#endif
888
889#ifdef CONFIG_SCHED_MC
890static inline const int cpu_core_flags(void)
891{
892 return SD_SHARE_PKG_RESOURCES;
893}
894#endif
895
896#ifdef CONFIG_NUMA
897static inline const int cpu_numa_flags(void)
898{
899 return SD_NUMA;
900}
901#endif
902
882struct sched_domain_attr { 903struct sched_domain_attr {
883 int relax_domain_level; 904 int relax_domain_level;
884}; 905};
@@ -985,6 +1006,38 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms);
985 1006
986bool cpus_share_cache(int this_cpu, int that_cpu); 1007bool cpus_share_cache(int this_cpu, int that_cpu);
987 1008
1009typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
1010typedef const int (*sched_domain_flags_f)(void);
1011
1012#define SDTL_OVERLAP 0x01
1013
1014struct sd_data {
1015 struct sched_domain **__percpu sd;
1016 struct sched_group **__percpu sg;
1017 struct sched_group_power **__percpu sgp;
1018};
1019
1020struct sched_domain_topology_level {
1021 sched_domain_mask_f mask;
1022 sched_domain_flags_f sd_flags;
1023 int flags;
1024 int numa_level;
1025 struct sd_data data;
1026#ifdef CONFIG_SCHED_DEBUG
1027 char *name;
1028#endif
1029};
1030
1031extern struct sched_domain_topology_level *sched_domain_topology;
1032
1033extern void set_sched_topology(struct sched_domain_topology_level *tl);
1034
1035#ifdef CONFIG_SCHED_DEBUG
1036# define SD_INIT_NAME(type) .name = #type
1037#else
1038# define SD_INIT_NAME(type)
1039#endif
1040
988#else /* CONFIG_SMP */ 1041#else /* CONFIG_SMP */
989 1042
990struct sched_domain_attr; 1043struct sched_domain_attr;
diff --git a/include/linux/topology.h b/include/linux/topology.h
index 7062330a1329..973671ff9e7d 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -66,121 +66,6 @@ int arch_update_cpu_topology(void);
66#define PENALTY_FOR_NODE_WITH_CPUS (1) 66#define PENALTY_FOR_NODE_WITH_CPUS (1)
67#endif 67#endif
68 68
69/*
70 * Below are the 3 major initializers used in building sched_domains:
71 * SD_SIBLING_INIT, for SMT domains
72 * SD_CPU_INIT, for SMP domains
73 *
74 * Any architecture that cares to do any tuning to these values should do so
75 * by defining their own arch-specific initializer in include/asm/topology.h.
76 * A definition there will automagically override these default initializers
77 * and allow arch-specific performance tuning of sched_domains.
78 * (Only non-zero and non-null fields need be specified.)
79 */
80
81#ifdef CONFIG_SCHED_SMT
82/* MCD - Do we really need this? It is always on if CONFIG_SCHED_SMT is,
83 * so can't we drop this in favor of CONFIG_SCHED_SMT?
84 */
85#define ARCH_HAS_SCHED_WAKE_IDLE
86/* Common values for SMT siblings */
87#ifndef SD_SIBLING_INIT
88#define SD_SIBLING_INIT (struct sched_domain) { \
89 .min_interval = 1, \
90 .max_interval = 2, \
91 .busy_factor = 64, \
92 .imbalance_pct = 110, \
93 \
94 .flags = 1*SD_LOAD_BALANCE \
95 | 1*SD_BALANCE_NEWIDLE \
96 | 1*SD_BALANCE_EXEC \
97 | 1*SD_BALANCE_FORK \
98 | 0*SD_BALANCE_WAKE \
99 | 1*SD_WAKE_AFFINE \
100 | 1*SD_SHARE_CPUPOWER \
101 | 1*SD_SHARE_PKG_RESOURCES \
102 | 0*SD_SERIALIZE \
103 | 0*SD_PREFER_SIBLING \
104 | arch_sd_sibling_asym_packing() \
105 , \
106 .last_balance = jiffies, \
107 .balance_interval = 1, \
108 .smt_gain = 1178, /* 15% */ \
109 .max_newidle_lb_cost = 0, \
110 .next_decay_max_lb_cost = jiffies, \
111}
112#endif
113#endif /* CONFIG_SCHED_SMT */
114
115#ifdef CONFIG_SCHED_MC
116/* Common values for MC siblings. for now mostly derived from SD_CPU_INIT */
117#ifndef SD_MC_INIT
118#define SD_MC_INIT (struct sched_domain) { \
119 .min_interval = 1, \
120 .max_interval = 4, \
121 .busy_factor = 64, \
122 .imbalance_pct = 125, \
123 .cache_nice_tries = 1, \
124 .busy_idx = 2, \
125 .wake_idx = 0, \
126 .forkexec_idx = 0, \
127 \
128 .flags = 1*SD_LOAD_BALANCE \
129 | 1*SD_BALANCE_NEWIDLE \
130 | 1*SD_BALANCE_EXEC \
131 | 1*SD_BALANCE_FORK \
132 | 0*SD_BALANCE_WAKE \
133 | 1*SD_WAKE_AFFINE \
134 | 0*SD_SHARE_CPUPOWER \
135 | 1*SD_SHARE_PKG_RESOURCES \
136 | 0*SD_SERIALIZE \
137 , \
138 .last_balance = jiffies, \
139 .balance_interval = 1, \
140 .max_newidle_lb_cost = 0, \
141 .next_decay_max_lb_cost = jiffies, \
142}
143#endif
144#endif /* CONFIG_SCHED_MC */
145
146/* Common values for CPUs */
147#ifndef SD_CPU_INIT
148#define SD_CPU_INIT (struct sched_domain) { \
149 .min_interval = 1, \
150 .max_interval = 4, \
151 .busy_factor = 64, \
152 .imbalance_pct = 125, \
153 .cache_nice_tries = 1, \
154 .busy_idx = 2, \
155 .idle_idx = 1, \
156 .newidle_idx = 0, \
157 .wake_idx = 0, \
158 .forkexec_idx = 0, \
159 \
160 .flags = 1*SD_LOAD_BALANCE \
161 | 1*SD_BALANCE_NEWIDLE \
162 | 1*SD_BALANCE_EXEC \
163 | 1*SD_BALANCE_FORK \
164 | 0*SD_BALANCE_WAKE \
165 | 1*SD_WAKE_AFFINE \
166 | 0*SD_SHARE_CPUPOWER \
167 | 0*SD_SHARE_PKG_RESOURCES \
168 | 0*SD_SERIALIZE \
169 | 1*SD_PREFER_SIBLING \
170 , \
171 .last_balance = jiffies, \
172 .balance_interval = 1, \
173 .max_newidle_lb_cost = 0, \
174 .next_decay_max_lb_cost = jiffies, \
175}
176#endif
177
178#ifdef CONFIG_SCHED_BOOK
179#ifndef SD_BOOK_INIT
180#error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!!
181#endif
182#endif /* CONFIG_SCHED_BOOK */
183
184#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID 69#ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
185DECLARE_PER_CPU(int, numa_node); 70DECLARE_PER_CPU(int, numa_node);
186 71
@@ -295,4 +180,17 @@ static inline int cpu_to_mem(int cpu)
295#define topology_core_cpumask(cpu) cpumask_of(cpu) 180#define topology_core_cpumask(cpu) cpumask_of(cpu)
296#endif 181#endif
297 182
183#ifdef CONFIG_SCHED_SMT
184static inline const struct cpumask *cpu_smt_mask(int cpu)
185{
186 return topology_thread_cpumask(cpu);
187}
188#endif
189
190static inline const struct cpumask *cpu_cpu_mask(int cpu)
191{
192 return cpumask_of_node(cpu_to_node(cpu));
193}
194
195
298#endif /* _LINUX_TOPOLOGY_H */ 196#endif /* _LINUX_TOPOLOGY_H */