diff options
| -rw-r--r-- | arch/ia64/include/asm/topology.h | 25 | ||||
| -rw-r--r-- | arch/mips/include/asm/mach-ip27/topology.h | 17 | ||||
| -rw-r--r-- | arch/powerpc/include/asm/topology.h | 36 | ||||
| -rw-r--r-- | arch/sh/include/asm/topology.h | 25 | ||||
| -rw-r--r-- | arch/sparc/include/asm/topology_64.h | 19 | ||||
| -rw-r--r-- | arch/tile/include/asm/topology.h | 26 | ||||
| -rw-r--r-- | arch/x86/include/asm/topology.h | 38 | ||||
| -rw-r--r-- | include/linux/topology.h | 37 | ||||
| -rw-r--r-- | kernel/sched/core.c | 280 |
9 files changed, 185 insertions, 318 deletions
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 09f646753d1a..a2496e449b75 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h | |||
| @@ -70,31 +70,6 @@ void build_cpu_to_node_map(void); | |||
| 70 | .nr_balance_failed = 0, \ | 70 | .nr_balance_failed = 0, \ |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | /* sched_domains SD_NODE_INIT for IA64 NUMA machines */ | ||
| 74 | #define SD_NODE_INIT (struct sched_domain) { \ | ||
| 75 | .parent = NULL, \ | ||
| 76 | .child = NULL, \ | ||
| 77 | .groups = NULL, \ | ||
| 78 | .min_interval = 8, \ | ||
| 79 | .max_interval = 8*(min(num_online_cpus(), 32U)), \ | ||
| 80 | .busy_factor = 64, \ | ||
| 81 | .imbalance_pct = 125, \ | ||
| 82 | .cache_nice_tries = 2, \ | ||
| 83 | .busy_idx = 3, \ | ||
| 84 | .idle_idx = 2, \ | ||
| 85 | .newidle_idx = 0, \ | ||
| 86 | .wake_idx = 0, \ | ||
| 87 | .forkexec_idx = 0, \ | ||
| 88 | .flags = SD_LOAD_BALANCE \ | ||
| 89 | | SD_BALANCE_NEWIDLE \ | ||
| 90 | | SD_BALANCE_EXEC \ | ||
| 91 | | SD_BALANCE_FORK \ | ||
| 92 | | SD_SERIALIZE, \ | ||
| 93 | .last_balance = jiffies, \ | ||
| 94 | .balance_interval = 64, \ | ||
| 95 | .nr_balance_failed = 0, \ | ||
| 96 | } | ||
| 97 | |||
| 98 | #endif /* CONFIG_NUMA */ | 73 | #endif /* CONFIG_NUMA */ |
| 99 | 74 | ||
| 100 | #ifdef CONFIG_SMP | 75 | #ifdef CONFIG_SMP |
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h index 1b1a7d1632b9..b2cf641f206f 100644 --- a/arch/mips/include/asm/mach-ip27/topology.h +++ b/arch/mips/include/asm/mach-ip27/topology.h | |||
| @@ -36,23 +36,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; | |||
| 36 | 36 | ||
| 37 | #define node_distance(from, to) (__node_distances[(from)][(to)]) | 37 | #define node_distance(from, to) (__node_distances[(from)][(to)]) |
| 38 | 38 | ||
| 39 | /* sched_domains SD_NODE_INIT for SGI IP27 machines */ | ||
| 40 | #define SD_NODE_INIT (struct sched_domain) { \ | ||
| 41 | .parent = NULL, \ | ||
| 42 | .child = NULL, \ | ||
| 43 | .groups = NULL, \ | ||
| 44 | .min_interval = 8, \ | ||
| 45 | .max_interval = 32, \ | ||
| 46 | .busy_factor = 32, \ | ||
| 47 | .imbalance_pct = 125, \ | ||
| 48 | .cache_nice_tries = 1, \ | ||
| 49 | .flags = SD_LOAD_BALANCE | \ | ||
| 50 | SD_BALANCE_EXEC, \ | ||
| 51 | .last_balance = jiffies, \ | ||
| 52 | .balance_interval = 1, \ | ||
| 53 | .nr_balance_failed = 0, \ | ||
| 54 | } | ||
| 55 | |||
| 56 | #include <asm-generic/topology.h> | 39 | #include <asm-generic/topology.h> |
| 57 | 40 | ||
| 58 | #endif /* _ASM_MACH_TOPOLOGY_H */ | 41 | #endif /* _ASM_MACH_TOPOLOGY_H */ |
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index c97185885c6d..852ed1b384f6 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h | |||
| @@ -18,12 +18,6 @@ struct device_node; | |||
| 18 | */ | 18 | */ |
| 19 | #define RECLAIM_DISTANCE 10 | 19 | #define RECLAIM_DISTANCE 10 |
| 20 | 20 | ||
| 21 | /* | ||
| 22 | * Avoid creating an extra level of balancing (SD_ALLNODES) on the largest | ||
| 23 | * POWER7 boxes which have a maximum of 32 nodes. | ||
| 24 | */ | ||
| 25 | #define SD_NODES_PER_DOMAIN 32 | ||
| 26 | |||
| 27 | #include <asm/mmzone.h> | 21 | #include <asm/mmzone.h> |
| 28 | 22 | ||
| 29 | static inline int cpu_to_node(int cpu) | 23 | static inline int cpu_to_node(int cpu) |
| @@ -51,36 +45,6 @@ static inline int pcibus_to_node(struct pci_bus *bus) | |||
| 51 | cpu_all_mask : \ | 45 | cpu_all_mask : \ |
| 52 | cpumask_of_node(pcibus_to_node(bus))) | 46 | cpumask_of_node(pcibus_to_node(bus))) |
| 53 | 47 | ||
| 54 | /* sched_domains SD_NODE_INIT for PPC64 machines */ | ||
| 55 | #define SD_NODE_INIT (struct sched_domain) { \ | ||
| 56 | .min_interval = 8, \ | ||
| 57 | .max_interval = 32, \ | ||
| 58 | .busy_factor = 32, \ | ||
| 59 | .imbalance_pct = 125, \ | ||
| 60 | .cache_nice_tries = 1, \ | ||
| 61 | .busy_idx = 3, \ | ||
| 62 | .idle_idx = 1, \ | ||
| 63 | .newidle_idx = 0, \ | ||
| 64 | .wake_idx = 0, \ | ||
| 65 | .forkexec_idx = 0, \ | ||
| 66 | \ | ||
| 67 | .flags = 1*SD_LOAD_BALANCE \ | ||
| 68 | | 0*SD_BALANCE_NEWIDLE \ | ||
| 69 | | 1*SD_BALANCE_EXEC \ | ||
| 70 | | 1*SD_BALANCE_FORK \ | ||
| 71 | | 0*SD_BALANCE_WAKE \ | ||
| 72 | | 1*SD_WAKE_AFFINE \ | ||
| 73 | | 0*SD_PREFER_LOCAL \ | ||
| 74 | | 0*SD_SHARE_CPUPOWER \ | ||
| 75 | | 0*SD_POWERSAVINGS_BALANCE \ | ||
| 76 | | 0*SD_SHARE_PKG_RESOURCES \ | ||
| 77 | | 1*SD_SERIALIZE \ | ||
| 78 | | 0*SD_PREFER_SIBLING \ | ||
| 79 | , \ | ||
| 80 | .last_balance = jiffies, \ | ||
| 81 | .balance_interval = 1, \ | ||
| 82 | } | ||
| 83 | |||
| 84 | extern int __node_distance(int, int); | 48 | extern int __node_distance(int, int); |
| 85 | #define node_distance(a, b) __node_distance(a, b) | 49 | #define node_distance(a, b) __node_distance(a, b) |
| 86 | 50 | ||
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index 88e734069fa6..b0a282d65f6a 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h | |||
| @@ -3,31 +3,6 @@ | |||
| 3 | 3 | ||
| 4 | #ifdef CONFIG_NUMA | 4 | #ifdef CONFIG_NUMA |
| 5 | 5 | ||
| 6 | /* sched_domains SD_NODE_INIT for sh machines */ | ||
| 7 | #define SD_NODE_INIT (struct sched_domain) { \ | ||
| 8 | .parent = NULL, \ | ||
| 9 | .child = NULL, \ | ||
| 10 | .groups = NULL, \ | ||
| 11 | .min_interval = 8, \ | ||
| 12 | .max_interval = 32, \ | ||
| 13 | .busy_factor = 32, \ | ||
| 14 | .imbalance_pct = 125, \ | ||
| 15 | .cache_nice_tries = 2, \ | ||
| 16 | .busy_idx = 3, \ | ||
| 17 | .idle_idx = 2, \ | ||
| 18 | .newidle_idx = 0, \ | ||
| 19 | .wake_idx = 0, \ | ||
| 20 | .forkexec_idx = 0, \ | ||
| 21 | .flags = SD_LOAD_BALANCE \ | ||
| 22 | | SD_BALANCE_FORK \ | ||
| 23 | | SD_BALANCE_EXEC \ | ||
| 24 | | SD_BALANCE_NEWIDLE \ | ||
| 25 | | SD_SERIALIZE, \ | ||
| 26 | .last_balance = jiffies, \ | ||
| 27 | .balance_interval = 1, \ | ||
| 28 | .nr_balance_failed = 0, \ | ||
| 29 | } | ||
| 30 | |||
| 31 | #define cpu_to_node(cpu) ((void)(cpu),0) | 6 | #define cpu_to_node(cpu) ((void)(cpu),0) |
| 32 | #define parent_node(node) ((void)(node),0) | 7 | #define parent_node(node) ((void)(node),0) |
| 33 | 8 | ||
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 8b9c556d630b..1754390a426f 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h | |||
| @@ -31,25 +31,6 @@ static inline int pcibus_to_node(struct pci_bus *pbus) | |||
| 31 | cpu_all_mask : \ | 31 | cpu_all_mask : \ |
| 32 | cpumask_of_node(pcibus_to_node(bus))) | 32 | cpumask_of_node(pcibus_to_node(bus))) |
| 33 | 33 | ||
| 34 | #define SD_NODE_INIT (struct sched_domain) { \ | ||
| 35 | .min_interval = 8, \ | ||
| 36 | .max_interval = 32, \ | ||
| 37 | .busy_factor = 32, \ | ||
| 38 | .imbalance_pct = 125, \ | ||
| 39 | .cache_nice_tries = 2, \ | ||
| 40 | .busy_idx = 3, \ | ||
| 41 | .idle_idx = 2, \ | ||
| 42 | .newidle_idx = 0, \ | ||
| 43 | .wake_idx = 0, \ | ||
| 44 | .forkexec_idx = 0, \ | ||
| 45 | .flags = SD_LOAD_BALANCE \ | ||
| 46 | | SD_BALANCE_FORK \ | ||
| 47 | | SD_BALANCE_EXEC \ | ||
| 48 | | SD_SERIALIZE, \ | ||
| 49 | .last_balance = jiffies, \ | ||
| 50 | .balance_interval = 1, \ | ||
| 51 | } | ||
| 52 | |||
| 53 | #else /* CONFIG_NUMA */ | 34 | #else /* CONFIG_NUMA */ |
| 54 | 35 | ||
| 55 | #include <asm-generic/topology.h> | 36 | #include <asm-generic/topology.h> |
diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h index 6fdd0c860193..7a7ce390534f 100644 --- a/arch/tile/include/asm/topology.h +++ b/arch/tile/include/asm/topology.h | |||
| @@ -78,32 +78,6 @@ static inline const struct cpumask *cpumask_of_node(int node) | |||
| 78 | .balance_interval = 32, \ | 78 | .balance_interval = 32, \ |
| 79 | } | 79 | } |
| 80 | 80 | ||
| 81 | /* sched_domains SD_NODE_INIT for TILE architecture */ | ||
| 82 | #define SD_NODE_INIT (struct sched_domain) { \ | ||
| 83 | .min_interval = 16, \ | ||
| 84 | .max_interval = 512, \ | ||
| 85 | .busy_factor = 32, \ | ||
| 86 | .imbalance_pct = 125, \ | ||
| 87 | .cache_nice_tries = 1, \ | ||
| 88 | .busy_idx = 3, \ | ||
| 89 | .idle_idx = 1, \ | ||
| 90 | .newidle_idx = 2, \ | ||
| 91 | .wake_idx = 1, \ | ||
| 92 | .flags = 1*SD_LOAD_BALANCE \ | ||
| 93 | | 1*SD_BALANCE_NEWIDLE \ | ||
| 94 | | 1*SD_BALANCE_EXEC \ | ||
| 95 | | 1*SD_BALANCE_FORK \ | ||
| 96 | | 0*SD_BALANCE_WAKE \ | ||
| 97 | | 0*SD_WAKE_AFFINE \ | ||
| 98 | | 0*SD_PREFER_LOCAL \ | ||
| 99 | | 0*SD_SHARE_CPUPOWER \ | ||
| 100 | | 0*SD_SHARE_PKG_RESOURCES \ | ||
| 101 | | 1*SD_SERIALIZE \ | ||
| 102 | , \ | ||
| 103 | .last_balance = jiffies, \ | ||
| 104 | .balance_interval = 128, \ | ||
| 105 | } | ||
| 106 | |||
| 107 | /* By definition, we create nodes based on online memory. */ | 81 | /* By definition, we create nodes based on online memory. */ |
| 108 | #define node_has_online_mem(nid) 1 | 82 | #define node_has_online_mem(nid) 1 |
| 109 | 83 | ||
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index b9676ae37ada..095b21507b6a 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
| @@ -92,44 +92,6 @@ extern void setup_node_to_cpumask_map(void); | |||
| 92 | 92 | ||
| 93 | #define pcibus_to_node(bus) __pcibus_to_node(bus) | 93 | #define pcibus_to_node(bus) __pcibus_to_node(bus) |
| 94 | 94 | ||
| 95 | #ifdef CONFIG_X86_32 | ||
| 96 | # define SD_CACHE_NICE_TRIES 1 | ||
| 97 | # define SD_IDLE_IDX 1 | ||
| 98 | #else | ||
| 99 | # define SD_CACHE_NICE_TRIES 2 | ||
| 100 | # define SD_IDLE_IDX 2 | ||
| 101 | #endif | ||
| 102 | |||
| 103 | /* sched_domains SD_NODE_INIT for NUMA machines */ | ||
| 104 | #define SD_NODE_INIT (struct sched_domain) { \ | ||
| 105 | .min_interval = 8, \ | ||
| 106 | .max_interval = 32, \ | ||
| 107 | .busy_factor = 32, \ | ||
| 108 | .imbalance_pct = 125, \ | ||
| 109 | .cache_nice_tries = SD_CACHE_NICE_TRIES, \ | ||
| 110 | .busy_idx = 3, \ | ||
| 111 | .idle_idx = SD_IDLE_IDX, \ | ||
| 112 | .newidle_idx = 0, \ | ||
| 113 | .wake_idx = 0, \ | ||
| 114 | .forkexec_idx = 0, \ | ||
| 115 | \ | ||
| 116 | .flags = 1*SD_LOAD_BALANCE \ | ||
| 117 | | 1*SD_BALANCE_NEWIDLE \ | ||
| 118 | | 1*SD_BALANCE_EXEC \ | ||
| 119 | | 1*SD_BALANCE_FORK \ | ||
| 120 | | 0*SD_BALANCE_WAKE \ | ||
| 121 | | 1*SD_WAKE_AFFINE \ | ||
| 122 | | 0*SD_PREFER_LOCAL \ | ||
| 123 | | 0*SD_SHARE_CPUPOWER \ | ||
| 124 | | 0*SD_POWERSAVINGS_BALANCE \ | ||
| 125 | | 0*SD_SHARE_PKG_RESOURCES \ | ||
| 126 | | 1*SD_SERIALIZE \ | ||
| 127 | | 0*SD_PREFER_SIBLING \ | ||
| 128 | , \ | ||
| 129 | .last_balance = jiffies, \ | ||
| 130 | .balance_interval = 1, \ | ||
| 131 | } | ||
| 132 | |||
| 133 | extern int __node_distance(int, int); | 95 | extern int __node_distance(int, int); |
| 134 | #define node_distance(a, b) __node_distance(a, b) | 96 | #define node_distance(a, b) __node_distance(a, b) |
| 135 | 97 | ||
diff --git a/include/linux/topology.h b/include/linux/topology.h index e26db031303b..4f59bf36f0af 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h | |||
| @@ -70,7 +70,6 @@ int arch_update_cpu_topology(void); | |||
| 70 | * Below are the 3 major initializers used in building sched_domains: | 70 | * Below are the 3 major initializers used in building sched_domains: |
| 71 | * SD_SIBLING_INIT, for SMT domains | 71 | * SD_SIBLING_INIT, for SMT domains |
| 72 | * SD_CPU_INIT, for SMP domains | 72 | * SD_CPU_INIT, for SMP domains |
| 73 | * SD_NODE_INIT, for NUMA domains | ||
| 74 | * | 73 | * |
| 75 | * Any architecture that cares to do any tuning to these values should do so | 74 | * Any architecture that cares to do any tuning to these values should do so |
| 76 | * by defining their own arch-specific initializer in include/asm/topology.h. | 75 | * by defining their own arch-specific initializer in include/asm/topology.h. |
| @@ -176,48 +175,12 @@ int arch_update_cpu_topology(void); | |||
| 176 | } | 175 | } |
| 177 | #endif | 176 | #endif |
| 178 | 177 | ||
| 179 | /* sched_domains SD_ALLNODES_INIT for NUMA machines */ | ||
| 180 | #define SD_ALLNODES_INIT (struct sched_domain) { \ | ||
| 181 | .min_interval = 64, \ | ||
| 182 | .max_interval = 64*num_online_cpus(), \ | ||
| 183 | .busy_factor = 128, \ | ||
| 184 | .imbalance_pct = 133, \ | ||
| 185 | .cache_nice_tries = 1, \ | ||
| 186 | .busy_idx = 3, \ | ||
| 187 | .idle_idx = 3, \ | ||
| 188 | .flags = 1*SD_LOAD_BALANCE \ | ||
| 189 | | 1*SD_BALANCE_NEWIDLE \ | ||
| 190 | | 0*SD_BALANCE_EXEC \ | ||
| 191 | | 0*SD_BALANCE_FORK \ | ||
| 192 | | 0*SD_BALANCE_WAKE \ | ||
| 193 | | 0*SD_WAKE_AFFINE \ | ||
| 194 | | 0*SD_SHARE_CPUPOWER \ | ||
| 195 | | 0*SD_POWERSAVINGS_BALANCE \ | ||
| 196 | | 0*SD_SHARE_PKG_RESOURCES \ | ||
| 197 | | 1*SD_SERIALIZE \ | ||
| 198 | | 0*SD_PREFER_SIBLING \ | ||
| 199 | , \ | ||
| 200 | .last_balance = jiffies, \ | ||
| 201 | .balance_interval = 64, \ | ||
| 202 | } | ||
| 203 | |||
| 204 | #ifndef SD_NODES_PER_DOMAIN | ||
| 205 | #define SD_NODES_PER_DOMAIN 16 | ||
| 206 | #endif | ||
| 207 | |||
| 208 | #ifdef CONFIG_SCHED_BOOK | 178 | #ifdef CONFIG_SCHED_BOOK |
| 209 | #ifndef SD_BOOK_INIT | 179 | #ifndef SD_BOOK_INIT |
| 210 | #error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!! | 180 | #error Please define an appropriate SD_BOOK_INIT in include/asm/topology.h!!! |
| 211 | #endif | 181 | #endif |
| 212 | #endif /* CONFIG_SCHED_BOOK */ | 182 | #endif /* CONFIG_SCHED_BOOK */ |
| 213 | 183 | ||
| 214 | #ifdef CONFIG_NUMA | ||
| 215 | #ifndef SD_NODE_INIT | ||
| 216 | #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! | ||
| 217 | #endif | ||
| 218 | |||
| 219 | #endif /* CONFIG_NUMA */ | ||
| 220 | |||
| 221 | #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID | 184 | #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID |
| 222 | DECLARE_PER_CPU(int, numa_node); | 185 | DECLARE_PER_CPU(int, numa_node); |
| 223 | 186 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6001e5c3b4e4..b4f2096980a3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -5560,7 +5560,8 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
| 5560 | break; | 5560 | break; |
| 5561 | } | 5561 | } |
| 5562 | 5562 | ||
| 5563 | if (cpumask_intersects(groupmask, sched_group_cpus(group))) { | 5563 | if (!(sd->flags & SD_OVERLAP) && |
| 5564 | cpumask_intersects(groupmask, sched_group_cpus(group))) { | ||
| 5564 | printk(KERN_CONT "\n"); | 5565 | printk(KERN_CONT "\n"); |
| 5565 | printk(KERN_ERR "ERROR: repeated CPUs\n"); | 5566 | printk(KERN_ERR "ERROR: repeated CPUs\n"); |
| 5566 | break; | 5567 | break; |
| @@ -5898,92 +5899,6 @@ static int __init isolated_cpu_setup(char *str) | |||
| 5898 | 5899 | ||
| 5899 | __setup("isolcpus=", isolated_cpu_setup); | 5900 | __setup("isolcpus=", isolated_cpu_setup); |
| 5900 | 5901 | ||
| 5901 | #ifdef CONFIG_NUMA | ||
| 5902 | |||
| 5903 | /** | ||
| 5904 | * find_next_best_node - find the next node to include in a sched_domain | ||
| 5905 | * @node: node whose sched_domain we're building | ||
| 5906 | * @used_nodes: nodes already in the sched_domain | ||
| 5907 | * | ||
| 5908 | * Find the next node to include in a given scheduling domain. Simply | ||
| 5909 | * finds the closest node not already in the @used_nodes map. | ||
| 5910 | * | ||
| 5911 | * Should use nodemask_t. | ||
| 5912 | */ | ||
| 5913 | static int find_next_best_node(int node, nodemask_t *used_nodes) | ||
| 5914 | { | ||
| 5915 | int i, n, val, min_val, best_node = -1; | ||
| 5916 | |||
| 5917 | min_val = INT_MAX; | ||
| 5918 | |||
| 5919 | for (i = 0; i < nr_node_ids; i++) { | ||
| 5920 | /* Start at @node */ | ||
| 5921 | n = (node + i) % nr_node_ids; | ||
| 5922 | |||
| 5923 | if (!nr_cpus_node(n)) | ||
| 5924 | continue; | ||
| 5925 | |||
| 5926 | /* Skip already used nodes */ | ||
| 5927 | if (node_isset(n, *used_nodes)) | ||
| 5928 | continue; | ||
| 5929 | |||
| 5930 | /* Simple min distance search */ | ||
| 5931 | val = node_distance(node, n); | ||
| 5932 | |||
| 5933 | if (val < min_val) { | ||
| 5934 | min_val = val; | ||
| 5935 | best_node = n; | ||
| 5936 | } | ||
| 5937 | } | ||
| 5938 | |||
| 5939 | if (best_node != -1) | ||
| 5940 | node_set(best_node, *used_nodes); | ||
| 5941 | return best_node; | ||
| 5942 | } | ||
| 5943 | |||
| 5944 | /** | ||
| 5945 | * sched_domain_node_span - get a cpumask for a node's sched_domain | ||
| 5946 | * @node: node whose cpumask we're constructing | ||
| 5947 | * @span: resulting cpumask | ||
| 5948 | * | ||
| 5949 | * Given a node, construct a good cpumask for its sched_domain to span. It | ||
| 5950 | * should be one that prevents unnecessary balancing, but also spreads tasks | ||
| 5951 | * out optimally. | ||
| 5952 | */ | ||
| 5953 | static void sched_domain_node_span(int node, struct cpumask *span) | ||
| 5954 | { | ||
| 5955 | nodemask_t used_nodes; | ||
| 5956 | int i; | ||
| 5957 | |||
| 5958 | cpumask_clear(span); | ||
| 5959 | nodes_clear(used_nodes); | ||
| 5960 | |||
| 5961 | cpumask_or(span, span, cpumask_of_node(node)); | ||
| 5962 | node_set(node, used_nodes); | ||
| 5963 | |||
| 5964 | for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { | ||
| 5965 | int next_node = find_next_best_node(node, &used_nodes); | ||
| 5966 | if (next_node < 0) | ||
| 5967 | break; | ||
| 5968 | cpumask_or(span, span, cpumask_of_node(next_node)); | ||
| 5969 | } | ||
| 5970 | } | ||
| 5971 | |||
| 5972 | static const struct cpumask *cpu_node_mask(int cpu) | ||
| 5973 | { | ||
| 5974 | lockdep_assert_held(&sched_domains_mutex); | ||
| 5975 | |||
| 5976 | sched_domain_node_span(cpu_to_node(cpu), sched_domains_tmpmask); | ||
| 5977 | |||
| 5978 | return sched_domains_tmpmask; | ||
| 5979 | } | ||
| 5980 | |||
| 5981 | static const struct cpumask *cpu_allnodes_mask(int cpu) | ||
| 5982 | { | ||
| 5983 | return cpu_possible_mask; | ||
| 5984 | } | ||
| 5985 | #endif /* CONFIG_NUMA */ | ||
| 5986 | |||
| 5987 | static const struct cpumask *cpu_cpu_mask(int cpu) | 5902 | static const struct cpumask *cpu_cpu_mask(int cpu) |
| 5988 | { | 5903 | { |
| 5989 | return cpumask_of_node(cpu_to_node(cpu)); | 5904 | return cpumask_of_node(cpu_to_node(cpu)); |
| @@ -6020,6 +5935,7 @@ struct sched_domain_topology_level { | |||
| 6020 | sched_domain_init_f init; | 5935 | sched_domain_init_f init; |
| 6021 | sched_domain_mask_f mask; | 5936 | sched_domain_mask_f mask; |
| 6022 | int flags; | 5937 | int flags; |
| 5938 | int numa_level; | ||
| 6023 | struct sd_data data; | 5939 | struct sd_data data; |
| 6024 | }; | 5940 | }; |
| 6025 | 5941 | ||
| @@ -6213,10 +6129,6 @@ sd_init_##type(struct sched_domain_topology_level *tl, int cpu) \ | |||
| 6213 | } | 6129 | } |
| 6214 | 6130 | ||
| 6215 | SD_INIT_FUNC(CPU) | 6131 | SD_INIT_FUNC(CPU) |
| 6216 | #ifdef CONFIG_NUMA | ||
| 6217 | SD_INIT_FUNC(ALLNODES) | ||
| 6218 | SD_INIT_FUNC(NODE) | ||
| 6219 | #endif | ||
| 6220 | #ifdef CONFIG_SCHED_SMT | 6132 | #ifdef CONFIG_SCHED_SMT |
| 6221 | SD_INIT_FUNC(SIBLING) | 6133 | SD_INIT_FUNC(SIBLING) |
| 6222 | #endif | 6134 | #endif |
| @@ -6338,15 +6250,191 @@ static struct sched_domain_topology_level default_topology[] = { | |||
| 6338 | { sd_init_BOOK, cpu_book_mask, }, | 6250 | { sd_init_BOOK, cpu_book_mask, }, |
| 6339 | #endif | 6251 | #endif |
| 6340 | { sd_init_CPU, cpu_cpu_mask, }, | 6252 | { sd_init_CPU, cpu_cpu_mask, }, |
| 6341 | #ifdef CONFIG_NUMA | ||
| 6342 | { sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, }, | ||
| 6343 | { sd_init_ALLNODES, cpu_allnodes_mask, }, | ||
| 6344 | #endif | ||
| 6345 | { NULL, }, | 6253 | { NULL, }, |
| 6346 | }; | 6254 | }; |
| 6347 | 6255 | ||
| 6348 | static struct sched_domain_topology_level *sched_domain_topology = default_topology; | 6256 | static struct sched_domain_topology_level *sched_domain_topology = default_topology; |
| 6349 | 6257 | ||
| 6258 | #ifdef CONFIG_NUMA | ||
| 6259 | |||
| 6260 | static int sched_domains_numa_levels; | ||
| 6261 | static int sched_domains_numa_scale; | ||
| 6262 | static int *sched_domains_numa_distance; | ||
| 6263 | static struct cpumask ***sched_domains_numa_masks; | ||
| 6264 | static int sched_domains_curr_level; | ||
| 6265 | |||
| 6266 | static inline unsigned long numa_scale(unsigned long x, int level) | ||
| 6267 | { | ||
| 6268 | return x * sched_domains_numa_distance[level] / sched_domains_numa_scale; | ||
| 6269 | } | ||
| 6270 | |||
| 6271 | static inline int sd_local_flags(int level) | ||
| 6272 | { | ||
| 6273 | if (sched_domains_numa_distance[level] > REMOTE_DISTANCE) | ||
| 6274 | return 0; | ||
| 6275 | |||
| 6276 | return SD_BALANCE_EXEC | SD_BALANCE_FORK | SD_WAKE_AFFINE; | ||
| 6277 | } | ||
| 6278 | |||
| 6279 | static struct sched_domain * | ||
| 6280 | sd_numa_init(struct sched_domain_topology_level *tl, int cpu) | ||
| 6281 | { | ||
| 6282 | struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); | ||
| 6283 | int level = tl->numa_level; | ||
| 6284 | int sd_weight = cpumask_weight( | ||
| 6285 | sched_domains_numa_masks[level][cpu_to_node(cpu)]); | ||
| 6286 | |||
| 6287 | *sd = (struct sched_domain){ | ||
| 6288 | .min_interval = sd_weight, | ||
| 6289 | .max_interval = 2*sd_weight, | ||
| 6290 | .busy_factor = 32, | ||
| 6291 | .imbalance_pct = 100 + numa_scale(25, level), | ||
| 6292 | .cache_nice_tries = 2, | ||
| 6293 | .busy_idx = 3, | ||
| 6294 | .idle_idx = 2, | ||
| 6295 | .newidle_idx = 0, | ||
| 6296 | .wake_idx = 0, | ||
| 6297 | .forkexec_idx = 0, | ||
| 6298 | |||
| 6299 | .flags = 1*SD_LOAD_BALANCE | ||
| 6300 | | 1*SD_BALANCE_NEWIDLE | ||
| 6301 | | 0*SD_BALANCE_EXEC | ||
| 6302 | | 0*SD_BALANCE_FORK | ||
| 6303 | | 0*SD_BALANCE_WAKE | ||
| 6304 | | 0*SD_WAKE_AFFINE | ||
| 6305 | | 0*SD_PREFER_LOCAL | ||
| 6306 | | 0*SD_SHARE_CPUPOWER | ||
| 6307 | | 0*SD_POWERSAVINGS_BALANCE | ||
| 6308 | | 0*SD_SHARE_PKG_RESOURCES | ||
| 6309 | | 1*SD_SERIALIZE | ||
| 6310 | | 0*SD_PREFER_SIBLING | ||
| 6311 | | sd_local_flags(level) | ||
| 6312 | , | ||
| 6313 | .last_balance = jiffies, | ||
| 6314 | .balance_interval = sd_weight, | ||
| 6315 | }; | ||
| 6316 | SD_INIT_NAME(sd, NUMA); | ||
| 6317 | sd->private = &tl->data; | ||
| 6318 | |||
| 6319 | /* | ||
| 6320 | * Ugly hack to pass state to sd_numa_mask()... | ||
| 6321 | */ | ||
| 6322 | sched_domains_curr_level = tl->numa_level; | ||
| 6323 | |||
| 6324 | return sd; | ||
| 6325 | } | ||
| 6326 | |||
| 6327 | static const struct cpumask *sd_numa_mask(int cpu) | ||
| 6328 | { | ||
| 6329 | return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; | ||
| 6330 | } | ||
| 6331 | |||
| 6332 | static void sched_init_numa(void) | ||
| 6333 | { | ||
| 6334 | int next_distance, curr_distance = node_distance(0, 0); | ||
| 6335 | struct sched_domain_topology_level *tl; | ||
| 6336 | int level = 0; | ||
| 6337 | int i, j, k; | ||
| 6338 | |||
| 6339 | sched_domains_numa_scale = curr_distance; | ||
| 6340 | sched_domains_numa_distance = kzalloc(sizeof(int) * nr_node_ids, GFP_KERNEL); | ||
| 6341 | if (!sched_domains_numa_distance) | ||
| 6342 | return; | ||
| 6343 | |||
| 6344 | /* | ||
| 6345 | * O(nr_nodes^2) deduplicating selection sort -- in order to find the | ||
| 6346 | * unique distances in the node_distance() table. | ||
| 6347 | * | ||
| 6348 | * Assumes node_distance(0,j) includes all distances in | ||
| 6349 | * node_distance(i,j) in order to avoid cubic time. | ||
| 6350 | * | ||
| 6351 | * XXX: could be optimized to O(n log n) by using sort() | ||
| 6352 | */ | ||
| 6353 | next_distance = curr_distance; | ||
| 6354 | for (i = 0; i < nr_node_ids; i++) { | ||
| 6355 | for (j = 0; j < nr_node_ids; j++) { | ||
| 6356 | int distance = node_distance(0, j); | ||
| 6357 | if (distance > curr_distance && | ||
| 6358 | (distance < next_distance || | ||
| 6359 | next_distance == curr_distance)) | ||
| 6360 | next_distance = distance; | ||
| 6361 | } | ||
| 6362 | if (next_distance != curr_distance) { | ||
| 6363 | sched_domains_numa_distance[level++] = next_distance; | ||
| 6364 | sched_domains_numa_levels = level; | ||
| 6365 | curr_distance = next_distance; | ||
| 6366 | } else break; | ||
| 6367 | } | ||
| 6368 | /* | ||
| 6369 | * 'level' contains the number of unique distances, excluding the | ||
| 6370 | * identity distance node_distance(i,i). | ||
| 6371 | * | ||
| 6372 | * The sched_domains_nume_distance[] array includes the actual distance | ||
| 6373 | * numbers. | ||
| 6374 | */ | ||
| 6375 | |||
| 6376 | sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); | ||
| 6377 | if (!sched_domains_numa_masks) | ||
| 6378 | return; | ||
| 6379 | |||
| 6380 | /* | ||
| 6381 | * Now for each level, construct a mask per node which contains all | ||
| 6382 | * cpus of nodes that are that many hops away from us. | ||
| 6383 | */ | ||
| 6384 | for (i = 0; i < level; i++) { | ||
| 6385 | sched_domains_numa_masks[i] = | ||
| 6386 | kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL); | ||
| 6387 | if (!sched_domains_numa_masks[i]) | ||
| 6388 | return; | ||
| 6389 | |||
| 6390 | for (j = 0; j < nr_node_ids; j++) { | ||
| 6391 | struct cpumask *mask = kzalloc_node(cpumask_size(), GFP_KERNEL, j); | ||
| 6392 | if (!mask) | ||
| 6393 | return; | ||
| 6394 | |||
| 6395 | sched_domains_numa_masks[i][j] = mask; | ||
| 6396 | |||
| 6397 | for (k = 0; k < nr_node_ids; k++) { | ||
| 6398 | if (node_distance(cpu_to_node(j), k) > | ||
| 6399 | sched_domains_numa_distance[i]) | ||
| 6400 | continue; | ||
| 6401 | |||
| 6402 | cpumask_or(mask, mask, cpumask_of_node(k)); | ||
| 6403 | } | ||
| 6404 | } | ||
| 6405 | } | ||
| 6406 | |||
| 6407 | tl = kzalloc((ARRAY_SIZE(default_topology) + level) * | ||
| 6408 | sizeof(struct sched_domain_topology_level), GFP_KERNEL); | ||
| 6409 | if (!tl) | ||
| 6410 | return; | ||
| 6411 | |||
| 6412 | /* | ||
| 6413 | * Copy the default topology bits.. | ||
| 6414 | */ | ||
| 6415 | for (i = 0; default_topology[i].init; i++) | ||
| 6416 | tl[i] = default_topology[i]; | ||
| 6417 | |||
| 6418 | /* | ||
| 6419 | * .. and append 'j' levels of NUMA goodness. | ||
| 6420 | */ | ||
| 6421 | for (j = 0; j < level; i++, j++) { | ||
| 6422 | tl[i] = (struct sched_domain_topology_level){ | ||
| 6423 | .init = sd_numa_init, | ||
| 6424 | .mask = sd_numa_mask, | ||
| 6425 | .flags = SDTL_OVERLAP, | ||
| 6426 | .numa_level = j, | ||
| 6427 | }; | ||
| 6428 | } | ||
| 6429 | |||
| 6430 | sched_domain_topology = tl; | ||
| 6431 | } | ||
| 6432 | #else | ||
| 6433 | static inline void sched_init_numa(void) | ||
| 6434 | { | ||
| 6435 | } | ||
| 6436 | #endif /* CONFIG_NUMA */ | ||
| 6437 | |||
| 6350 | static int __sdt_alloc(const struct cpumask *cpu_map) | 6438 | static int __sdt_alloc(const struct cpumask *cpu_map) |
| 6351 | { | 6439 | { |
| 6352 | struct sched_domain_topology_level *tl; | 6440 | struct sched_domain_topology_level *tl; |
| @@ -6840,6 +6928,8 @@ void __init sched_init_smp(void) | |||
| 6840 | alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); | 6928 | alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); |
| 6841 | alloc_cpumask_var(&fallback_doms, GFP_KERNEL); | 6929 | alloc_cpumask_var(&fallback_doms, GFP_KERNEL); |
| 6842 | 6930 | ||
| 6931 | sched_init_numa(); | ||
| 6932 | |||
| 6843 | get_online_cpus(); | 6933 | get_online_cpus(); |
| 6844 | mutex_lock(&sched_domains_mutex); | 6934 | mutex_lock(&sched_domains_mutex); |
| 6845 | init_sched_domains(cpu_active_mask); | 6935 | init_sched_domains(cpu_active_mask); |
