diff options
Diffstat (limited to 'arch/x86/mm/numa.c')
-rw-r--r-- | arch/x86/mm/numa.c | 212 |
1 files changed, 209 insertions, 3 deletions
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index ebf6d7887a38..9559d360fde7 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -26,12 +26,50 @@ static __init int numa_setup(char *opt) | |||
26 | early_param("numa", numa_setup); | 26 | early_param("numa", numa_setup); |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * Which logical CPUs are on which nodes | 29 | * apicid, cpu, node mappings |
30 | */ | 30 | */ |
31 | s16 __apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | ||
32 | [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE | ||
33 | }; | ||
34 | |||
31 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; | 35 | cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; |
32 | EXPORT_SYMBOL(node_to_cpumask_map); | 36 | EXPORT_SYMBOL(node_to_cpumask_map); |
33 | 37 | ||
34 | /* | 38 | /* |
39 | * Map cpu index to node index | ||
40 | */ | ||
41 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | ||
42 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | ||
43 | |||
44 | void __cpuinit numa_set_node(int cpu, int node) | ||
45 | { | ||
46 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); | ||
47 | |||
48 | /* early setting, no percpu area yet */ | ||
49 | if (cpu_to_node_map) { | ||
50 | cpu_to_node_map[cpu] = node; | ||
51 | return; | ||
52 | } | ||
53 | |||
54 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | ||
55 | if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) { | ||
56 | printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu); | ||
57 | dump_stack(); | ||
58 | return; | ||
59 | } | ||
60 | #endif | ||
61 | per_cpu(x86_cpu_to_node_map, cpu) = node; | ||
62 | |||
63 | if (node != NUMA_NO_NODE) | ||
64 | set_cpu_numa_node(cpu, node); | ||
65 | } | ||
66 | |||
67 | void __cpuinit numa_clear_node(int cpu) | ||
68 | { | ||
69 | numa_set_node(cpu, NUMA_NO_NODE); | ||
70 | } | ||
71 | |||
72 | /* | ||
35 | * Allocate node_to_cpumask_map based on number of available nodes | 73 | * Allocate node_to_cpumask_map based on number of available nodes |
36 | * Requires node_possible_map to be valid. | 74 | * Requires node_possible_map to be valid. |
37 | * | 75 | * |
@@ -57,7 +95,174 @@ void __init setup_node_to_cpumask_map(void) | |||
57 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); | 95 | pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids); |
58 | } | 96 | } |
59 | 97 | ||
60 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 98 | /* |
99 | * There are unfortunately some poorly designed mainboards around that | ||
100 | * only connect memory to a single CPU. This breaks the 1:1 cpu->node | ||
101 | * mapping. To avoid this fill in the mapping for all possible CPUs, | ||
102 | * as the number of CPUs is not known yet. We round robin the existing | ||
103 | * nodes. | ||
104 | */ | ||
105 | void __init numa_init_array(void) | ||
106 | { | ||
107 | int rr, i; | ||
108 | |||
109 | rr = first_node(node_online_map); | ||
110 | for (i = 0; i < nr_cpu_ids; i++) { | ||
111 | if (early_cpu_to_node(i) != NUMA_NO_NODE) | ||
112 | continue; | ||
113 | numa_set_node(i, rr); | ||
114 | rr = next_node(rr, node_online_map); | ||
115 | if (rr == MAX_NUMNODES) | ||
116 | rr = first_node(node_online_map); | ||
117 | } | ||
118 | } | ||
119 | |||
120 | static __init int find_near_online_node(int node) | ||
121 | { | ||
122 | int n, val; | ||
123 | int min_val = INT_MAX; | ||
124 | int best_node = -1; | ||
125 | |||
126 | for_each_online_node(n) { | ||
127 | val = node_distance(node, n); | ||
128 | |||
129 | if (val < min_val) { | ||
130 | min_val = val; | ||
131 | best_node = n; | ||
132 | } | ||
133 | } | ||
134 | |||
135 | return best_node; | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * Setup early cpu_to_node. | ||
140 | * | ||
141 | * Populate cpu_to_node[] only if x86_cpu_to_apicid[], | ||
142 | * and apicid_to_node[] tables have valid entries for a CPU. | ||
143 | * This means we skip cpu_to_node[] initialisation for NUMA | ||
144 | * emulation and faking node case (when running a kernel compiled | ||
145 | * for NUMA on a non NUMA box), which is OK as cpu_to_node[] | ||
146 | * is already initialized in a round robin manner at numa_init_array, | ||
147 | * prior to this call, and this initialization is good enough | ||
148 | * for the fake NUMA cases. | ||
149 | * | ||
150 | * Called before the per_cpu areas are setup. | ||
151 | */ | ||
152 | void __init init_cpu_to_node(void) | ||
153 | { | ||
154 | int cpu; | ||
155 | u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); | ||
156 | |||
157 | BUG_ON(cpu_to_apicid == NULL); | ||
158 | |||
159 | for_each_possible_cpu(cpu) { | ||
160 | int node = numa_cpu_node(cpu); | ||
161 | |||
162 | if (node == NUMA_NO_NODE) | ||
163 | continue; | ||
164 | if (!node_online(node)) | ||
165 | node = find_near_online_node(node); | ||
166 | numa_set_node(cpu, node); | ||
167 | } | ||
168 | } | ||
169 | |||
170 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS | ||
171 | |||
172 | # ifndef CONFIG_NUMA_EMU | ||
173 | void __cpuinit numa_add_cpu(int cpu) | ||
174 | { | ||
175 | cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | ||
176 | } | ||
177 | |||
178 | void __cpuinit numa_remove_cpu(int cpu) | ||
179 | { | ||
180 | cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | ||
181 | } | ||
182 | # endif /* !CONFIG_NUMA_EMU */ | ||
183 | |||
184 | #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ | ||
185 | |||
186 | int __cpu_to_node(int cpu) | ||
187 | { | ||
188 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { | ||
189 | printk(KERN_WARNING | ||
190 | "cpu_to_node(%d): usage too early!\n", cpu); | ||
191 | dump_stack(); | ||
192 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
193 | } | ||
194 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
195 | } | ||
196 | EXPORT_SYMBOL(__cpu_to_node); | ||
197 | |||
198 | /* | ||
199 | * Same function as cpu_to_node() but used if called before the | ||
200 | * per_cpu areas are setup. | ||
201 | */ | ||
202 | int early_cpu_to_node(int cpu) | ||
203 | { | ||
204 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) | ||
205 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
206 | |||
207 | if (!cpu_possible(cpu)) { | ||
208 | printk(KERN_WARNING | ||
209 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); | ||
210 | dump_stack(); | ||
211 | return NUMA_NO_NODE; | ||
212 | } | ||
213 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
214 | } | ||
215 | |||
216 | struct cpumask __cpuinit *debug_cpumask_set_cpu(int cpu, int enable) | ||
217 | { | ||
218 | int node = early_cpu_to_node(cpu); | ||
219 | struct cpumask *mask; | ||
220 | char buf[64]; | ||
221 | |||
222 | if (node == NUMA_NO_NODE) { | ||
223 | /* early_cpu_to_node() already emits a warning and trace */ | ||
224 | return NULL; | ||
225 | } | ||
226 | mask = node_to_cpumask_map[node]; | ||
227 | if (!mask) { | ||
228 | pr_err("node_to_cpumask_map[%i] NULL\n", node); | ||
229 | dump_stack(); | ||
230 | return NULL; | ||
231 | } | ||
232 | |||
233 | cpulist_scnprintf(buf, sizeof(buf), mask); | ||
234 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | ||
235 | enable ? "numa_add_cpu" : "numa_remove_cpu", | ||
236 | cpu, node, buf); | ||
237 | return mask; | ||
238 | } | ||
239 | |||
240 | # ifndef CONFIG_NUMA_EMU | ||
241 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | ||
242 | { | ||
243 | struct cpumask *mask; | ||
244 | |||
245 | mask = debug_cpumask_set_cpu(cpu, enable); | ||
246 | if (!mask) | ||
247 | return; | ||
248 | |||
249 | if (enable) | ||
250 | cpumask_set_cpu(cpu, mask); | ||
251 | else | ||
252 | cpumask_clear_cpu(cpu, mask); | ||
253 | } | ||
254 | |||
255 | void __cpuinit numa_add_cpu(int cpu) | ||
256 | { | ||
257 | numa_set_cpumask(cpu, 1); | ||
258 | } | ||
259 | |||
260 | void __cpuinit numa_remove_cpu(int cpu) | ||
261 | { | ||
262 | numa_set_cpumask(cpu, 0); | ||
263 | } | ||
264 | # endif /* !CONFIG_NUMA_EMU */ | ||
265 | |||
61 | /* | 266 | /* |
62 | * Returns a pointer to the bitmask of CPUs on Node 'node'. | 267 | * Returns a pointer to the bitmask of CPUs on Node 'node'. |
63 | */ | 268 | */ |
@@ -80,4 +285,5 @@ const struct cpumask *cpumask_of_node(int node) | |||
80 | return node_to_cpumask_map[node]; | 285 | return node_to_cpumask_map[node]; |
81 | } | 286 | } |
82 | EXPORT_SYMBOL(cpumask_of_node); | 287 | EXPORT_SYMBOL(cpumask_of_node); |
83 | #endif | 288 | |
289 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ | ||