diff options
Diffstat (limited to 'arch/x86/kernel/setup_percpu.c')
-rw-r--r-- | arch/x86/kernel/setup_percpu.c | 399 |
1 files changed, 399 insertions, 0 deletions
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c new file mode 100644 index 000000000000..cac68430d31f --- /dev/null +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -0,0 +1,399 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/init.h> | ||
4 | #include <linux/bootmem.h> | ||
5 | #include <linux/percpu.h> | ||
6 | #include <linux/kexec.h> | ||
7 | #include <linux/crash_dump.h> | ||
8 | #include <asm/smp.h> | ||
9 | #include <asm/percpu.h> | ||
10 | #include <asm/sections.h> | ||
11 | #include <asm/processor.h> | ||
12 | #include <asm/setup.h> | ||
13 | #include <asm/topology.h> | ||
14 | #include <asm/mpspec.h> | ||
15 | #include <asm/apicdef.h> | ||
16 | #include <asm/highmem.h> | ||
17 | |||
18 | #ifdef CONFIG_X86_LOCAL_APIC | ||
19 | unsigned int num_processors; | ||
20 | unsigned disabled_cpus __cpuinitdata; | ||
21 | /* Processor that is doing the boot up */ | ||
22 | unsigned int boot_cpu_physical_apicid = -1U; | ||
23 | unsigned int max_physical_apicid; | ||
24 | EXPORT_SYMBOL(boot_cpu_physical_apicid); | ||
25 | |||
26 | /* Bitmask of physically existing CPUs */ | ||
27 | physid_mask_t phys_cpu_present_map; | ||
28 | #endif | ||
29 | |||
30 | /* map cpu index to physical APIC ID */ | ||
31 | DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); | ||
32 | DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); | ||
33 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); | ||
34 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); | ||
35 | |||
36 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | ||
37 | #define X86_64_NUMA 1 | ||
38 | |||
39 | /* map cpu index to node index */ | ||
40 | DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); | ||
41 | EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); | ||
42 | |||
43 | /* which logical CPUs are on which nodes */ | ||
44 | cpumask_t *node_to_cpumask_map; | ||
45 | EXPORT_SYMBOL(node_to_cpumask_map); | ||
46 | |||
47 | /* setup node_to_cpumask_map */ | ||
48 | static void __init setup_node_to_cpumask_map(void); | ||
49 | |||
50 | #else | ||
51 | static inline void setup_node_to_cpumask_map(void) { } | ||
52 | #endif | ||
53 | |||
54 | #if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) | ||
55 | /* | ||
56 | * Copy data used in early init routines from the initial arrays to the | ||
57 | * per cpu data areas. These arrays then become expendable and the | ||
58 | * *_early_ptr's are zeroed indicating that the static arrays are gone. | ||
59 | */ | ||
60 | static void __init setup_per_cpu_maps(void) | ||
61 | { | ||
62 | int cpu; | ||
63 | |||
64 | for_each_possible_cpu(cpu) { | ||
65 | per_cpu(x86_cpu_to_apicid, cpu) = | ||
66 | early_per_cpu_map(x86_cpu_to_apicid, cpu); | ||
67 | per_cpu(x86_bios_cpu_apicid, cpu) = | ||
68 | early_per_cpu_map(x86_bios_cpu_apicid, cpu); | ||
69 | #ifdef X86_64_NUMA | ||
70 | per_cpu(x86_cpu_to_node_map, cpu) = | ||
71 | early_per_cpu_map(x86_cpu_to_node_map, cpu); | ||
72 | #endif | ||
73 | } | ||
74 | |||
75 | /* indicate the early static arrays will soon be gone */ | ||
76 | early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; | ||
77 | early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; | ||
78 | #ifdef X86_64_NUMA | ||
79 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; | ||
80 | #endif | ||
81 | } | ||
82 | |||
83 | #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP | ||
84 | cpumask_t *cpumask_of_cpu_map __read_mostly; | ||
85 | EXPORT_SYMBOL(cpumask_of_cpu_map); | ||
86 | |||
87 | /* requires nr_cpu_ids to be initialized */ | ||
88 | static void __init setup_cpumask_of_cpu(void) | ||
89 | { | ||
90 | int i; | ||
91 | |||
92 | /* alloc_bootmem zeroes memory */ | ||
93 | cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); | ||
94 | for (i = 0; i < nr_cpu_ids; i++) | ||
95 | cpu_set(i, cpumask_of_cpu_map[i]); | ||
96 | } | ||
97 | #else | ||
98 | static inline void setup_cpumask_of_cpu(void) { } | ||
99 | #endif | ||
100 | |||
101 | #ifdef CONFIG_X86_32 | ||
102 | /* | ||
103 | * Great future not-so-futuristic plan: make i386 and x86_64 do it | ||
104 | * the same way | ||
105 | */ | ||
106 | unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; | ||
107 | EXPORT_SYMBOL(__per_cpu_offset); | ||
108 | static inline void setup_cpu_pda_map(void) { } | ||
109 | |||
110 | #elif !defined(CONFIG_SMP) | ||
111 | static inline void setup_cpu_pda_map(void) { } | ||
112 | |||
113 | #else /* CONFIG_SMP && CONFIG_X86_64 */ | ||
114 | |||
115 | /* | ||
116 | * Allocate cpu_pda pointer table and array via alloc_bootmem. | ||
117 | */ | ||
118 | static void __init setup_cpu_pda_map(void) | ||
119 | { | ||
120 | char *pda; | ||
121 | struct x8664_pda **new_cpu_pda; | ||
122 | unsigned long size; | ||
123 | int cpu; | ||
124 | |||
125 | size = roundup(sizeof(struct x8664_pda), cache_line_size()); | ||
126 | |||
127 | /* allocate cpu_pda array and pointer table */ | ||
128 | { | ||
129 | unsigned long tsize = nr_cpu_ids * sizeof(void *); | ||
130 | unsigned long asize = size * (nr_cpu_ids - 1); | ||
131 | |||
132 | tsize = roundup(tsize, cache_line_size()); | ||
133 | new_cpu_pda = alloc_bootmem(tsize + asize); | ||
134 | pda = (char *)new_cpu_pda + tsize; | ||
135 | } | ||
136 | |||
137 | /* initialize pointer table to static pda's */ | ||
138 | for_each_possible_cpu(cpu) { | ||
139 | if (cpu == 0) { | ||
140 | /* leave boot cpu pda in place */ | ||
141 | new_cpu_pda[0] = cpu_pda(0); | ||
142 | continue; | ||
143 | } | ||
144 | new_cpu_pda[cpu] = (struct x8664_pda *)pda; | ||
145 | new_cpu_pda[cpu]->in_bootmem = 1; | ||
146 | pda += size; | ||
147 | } | ||
148 | |||
149 | /* point to new pointer table */ | ||
150 | _cpu_pda = new_cpu_pda; | ||
151 | } | ||
152 | #endif | ||
153 | |||
154 | /* | ||
155 | * Great future plan: | ||
156 | * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. | ||
157 | * Always point %gs to its beginning | ||
158 | */ | ||
159 | void __init setup_per_cpu_areas(void) | ||
160 | { | ||
161 | ssize_t size = PERCPU_ENOUGH_ROOM; | ||
162 | char *ptr; | ||
163 | int cpu; | ||
164 | |||
165 | /* Setup cpu_pda map */ | ||
166 | setup_cpu_pda_map(); | ||
167 | |||
168 | /* Copy section for each CPU (we discard the original) */ | ||
169 | size = PERCPU_ENOUGH_ROOM; | ||
170 | printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", | ||
171 | size); | ||
172 | |||
173 | for_each_possible_cpu(cpu) { | ||
174 | #ifndef CONFIG_NEED_MULTIPLE_NODES | ||
175 | ptr = alloc_bootmem_pages(size); | ||
176 | #else | ||
177 | int node = early_cpu_to_node(cpu); | ||
178 | if (!node_online(node) || !NODE_DATA(node)) { | ||
179 | ptr = alloc_bootmem_pages(size); | ||
180 | printk(KERN_INFO | ||
181 | "cpu %d has no node %d or node-local memory\n", | ||
182 | cpu, node); | ||
183 | } | ||
184 | else | ||
185 | ptr = alloc_bootmem_pages_node(NODE_DATA(node), size); | ||
186 | #endif | ||
187 | per_cpu_offset(cpu) = ptr - __per_cpu_start; | ||
188 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | ||
189 | |||
190 | } | ||
191 | |||
192 | printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", | ||
193 | NR_CPUS, nr_cpu_ids, nr_node_ids); | ||
194 | |||
195 | /* Setup percpu data maps */ | ||
196 | setup_per_cpu_maps(); | ||
197 | |||
198 | /* Setup node to cpumask map */ | ||
199 | setup_node_to_cpumask_map(); | ||
200 | |||
201 | /* Setup cpumask_of_cpu map */ | ||
202 | setup_cpumask_of_cpu(); | ||
203 | } | ||
204 | |||
205 | #endif | ||
206 | |||
207 | #ifdef X86_64_NUMA | ||
208 | |||
209 | /* | ||
210 | * Allocate node_to_cpumask_map based on number of available nodes | ||
211 | * Requires node_possible_map to be valid. | ||
212 | * | ||
213 | * Note: node_to_cpumask() is not valid until after this is done. | ||
214 | */ | ||
215 | static void __init setup_node_to_cpumask_map(void) | ||
216 | { | ||
217 | unsigned int node, num = 0; | ||
218 | cpumask_t *map; | ||
219 | |||
220 | /* setup nr_node_ids if not done yet */ | ||
221 | if (nr_node_ids == MAX_NUMNODES) { | ||
222 | for_each_node_mask(node, node_possible_map) | ||
223 | num = node; | ||
224 | nr_node_ids = num + 1; | ||
225 | } | ||
226 | |||
227 | /* allocate the map */ | ||
228 | map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); | ||
229 | |||
230 | Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", | ||
231 | map, nr_node_ids); | ||
232 | |||
233 | /* node_to_cpumask() will now work */ | ||
234 | node_to_cpumask_map = map; | ||
235 | } | ||
236 | |||
237 | void __cpuinit numa_set_node(int cpu, int node) | ||
238 | { | ||
239 | int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); | ||
240 | |||
241 | if (cpu_pda(cpu) && node != NUMA_NO_NODE) | ||
242 | cpu_pda(cpu)->nodenumber = node; | ||
243 | |||
244 | if (cpu_to_node_map) | ||
245 | cpu_to_node_map[cpu] = node; | ||
246 | |||
247 | else if (per_cpu_offset(cpu)) | ||
248 | per_cpu(x86_cpu_to_node_map, cpu) = node; | ||
249 | |||
250 | else | ||
251 | Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); | ||
252 | } | ||
253 | |||
254 | void __cpuinit numa_clear_node(int cpu) | ||
255 | { | ||
256 | numa_set_node(cpu, NUMA_NO_NODE); | ||
257 | } | ||
258 | |||
259 | #ifndef CONFIG_DEBUG_PER_CPU_MAPS | ||
260 | |||
261 | void __cpuinit numa_add_cpu(int cpu) | ||
262 | { | ||
263 | cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]); | ||
264 | } | ||
265 | |||
266 | void __cpuinit numa_remove_cpu(int cpu) | ||
267 | { | ||
268 | cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); | ||
269 | } | ||
270 | |||
271 | #else /* CONFIG_DEBUG_PER_CPU_MAPS */ | ||
272 | |||
273 | /* | ||
274 | * --------- debug versions of the numa functions --------- | ||
275 | */ | ||
276 | static void __cpuinit numa_set_cpumask(int cpu, int enable) | ||
277 | { | ||
278 | int node = cpu_to_node(cpu); | ||
279 | cpumask_t *mask; | ||
280 | char buf[64]; | ||
281 | |||
282 | if (node_to_cpumask_map == NULL) { | ||
283 | printk(KERN_ERR "node_to_cpumask_map NULL\n"); | ||
284 | dump_stack(); | ||
285 | return; | ||
286 | } | ||
287 | |||
288 | mask = &node_to_cpumask_map[node]; | ||
289 | if (enable) | ||
290 | cpu_set(cpu, *mask); | ||
291 | else | ||
292 | cpu_clear(cpu, *mask); | ||
293 | |||
294 | cpulist_scnprintf(buf, sizeof(buf), *mask); | ||
295 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | ||
296 | enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); | ||
297 | } | ||
298 | |||
299 | void __cpuinit numa_add_cpu(int cpu) | ||
300 | { | ||
301 | numa_set_cpumask(cpu, 1); | ||
302 | } | ||
303 | |||
304 | void __cpuinit numa_remove_cpu(int cpu) | ||
305 | { | ||
306 | numa_set_cpumask(cpu, 0); | ||
307 | } | ||
308 | |||
309 | int cpu_to_node(int cpu) | ||
310 | { | ||
311 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) { | ||
312 | printk(KERN_WARNING | ||
313 | "cpu_to_node(%d): usage too early!\n", cpu); | ||
314 | dump_stack(); | ||
315 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
316 | } | ||
317 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
318 | } | ||
319 | EXPORT_SYMBOL(cpu_to_node); | ||
320 | |||
321 | /* | ||
322 | * Same function as cpu_to_node() but used if called before the | ||
323 | * per_cpu areas are setup. | ||
324 | */ | ||
325 | int early_cpu_to_node(int cpu) | ||
326 | { | ||
327 | if (early_per_cpu_ptr(x86_cpu_to_node_map)) | ||
328 | return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu]; | ||
329 | |||
330 | if (!per_cpu_offset(cpu)) { | ||
331 | printk(KERN_WARNING | ||
332 | "early_cpu_to_node(%d): no per_cpu area!\n", cpu); | ||
333 | dump_stack(); | ||
334 | return NUMA_NO_NODE; | ||
335 | } | ||
336 | return per_cpu(x86_cpu_to_node_map, cpu); | ||
337 | } | ||
338 | |||
339 | |||
340 | /* empty cpumask */ | ||
341 | static const cpumask_t cpu_mask_none; | ||
342 | |||
343 | /* | ||
344 | * Returns a pointer to the bitmask of CPUs on Node 'node'. | ||
345 | */ | ||
346 | const cpumask_t *_node_to_cpumask_ptr(int node) | ||
347 | { | ||
348 | if (node_to_cpumask_map == NULL) { | ||
349 | printk(KERN_WARNING | ||
350 | "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", | ||
351 | node); | ||
352 | dump_stack(); | ||
353 | return (const cpumask_t *)&cpu_online_map; | ||
354 | } | ||
355 | if (node >= nr_node_ids) { | ||
356 | printk(KERN_WARNING | ||
357 | "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", | ||
358 | node, nr_node_ids); | ||
359 | dump_stack(); | ||
360 | return &cpu_mask_none; | ||
361 | } | ||
362 | return &node_to_cpumask_map[node]; | ||
363 | } | ||
364 | EXPORT_SYMBOL(_node_to_cpumask_ptr); | ||
365 | |||
366 | /* | ||
367 | * Returns a bitmask of CPUs on Node 'node'. | ||
368 | * | ||
369 | * Side note: this function creates the returned cpumask on the stack | ||
370 | * so with a high NR_CPUS count, excessive stack space is used. The | ||
371 | * node_to_cpumask_ptr function should be used whenever possible. | ||
372 | */ | ||
373 | cpumask_t node_to_cpumask(int node) | ||
374 | { | ||
375 | if (node_to_cpumask_map == NULL) { | ||
376 | printk(KERN_WARNING | ||
377 | "node_to_cpumask(%d): no node_to_cpumask_map!\n", node); | ||
378 | dump_stack(); | ||
379 | return cpu_online_map; | ||
380 | } | ||
381 | if (node >= nr_node_ids) { | ||
382 | printk(KERN_WARNING | ||
383 | "node_to_cpumask(%d): node > nr_node_ids(%d)\n", | ||
384 | node, nr_node_ids); | ||
385 | dump_stack(); | ||
386 | return cpu_mask_none; | ||
387 | } | ||
388 | return node_to_cpumask_map[node]; | ||
389 | } | ||
390 | EXPORT_SYMBOL(node_to_cpumask); | ||
391 | |||
392 | /* | ||
393 | * --------- end of debug versions of the numa functions --------- | ||
394 | */ | ||
395 | |||
396 | #endif /* CONFIG_DEBUG_PER_CPU_MAPS */ | ||
397 | |||
398 | #endif /* X86_64_NUMA */ | ||
399 | |||