diff options
Diffstat (limited to 'arch/x86/kernel/smpboot_32.c')
-rw-r--r-- | arch/x86/kernel/smpboot_32.c | 1322 |
1 files changed, 1322 insertions, 0 deletions
diff --git a/arch/x86/kernel/smpboot_32.c b/arch/x86/kernel/smpboot_32.c new file mode 100644 index 000000000000..e4f61d1c6248 --- /dev/null +++ b/arch/x86/kernel/smpboot_32.c | |||
@@ -0,0 +1,1322 @@ | |||
1 | /* | ||
2 | * x86 SMP booting functions | ||
3 | * | ||
4 | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> | ||
5 | * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> | ||
6 | * | ||
7 | * Much of the core SMP work is based on previous work by Thomas Radke, to | ||
8 | * whom a great many thanks are extended. | ||
9 | * | ||
10 | * Thanks to Intel for making available several different Pentium, | ||
11 | * Pentium Pro and Pentium-II/Xeon MP machines. | ||
12 | * Original development of Linux SMP code supported by Caldera. | ||
13 | * | ||
14 | * This code is released under the GNU General Public License version 2 or | ||
15 | * later. | ||
16 | * | ||
17 | * Fixes | ||
18 | * Felix Koop : NR_CPUS used properly | ||
19 | * Jose Renau : Handle single CPU case. | ||
20 | * Alan Cox : By repeated request 8) - Total BogoMIPS report. | ||
21 | * Greg Wright : Fix for kernel stacks panic. | ||
22 | * Erich Boleyn : MP v1.4 and additional changes. | ||
23 | * Matthias Sattler : Changes for 2.1 kernel map. | ||
24 | * Michel Lespinasse : Changes for 2.1 kernel map. | ||
25 | * Michael Chastain : Change trampoline.S to gnu as. | ||
26 | * Alan Cox : Dumb bug: 'B' step PPro's are fine | ||
27 | * Ingo Molnar : Added APIC timers, based on code | ||
28 | * from Jose Renau | ||
29 | * Ingo Molnar : various cleanups and rewrites | ||
30 | * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. | ||
31 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs | ||
32 | * Martin J. Bligh : Added support for multi-quad systems | ||
33 | * Dave Jones : Report invalid combinations of Athlon CPUs. | ||
34 | * Rusty Russell : Hacked into shape for new "hotplug" boot process. */ | ||
35 | |||
36 | #include <linux/module.h> | ||
37 | #include <linux/init.h> | ||
38 | #include <linux/kernel.h> | ||
39 | |||
40 | #include <linux/mm.h> | ||
41 | #include <linux/sched.h> | ||
42 | #include <linux/kernel_stat.h> | ||
43 | #include <linux/bootmem.h> | ||
44 | #include <linux/notifier.h> | ||
45 | #include <linux/cpu.h> | ||
46 | #include <linux/percpu.h> | ||
47 | #include <linux/nmi.h> | ||
48 | |||
49 | #include <linux/delay.h> | ||
50 | #include <linux/mc146818rtc.h> | ||
51 | #include <asm/tlbflush.h> | ||
52 | #include <asm/desc.h> | ||
53 | #include <asm/arch_hooks.h> | ||
54 | #include <asm/nmi.h> | ||
55 | |||
56 | #include <mach_apic.h> | ||
57 | #include <mach_wakecpu.h> | ||
58 | #include <smpboot_hooks.h> | ||
59 | #include <asm/vmi.h> | ||
60 | #include <asm/mtrr.h> | ||
61 | |||
62 | /* Set if we find a B stepping CPU */ | ||
63 | static int __devinitdata smp_b_stepping; | ||
64 | |||
65 | /* Number of siblings per CPU package */ | ||
66 | int smp_num_siblings = 1; | ||
67 | EXPORT_SYMBOL(smp_num_siblings); | ||
68 | |||
69 | /* Last level cache ID of each logical CPU */ | ||
70 | int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; | ||
71 | |||
72 | /* representing HT siblings of each logical CPU */ | ||
73 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; | ||
74 | EXPORT_SYMBOL(cpu_sibling_map); | ||
75 | |||
76 | /* representing HT and core siblings of each logical CPU */ | ||
77 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; | ||
78 | EXPORT_SYMBOL(cpu_core_map); | ||
79 | |||
80 | /* bitmap of online cpus */ | ||
81 | cpumask_t cpu_online_map __read_mostly; | ||
82 | EXPORT_SYMBOL(cpu_online_map); | ||
83 | |||
84 | cpumask_t cpu_callin_map; | ||
85 | cpumask_t cpu_callout_map; | ||
86 | EXPORT_SYMBOL(cpu_callout_map); | ||
87 | cpumask_t cpu_possible_map; | ||
88 | EXPORT_SYMBOL(cpu_possible_map); | ||
89 | static cpumask_t smp_commenced_mask; | ||
90 | |||
91 | /* Per CPU bogomips and other parameters */ | ||
92 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | ||
93 | EXPORT_SYMBOL(cpu_data); | ||
94 | |||
95 | u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = | ||
96 | { [0 ... NR_CPUS-1] = 0xff }; | ||
97 | EXPORT_SYMBOL(x86_cpu_to_apicid); | ||
98 | |||
99 | u8 apicid_2_node[MAX_APICID]; | ||
100 | |||
101 | /* | ||
102 | * Trampoline 80x86 program as an array. | ||
103 | */ | ||
104 | |||
105 | extern unsigned char trampoline_data []; | ||
106 | extern unsigned char trampoline_end []; | ||
107 | static unsigned char *trampoline_base; | ||
108 | static int trampoline_exec; | ||
109 | |||
110 | static void map_cpu_to_logical_apicid(void); | ||
111 | |||
112 | /* State of each CPU. */ | ||
113 | DEFINE_PER_CPU(int, cpu_state) = { 0 }; | ||
114 | |||
115 | /* | ||
116 | * Currently trivial. Write the real->protected mode | ||
117 | * bootstrap into the page concerned. The caller | ||
118 | * has made sure it's suitably aligned. | ||
119 | */ | ||
120 | |||
121 | static unsigned long __devinit setup_trampoline(void) | ||
122 | { | ||
123 | memcpy(trampoline_base, trampoline_data, trampoline_end - trampoline_data); | ||
124 | return virt_to_phys(trampoline_base); | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * We are called very early to get the low memory for the | ||
129 | * SMP bootup trampoline page. | ||
130 | */ | ||
131 | void __init smp_alloc_memory(void) | ||
132 | { | ||
133 | trampoline_base = (void *) alloc_bootmem_low_pages(PAGE_SIZE); | ||
134 | /* | ||
135 | * Has to be in very low memory so we can execute | ||
136 | * real-mode AP code. | ||
137 | */ | ||
138 | if (__pa(trampoline_base) >= 0x9F000) | ||
139 | BUG(); | ||
140 | /* | ||
141 | * Make the SMP trampoline executable: | ||
142 | */ | ||
143 | trampoline_exec = set_kernel_exec((unsigned long)trampoline_base, 1); | ||
144 | } | ||
145 | |||
146 | /* | ||
147 | * The bootstrap kernel entry code has set these up. Save them for | ||
148 | * a given CPU | ||
149 | */ | ||
150 | |||
151 | void __cpuinit smp_store_cpu_info(int id) | ||
152 | { | ||
153 | struct cpuinfo_x86 *c = cpu_data + id; | ||
154 | |||
155 | *c = boot_cpu_data; | ||
156 | if (id!=0) | ||
157 | identify_secondary_cpu(c); | ||
158 | /* | ||
159 | * Mask B, Pentium, but not Pentium MMX | ||
160 | */ | ||
161 | if (c->x86_vendor == X86_VENDOR_INTEL && | ||
162 | c->x86 == 5 && | ||
163 | c->x86_mask >= 1 && c->x86_mask <= 4 && | ||
164 | c->x86_model <= 3) | ||
165 | /* | ||
166 | * Remember we have B step Pentia with bugs | ||
167 | */ | ||
168 | smp_b_stepping = 1; | ||
169 | |||
170 | /* | ||
171 | * Certain Athlons might work (for various values of 'work') in SMP | ||
172 | * but they are not certified as MP capable. | ||
173 | */ | ||
174 | if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { | ||
175 | |||
176 | if (num_possible_cpus() == 1) | ||
177 | goto valid_k7; | ||
178 | |||
179 | /* Athlon 660/661 is valid. */ | ||
180 | if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) | ||
181 | goto valid_k7; | ||
182 | |||
183 | /* Duron 670 is valid */ | ||
184 | if ((c->x86_model==7) && (c->x86_mask==0)) | ||
185 | goto valid_k7; | ||
186 | |||
187 | /* | ||
188 | * Athlon 662, Duron 671, and Athlon >model 7 have capability bit. | ||
189 | * It's worth noting that the A5 stepping (662) of some Athlon XP's | ||
190 | * have the MP bit set. | ||
191 | * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for more. | ||
192 | */ | ||
193 | if (((c->x86_model==6) && (c->x86_mask>=2)) || | ||
194 | ((c->x86_model==7) && (c->x86_mask>=1)) || | ||
195 | (c->x86_model> 7)) | ||
196 | if (cpu_has_mp) | ||
197 | goto valid_k7; | ||
198 | |||
199 | /* If we get here, it's not a certified SMP capable AMD system. */ | ||
200 | add_taint(TAINT_UNSAFE_SMP); | ||
201 | } | ||
202 | |||
203 | valid_k7: | ||
204 | ; | ||
205 | } | ||
206 | |||
207 | extern void calibrate_delay(void); | ||
208 | |||
209 | static atomic_t init_deasserted; | ||
210 | |||
211 | static void __cpuinit smp_callin(void) | ||
212 | { | ||
213 | int cpuid, phys_id; | ||
214 | unsigned long timeout; | ||
215 | |||
216 | /* | ||
217 | * If waken up by an INIT in an 82489DX configuration | ||
218 | * we may get here before an INIT-deassert IPI reaches | ||
219 | * our local APIC. We have to wait for the IPI or we'll | ||
220 | * lock up on an APIC access. | ||
221 | */ | ||
222 | wait_for_init_deassert(&init_deasserted); | ||
223 | |||
224 | /* | ||
225 | * (This works even if the APIC is not enabled.) | ||
226 | */ | ||
227 | phys_id = GET_APIC_ID(apic_read(APIC_ID)); | ||
228 | cpuid = smp_processor_id(); | ||
229 | if (cpu_isset(cpuid, cpu_callin_map)) { | ||
230 | printk("huh, phys CPU#%d, CPU#%d already present??\n", | ||
231 | phys_id, cpuid); | ||
232 | BUG(); | ||
233 | } | ||
234 | Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); | ||
235 | |||
236 | /* | ||
237 | * STARTUP IPIs are fragile beasts as they might sometimes | ||
238 | * trigger some glue motherboard logic. Complete APIC bus | ||
239 | * silence for 1 second, this overestimates the time the | ||
240 | * boot CPU is spending to send the up to 2 STARTUP IPIs | ||
241 | * by a factor of two. This should be enough. | ||
242 | */ | ||
243 | |||
244 | /* | ||
245 | * Waiting 2s total for startup (udelay is not yet working) | ||
246 | */ | ||
247 | timeout = jiffies + 2*HZ; | ||
248 | while (time_before(jiffies, timeout)) { | ||
249 | /* | ||
250 | * Has the boot CPU finished it's STARTUP sequence? | ||
251 | */ | ||
252 | if (cpu_isset(cpuid, cpu_callout_map)) | ||
253 | break; | ||
254 | rep_nop(); | ||
255 | } | ||
256 | |||
257 | if (!time_before(jiffies, timeout)) { | ||
258 | printk("BUG: CPU%d started up but did not get a callout!\n", | ||
259 | cpuid); | ||
260 | BUG(); | ||
261 | } | ||
262 | |||
263 | /* | ||
264 | * the boot CPU has finished the init stage and is spinning | ||
265 | * on callin_map until we finish. We are free to set up this | ||
266 | * CPU, first the APIC. (this is probably redundant on most | ||
267 | * boards) | ||
268 | */ | ||
269 | |||
270 | Dprintk("CALLIN, before setup_local_APIC().\n"); | ||
271 | smp_callin_clear_local_apic(); | ||
272 | setup_local_APIC(); | ||
273 | map_cpu_to_logical_apicid(); | ||
274 | |||
275 | /* | ||
276 | * Get our bogomips. | ||
277 | */ | ||
278 | calibrate_delay(); | ||
279 | Dprintk("Stack at about %p\n",&cpuid); | ||
280 | |||
281 | /* | ||
282 | * Save our processor parameters | ||
283 | */ | ||
284 | smp_store_cpu_info(cpuid); | ||
285 | |||
286 | /* | ||
287 | * Allow the master to continue. | ||
288 | */ | ||
289 | cpu_set(cpuid, cpu_callin_map); | ||
290 | } | ||
291 | |||
292 | static int cpucount; | ||
293 | |||
294 | /* maps the cpu to the sched domain representing multi-core */ | ||
295 | cpumask_t cpu_coregroup_map(int cpu) | ||
296 | { | ||
297 | struct cpuinfo_x86 *c = cpu_data + cpu; | ||
298 | /* | ||
299 | * For perf, we return last level cache shared map. | ||
300 | * And for power savings, we return cpu_core_map | ||
301 | */ | ||
302 | if (sched_mc_power_savings || sched_smt_power_savings) | ||
303 | return cpu_core_map[cpu]; | ||
304 | else | ||
305 | return c->llc_shared_map; | ||
306 | } | ||
307 | |||
308 | /* representing cpus for which sibling maps can be computed */ | ||
309 | static cpumask_t cpu_sibling_setup_map; | ||
310 | |||
311 | void __cpuinit set_cpu_sibling_map(int cpu) | ||
312 | { | ||
313 | int i; | ||
314 | struct cpuinfo_x86 *c = cpu_data; | ||
315 | |||
316 | cpu_set(cpu, cpu_sibling_setup_map); | ||
317 | |||
318 | if (smp_num_siblings > 1) { | ||
319 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | ||
320 | if (c[cpu].phys_proc_id == c[i].phys_proc_id && | ||
321 | c[cpu].cpu_core_id == c[i].cpu_core_id) { | ||
322 | cpu_set(i, cpu_sibling_map[cpu]); | ||
323 | cpu_set(cpu, cpu_sibling_map[i]); | ||
324 | cpu_set(i, cpu_core_map[cpu]); | ||
325 | cpu_set(cpu, cpu_core_map[i]); | ||
326 | cpu_set(i, c[cpu].llc_shared_map); | ||
327 | cpu_set(cpu, c[i].llc_shared_map); | ||
328 | } | ||
329 | } | ||
330 | } else { | ||
331 | cpu_set(cpu, cpu_sibling_map[cpu]); | ||
332 | } | ||
333 | |||
334 | cpu_set(cpu, c[cpu].llc_shared_map); | ||
335 | |||
336 | if (current_cpu_data.x86_max_cores == 1) { | ||
337 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; | ||
338 | c[cpu].booted_cores = 1; | ||
339 | return; | ||
340 | } | ||
341 | |||
342 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | ||
343 | if (cpu_llc_id[cpu] != BAD_APICID && | ||
344 | cpu_llc_id[cpu] == cpu_llc_id[i]) { | ||
345 | cpu_set(i, c[cpu].llc_shared_map); | ||
346 | cpu_set(cpu, c[i].llc_shared_map); | ||
347 | } | ||
348 | if (c[cpu].phys_proc_id == c[i].phys_proc_id) { | ||
349 | cpu_set(i, cpu_core_map[cpu]); | ||
350 | cpu_set(cpu, cpu_core_map[i]); | ||
351 | /* | ||
352 | * Does this new cpu bringup a new core? | ||
353 | */ | ||
354 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) { | ||
355 | /* | ||
356 | * for each core in package, increment | ||
357 | * the booted_cores for this new cpu | ||
358 | */ | ||
359 | if (first_cpu(cpu_sibling_map[i]) == i) | ||
360 | c[cpu].booted_cores++; | ||
361 | /* | ||
362 | * increment the core count for all | ||
363 | * the other cpus in this package | ||
364 | */ | ||
365 | if (i != cpu) | ||
366 | c[i].booted_cores++; | ||
367 | } else if (i != cpu && !c[cpu].booted_cores) | ||
368 | c[cpu].booted_cores = c[i].booted_cores; | ||
369 | } | ||
370 | } | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * Activate a secondary processor. | ||
375 | */ | ||
376 | static void __cpuinit start_secondary(void *unused) | ||
377 | { | ||
378 | /* | ||
379 | * Don't put *anything* before cpu_init(), SMP booting is too | ||
380 | * fragile that we want to limit the things done here to the | ||
381 | * most necessary things. | ||
382 | */ | ||
383 | #ifdef CONFIG_VMI | ||
384 | vmi_bringup(); | ||
385 | #endif | ||
386 | cpu_init(); | ||
387 | preempt_disable(); | ||
388 | smp_callin(); | ||
389 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | ||
390 | rep_nop(); | ||
391 | /* | ||
392 | * Check TSC synchronization with the BP: | ||
393 | */ | ||
394 | check_tsc_sync_target(); | ||
395 | |||
396 | setup_secondary_clock(); | ||
397 | if (nmi_watchdog == NMI_IO_APIC) { | ||
398 | disable_8259A_irq(0); | ||
399 | enable_NMI_through_LVT0(NULL); | ||
400 | enable_8259A_irq(0); | ||
401 | } | ||
402 | /* | ||
403 | * low-memory mappings have been cleared, flush them from | ||
404 | * the local TLBs too. | ||
405 | */ | ||
406 | local_flush_tlb(); | ||
407 | |||
408 | /* This must be done before setting cpu_online_map */ | ||
409 | set_cpu_sibling_map(raw_smp_processor_id()); | ||
410 | wmb(); | ||
411 | |||
412 | /* | ||
413 | * We need to hold call_lock, so there is no inconsistency | ||
414 | * between the time smp_call_function() determines number of | ||
415 | * IPI receipients, and the time when the determination is made | ||
416 | * for which cpus receive the IPI. Holding this | ||
417 | * lock helps us to not include this cpu in a currently in progress | ||
418 | * smp_call_function(). | ||
419 | */ | ||
420 | lock_ipi_call_lock(); | ||
421 | cpu_set(smp_processor_id(), cpu_online_map); | ||
422 | unlock_ipi_call_lock(); | ||
423 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | ||
424 | |||
425 | /* We can take interrupts now: we're officially "up". */ | ||
426 | local_irq_enable(); | ||
427 | |||
428 | wmb(); | ||
429 | cpu_idle(); | ||
430 | } | ||
431 | |||
432 | /* | ||
433 | * Everything has been set up for the secondary | ||
434 | * CPUs - they just need to reload everything | ||
435 | * from the task structure | ||
436 | * This function must not return. | ||
437 | */ | ||
438 | void __devinit initialize_secondary(void) | ||
439 | { | ||
440 | /* | ||
441 | * We don't actually need to load the full TSS, | ||
442 | * basically just the stack pointer and the eip. | ||
443 | */ | ||
444 | |||
445 | asm volatile( | ||
446 | "movl %0,%%esp\n\t" | ||
447 | "jmp *%1" | ||
448 | : | ||
449 | :"m" (current->thread.esp),"m" (current->thread.eip)); | ||
450 | } | ||
451 | |||
452 | /* Static state in head.S used to set up a CPU */ | ||
453 | extern struct { | ||
454 | void * esp; | ||
455 | unsigned short ss; | ||
456 | } stack_start; | ||
457 | |||
458 | #ifdef CONFIG_NUMA | ||
459 | |||
460 | /* which logical CPUs are on which nodes */ | ||
461 | cpumask_t node_2_cpu_mask[MAX_NUMNODES] __read_mostly = | ||
462 | { [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE }; | ||
463 | EXPORT_SYMBOL(node_2_cpu_mask); | ||
464 | /* which node each logical CPU is on */ | ||
465 | int cpu_2_node[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 }; | ||
466 | EXPORT_SYMBOL(cpu_2_node); | ||
467 | |||
468 | /* set up a mapping between cpu and node. */ | ||
469 | static inline void map_cpu_to_node(int cpu, int node) | ||
470 | { | ||
471 | printk("Mapping cpu %d to node %d\n", cpu, node); | ||
472 | cpu_set(cpu, node_2_cpu_mask[node]); | ||
473 | cpu_2_node[cpu] = node; | ||
474 | } | ||
475 | |||
476 | /* undo a mapping between cpu and node. */ | ||
477 | static inline void unmap_cpu_to_node(int cpu) | ||
478 | { | ||
479 | int node; | ||
480 | |||
481 | printk("Unmapping cpu %d from all nodes\n", cpu); | ||
482 | for (node = 0; node < MAX_NUMNODES; node ++) | ||
483 | cpu_clear(cpu, node_2_cpu_mask[node]); | ||
484 | cpu_2_node[cpu] = 0; | ||
485 | } | ||
486 | #else /* !CONFIG_NUMA */ | ||
487 | |||
488 | #define map_cpu_to_node(cpu, node) ({}) | ||
489 | #define unmap_cpu_to_node(cpu) ({}) | ||
490 | |||
491 | #endif /* CONFIG_NUMA */ | ||
492 | |||
493 | u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
494 | |||
495 | static void map_cpu_to_logical_apicid(void) | ||
496 | { | ||
497 | int cpu = smp_processor_id(); | ||
498 | int apicid = logical_smp_processor_id(); | ||
499 | int node = apicid_to_node(apicid); | ||
500 | |||
501 | if (!node_online(node)) | ||
502 | node = first_online_node; | ||
503 | |||
504 | cpu_2_logical_apicid[cpu] = apicid; | ||
505 | map_cpu_to_node(cpu, node); | ||
506 | } | ||
507 | |||
508 | static void unmap_cpu_to_logical_apicid(int cpu) | ||
509 | { | ||
510 | cpu_2_logical_apicid[cpu] = BAD_APICID; | ||
511 | unmap_cpu_to_node(cpu); | ||
512 | } | ||
513 | |||
514 | static inline void __inquire_remote_apic(int apicid) | ||
515 | { | ||
516 | int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; | ||
517 | char *names[] = { "ID", "VERSION", "SPIV" }; | ||
518 | int timeout; | ||
519 | unsigned long status; | ||
520 | |||
521 | printk("Inquiring remote APIC #%d...\n", apicid); | ||
522 | |||
523 | for (i = 0; i < ARRAY_SIZE(regs); i++) { | ||
524 | printk("... APIC #%d %s: ", apicid, names[i]); | ||
525 | |||
526 | /* | ||
527 | * Wait for idle. | ||
528 | */ | ||
529 | status = safe_apic_wait_icr_idle(); | ||
530 | if (status) | ||
531 | printk("a previous APIC delivery may have failed\n"); | ||
532 | |||
533 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); | ||
534 | apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); | ||
535 | |||
536 | timeout = 0; | ||
537 | do { | ||
538 | udelay(100); | ||
539 | status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; | ||
540 | } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); | ||
541 | |||
542 | switch (status) { | ||
543 | case APIC_ICR_RR_VALID: | ||
544 | status = apic_read(APIC_RRR); | ||
545 | printk("%lx\n", status); | ||
546 | break; | ||
547 | default: | ||
548 | printk("failed\n"); | ||
549 | } | ||
550 | } | ||
551 | } | ||
552 | |||
553 | #ifdef WAKE_SECONDARY_VIA_NMI | ||
554 | /* | ||
555 | * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal | ||
556 | * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this | ||
557 | * won't ... remember to clear down the APIC, etc later. | ||
558 | */ | ||
559 | static int __devinit | ||
560 | wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | ||
561 | { | ||
562 | unsigned long send_status, accept_status = 0; | ||
563 | int maxlvt; | ||
564 | |||
565 | /* Target chip */ | ||
566 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); | ||
567 | |||
568 | /* Boot on the stack */ | ||
569 | /* Kick the second */ | ||
570 | apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); | ||
571 | |||
572 | Dprintk("Waiting for send to finish...\n"); | ||
573 | send_status = safe_apic_wait_icr_idle(); | ||
574 | |||
575 | /* | ||
576 | * Give the other CPU some time to accept the IPI. | ||
577 | */ | ||
578 | udelay(200); | ||
579 | /* | ||
580 | * Due to the Pentium erratum 3AP. | ||
581 | */ | ||
582 | maxlvt = lapic_get_maxlvt(); | ||
583 | if (maxlvt > 3) { | ||
584 | apic_read_around(APIC_SPIV); | ||
585 | apic_write(APIC_ESR, 0); | ||
586 | } | ||
587 | accept_status = (apic_read(APIC_ESR) & 0xEF); | ||
588 | Dprintk("NMI sent.\n"); | ||
589 | |||
590 | if (send_status) | ||
591 | printk("APIC never delivered???\n"); | ||
592 | if (accept_status) | ||
593 | printk("APIC delivery error (%lx).\n", accept_status); | ||
594 | |||
595 | return (send_status | accept_status); | ||
596 | } | ||
597 | #endif /* WAKE_SECONDARY_VIA_NMI */ | ||
598 | |||
599 | #ifdef WAKE_SECONDARY_VIA_INIT | ||
600 | static int __devinit | ||
601 | wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | ||
602 | { | ||
603 | unsigned long send_status, accept_status = 0; | ||
604 | int maxlvt, num_starts, j; | ||
605 | |||
606 | /* | ||
607 | * Be paranoid about clearing APIC errors. | ||
608 | */ | ||
609 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { | ||
610 | apic_read_around(APIC_SPIV); | ||
611 | apic_write(APIC_ESR, 0); | ||
612 | apic_read(APIC_ESR); | ||
613 | } | ||
614 | |||
615 | Dprintk("Asserting INIT.\n"); | ||
616 | |||
617 | /* | ||
618 | * Turn INIT on target chip | ||
619 | */ | ||
620 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | ||
621 | |||
622 | /* | ||
623 | * Send IPI | ||
624 | */ | ||
625 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT | ||
626 | | APIC_DM_INIT); | ||
627 | |||
628 | Dprintk("Waiting for send to finish...\n"); | ||
629 | send_status = safe_apic_wait_icr_idle(); | ||
630 | |||
631 | mdelay(10); | ||
632 | |||
633 | Dprintk("Deasserting INIT.\n"); | ||
634 | |||
635 | /* Target chip */ | ||
636 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | ||
637 | |||
638 | /* Send IPI */ | ||
639 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); | ||
640 | |||
641 | Dprintk("Waiting for send to finish...\n"); | ||
642 | send_status = safe_apic_wait_icr_idle(); | ||
643 | |||
644 | atomic_set(&init_deasserted, 1); | ||
645 | |||
646 | /* | ||
647 | * Should we send STARTUP IPIs ? | ||
648 | * | ||
649 | * Determine this based on the APIC version. | ||
650 | * If we don't have an integrated APIC, don't send the STARTUP IPIs. | ||
651 | */ | ||
652 | if (APIC_INTEGRATED(apic_version[phys_apicid])) | ||
653 | num_starts = 2; | ||
654 | else | ||
655 | num_starts = 0; | ||
656 | |||
657 | /* | ||
658 | * Paravirt / VMI wants a startup IPI hook here to set up the | ||
659 | * target processor state. | ||
660 | */ | ||
661 | startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, | ||
662 | (unsigned long) stack_start.esp); | ||
663 | |||
664 | /* | ||
665 | * Run STARTUP IPI loop. | ||
666 | */ | ||
667 | Dprintk("#startup loops: %d.\n", num_starts); | ||
668 | |||
669 | maxlvt = lapic_get_maxlvt(); | ||
670 | |||
671 | for (j = 1; j <= num_starts; j++) { | ||
672 | Dprintk("Sending STARTUP #%d.\n",j); | ||
673 | apic_read_around(APIC_SPIV); | ||
674 | apic_write(APIC_ESR, 0); | ||
675 | apic_read(APIC_ESR); | ||
676 | Dprintk("After apic_write.\n"); | ||
677 | |||
678 | /* | ||
679 | * STARTUP IPI | ||
680 | */ | ||
681 | |||
682 | /* Target chip */ | ||
683 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | ||
684 | |||
685 | /* Boot on the stack */ | ||
686 | /* Kick the second */ | ||
687 | apic_write_around(APIC_ICR, APIC_DM_STARTUP | ||
688 | | (start_eip >> 12)); | ||
689 | |||
690 | /* | ||
691 | * Give the other CPU some time to accept the IPI. | ||
692 | */ | ||
693 | udelay(300); | ||
694 | |||
695 | Dprintk("Startup point 1.\n"); | ||
696 | |||
697 | Dprintk("Waiting for send to finish...\n"); | ||
698 | send_status = safe_apic_wait_icr_idle(); | ||
699 | |||
700 | /* | ||
701 | * Give the other CPU some time to accept the IPI. | ||
702 | */ | ||
703 | udelay(200); | ||
704 | /* | ||
705 | * Due to the Pentium erratum 3AP. | ||
706 | */ | ||
707 | if (maxlvt > 3) { | ||
708 | apic_read_around(APIC_SPIV); | ||
709 | apic_write(APIC_ESR, 0); | ||
710 | } | ||
711 | accept_status = (apic_read(APIC_ESR) & 0xEF); | ||
712 | if (send_status || accept_status) | ||
713 | break; | ||
714 | } | ||
715 | Dprintk("After Startup.\n"); | ||
716 | |||
717 | if (send_status) | ||
718 | printk("APIC never delivered???\n"); | ||
719 | if (accept_status) | ||
720 | printk("APIC delivery error (%lx).\n", accept_status); | ||
721 | |||
722 | return (send_status | accept_status); | ||
723 | } | ||
724 | #endif /* WAKE_SECONDARY_VIA_INIT */ | ||
725 | |||
726 | extern cpumask_t cpu_initialized; | ||
727 | static inline int alloc_cpu_id(void) | ||
728 | { | ||
729 | cpumask_t tmp_map; | ||
730 | int cpu; | ||
731 | cpus_complement(tmp_map, cpu_present_map); | ||
732 | cpu = first_cpu(tmp_map); | ||
733 | if (cpu >= NR_CPUS) | ||
734 | return -ENODEV; | ||
735 | return cpu; | ||
736 | } | ||
737 | |||
738 | #ifdef CONFIG_HOTPLUG_CPU | ||
739 | static struct task_struct * __devinitdata cpu_idle_tasks[NR_CPUS]; | ||
740 | static inline struct task_struct * alloc_idle_task(int cpu) | ||
741 | { | ||
742 | struct task_struct *idle; | ||
743 | |||
744 | if ((idle = cpu_idle_tasks[cpu]) != NULL) { | ||
745 | /* initialize thread_struct. we really want to avoid destroy | ||
746 | * idle tread | ||
747 | */ | ||
748 | idle->thread.esp = (unsigned long)task_pt_regs(idle); | ||
749 | init_idle(idle, cpu); | ||
750 | return idle; | ||
751 | } | ||
752 | idle = fork_idle(cpu); | ||
753 | |||
754 | if (!IS_ERR(idle)) | ||
755 | cpu_idle_tasks[cpu] = idle; | ||
756 | return idle; | ||
757 | } | ||
758 | #else | ||
759 | #define alloc_idle_task(cpu) fork_idle(cpu) | ||
760 | #endif | ||
761 | |||
762 | static int __cpuinit do_boot_cpu(int apicid, int cpu) | ||
763 | /* | ||
764 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | ||
765 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. | ||
766 | * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. | ||
767 | */ | ||
768 | { | ||
769 | struct task_struct *idle; | ||
770 | unsigned long boot_error; | ||
771 | int timeout; | ||
772 | unsigned long start_eip; | ||
773 | unsigned short nmi_high = 0, nmi_low = 0; | ||
774 | |||
775 | /* | ||
776 | * Save current MTRR state in case it was changed since early boot | ||
777 | * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: | ||
778 | */ | ||
779 | mtrr_save_state(); | ||
780 | |||
781 | /* | ||
782 | * We can't use kernel_thread since we must avoid to | ||
783 | * reschedule the child. | ||
784 | */ | ||
785 | idle = alloc_idle_task(cpu); | ||
786 | if (IS_ERR(idle)) | ||
787 | panic("failed fork for CPU %d", cpu); | ||
788 | |||
789 | init_gdt(cpu); | ||
790 | per_cpu(current_task, cpu) = idle; | ||
791 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); | ||
792 | |||
793 | idle->thread.eip = (unsigned long) start_secondary; | ||
794 | /* start_eip had better be page-aligned! */ | ||
795 | start_eip = setup_trampoline(); | ||
796 | |||
797 | ++cpucount; | ||
798 | alternatives_smp_switch(1); | ||
799 | |||
800 | /* So we see what's up */ | ||
801 | printk("Booting processor %d/%d eip %lx\n", cpu, apicid, start_eip); | ||
802 | /* Stack for startup_32 can be just as for start_secondary onwards */ | ||
803 | stack_start.esp = (void *) idle->thread.esp; | ||
804 | |||
805 | irq_ctx_init(cpu); | ||
806 | |||
807 | x86_cpu_to_apicid[cpu] = apicid; | ||
808 | /* | ||
809 | * This grunge runs the startup process for | ||
810 | * the targeted processor. | ||
811 | */ | ||
812 | |||
813 | atomic_set(&init_deasserted, 0); | ||
814 | |||
815 | Dprintk("Setting warm reset code and vector.\n"); | ||
816 | |||
817 | store_NMI_vector(&nmi_high, &nmi_low); | ||
818 | |||
819 | smpboot_setup_warm_reset_vector(start_eip); | ||
820 | |||
821 | /* | ||
822 | * Starting actual IPI sequence... | ||
823 | */ | ||
824 | boot_error = wakeup_secondary_cpu(apicid, start_eip); | ||
825 | |||
826 | if (!boot_error) { | ||
827 | /* | ||
828 | * allow APs to start initializing. | ||
829 | */ | ||
830 | Dprintk("Before Callout %d.\n", cpu); | ||
831 | cpu_set(cpu, cpu_callout_map); | ||
832 | Dprintk("After Callout %d.\n", cpu); | ||
833 | |||
834 | /* | ||
835 | * Wait 5s total for a response | ||
836 | */ | ||
837 | for (timeout = 0; timeout < 50000; timeout++) { | ||
838 | if (cpu_isset(cpu, cpu_callin_map)) | ||
839 | break; /* It has booted */ | ||
840 | udelay(100); | ||
841 | } | ||
842 | |||
843 | if (cpu_isset(cpu, cpu_callin_map)) { | ||
844 | /* number CPUs logically, starting from 1 (BSP is 0) */ | ||
845 | Dprintk("OK.\n"); | ||
846 | printk("CPU%d: ", cpu); | ||
847 | print_cpu_info(&cpu_data[cpu]); | ||
848 | Dprintk("CPU has booted.\n"); | ||
849 | } else { | ||
850 | boot_error= 1; | ||
851 | if (*((volatile unsigned char *)trampoline_base) | ||
852 | == 0xA5) | ||
853 | /* trampoline started but...? */ | ||
854 | printk("Stuck ??\n"); | ||
855 | else | ||
856 | /* trampoline code not run */ | ||
857 | printk("Not responding.\n"); | ||
858 | inquire_remote_apic(apicid); | ||
859 | } | ||
860 | } | ||
861 | |||
862 | if (boot_error) { | ||
863 | /* Try to put things back the way they were before ... */ | ||
864 | unmap_cpu_to_logical_apicid(cpu); | ||
865 | cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ | ||
866 | cpu_clear(cpu, cpu_initialized); /* was set by cpu_init() */ | ||
867 | cpucount--; | ||
868 | } else { | ||
869 | x86_cpu_to_apicid[cpu] = apicid; | ||
870 | cpu_set(cpu, cpu_present_map); | ||
871 | } | ||
872 | |||
873 | /* mark "stuck" area as not stuck */ | ||
874 | *((volatile unsigned long *)trampoline_base) = 0; | ||
875 | |||
876 | return boot_error; | ||
877 | } | ||
878 | |||
879 | #ifdef CONFIG_HOTPLUG_CPU | ||
880 | void cpu_exit_clear(void) | ||
881 | { | ||
882 | int cpu = raw_smp_processor_id(); | ||
883 | |||
884 | idle_task_exit(); | ||
885 | |||
886 | cpucount --; | ||
887 | cpu_uninit(); | ||
888 | irq_ctx_exit(cpu); | ||
889 | |||
890 | cpu_clear(cpu, cpu_callout_map); | ||
891 | cpu_clear(cpu, cpu_callin_map); | ||
892 | |||
893 | cpu_clear(cpu, smp_commenced_mask); | ||
894 | unmap_cpu_to_logical_apicid(cpu); | ||
895 | } | ||
896 | |||
897 | struct warm_boot_cpu_info { | ||
898 | struct completion *complete; | ||
899 | struct work_struct task; | ||
900 | int apicid; | ||
901 | int cpu; | ||
902 | }; | ||
903 | |||
904 | static void __cpuinit do_warm_boot_cpu(struct work_struct *work) | ||
905 | { | ||
906 | struct warm_boot_cpu_info *info = | ||
907 | container_of(work, struct warm_boot_cpu_info, task); | ||
908 | do_boot_cpu(info->apicid, info->cpu); | ||
909 | complete(info->complete); | ||
910 | } | ||
911 | |||
912 | static int __cpuinit __smp_prepare_cpu(int cpu) | ||
913 | { | ||
914 | DECLARE_COMPLETION_ONSTACK(done); | ||
915 | struct warm_boot_cpu_info info; | ||
916 | int apicid, ret; | ||
917 | |||
918 | apicid = x86_cpu_to_apicid[cpu]; | ||
919 | if (apicid == BAD_APICID) { | ||
920 | ret = -ENODEV; | ||
921 | goto exit; | ||
922 | } | ||
923 | |||
924 | info.complete = &done; | ||
925 | info.apicid = apicid; | ||
926 | info.cpu = cpu; | ||
927 | INIT_WORK(&info.task, do_warm_boot_cpu); | ||
928 | |||
929 | /* init low mem mapping */ | ||
930 | clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, | ||
931 | min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS)); | ||
932 | flush_tlb_all(); | ||
933 | schedule_work(&info.task); | ||
934 | wait_for_completion(&done); | ||
935 | |||
936 | zap_low_mappings(); | ||
937 | ret = 0; | ||
938 | exit: | ||
939 | return ret; | ||
940 | } | ||
941 | #endif | ||
942 | |||
943 | /* | ||
944 | * Cycle through the processors sending APIC IPIs to boot each. | ||
945 | */ | ||
946 | |||
947 | static int boot_cpu_logical_apicid; | ||
948 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ | ||
949 | void *xquad_portio; | ||
950 | #ifdef CONFIG_X86_NUMAQ | ||
951 | EXPORT_SYMBOL(xquad_portio); | ||
952 | #endif | ||
953 | |||
954 | static void __init smp_boot_cpus(unsigned int max_cpus) | ||
955 | { | ||
956 | int apicid, cpu, bit, kicked; | ||
957 | unsigned long bogosum = 0; | ||
958 | |||
959 | /* | ||
960 | * Setup boot CPU information | ||
961 | */ | ||
962 | smp_store_cpu_info(0); /* Final full version of the data */ | ||
963 | printk("CPU%d: ", 0); | ||
964 | print_cpu_info(&cpu_data[0]); | ||
965 | |||
966 | boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); | ||
967 | boot_cpu_logical_apicid = logical_smp_processor_id(); | ||
968 | x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; | ||
969 | |||
970 | current_thread_info()->cpu = 0; | ||
971 | |||
972 | set_cpu_sibling_map(0); | ||
973 | |||
974 | /* | ||
975 | * If we couldn't find an SMP configuration at boot time, | ||
976 | * get out of here now! | ||
977 | */ | ||
978 | if (!smp_found_config && !acpi_lapic) { | ||
979 | printk(KERN_NOTICE "SMP motherboard not detected.\n"); | ||
980 | smpboot_clear_io_apic_irqs(); | ||
981 | phys_cpu_present_map = physid_mask_of_physid(0); | ||
982 | if (APIC_init_uniprocessor()) | ||
983 | printk(KERN_NOTICE "Local APIC not detected." | ||
984 | " Using dummy APIC emulation.\n"); | ||
985 | map_cpu_to_logical_apicid(); | ||
986 | cpu_set(0, cpu_sibling_map[0]); | ||
987 | cpu_set(0, cpu_core_map[0]); | ||
988 | return; | ||
989 | } | ||
990 | |||
991 | /* | ||
992 | * Should not be necessary because the MP table should list the boot | ||
993 | * CPU too, but we do it for the sake of robustness anyway. | ||
994 | * Makes no sense to do this check in clustered apic mode, so skip it | ||
995 | */ | ||
996 | if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { | ||
997 | printk("weird, boot CPU (#%d) not listed by the BIOS.\n", | ||
998 | boot_cpu_physical_apicid); | ||
999 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); | ||
1000 | } | ||
1001 | |||
1002 | /* | ||
1003 | * If we couldn't find a local APIC, then get out of here now! | ||
1004 | */ | ||
1005 | if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) { | ||
1006 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", | ||
1007 | boot_cpu_physical_apicid); | ||
1008 | printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); | ||
1009 | smpboot_clear_io_apic_irqs(); | ||
1010 | phys_cpu_present_map = physid_mask_of_physid(0); | ||
1011 | cpu_set(0, cpu_sibling_map[0]); | ||
1012 | cpu_set(0, cpu_core_map[0]); | ||
1013 | return; | ||
1014 | } | ||
1015 | |||
1016 | verify_local_APIC(); | ||
1017 | |||
1018 | /* | ||
1019 | * If SMP should be disabled, then really disable it! | ||
1020 | */ | ||
1021 | if (!max_cpus) { | ||
1022 | smp_found_config = 0; | ||
1023 | printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); | ||
1024 | smpboot_clear_io_apic_irqs(); | ||
1025 | phys_cpu_present_map = physid_mask_of_physid(0); | ||
1026 | cpu_set(0, cpu_sibling_map[0]); | ||
1027 | cpu_set(0, cpu_core_map[0]); | ||
1028 | return; | ||
1029 | } | ||
1030 | |||
1031 | connect_bsp_APIC(); | ||
1032 | setup_local_APIC(); | ||
1033 | map_cpu_to_logical_apicid(); | ||
1034 | |||
1035 | |||
1036 | setup_portio_remap(); | ||
1037 | |||
1038 | /* | ||
1039 | * Scan the CPU present map and fire up the other CPUs via do_boot_cpu | ||
1040 | * | ||
1041 | * In clustered apic mode, phys_cpu_present_map is a constructed thus: | ||
1042 | * bits 0-3 are quad0, 4-7 are quad1, etc. A perverse twist on the | ||
1043 | * clustered apic ID. | ||
1044 | */ | ||
1045 | Dprintk("CPU present map: %lx\n", physids_coerce(phys_cpu_present_map)); | ||
1046 | |||
1047 | kicked = 1; | ||
1048 | for (bit = 0; kicked < NR_CPUS && bit < MAX_APICS; bit++) { | ||
1049 | apicid = cpu_present_to_apicid(bit); | ||
1050 | /* | ||
1051 | * Don't even attempt to start the boot CPU! | ||
1052 | */ | ||
1053 | if ((apicid == boot_cpu_apicid) || (apicid == BAD_APICID)) | ||
1054 | continue; | ||
1055 | |||
1056 | if (!check_apicid_present(bit)) | ||
1057 | continue; | ||
1058 | if (max_cpus <= cpucount+1) | ||
1059 | continue; | ||
1060 | |||
1061 | if (((cpu = alloc_cpu_id()) <= 0) || do_boot_cpu(apicid, cpu)) | ||
1062 | printk("CPU #%d not responding - cannot use it.\n", | ||
1063 | apicid); | ||
1064 | else | ||
1065 | ++kicked; | ||
1066 | } | ||
1067 | |||
1068 | /* | ||
1069 | * Cleanup possible dangling ends... | ||
1070 | */ | ||
1071 | smpboot_restore_warm_reset_vector(); | ||
1072 | |||
1073 | /* | ||
1074 | * Allow the user to impress friends. | ||
1075 | */ | ||
1076 | Dprintk("Before bogomips.\n"); | ||
1077 | for (cpu = 0; cpu < NR_CPUS; cpu++) | ||
1078 | if (cpu_isset(cpu, cpu_callout_map)) | ||
1079 | bogosum += cpu_data[cpu].loops_per_jiffy; | ||
1080 | printk(KERN_INFO | ||
1081 | "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", | ||
1082 | cpucount+1, | ||
1083 | bogosum/(500000/HZ), | ||
1084 | (bogosum/(5000/HZ))%100); | ||
1085 | |||
1086 | Dprintk("Before bogocount - setting activated=1.\n"); | ||
1087 | |||
1088 | if (smp_b_stepping) | ||
1089 | printk(KERN_WARNING "WARNING: SMP operation may be unreliable with B stepping processors.\n"); | ||
1090 | |||
1091 | /* | ||
1092 | * Don't taint if we are running SMP kernel on a single non-MP | ||
1093 | * approved Athlon | ||
1094 | */ | ||
1095 | if (tainted & TAINT_UNSAFE_SMP) { | ||
1096 | if (cpucount) | ||
1097 | printk (KERN_INFO "WARNING: This combination of AMD processors is not suitable for SMP.\n"); | ||
1098 | else | ||
1099 | tainted &= ~TAINT_UNSAFE_SMP; | ||
1100 | } | ||
1101 | |||
1102 | Dprintk("Boot done.\n"); | ||
1103 | |||
1104 | /* | ||
1105 | * construct cpu_sibling_map[], so that we can tell sibling CPUs | ||
1106 | * efficiently. | ||
1107 | */ | ||
1108 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
1109 | cpus_clear(cpu_sibling_map[cpu]); | ||
1110 | cpus_clear(cpu_core_map[cpu]); | ||
1111 | } | ||
1112 | |||
1113 | cpu_set(0, cpu_sibling_map[0]); | ||
1114 | cpu_set(0, cpu_core_map[0]); | ||
1115 | |||
1116 | smpboot_setup_io_apic(); | ||
1117 | |||
1118 | setup_boot_clock(); | ||
1119 | } | ||
1120 | |||
1121 | /* These are wrappers to interface to the new boot process. Someone | ||
1122 | who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ | ||
1123 | void __init native_smp_prepare_cpus(unsigned int max_cpus) | ||
1124 | { | ||
1125 | smp_commenced_mask = cpumask_of_cpu(0); | ||
1126 | cpu_callin_map = cpumask_of_cpu(0); | ||
1127 | mb(); | ||
1128 | smp_boot_cpus(max_cpus); | ||
1129 | } | ||
1130 | |||
1131 | void __init native_smp_prepare_boot_cpu(void) | ||
1132 | { | ||
1133 | unsigned int cpu = smp_processor_id(); | ||
1134 | |||
1135 | init_gdt(cpu); | ||
1136 | switch_to_new_gdt(); | ||
1137 | |||
1138 | cpu_set(cpu, cpu_online_map); | ||
1139 | cpu_set(cpu, cpu_callout_map); | ||
1140 | cpu_set(cpu, cpu_present_map); | ||
1141 | cpu_set(cpu, cpu_possible_map); | ||
1142 | __get_cpu_var(cpu_state) = CPU_ONLINE; | ||
1143 | } | ||
1144 | |||
1145 | #ifdef CONFIG_HOTPLUG_CPU | ||
1146 | void remove_siblinginfo(int cpu) | ||
1147 | { | ||
1148 | int sibling; | ||
1149 | struct cpuinfo_x86 *c = cpu_data; | ||
1150 | |||
1151 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) { | ||
1152 | cpu_clear(cpu, cpu_core_map[sibling]); | ||
1153 | /* | ||
1154 | * last thread sibling in this cpu core going down | ||
1155 | */ | ||
1156 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) | ||
1157 | c[sibling].booted_cores--; | ||
1158 | } | ||
1159 | |||
1160 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) | ||
1161 | cpu_clear(cpu, cpu_sibling_map[sibling]); | ||
1162 | cpus_clear(cpu_sibling_map[cpu]); | ||
1163 | cpus_clear(cpu_core_map[cpu]); | ||
1164 | c[cpu].phys_proc_id = 0; | ||
1165 | c[cpu].cpu_core_id = 0; | ||
1166 | cpu_clear(cpu, cpu_sibling_setup_map); | ||
1167 | } | ||
1168 | |||
1169 | int __cpu_disable(void) | ||
1170 | { | ||
1171 | cpumask_t map = cpu_online_map; | ||
1172 | int cpu = smp_processor_id(); | ||
1173 | |||
1174 | /* | ||
1175 | * Perhaps use cpufreq to drop frequency, but that could go | ||
1176 | * into generic code. | ||
1177 | * | ||
1178 | * We won't take down the boot processor on i386 due to some | ||
1179 | * interrupts only being able to be serviced by the BSP. | ||
1180 | * Especially so if we're not using an IOAPIC -zwane | ||
1181 | */ | ||
1182 | if (cpu == 0) | ||
1183 | return -EBUSY; | ||
1184 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
1185 | stop_apic_nmi_watchdog(NULL); | ||
1186 | clear_local_APIC(); | ||
1187 | /* Allow any queued timer interrupts to get serviced */ | ||
1188 | local_irq_enable(); | ||
1189 | mdelay(1); | ||
1190 | local_irq_disable(); | ||
1191 | |||
1192 | remove_siblinginfo(cpu); | ||
1193 | |||
1194 | cpu_clear(cpu, map); | ||
1195 | fixup_irqs(map); | ||
1196 | /* It's now safe to remove this processor from the online map */ | ||
1197 | cpu_clear(cpu, cpu_online_map); | ||
1198 | return 0; | ||
1199 | } | ||
1200 | |||
1201 | void __cpu_die(unsigned int cpu) | ||
1202 | { | ||
1203 | /* We don't do anything here: idle task is faking death itself. */ | ||
1204 | unsigned int i; | ||
1205 | |||
1206 | for (i = 0; i < 10; i++) { | ||
1207 | /* They ack this in play_dead by setting CPU_DEAD */ | ||
1208 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { | ||
1209 | printk ("CPU %d is now offline\n", cpu); | ||
1210 | if (1 == num_online_cpus()) | ||
1211 | alternatives_smp_switch(0); | ||
1212 | return; | ||
1213 | } | ||
1214 | msleep(100); | ||
1215 | } | ||
1216 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); | ||
1217 | } | ||
1218 | #else /* ... !CONFIG_HOTPLUG_CPU */ | ||
1219 | int __cpu_disable(void) | ||
1220 | { | ||
1221 | return -ENOSYS; | ||
1222 | } | ||
1223 | |||
1224 | void __cpu_die(unsigned int cpu) | ||
1225 | { | ||
1226 | /* We said "no" in __cpu_disable */ | ||
1227 | BUG(); | ||
1228 | } | ||
1229 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
1230 | |||
1231 | int __cpuinit native_cpu_up(unsigned int cpu) | ||
1232 | { | ||
1233 | unsigned long flags; | ||
1234 | #ifdef CONFIG_HOTPLUG_CPU | ||
1235 | int ret = 0; | ||
1236 | |||
1237 | /* | ||
1238 | * We do warm boot only on cpus that had booted earlier | ||
1239 | * Otherwise cold boot is all handled from smp_boot_cpus(). | ||
1240 | * cpu_callin_map is set during AP kickstart process. Its reset | ||
1241 | * when a cpu is taken offline from cpu_exit_clear(). | ||
1242 | */ | ||
1243 | if (!cpu_isset(cpu, cpu_callin_map)) | ||
1244 | ret = __smp_prepare_cpu(cpu); | ||
1245 | |||
1246 | if (ret) | ||
1247 | return -EIO; | ||
1248 | #endif | ||
1249 | |||
1250 | /* In case one didn't come up */ | ||
1251 | if (!cpu_isset(cpu, cpu_callin_map)) { | ||
1252 | printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); | ||
1253 | return -EIO; | ||
1254 | } | ||
1255 | |||
1256 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | ||
1257 | /* Unleash the CPU! */ | ||
1258 | cpu_set(cpu, smp_commenced_mask); | ||
1259 | |||
1260 | /* | ||
1261 | * Check TSC synchronization with the AP (keep irqs disabled | ||
1262 | * while doing so): | ||
1263 | */ | ||
1264 | local_irq_save(flags); | ||
1265 | check_tsc_sync_source(cpu); | ||
1266 | local_irq_restore(flags); | ||
1267 | |||
1268 | while (!cpu_isset(cpu, cpu_online_map)) { | ||
1269 | cpu_relax(); | ||
1270 | touch_nmi_watchdog(); | ||
1271 | } | ||
1272 | |||
1273 | return 0; | ||
1274 | } | ||
1275 | |||
1276 | void __init native_smp_cpus_done(unsigned int max_cpus) | ||
1277 | { | ||
1278 | #ifdef CONFIG_X86_IO_APIC | ||
1279 | setup_ioapic_dest(); | ||
1280 | #endif | ||
1281 | zap_low_mappings(); | ||
1282 | #ifndef CONFIG_HOTPLUG_CPU | ||
1283 | /* | ||
1284 | * Disable executability of the SMP trampoline: | ||
1285 | */ | ||
1286 | set_kernel_exec((unsigned long)trampoline_base, trampoline_exec); | ||
1287 | #endif | ||
1288 | } | ||
1289 | |||
1290 | void __init smp_intr_init(void) | ||
1291 | { | ||
1292 | /* | ||
1293 | * IRQ0 must be given a fixed assignment and initialized, | ||
1294 | * because it's used before the IO-APIC is set up. | ||
1295 | */ | ||
1296 | set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]); | ||
1297 | |||
1298 | /* | ||
1299 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper | ||
1300 | * IPI, driven by wakeup. | ||
1301 | */ | ||
1302 | set_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | ||
1303 | |||
1304 | /* IPI for invalidation */ | ||
1305 | set_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); | ||
1306 | |||
1307 | /* IPI for generic function call */ | ||
1308 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | ||
1309 | } | ||
1310 | |||
1311 | /* | ||
1312 | * If the BIOS enumerates physical processors before logical, | ||
1313 | * maxcpus=N at enumeration-time can be used to disable HT. | ||
1314 | */ | ||
1315 | static int __init parse_maxcpus(char *arg) | ||
1316 | { | ||
1317 | extern unsigned int maxcpus; | ||
1318 | |||
1319 | maxcpus = simple_strtoul(arg, NULL, 0); | ||
1320 | return 0; | ||
1321 | } | ||
1322 | early_param("maxcpus", parse_maxcpus); | ||