diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:17:24 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2007-10-11 05:17:24 -0400 |
commit | 250c22777fe1ccd7ac588579a6c16db4c0161cc5 (patch) | |
tree | 55c317efb7d792ec6fdae1d1937c67a502c48dec /arch/x86_64/kernel/smpboot_64.c | |
parent | 2db55d344e529492545cb3b755c7e9ba8e4fa94e (diff) |
x86_64: move kernel
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86_64/kernel/smpboot_64.c')
-rw-r--r-- | arch/x86_64/kernel/smpboot_64.c | 1085 |
1 files changed, 0 insertions, 1085 deletions
diff --git a/arch/x86_64/kernel/smpboot_64.c b/arch/x86_64/kernel/smpboot_64.c deleted file mode 100644 index 32f50783edc8..000000000000 --- a/arch/x86_64/kernel/smpboot_64.c +++ /dev/null | |||
@@ -1,1085 +0,0 @@ | |||
1 | /* | ||
2 | * x86 SMP booting functions | ||
3 | * | ||
4 | * (c) 1995 Alan Cox, Building #3 <alan@redhat.com> | ||
5 | * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> | ||
6 | * Copyright 2001 Andi Kleen, SuSE Labs. | ||
7 | * | ||
8 | * Much of the core SMP work is based on previous work by Thomas Radke, to | ||
9 | * whom a great many thanks are extended. | ||
10 | * | ||
11 | * Thanks to Intel for making available several different Pentium, | ||
12 | * Pentium Pro and Pentium-II/Xeon MP machines. | ||
13 | * Original development of Linux SMP code supported by Caldera. | ||
14 | * | ||
15 | * This code is released under the GNU General Public License version 2 | ||
16 | * | ||
17 | * Fixes | ||
18 | * Felix Koop : NR_CPUS used properly | ||
19 | * Jose Renau : Handle single CPU case. | ||
20 | * Alan Cox : By repeated request 8) - Total BogoMIP report. | ||
21 | * Greg Wright : Fix for kernel stacks panic. | ||
22 | * Erich Boleyn : MP v1.4 and additional changes. | ||
23 | * Matthias Sattler : Changes for 2.1 kernel map. | ||
24 | * Michel Lespinasse : Changes for 2.1 kernel map. | ||
25 | * Michael Chastain : Change trampoline.S to gnu as. | ||
26 | * Alan Cox : Dumb bug: 'B' step PPro's are fine | ||
27 | * Ingo Molnar : Added APIC timers, based on code | ||
28 | * from Jose Renau | ||
29 | * Ingo Molnar : various cleanups and rewrites | ||
30 | * Tigran Aivazian : fixed "0.00 in /proc/uptime on SMP" bug. | ||
31 | * Maciej W. Rozycki : Bits for genuine 82489DX APICs | ||
32 | * Andi Kleen : Changed for SMP boot into long mode. | ||
33 | * Rusty Russell : Hacked into shape for new "hotplug" boot process. | ||
34 | * Andi Kleen : Converted to new state machine. | ||
35 | * Various cleanups. | ||
36 | * Probably mostly hotplug CPU ready now. | ||
37 | * Ashok Raj : CPU hotplug support | ||
38 | */ | ||
39 | |||
40 | |||
41 | #include <linux/init.h> | ||
42 | |||
43 | #include <linux/mm.h> | ||
44 | #include <linux/kernel_stat.h> | ||
45 | #include <linux/bootmem.h> | ||
46 | #include <linux/thread_info.h> | ||
47 | #include <linux/module.h> | ||
48 | #include <linux/delay.h> | ||
49 | #include <linux/mc146818rtc.h> | ||
50 | #include <linux/smp.h> | ||
51 | #include <linux/kdebug.h> | ||
52 | |||
53 | #include <asm/mtrr.h> | ||
54 | #include <asm/pgalloc.h> | ||
55 | #include <asm/desc.h> | ||
56 | #include <asm/tlbflush.h> | ||
57 | #include <asm/proto.h> | ||
58 | #include <asm/nmi.h> | ||
59 | #include <asm/irq.h> | ||
60 | #include <asm/hw_irq.h> | ||
61 | #include <asm/numa.h> | ||
62 | |||
63 | /* Number of siblings per CPU package */ | ||
64 | int smp_num_siblings = 1; | ||
65 | EXPORT_SYMBOL(smp_num_siblings); | ||
66 | |||
67 | /* Last level cache ID of each logical CPU */ | ||
68 | u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; | ||
69 | |||
70 | /* Bitmask of currently online CPUs */ | ||
71 | cpumask_t cpu_online_map __read_mostly; | ||
72 | |||
73 | EXPORT_SYMBOL(cpu_online_map); | ||
74 | |||
75 | /* | ||
76 | * Private maps to synchronize booting between AP and BP. | ||
77 | * Probably not needed anymore, but it makes for easier debugging. -AK | ||
78 | */ | ||
79 | cpumask_t cpu_callin_map; | ||
80 | cpumask_t cpu_callout_map; | ||
81 | EXPORT_SYMBOL(cpu_callout_map); | ||
82 | |||
83 | cpumask_t cpu_possible_map; | ||
84 | EXPORT_SYMBOL(cpu_possible_map); | ||
85 | |||
86 | /* Per CPU bogomips and other parameters */ | ||
87 | struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | ||
88 | EXPORT_SYMBOL(cpu_data); | ||
89 | |||
90 | /* Set when the idlers are all forked */ | ||
91 | int smp_threads_ready; | ||
92 | |||
93 | /* representing HT siblings of each logical CPU */ | ||
94 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; | ||
95 | EXPORT_SYMBOL(cpu_sibling_map); | ||
96 | |||
97 | /* representing HT and core siblings of each logical CPU */ | ||
98 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; | ||
99 | EXPORT_SYMBOL(cpu_core_map); | ||
100 | |||
101 | /* | ||
102 | * Trampoline 80x86 program as an array. | ||
103 | */ | ||
104 | |||
105 | extern unsigned char trampoline_data[]; | ||
106 | extern unsigned char trampoline_end[]; | ||
107 | |||
108 | /* State of each CPU */ | ||
109 | DEFINE_PER_CPU(int, cpu_state) = { 0 }; | ||
110 | |||
111 | /* | ||
112 | * Store all idle threads, this can be reused instead of creating | ||
113 | * a new thread. Also avoids complicated thread destroy functionality | ||
114 | * for idle threads. | ||
115 | */ | ||
116 | struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; | ||
117 | |||
118 | #define get_idle_for_cpu(x) (idle_thread_array[(x)]) | ||
119 | #define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) | ||
120 | |||
121 | /* | ||
122 | * Currently trivial. Write the real->protected mode | ||
123 | * bootstrap into the page concerned. The caller | ||
124 | * has made sure it's suitably aligned. | ||
125 | */ | ||
126 | |||
127 | static unsigned long __cpuinit setup_trampoline(void) | ||
128 | { | ||
129 | void *tramp = __va(SMP_TRAMPOLINE_BASE); | ||
130 | memcpy(tramp, trampoline_data, trampoline_end - trampoline_data); | ||
131 | return virt_to_phys(tramp); | ||
132 | } | ||
133 | |||
134 | /* | ||
135 | * The bootstrap kernel entry code has set these up. Save them for | ||
136 | * a given CPU | ||
137 | */ | ||
138 | |||
139 | static void __cpuinit smp_store_cpu_info(int id) | ||
140 | { | ||
141 | struct cpuinfo_x86 *c = cpu_data + id; | ||
142 | |||
143 | *c = boot_cpu_data; | ||
144 | identify_cpu(c); | ||
145 | print_cpu_info(c); | ||
146 | } | ||
147 | |||
148 | static atomic_t init_deasserted __cpuinitdata; | ||
149 | |||
150 | /* | ||
151 | * Report back to the Boot Processor. | ||
152 | * Running on AP. | ||
153 | */ | ||
154 | void __cpuinit smp_callin(void) | ||
155 | { | ||
156 | int cpuid, phys_id; | ||
157 | unsigned long timeout; | ||
158 | |||
159 | /* | ||
160 | * If waken up by an INIT in an 82489DX configuration | ||
161 | * we may get here before an INIT-deassert IPI reaches | ||
162 | * our local APIC. We have to wait for the IPI or we'll | ||
163 | * lock up on an APIC access. | ||
164 | */ | ||
165 | while (!atomic_read(&init_deasserted)) | ||
166 | cpu_relax(); | ||
167 | |||
168 | /* | ||
169 | * (This works even if the APIC is not enabled.) | ||
170 | */ | ||
171 | phys_id = GET_APIC_ID(apic_read(APIC_ID)); | ||
172 | cpuid = smp_processor_id(); | ||
173 | if (cpu_isset(cpuid, cpu_callin_map)) { | ||
174 | panic("smp_callin: phys CPU#%d, CPU#%d already present??\n", | ||
175 | phys_id, cpuid); | ||
176 | } | ||
177 | Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); | ||
178 | |||
179 | /* | ||
180 | * STARTUP IPIs are fragile beasts as they might sometimes | ||
181 | * trigger some glue motherboard logic. Complete APIC bus | ||
182 | * silence for 1 second, this overestimates the time the | ||
183 | * boot CPU is spending to send the up to 2 STARTUP IPIs | ||
184 | * by a factor of two. This should be enough. | ||
185 | */ | ||
186 | |||
187 | /* | ||
188 | * Waiting 2s total for startup (udelay is not yet working) | ||
189 | */ | ||
190 | timeout = jiffies + 2*HZ; | ||
191 | while (time_before(jiffies, timeout)) { | ||
192 | /* | ||
193 | * Has the boot CPU finished it's STARTUP sequence? | ||
194 | */ | ||
195 | if (cpu_isset(cpuid, cpu_callout_map)) | ||
196 | break; | ||
197 | cpu_relax(); | ||
198 | } | ||
199 | |||
200 | if (!time_before(jiffies, timeout)) { | ||
201 | panic("smp_callin: CPU%d started up but did not get a callout!\n", | ||
202 | cpuid); | ||
203 | } | ||
204 | |||
205 | /* | ||
206 | * the boot CPU has finished the init stage and is spinning | ||
207 | * on callin_map until we finish. We are free to set up this | ||
208 | * CPU, first the APIC. (this is probably redundant on most | ||
209 | * boards) | ||
210 | */ | ||
211 | |||
212 | Dprintk("CALLIN, before setup_local_APIC().\n"); | ||
213 | setup_local_APIC(); | ||
214 | |||
215 | /* | ||
216 | * Get our bogomips. | ||
217 | * | ||
218 | * Need to enable IRQs because it can take longer and then | ||
219 | * the NMI watchdog might kill us. | ||
220 | */ | ||
221 | local_irq_enable(); | ||
222 | calibrate_delay(); | ||
223 | local_irq_disable(); | ||
224 | Dprintk("Stack at about %p\n",&cpuid); | ||
225 | |||
226 | disable_APIC_timer(); | ||
227 | |||
228 | /* | ||
229 | * Save our processor parameters | ||
230 | */ | ||
231 | smp_store_cpu_info(cpuid); | ||
232 | |||
233 | /* | ||
234 | * Allow the master to continue. | ||
235 | */ | ||
236 | cpu_set(cpuid, cpu_callin_map); | ||
237 | } | ||
238 | |||
239 | /* maps the cpu to the sched domain representing multi-core */ | ||
240 | cpumask_t cpu_coregroup_map(int cpu) | ||
241 | { | ||
242 | struct cpuinfo_x86 *c = cpu_data + cpu; | ||
243 | /* | ||
244 | * For perf, we return last level cache shared map. | ||
245 | * And for power savings, we return cpu_core_map | ||
246 | */ | ||
247 | if (sched_mc_power_savings || sched_smt_power_savings) | ||
248 | return cpu_core_map[cpu]; | ||
249 | else | ||
250 | return c->llc_shared_map; | ||
251 | } | ||
252 | |||
253 | /* representing cpus for which sibling maps can be computed */ | ||
254 | static cpumask_t cpu_sibling_setup_map; | ||
255 | |||
256 | static inline void set_cpu_sibling_map(int cpu) | ||
257 | { | ||
258 | int i; | ||
259 | struct cpuinfo_x86 *c = cpu_data; | ||
260 | |||
261 | cpu_set(cpu, cpu_sibling_setup_map); | ||
262 | |||
263 | if (smp_num_siblings > 1) { | ||
264 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | ||
265 | if (c[cpu].phys_proc_id == c[i].phys_proc_id && | ||
266 | c[cpu].cpu_core_id == c[i].cpu_core_id) { | ||
267 | cpu_set(i, cpu_sibling_map[cpu]); | ||
268 | cpu_set(cpu, cpu_sibling_map[i]); | ||
269 | cpu_set(i, cpu_core_map[cpu]); | ||
270 | cpu_set(cpu, cpu_core_map[i]); | ||
271 | cpu_set(i, c[cpu].llc_shared_map); | ||
272 | cpu_set(cpu, c[i].llc_shared_map); | ||
273 | } | ||
274 | } | ||
275 | } else { | ||
276 | cpu_set(cpu, cpu_sibling_map[cpu]); | ||
277 | } | ||
278 | |||
279 | cpu_set(cpu, c[cpu].llc_shared_map); | ||
280 | |||
281 | if (current_cpu_data.x86_max_cores == 1) { | ||
282 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; | ||
283 | c[cpu].booted_cores = 1; | ||
284 | return; | ||
285 | } | ||
286 | |||
287 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | ||
288 | if (cpu_llc_id[cpu] != BAD_APICID && | ||
289 | cpu_llc_id[cpu] == cpu_llc_id[i]) { | ||
290 | cpu_set(i, c[cpu].llc_shared_map); | ||
291 | cpu_set(cpu, c[i].llc_shared_map); | ||
292 | } | ||
293 | if (c[cpu].phys_proc_id == c[i].phys_proc_id) { | ||
294 | cpu_set(i, cpu_core_map[cpu]); | ||
295 | cpu_set(cpu, cpu_core_map[i]); | ||
296 | /* | ||
297 | * Does this new cpu bringup a new core? | ||
298 | */ | ||
299 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) { | ||
300 | /* | ||
301 | * for each core in package, increment | ||
302 | * the booted_cores for this new cpu | ||
303 | */ | ||
304 | if (first_cpu(cpu_sibling_map[i]) == i) | ||
305 | c[cpu].booted_cores++; | ||
306 | /* | ||
307 | * increment the core count for all | ||
308 | * the other cpus in this package | ||
309 | */ | ||
310 | if (i != cpu) | ||
311 | c[i].booted_cores++; | ||
312 | } else if (i != cpu && !c[cpu].booted_cores) | ||
313 | c[cpu].booted_cores = c[i].booted_cores; | ||
314 | } | ||
315 | } | ||
316 | } | ||
317 | |||
318 | /* | ||
319 | * Setup code on secondary processor (after comming out of the trampoline) | ||
320 | */ | ||
321 | void __cpuinit start_secondary(void) | ||
322 | { | ||
323 | /* | ||
324 | * Dont put anything before smp_callin(), SMP | ||
325 | * booting is too fragile that we want to limit the | ||
326 | * things done here to the most necessary things. | ||
327 | */ | ||
328 | cpu_init(); | ||
329 | preempt_disable(); | ||
330 | smp_callin(); | ||
331 | |||
332 | /* otherwise gcc will move up the smp_processor_id before the cpu_init */ | ||
333 | barrier(); | ||
334 | |||
335 | /* | ||
336 | * Check TSC sync first: | ||
337 | */ | ||
338 | check_tsc_sync_target(); | ||
339 | |||
340 | Dprintk("cpu %d: setting up apic clock\n", smp_processor_id()); | ||
341 | setup_secondary_APIC_clock(); | ||
342 | |||
343 | Dprintk("cpu %d: enabling apic timer\n", smp_processor_id()); | ||
344 | |||
345 | if (nmi_watchdog == NMI_IO_APIC) { | ||
346 | disable_8259A_irq(0); | ||
347 | enable_NMI_through_LVT0(NULL); | ||
348 | enable_8259A_irq(0); | ||
349 | } | ||
350 | |||
351 | enable_APIC_timer(); | ||
352 | |||
353 | /* | ||
354 | * The sibling maps must be set before turing the online map on for | ||
355 | * this cpu | ||
356 | */ | ||
357 | set_cpu_sibling_map(smp_processor_id()); | ||
358 | |||
359 | /* | ||
360 | * We need to hold call_lock, so there is no inconsistency | ||
361 | * between the time smp_call_function() determines number of | ||
362 | * IPI receipients, and the time when the determination is made | ||
363 | * for which cpus receive the IPI in genapic_flat.c. Holding this | ||
364 | * lock helps us to not include this cpu in a currently in progress | ||
365 | * smp_call_function(). | ||
366 | */ | ||
367 | lock_ipi_call_lock(); | ||
368 | spin_lock(&vector_lock); | ||
369 | |||
370 | /* Setup the per cpu irq handling data structures */ | ||
371 | __setup_vector_irq(smp_processor_id()); | ||
372 | /* | ||
373 | * Allow the master to continue. | ||
374 | */ | ||
375 | cpu_set(smp_processor_id(), cpu_online_map); | ||
376 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | ||
377 | spin_unlock(&vector_lock); | ||
378 | |||
379 | unlock_ipi_call_lock(); | ||
380 | |||
381 | cpu_idle(); | ||
382 | } | ||
383 | |||
384 | extern volatile unsigned long init_rsp; | ||
385 | extern void (*initial_code)(void); | ||
386 | |||
387 | #ifdef APIC_DEBUG | ||
388 | static void inquire_remote_apic(int apicid) | ||
389 | { | ||
390 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; | ||
391 | char *names[] = { "ID", "VERSION", "SPIV" }; | ||
392 | int timeout; | ||
393 | unsigned int status; | ||
394 | |||
395 | printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); | ||
396 | |||
397 | for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { | ||
398 | printk("... APIC #%d %s: ", apicid, names[i]); | ||
399 | |||
400 | /* | ||
401 | * Wait for idle. | ||
402 | */ | ||
403 | status = safe_apic_wait_icr_idle(); | ||
404 | if (status) | ||
405 | printk("a previous APIC delivery may have failed\n"); | ||
406 | |||
407 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); | ||
408 | apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); | ||
409 | |||
410 | timeout = 0; | ||
411 | do { | ||
412 | udelay(100); | ||
413 | status = apic_read(APIC_ICR) & APIC_ICR_RR_MASK; | ||
414 | } while (status == APIC_ICR_RR_INPROG && timeout++ < 1000); | ||
415 | |||
416 | switch (status) { | ||
417 | case APIC_ICR_RR_VALID: | ||
418 | status = apic_read(APIC_RRR); | ||
419 | printk("%08x\n", status); | ||
420 | break; | ||
421 | default: | ||
422 | printk("failed\n"); | ||
423 | } | ||
424 | } | ||
425 | } | ||
426 | #endif | ||
427 | |||
428 | /* | ||
429 | * Kick the secondary to wake up. | ||
430 | */ | ||
431 | static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip) | ||
432 | { | ||
433 | unsigned long send_status, accept_status = 0; | ||
434 | int maxlvt, num_starts, j; | ||
435 | |||
436 | Dprintk("Asserting INIT.\n"); | ||
437 | |||
438 | /* | ||
439 | * Turn INIT on target chip | ||
440 | */ | ||
441 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | ||
442 | |||
443 | /* | ||
444 | * Send IPI | ||
445 | */ | ||
446 | apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT | ||
447 | | APIC_DM_INIT); | ||
448 | |||
449 | Dprintk("Waiting for send to finish...\n"); | ||
450 | send_status = safe_apic_wait_icr_idle(); | ||
451 | |||
452 | mdelay(10); | ||
453 | |||
454 | Dprintk("Deasserting INIT.\n"); | ||
455 | |||
456 | /* Target chip */ | ||
457 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | ||
458 | |||
459 | /* Send IPI */ | ||
460 | apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); | ||
461 | |||
462 | Dprintk("Waiting for send to finish...\n"); | ||
463 | send_status = safe_apic_wait_icr_idle(); | ||
464 | |||
465 | mb(); | ||
466 | atomic_set(&init_deasserted, 1); | ||
467 | |||
468 | num_starts = 2; | ||
469 | |||
470 | /* | ||
471 | * Run STARTUP IPI loop. | ||
472 | */ | ||
473 | Dprintk("#startup loops: %d.\n", num_starts); | ||
474 | |||
475 | maxlvt = get_maxlvt(); | ||
476 | |||
477 | for (j = 1; j <= num_starts; j++) { | ||
478 | Dprintk("Sending STARTUP #%d.\n",j); | ||
479 | apic_write(APIC_ESR, 0); | ||
480 | apic_read(APIC_ESR); | ||
481 | Dprintk("After apic_write.\n"); | ||
482 | |||
483 | /* | ||
484 | * STARTUP IPI | ||
485 | */ | ||
486 | |||
487 | /* Target chip */ | ||
488 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | ||
489 | |||
490 | /* Boot on the stack */ | ||
491 | /* Kick the second */ | ||
492 | apic_write(APIC_ICR, APIC_DM_STARTUP | (start_rip >> 12)); | ||
493 | |||
494 | /* | ||
495 | * Give the other CPU some time to accept the IPI. | ||
496 | */ | ||
497 | udelay(300); | ||
498 | |||
499 | Dprintk("Startup point 1.\n"); | ||
500 | |||
501 | Dprintk("Waiting for send to finish...\n"); | ||
502 | send_status = safe_apic_wait_icr_idle(); | ||
503 | |||
504 | /* | ||
505 | * Give the other CPU some time to accept the IPI. | ||
506 | */ | ||
507 | udelay(200); | ||
508 | /* | ||
509 | * Due to the Pentium erratum 3AP. | ||
510 | */ | ||
511 | if (maxlvt > 3) { | ||
512 | apic_write(APIC_ESR, 0); | ||
513 | } | ||
514 | accept_status = (apic_read(APIC_ESR) & 0xEF); | ||
515 | if (send_status || accept_status) | ||
516 | break; | ||
517 | } | ||
518 | Dprintk("After Startup.\n"); | ||
519 | |||
520 | if (send_status) | ||
521 | printk(KERN_ERR "APIC never delivered???\n"); | ||
522 | if (accept_status) | ||
523 | printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status); | ||
524 | |||
525 | return (send_status | accept_status); | ||
526 | } | ||
527 | |||
528 | struct create_idle { | ||
529 | struct work_struct work; | ||
530 | struct task_struct *idle; | ||
531 | struct completion done; | ||
532 | int cpu; | ||
533 | }; | ||
534 | |||
535 | void do_fork_idle(struct work_struct *work) | ||
536 | { | ||
537 | struct create_idle *c_idle = | ||
538 | container_of(work, struct create_idle, work); | ||
539 | |||
540 | c_idle->idle = fork_idle(c_idle->cpu); | ||
541 | complete(&c_idle->done); | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * Boot one CPU. | ||
546 | */ | ||
547 | static int __cpuinit do_boot_cpu(int cpu, int apicid) | ||
548 | { | ||
549 | unsigned long boot_error; | ||
550 | int timeout; | ||
551 | unsigned long start_rip; | ||
552 | struct create_idle c_idle = { | ||
553 | .work = __WORK_INITIALIZER(c_idle.work, do_fork_idle), | ||
554 | .cpu = cpu, | ||
555 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), | ||
556 | }; | ||
557 | |||
558 | /* allocate memory for gdts of secondary cpus. Hotplug is considered */ | ||
559 | if (!cpu_gdt_descr[cpu].address && | ||
560 | !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { | ||
561 | printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu); | ||
562 | return -1; | ||
563 | } | ||
564 | |||
565 | /* Allocate node local memory for AP pdas */ | ||
566 | if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) { | ||
567 | struct x8664_pda *newpda, *pda; | ||
568 | int node = cpu_to_node(cpu); | ||
569 | pda = cpu_pda(cpu); | ||
570 | newpda = kmalloc_node(sizeof (struct x8664_pda), GFP_ATOMIC, | ||
571 | node); | ||
572 | if (newpda) { | ||
573 | memcpy(newpda, pda, sizeof (struct x8664_pda)); | ||
574 | cpu_pda(cpu) = newpda; | ||
575 | } else | ||
576 | printk(KERN_ERR | ||
577 | "Could not allocate node local PDA for CPU %d on node %d\n", | ||
578 | cpu, node); | ||
579 | } | ||
580 | |||
581 | alternatives_smp_switch(1); | ||
582 | |||
583 | c_idle.idle = get_idle_for_cpu(cpu); | ||
584 | |||
585 | if (c_idle.idle) { | ||
586 | c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *) | ||
587 | (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1); | ||
588 | init_idle(c_idle.idle, cpu); | ||
589 | goto do_rest; | ||
590 | } | ||
591 | |||
592 | /* | ||
593 | * During cold boot process, keventd thread is not spun up yet. | ||
594 | * When we do cpu hot-add, we create idle threads on the fly, we should | ||
595 | * not acquire any attributes from the calling context. Hence the clean | ||
596 | * way to create kernel_threads() is to do that from keventd(). | ||
597 | * We do the current_is_keventd() due to the fact that ACPI notifier | ||
598 | * was also queuing to keventd() and when the caller is already running | ||
599 | * in context of keventd(), we would end up with locking up the keventd | ||
600 | * thread. | ||
601 | */ | ||
602 | if (!keventd_up() || current_is_keventd()) | ||
603 | c_idle.work.func(&c_idle.work); | ||
604 | else { | ||
605 | schedule_work(&c_idle.work); | ||
606 | wait_for_completion(&c_idle.done); | ||
607 | } | ||
608 | |||
609 | if (IS_ERR(c_idle.idle)) { | ||
610 | printk("failed fork for CPU %d\n", cpu); | ||
611 | return PTR_ERR(c_idle.idle); | ||
612 | } | ||
613 | |||
614 | set_idle_for_cpu(cpu, c_idle.idle); | ||
615 | |||
616 | do_rest: | ||
617 | |||
618 | cpu_pda(cpu)->pcurrent = c_idle.idle; | ||
619 | |||
620 | start_rip = setup_trampoline(); | ||
621 | |||
622 | init_rsp = c_idle.idle->thread.rsp; | ||
623 | per_cpu(init_tss,cpu).rsp0 = init_rsp; | ||
624 | initial_code = start_secondary; | ||
625 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); | ||
626 | |||
627 | printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu, | ||
628 | cpus_weight(cpu_present_map), | ||
629 | apicid); | ||
630 | |||
631 | /* | ||
632 | * This grunge runs the startup process for | ||
633 | * the targeted processor. | ||
634 | */ | ||
635 | |||
636 | atomic_set(&init_deasserted, 0); | ||
637 | |||
638 | Dprintk("Setting warm reset code and vector.\n"); | ||
639 | |||
640 | CMOS_WRITE(0xa, 0xf); | ||
641 | local_flush_tlb(); | ||
642 | Dprintk("1.\n"); | ||
643 | *((volatile unsigned short *) phys_to_virt(0x469)) = start_rip >> 4; | ||
644 | Dprintk("2.\n"); | ||
645 | *((volatile unsigned short *) phys_to_virt(0x467)) = start_rip & 0xf; | ||
646 | Dprintk("3.\n"); | ||
647 | |||
648 | /* | ||
649 | * Be paranoid about clearing APIC errors. | ||
650 | */ | ||
651 | apic_write(APIC_ESR, 0); | ||
652 | apic_read(APIC_ESR); | ||
653 | |||
654 | /* | ||
655 | * Status is now clean | ||
656 | */ | ||
657 | boot_error = 0; | ||
658 | |||
659 | /* | ||
660 | * Starting actual IPI sequence... | ||
661 | */ | ||
662 | boot_error = wakeup_secondary_via_INIT(apicid, start_rip); | ||
663 | |||
664 | if (!boot_error) { | ||
665 | /* | ||
666 | * allow APs to start initializing. | ||
667 | */ | ||
668 | Dprintk("Before Callout %d.\n", cpu); | ||
669 | cpu_set(cpu, cpu_callout_map); | ||
670 | Dprintk("After Callout %d.\n", cpu); | ||
671 | |||
672 | /* | ||
673 | * Wait 5s total for a response | ||
674 | */ | ||
675 | for (timeout = 0; timeout < 50000; timeout++) { | ||
676 | if (cpu_isset(cpu, cpu_callin_map)) | ||
677 | break; /* It has booted */ | ||
678 | udelay(100); | ||
679 | } | ||
680 | |||
681 | if (cpu_isset(cpu, cpu_callin_map)) { | ||
682 | /* number CPUs logically, starting from 1 (BSP is 0) */ | ||
683 | Dprintk("CPU has booted.\n"); | ||
684 | } else { | ||
685 | boot_error = 1; | ||
686 | if (*((volatile unsigned char *)phys_to_virt(SMP_TRAMPOLINE_BASE)) | ||
687 | == 0xA5) | ||
688 | /* trampoline started but...? */ | ||
689 | printk("Stuck ??\n"); | ||
690 | else | ||
691 | /* trampoline code not run */ | ||
692 | printk("Not responding.\n"); | ||
693 | #ifdef APIC_DEBUG | ||
694 | inquire_remote_apic(apicid); | ||
695 | #endif | ||
696 | } | ||
697 | } | ||
698 | if (boot_error) { | ||
699 | cpu_clear(cpu, cpu_callout_map); /* was set here (do_boot_cpu()) */ | ||
700 | clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ | ||
701 | clear_node_cpumask(cpu); /* was set by numa_add_cpu */ | ||
702 | cpu_clear(cpu, cpu_present_map); | ||
703 | cpu_clear(cpu, cpu_possible_map); | ||
704 | x86_cpu_to_apicid[cpu] = BAD_APICID; | ||
705 | x86_cpu_to_log_apicid[cpu] = BAD_APICID; | ||
706 | return -EIO; | ||
707 | } | ||
708 | |||
709 | return 0; | ||
710 | } | ||
711 | |||
712 | cycles_t cacheflush_time; | ||
713 | unsigned long cache_decay_ticks; | ||
714 | |||
715 | /* | ||
716 | * Cleanup possible dangling ends... | ||
717 | */ | ||
718 | static __cpuinit void smp_cleanup_boot(void) | ||
719 | { | ||
720 | /* | ||
721 | * Paranoid: Set warm reset code and vector here back | ||
722 | * to default values. | ||
723 | */ | ||
724 | CMOS_WRITE(0, 0xf); | ||
725 | |||
726 | /* | ||
727 | * Reset trampoline flag | ||
728 | */ | ||
729 | *((volatile int *) phys_to_virt(0x467)) = 0; | ||
730 | } | ||
731 | |||
732 | /* | ||
733 | * Fall back to non SMP mode after errors. | ||
734 | * | ||
735 | * RED-PEN audit/test this more. I bet there is more state messed up here. | ||
736 | */ | ||
737 | static __init void disable_smp(void) | ||
738 | { | ||
739 | cpu_present_map = cpumask_of_cpu(0); | ||
740 | cpu_possible_map = cpumask_of_cpu(0); | ||
741 | if (smp_found_config) | ||
742 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); | ||
743 | else | ||
744 | phys_cpu_present_map = physid_mask_of_physid(0); | ||
745 | cpu_set(0, cpu_sibling_map[0]); | ||
746 | cpu_set(0, cpu_core_map[0]); | ||
747 | } | ||
748 | |||
749 | #ifdef CONFIG_HOTPLUG_CPU | ||
750 | |||
751 | int additional_cpus __initdata = -1; | ||
752 | |||
753 | /* | ||
754 | * cpu_possible_map should be static, it cannot change as cpu's | ||
755 | * are onlined, or offlined. The reason is per-cpu data-structures | ||
756 | * are allocated by some modules at init time, and dont expect to | ||
757 | * do this dynamically on cpu arrival/departure. | ||
758 | * cpu_present_map on the other hand can change dynamically. | ||
759 | * In case when cpu_hotplug is not compiled, then we resort to current | ||
760 | * behaviour, which is cpu_possible == cpu_present. | ||
761 | * - Ashok Raj | ||
762 | * | ||
763 | * Three ways to find out the number of additional hotplug CPUs: | ||
764 | * - If the BIOS specified disabled CPUs in ACPI/mptables use that. | ||
765 | * - The user can overwrite it with additional_cpus=NUM | ||
766 | * - Otherwise don't reserve additional CPUs. | ||
767 | * We do this because additional CPUs waste a lot of memory. | ||
768 | * -AK | ||
769 | */ | ||
770 | __init void prefill_possible_map(void) | ||
771 | { | ||
772 | int i; | ||
773 | int possible; | ||
774 | |||
775 | if (additional_cpus == -1) { | ||
776 | if (disabled_cpus > 0) | ||
777 | additional_cpus = disabled_cpus; | ||
778 | else | ||
779 | additional_cpus = 0; | ||
780 | } | ||
781 | possible = num_processors + additional_cpus; | ||
782 | if (possible > NR_CPUS) | ||
783 | possible = NR_CPUS; | ||
784 | |||
785 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | ||
786 | possible, | ||
787 | max_t(int, possible - num_processors, 0)); | ||
788 | |||
789 | for (i = 0; i < possible; i++) | ||
790 | cpu_set(i, cpu_possible_map); | ||
791 | } | ||
792 | #endif | ||
793 | |||
794 | /* | ||
795 | * Various sanity checks. | ||
796 | */ | ||
797 | static int __init smp_sanity_check(unsigned max_cpus) | ||
798 | { | ||
799 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { | ||
800 | printk("weird, boot CPU (#%d) not listed by the BIOS.\n", | ||
801 | hard_smp_processor_id()); | ||
802 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); | ||
803 | } | ||
804 | |||
805 | /* | ||
806 | * If we couldn't find an SMP configuration at boot time, | ||
807 | * get out of here now! | ||
808 | */ | ||
809 | if (!smp_found_config) { | ||
810 | printk(KERN_NOTICE "SMP motherboard not detected.\n"); | ||
811 | disable_smp(); | ||
812 | if (APIC_init_uniprocessor()) | ||
813 | printk(KERN_NOTICE "Local APIC not detected." | ||
814 | " Using dummy APIC emulation.\n"); | ||
815 | return -1; | ||
816 | } | ||
817 | |||
818 | /* | ||
819 | * Should not be necessary because the MP table should list the boot | ||
820 | * CPU too, but we do it for the sake of robustness anyway. | ||
821 | */ | ||
822 | if (!physid_isset(boot_cpu_id, phys_cpu_present_map)) { | ||
823 | printk(KERN_NOTICE "weird, boot CPU (#%d) not listed by the BIOS.\n", | ||
824 | boot_cpu_id); | ||
825 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); | ||
826 | } | ||
827 | |||
828 | /* | ||
829 | * If we couldn't find a local APIC, then get out of here now! | ||
830 | */ | ||
831 | if (!cpu_has_apic) { | ||
832 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", | ||
833 | boot_cpu_id); | ||
834 | printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); | ||
835 | nr_ioapics = 0; | ||
836 | return -1; | ||
837 | } | ||
838 | |||
839 | /* | ||
840 | * If SMP should be disabled, then really disable it! | ||
841 | */ | ||
842 | if (!max_cpus) { | ||
843 | printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); | ||
844 | nr_ioapics = 0; | ||
845 | return -1; | ||
846 | } | ||
847 | |||
848 | return 0; | ||
849 | } | ||
850 | |||
851 | /* | ||
852 | * Prepare for SMP bootup. The MP table or ACPI has been read | ||
853 | * earlier. Just do some sanity checking here and enable APIC mode. | ||
854 | */ | ||
855 | void __init smp_prepare_cpus(unsigned int max_cpus) | ||
856 | { | ||
857 | nmi_watchdog_default(); | ||
858 | current_cpu_data = boot_cpu_data; | ||
859 | current_thread_info()->cpu = 0; /* needed? */ | ||
860 | set_cpu_sibling_map(0); | ||
861 | |||
862 | if (smp_sanity_check(max_cpus) < 0) { | ||
863 | printk(KERN_INFO "SMP disabled\n"); | ||
864 | disable_smp(); | ||
865 | return; | ||
866 | } | ||
867 | |||
868 | |||
869 | /* | ||
870 | * Switch from PIC to APIC mode. | ||
871 | */ | ||
872 | setup_local_APIC(); | ||
873 | |||
874 | if (GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_id) { | ||
875 | panic("Boot APIC ID in local APIC unexpected (%d vs %d)", | ||
876 | GET_APIC_ID(apic_read(APIC_ID)), boot_cpu_id); | ||
877 | /* Or can we switch back to PIC here? */ | ||
878 | } | ||
879 | |||
880 | /* | ||
881 | * Now start the IO-APICs | ||
882 | */ | ||
883 | if (!skip_ioapic_setup && nr_ioapics) | ||
884 | setup_IO_APIC(); | ||
885 | else | ||
886 | nr_ioapics = 0; | ||
887 | |||
888 | /* | ||
889 | * Set up local APIC timer on boot CPU. | ||
890 | */ | ||
891 | |||
892 | setup_boot_APIC_clock(); | ||
893 | } | ||
894 | |||
895 | /* | ||
896 | * Early setup to make printk work. | ||
897 | */ | ||
898 | void __init smp_prepare_boot_cpu(void) | ||
899 | { | ||
900 | int me = smp_processor_id(); | ||
901 | cpu_set(me, cpu_online_map); | ||
902 | cpu_set(me, cpu_callout_map); | ||
903 | per_cpu(cpu_state, me) = CPU_ONLINE; | ||
904 | } | ||
905 | |||
906 | /* | ||
907 | * Entry point to boot a CPU. | ||
908 | */ | ||
909 | int __cpuinit __cpu_up(unsigned int cpu) | ||
910 | { | ||
911 | int apicid = cpu_present_to_apicid(cpu); | ||
912 | unsigned long flags; | ||
913 | int err; | ||
914 | |||
915 | WARN_ON(irqs_disabled()); | ||
916 | |||
917 | Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); | ||
918 | |||
919 | if (apicid == BAD_APICID || apicid == boot_cpu_id || | ||
920 | !physid_isset(apicid, phys_cpu_present_map)) { | ||
921 | printk("__cpu_up: bad cpu %d\n", cpu); | ||
922 | return -EINVAL; | ||
923 | } | ||
924 | |||
925 | /* | ||
926 | * Already booted CPU? | ||
927 | */ | ||
928 | if (cpu_isset(cpu, cpu_callin_map)) { | ||
929 | Dprintk("do_boot_cpu %d Already started\n", cpu); | ||
930 | return -ENOSYS; | ||
931 | } | ||
932 | |||
933 | /* | ||
934 | * Save current MTRR state in case it was changed since early boot | ||
935 | * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: | ||
936 | */ | ||
937 | mtrr_save_state(); | ||
938 | |||
939 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | ||
940 | /* Boot it! */ | ||
941 | err = do_boot_cpu(cpu, apicid); | ||
942 | if (err < 0) { | ||
943 | Dprintk("do_boot_cpu failed %d\n", err); | ||
944 | return err; | ||
945 | } | ||
946 | |||
947 | /* Unleash the CPU! */ | ||
948 | Dprintk("waiting for cpu %d\n", cpu); | ||
949 | |||
950 | /* | ||
951 | * Make sure and check TSC sync: | ||
952 | */ | ||
953 | local_irq_save(flags); | ||
954 | check_tsc_sync_source(cpu); | ||
955 | local_irq_restore(flags); | ||
956 | |||
957 | while (!cpu_isset(cpu, cpu_online_map)) | ||
958 | cpu_relax(); | ||
959 | err = 0; | ||
960 | |||
961 | return err; | ||
962 | } | ||
963 | |||
964 | /* | ||
965 | * Finish the SMP boot. | ||
966 | */ | ||
967 | void __init smp_cpus_done(unsigned int max_cpus) | ||
968 | { | ||
969 | smp_cleanup_boot(); | ||
970 | setup_ioapic_dest(); | ||
971 | check_nmi_watchdog(); | ||
972 | } | ||
973 | |||
974 | #ifdef CONFIG_HOTPLUG_CPU | ||
975 | |||
976 | static void remove_siblinginfo(int cpu) | ||
977 | { | ||
978 | int sibling; | ||
979 | struct cpuinfo_x86 *c = cpu_data; | ||
980 | |||
981 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) { | ||
982 | cpu_clear(cpu, cpu_core_map[sibling]); | ||
983 | /* | ||
984 | * last thread sibling in this cpu core going down | ||
985 | */ | ||
986 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) | ||
987 | c[sibling].booted_cores--; | ||
988 | } | ||
989 | |||
990 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) | ||
991 | cpu_clear(cpu, cpu_sibling_map[sibling]); | ||
992 | cpus_clear(cpu_sibling_map[cpu]); | ||
993 | cpus_clear(cpu_core_map[cpu]); | ||
994 | c[cpu].phys_proc_id = 0; | ||
995 | c[cpu].cpu_core_id = 0; | ||
996 | cpu_clear(cpu, cpu_sibling_setup_map); | ||
997 | } | ||
998 | |||
999 | void remove_cpu_from_maps(void) | ||
1000 | { | ||
1001 | int cpu = smp_processor_id(); | ||
1002 | |||
1003 | cpu_clear(cpu, cpu_callout_map); | ||
1004 | cpu_clear(cpu, cpu_callin_map); | ||
1005 | clear_bit(cpu, &cpu_initialized); /* was set by cpu_init() */ | ||
1006 | clear_node_cpumask(cpu); | ||
1007 | } | ||
1008 | |||
1009 | int __cpu_disable(void) | ||
1010 | { | ||
1011 | int cpu = smp_processor_id(); | ||
1012 | |||
1013 | /* | ||
1014 | * Perhaps use cpufreq to drop frequency, but that could go | ||
1015 | * into generic code. | ||
1016 | * | ||
1017 | * We won't take down the boot processor on i386 due to some | ||
1018 | * interrupts only being able to be serviced by the BSP. | ||
1019 | * Especially so if we're not using an IOAPIC -zwane | ||
1020 | */ | ||
1021 | if (cpu == 0) | ||
1022 | return -EBUSY; | ||
1023 | |||
1024 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
1025 | stop_apic_nmi_watchdog(NULL); | ||
1026 | clear_local_APIC(); | ||
1027 | |||
1028 | /* | ||
1029 | * HACK: | ||
1030 | * Allow any queued timer interrupts to get serviced | ||
1031 | * This is only a temporary solution until we cleanup | ||
1032 | * fixup_irqs as we do for IA64. | ||
1033 | */ | ||
1034 | local_irq_enable(); | ||
1035 | mdelay(1); | ||
1036 | |||
1037 | local_irq_disable(); | ||
1038 | remove_siblinginfo(cpu); | ||
1039 | |||
1040 | spin_lock(&vector_lock); | ||
1041 | /* It's now safe to remove this processor from the online map */ | ||
1042 | cpu_clear(cpu, cpu_online_map); | ||
1043 | spin_unlock(&vector_lock); | ||
1044 | remove_cpu_from_maps(); | ||
1045 | fixup_irqs(cpu_online_map); | ||
1046 | return 0; | ||
1047 | } | ||
1048 | |||
1049 | void __cpu_die(unsigned int cpu) | ||
1050 | { | ||
1051 | /* We don't do anything here: idle task is faking death itself. */ | ||
1052 | unsigned int i; | ||
1053 | |||
1054 | for (i = 0; i < 10; i++) { | ||
1055 | /* They ack this in play_dead by setting CPU_DEAD */ | ||
1056 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { | ||
1057 | printk ("CPU %d is now offline\n", cpu); | ||
1058 | if (1 == num_online_cpus()) | ||
1059 | alternatives_smp_switch(0); | ||
1060 | return; | ||
1061 | } | ||
1062 | msleep(100); | ||
1063 | } | ||
1064 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); | ||
1065 | } | ||
1066 | |||
1067 | static __init int setup_additional_cpus(char *s) | ||
1068 | { | ||
1069 | return s && get_option(&s, &additional_cpus) ? 0 : -EINVAL; | ||
1070 | } | ||
1071 | early_param("additional_cpus", setup_additional_cpus); | ||
1072 | |||
1073 | #else /* ... !CONFIG_HOTPLUG_CPU */ | ||
1074 | |||
1075 | int __cpu_disable(void) | ||
1076 | { | ||
1077 | return -ENOSYS; | ||
1078 | } | ||
1079 | |||
1080 | void __cpu_die(unsigned int cpu) | ||
1081 | { | ||
1082 | /* We said "no" in __cpu_disable */ | ||
1083 | BUG(); | ||
1084 | } | ||
1085 | #endif /* CONFIG_HOTPLUG_CPU */ | ||