aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/smpboot.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/smpboot.c')
-rw-r--r--arch/x86/kernel/smpboot.c325
1 files changed, 176 insertions, 149 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8b3bfc4dd708..9fd3137230d4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,8 +62,9 @@
62#include <asm/pgtable.h> 62#include <asm/pgtable.h>
63#include <asm/tlbflush.h> 63#include <asm/tlbflush.h>
64#include <asm/mtrr.h> 64#include <asm/mtrr.h>
65#include <asm/vmi.h> 65#include <asm/mwait.h>
66#include <asm/apic.h> 66#include <asm/apic.h>
67#include <asm/io_apic.h>
67#include <asm/setup.h> 68#include <asm/setup.h>
68#include <asm/uv/uv.h> 69#include <asm/uv/uv.h>
69#include <linux/mc146818rtc.h> 70#include <linux/mc146818rtc.h>
@@ -71,10 +72,6 @@
71#include <asm/smpboot_hooks.h> 72#include <asm/smpboot_hooks.h>
72#include <asm/i8259.h> 73#include <asm/i8259.h>
73 74
74#ifdef CONFIG_X86_32
75u8 apicid_2_node[MAX_APICID];
76#endif
77
78/* State of each CPU */ 75/* State of each CPU */
79DEFINE_PER_CPU(int, cpu_state) = { 0 }; 76DEFINE_PER_CPU(int, cpu_state) = { 0 };
80 77
@@ -97,12 +94,12 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
97 */ 94 */
98static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex); 95static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
99 96
100void cpu_hotplug_driver_lock() 97void cpu_hotplug_driver_lock(void)
101{ 98{
102 mutex_lock(&x86_cpu_hotplug_driver_mutex); 99 mutex_lock(&x86_cpu_hotplug_driver_mutex);
103} 100}
104 101
105void cpu_hotplug_driver_unlock() 102void cpu_hotplug_driver_unlock(void)
106{ 103{
107 mutex_unlock(&x86_cpu_hotplug_driver_mutex); 104 mutex_unlock(&x86_cpu_hotplug_driver_mutex);
108} 105}
@@ -130,68 +127,14 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
130DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); 127DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
131EXPORT_PER_CPU_SYMBOL(cpu_core_map); 128EXPORT_PER_CPU_SYMBOL(cpu_core_map);
132 129
130DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map);
131
133/* Per CPU bogomips and other parameters */ 132/* Per CPU bogomips and other parameters */
134DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); 133DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
135EXPORT_PER_CPU_SYMBOL(cpu_info); 134EXPORT_PER_CPU_SYMBOL(cpu_info);
136 135
137atomic_t init_deasserted; 136atomic_t init_deasserted;
138 137
139#if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
140/* which node each logical CPU is on */
141int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
142EXPORT_SYMBOL(cpu_to_node_map);
143
144/* set up a mapping between cpu and node. */
145static void map_cpu_to_node(int cpu, int node)
146{
147 printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
148 cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
149 cpu_to_node_map[cpu] = node;
150}
151
152/* undo a mapping between cpu and node. */
153static void unmap_cpu_to_node(int cpu)
154{
155 int node;
156
157 printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
158 for (node = 0; node < MAX_NUMNODES; node++)
159 cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
160 cpu_to_node_map[cpu] = 0;
161}
162#else /* !(CONFIG_NUMA && CONFIG_X86_32) */
163#define map_cpu_to_node(cpu, node) ({})
164#define unmap_cpu_to_node(cpu) ({})
165#endif
166
167#ifdef CONFIG_X86_32
168static int boot_cpu_logical_apicid;
169
170u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
171 { [0 ... NR_CPUS-1] = BAD_APICID };
172
173static void map_cpu_to_logical_apicid(void)
174{
175 int cpu = smp_processor_id();
176 int apicid = logical_smp_processor_id();
177 int node = apic->apicid_to_node(apicid);
178
179 if (!node_online(node))
180 node = first_online_node;
181
182 cpu_2_logical_apicid[cpu] = apicid;
183 map_cpu_to_node(cpu, node);
184}
185
186void numa_remove_cpu(int cpu)
187{
188 cpu_2_logical_apicid[cpu] = BAD_APICID;
189 unmap_cpu_to_node(cpu);
190}
191#else
192#define map_cpu_to_logical_apicid() do {} while (0)
193#endif
194
195/* 138/*
196 * Report back to the Boot Processor. 139 * Report back to the Boot Processor.
197 * Running on AP. 140 * Running on AP.
@@ -259,7 +202,6 @@ static void __cpuinit smp_callin(void)
259 apic->smp_callin_clear_local_apic(); 202 apic->smp_callin_clear_local_apic();
260 setup_local_APIC(); 203 setup_local_APIC();
261 end_local_APIC_setup(); 204 end_local_APIC_setup();
262 map_cpu_to_logical_apicid();
263 205
264 /* 206 /*
265 * Need to setup vector mappings before we enable interrupts. 207 * Need to setup vector mappings before we enable interrupts.
@@ -281,6 +223,13 @@ static void __cpuinit smp_callin(void)
281 */ 223 */
282 smp_store_cpu_info(cpuid); 224 smp_store_cpu_info(cpuid);
283 225
226 /*
227 * This must be done before setting cpu_online_mask
228 * or calling notify_cpu_starting.
229 */
230 set_cpu_sibling_map(raw_smp_processor_id());
231 wmb();
232
284 notify_cpu_starting(cpuid); 233 notify_cpu_starting(cpuid);
285 234
286 /* 235 /*
@@ -299,23 +248,16 @@ notrace static void __cpuinit start_secondary(void *unused)
299 * fragile that we want to limit the things done here to the 248 * fragile that we want to limit the things done here to the
300 * most necessary things. 249 * most necessary things.
301 */ 250 */
251 cpu_init();
252 preempt_disable();
253 smp_callin();
302 254
303#ifdef CONFIG_X86_32 255#ifdef CONFIG_X86_32
304 /* 256 /* switch away from the initial page table */
305 * Switch away from the trampoline page-table
306 *
307 * Do this before cpu_init() because it needs to access per-cpu
308 * data which may not be mapped in the trampoline page-table.
309 */
310 load_cr3(swapper_pg_dir); 257 load_cr3(swapper_pg_dir);
311 __flush_tlb_all(); 258 __flush_tlb_all();
312#endif 259#endif
313 260
314 vmi_bringup();
315 cpu_init();
316 preempt_disable();
317 smp_callin();
318
319 /* otherwise gcc will move up smp_processor_id before the cpu_init */ 261 /* otherwise gcc will move up smp_processor_id before the cpu_init */
320 barrier(); 262 barrier();
321 /* 263 /*
@@ -323,16 +265,6 @@ notrace static void __cpuinit start_secondary(void *unused)
323 */ 265 */
324 check_tsc_sync_target(); 266 check_tsc_sync_target();
325 267
326 if (nmi_watchdog == NMI_IO_APIC) {
327 legacy_pic->chip->mask(0);
328 enable_NMI_through_LVT0();
329 legacy_pic->chip->unmask(0);
330 }
331
332 /* This must be done before setting cpu_online_mask */
333 set_cpu_sibling_map(raw_smp_processor_id());
334 wmb();
335
336 /* 268 /*
337 * We need to hold call_lock, so there is no inconsistency 269 * We need to hold call_lock, so there is no inconsistency
338 * between the time smp_call_function() determines number of 270 * between the time smp_call_function() determines number of
@@ -353,6 +285,19 @@ notrace static void __cpuinit start_secondary(void *unused)
353 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 285 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
354 x86_platform.nmi_init(); 286 x86_platform.nmi_init();
355 287
288 /*
289 * Wait until the cpu which brought this one up marked it
290 * online before enabling interrupts. If we don't do that then
291 * we can end up waking up the softirq thread before this cpu
292 * reached the active state, which makes the scheduler unhappy
293 * and schedule the softirq thread on the wrong cpu. This is
294 * only observable with forced threaded interrupts, but in
295 * theory it could also happen w/o them. It's just way harder
296 * to achieve.
297 */
298 while (!cpumask_test_cpu(smp_processor_id(), cpu_active_mask))
299 cpu_relax();
300
356 /* enable local interrupts */ 301 /* enable local interrupts */
357 local_irq_enable(); 302 local_irq_enable();
358 303
@@ -365,23 +310,6 @@ notrace static void __cpuinit start_secondary(void *unused)
365 cpu_idle(); 310 cpu_idle();
366} 311}
367 312
368#ifdef CONFIG_CPUMASK_OFFSTACK
369/* In this case, llc_shared_map is a pointer to a cpumask. */
370static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
371 const struct cpuinfo_x86 *src)
372{
373 struct cpumask *llc = dst->llc_shared_map;
374 *dst = *src;
375 dst->llc_shared_map = llc;
376}
377#else
378static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
379 const struct cpuinfo_x86 *src)
380{
381 *dst = *src;
382}
383#endif /* CONFIG_CPUMASK_OFFSTACK */
384
385/* 313/*
386 * The bootstrap kernel entry code has set these up. Save them for 314 * The bootstrap kernel entry code has set these up. Save them for
387 * a given CPU 315 * a given CPU
@@ -391,12 +319,22 @@ void __cpuinit smp_store_cpu_info(int id)
391{ 319{
392 struct cpuinfo_x86 *c = &cpu_data(id); 320 struct cpuinfo_x86 *c = &cpu_data(id);
393 321
394 copy_cpuinfo_x86(c, &boot_cpu_data); 322 *c = boot_cpu_data;
395 c->cpu_index = id; 323 c->cpu_index = id;
396 if (id != 0) 324 if (id != 0)
397 identify_secondary_cpu(c); 325 identify_secondary_cpu(c);
398} 326}
399 327
328static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
329{
330 cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
331 cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
332 cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
333 cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
334 cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2));
335 cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1));
336}
337
400 338
401void __cpuinit set_cpu_sibling_map(int cpu) 339void __cpuinit set_cpu_sibling_map(int cpu)
402{ 340{
@@ -409,23 +347,23 @@ void __cpuinit set_cpu_sibling_map(int cpu)
409 for_each_cpu(i, cpu_sibling_setup_mask) { 347 for_each_cpu(i, cpu_sibling_setup_mask) {
410 struct cpuinfo_x86 *o = &cpu_data(i); 348 struct cpuinfo_x86 *o = &cpu_data(i);
411 349
412 if (c->phys_proc_id == o->phys_proc_id && 350 if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
413 c->cpu_core_id == o->cpu_core_id) { 351 if (c->phys_proc_id == o->phys_proc_id &&
414 cpumask_set_cpu(i, cpu_sibling_mask(cpu)); 352 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i) &&
415 cpumask_set_cpu(cpu, cpu_sibling_mask(i)); 353 c->compute_unit_id == o->compute_unit_id)
416 cpumask_set_cpu(i, cpu_core_mask(cpu)); 354 link_thread_siblings(cpu, i);
417 cpumask_set_cpu(cpu, cpu_core_mask(i)); 355 } else if (c->phys_proc_id == o->phys_proc_id &&
418 cpumask_set_cpu(i, c->llc_shared_map); 356 c->cpu_core_id == o->cpu_core_id) {
419 cpumask_set_cpu(cpu, o->llc_shared_map); 357 link_thread_siblings(cpu, i);
420 } 358 }
421 } 359 }
422 } else { 360 } else {
423 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); 361 cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
424 } 362 }
425 363
426 cpumask_set_cpu(cpu, c->llc_shared_map); 364 cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu));
427 365
428 if (current_cpu_data.x86_max_cores == 1) { 366 if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
429 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); 367 cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
430 c->booted_cores = 1; 368 c->booted_cores = 1;
431 return; 369 return;
@@ -434,8 +372,8 @@ void __cpuinit set_cpu_sibling_map(int cpu)
434 for_each_cpu(i, cpu_sibling_setup_mask) { 372 for_each_cpu(i, cpu_sibling_setup_mask) {
435 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && 373 if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
436 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { 374 per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
437 cpumask_set_cpu(i, c->llc_shared_map); 375 cpumask_set_cpu(i, cpu_llc_shared_mask(cpu));
438 cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map); 376 cpumask_set_cpu(cpu, cpu_llc_shared_mask(i));
439 } 377 }
440 if (c->phys_proc_id == cpu_data(i).phys_proc_id) { 378 if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
441 cpumask_set_cpu(i, cpu_core_mask(cpu)); 379 cpumask_set_cpu(i, cpu_core_mask(cpu));
@@ -474,7 +412,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
474 !(cpu_has(c, X86_FEATURE_AMD_DCM))) 412 !(cpu_has(c, X86_FEATURE_AMD_DCM)))
475 return cpu_core_mask(cpu); 413 return cpu_core_mask(cpu);
476 else 414 else
477 return c->llc_shared_map; 415 return cpu_llc_shared_mask(cpu);
478} 416}
479 417
480static void impress_friends(void) 418static void impress_friends(void)
@@ -636,7 +574,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
636 * target processor state. 574 * target processor state.
637 */ 575 */
638 startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, 576 startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
639 (unsigned long)stack_start.sp); 577 stack_start);
640 578
641 /* 579 /*
642 * Run STARTUP IPI loop. 580 * Run STARTUP IPI loop.
@@ -742,7 +680,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
742 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), 680 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
743 }; 681 };
744 682
745 INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle); 683 INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle);
746 684
747 alternatives_smp_switch(1); 685 alternatives_smp_switch(1);
748 686
@@ -774,7 +712,6 @@ do_rest:
774#ifdef CONFIG_X86_32 712#ifdef CONFIG_X86_32
775 /* Stack for startup_32 can be just as for start_secondary onwards */ 713 /* Stack for startup_32 can be just as for start_secondary onwards */
776 irq_ctx_init(cpu); 714 irq_ctx_init(cpu);
777 initial_page_table = __pa(&trampoline_pg_dir);
778#else 715#else
779 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 716 clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
780 initial_gs = per_cpu_offset(cpu); 717 initial_gs = per_cpu_offset(cpu);
@@ -784,10 +721,10 @@ do_rest:
784#endif 721#endif
785 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 722 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
786 initial_code = (unsigned long)start_secondary; 723 initial_code = (unsigned long)start_secondary;
787 stack_start.sp = (void *) c_idle.idle->thread.sp; 724 stack_start = c_idle.idle->thread.sp;
788 725
789 /* start_ip had better be page-aligned! */ 726 /* start_ip had better be page-aligned! */
790 start_ip = setup_trampoline(); 727 start_ip = trampoline_address();
791 728
792 /* So we see what's up */ 729 /* So we see what's up */
793 announce_cpu(cpu, apicid); 730 announce_cpu(cpu, apicid);
@@ -797,6 +734,8 @@ do_rest:
797 * the targeted processor. 734 * the targeted processor.
798 */ 735 */
799 736
737 printk(KERN_DEBUG "smpboot cpu %d: start_ip = %lx\n", cpu, start_ip);
738
800 atomic_set(&init_deasserted, 0); 739 atomic_set(&init_deasserted, 0);
801 740
802 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { 741 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
@@ -850,8 +789,8 @@ do_rest:
850 pr_debug("CPU%d: has booted.\n", cpu); 789 pr_debug("CPU%d: has booted.\n", cpu);
851 else { 790 else {
852 boot_error = 1; 791 boot_error = 1;
853 if (*((volatile unsigned char *)trampoline_base) 792 if (*(volatile u32 *)TRAMPOLINE_SYM(trampoline_status)
854 == 0xA5) 793 == 0xA5A5A5A5)
855 /* trampoline started but...? */ 794 /* trampoline started but...? */
856 pr_err("CPU%d: Stuck ??\n", cpu); 795 pr_err("CPU%d: Stuck ??\n", cpu);
857 else 796 else
@@ -877,7 +816,7 @@ do_rest:
877 } 816 }
878 817
879 /* mark "stuck" area as not stuck */ 818 /* mark "stuck" area as not stuck */
880 *((volatile unsigned long *)trampoline_base) = 0; 819 *(volatile u32 *)TRAMPOLINE_SYM(trampoline_status) = 0;
881 820
882 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { 821 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
883 /* 822 /*
@@ -923,7 +862,6 @@ int __cpuinit native_cpu_up(unsigned int cpu)
923 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; 862 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
924 863
925 err = do_boot_cpu(apicid, cpu); 864 err = do_boot_cpu(apicid, cpu);
926
927 if (err) { 865 if (err) {
928 pr_debug("do_boot_cpu failed %d\n", err); 866 pr_debug("do_boot_cpu failed %d\n", err);
929 return -EIO; 867 return -EIO;
@@ -945,6 +883,14 @@ int __cpuinit native_cpu_up(unsigned int cpu)
945 return 0; 883 return 0;
946} 884}
947 885
886/**
887 * arch_disable_smp_support() - disables SMP support for x86 at runtime
888 */
889void arch_disable_smp_support(void)
890{
891 disable_ioapic_support();
892}
893
948/* 894/*
949 * Fall back to non SMP mode after errors. 895 * Fall back to non SMP mode after errors.
950 * 896 *
@@ -960,7 +906,6 @@ static __init void disable_smp(void)
960 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 906 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
961 else 907 else
962 physid_set_mask_of_physid(0, &phys_cpu_present_map); 908 physid_set_mask_of_physid(0, &phys_cpu_present_map);
963 map_cpu_to_logical_apicid();
964 cpumask_set_cpu(0, cpu_sibling_mask(0)); 909 cpumask_set_cpu(0, cpu_sibling_mask(0));
965 cpumask_set_cpu(0, cpu_core_mask(0)); 910 cpumask_set_cpu(0, cpu_core_mask(0));
966} 911}
@@ -1045,7 +990,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1045 "(tell your hw vendor)\n"); 990 "(tell your hw vendor)\n");
1046 } 991 }
1047 smpboot_clear_io_apic(); 992 smpboot_clear_io_apic();
1048 arch_disable_smp_support(); 993 disable_ioapic_support();
1049 return -1; 994 return -1;
1050 } 995 }
1051 996
@@ -1058,11 +1003,9 @@ static int __init smp_sanity_check(unsigned max_cpus)
1058 printk(KERN_INFO "SMP mode deactivated.\n"); 1003 printk(KERN_INFO "SMP mode deactivated.\n");
1059 smpboot_clear_io_apic(); 1004 smpboot_clear_io_apic();
1060 1005
1061 localise_nmi_watchdog();
1062
1063 connect_bsp_APIC(); 1006 connect_bsp_APIC();
1064 setup_local_APIC(); 1007 setup_local_APIC();
1065 end_local_APIC_setup(); 1008 bsp_end_local_APIC_setup();
1066 return -1; 1009 return -1;
1067 } 1010 }
1068 1011
@@ -1091,26 +1034,22 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1091 1034
1092 preempt_disable(); 1035 preempt_disable();
1093 smp_cpu_index_default(); 1036 smp_cpu_index_default();
1094 current_cpu_data = boot_cpu_data; 1037
1095 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1096 mb();
1097 /* 1038 /*
1098 * Setup boot CPU information 1039 * Setup boot CPU information
1099 */ 1040 */
1100 smp_store_cpu_info(0); /* Final full version of the data */ 1041 smp_store_cpu_info(0); /* Final full version of the data */
1101#ifdef CONFIG_X86_32 1042 cpumask_copy(cpu_callin_mask, cpumask_of(0));
1102 boot_cpu_logical_apicid = logical_smp_processor_id(); 1043 mb();
1103#endif 1044
1104 current_thread_info()->cpu = 0; /* needed? */ 1045 current_thread_info()->cpu = 0; /* needed? */
1105 for_each_possible_cpu(i) { 1046 for_each_possible_cpu(i) {
1106 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); 1047 zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
1107 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); 1048 zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
1108 zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); 1049 zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
1109 } 1050 }
1110 set_cpu_sibling_map(0); 1051 set_cpu_sibling_map(0);
1111 1052
1112 enable_IR_x2apic();
1113 default_setup_apic_routing();
1114 1053
1115 if (smp_sanity_check(max_cpus) < 0) { 1054 if (smp_sanity_check(max_cpus) < 0) {
1116 printk(KERN_INFO "SMP disabled\n"); 1055 printk(KERN_INFO "SMP disabled\n");
@@ -1118,6 +1057,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1118 goto out; 1057 goto out;
1119 } 1058 }
1120 1059
1060 default_setup_apic_routing();
1061
1121 preempt_disable(); 1062 preempt_disable();
1122 if (read_apic_id() != boot_cpu_physical_apicid) { 1063 if (read_apic_id() != boot_cpu_physical_apicid) {
1123 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1064 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
@@ -1139,9 +1080,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1139 if (!skip_ioapic_setup && nr_ioapics) 1080 if (!skip_ioapic_setup && nr_ioapics)
1140 enable_IO_APIC(); 1081 enable_IO_APIC();
1141 1082
1142 end_local_APIC_setup(); 1083 bsp_end_local_APIC_setup();
1143
1144 map_cpu_to_logical_apicid();
1145 1084
1146 if (apic->setup_portio_remap) 1085 if (apic->setup_portio_remap)
1147 apic->setup_portio_remap(); 1086 apic->setup_portio_remap();
@@ -1163,6 +1102,20 @@ out:
1163 preempt_enable(); 1102 preempt_enable();
1164} 1103}
1165 1104
1105void arch_disable_nonboot_cpus_begin(void)
1106{
1107 /*
1108 * Avoid the smp alternatives switch during the disable_nonboot_cpus().
1109 * In the suspend path, we will be back in the SMP mode shortly anyways.
1110 */
1111 skip_smp_alternatives = true;
1112}
1113
1114void arch_disable_nonboot_cpus_end(void)
1115{
1116 skip_smp_alternatives = false;
1117}
1118
1166void arch_enable_nonboot_cpus_begin(void) 1119void arch_enable_nonboot_cpus_begin(void)
1167{ 1120{
1168 set_mtrr_aps_delayed_init(); 1121 set_mtrr_aps_delayed_init();
@@ -1193,7 +1146,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1193#ifdef CONFIG_X86_IO_APIC 1146#ifdef CONFIG_X86_IO_APIC
1194 setup_ioapic_dest(); 1147 setup_ioapic_dest();
1195#endif 1148#endif
1196 check_nmi_watchdog();
1197 mtrr_aps_init(); 1149 mtrr_aps_init();
1198} 1150}
1199 1151
@@ -1338,8 +1290,6 @@ int native_cpu_disable(void)
1338 if (cpu == 0) 1290 if (cpu == 0)
1339 return -EBUSY; 1291 return -EBUSY;
1340 1292
1341 if (nmi_watchdog == NMI_LOCAL_APIC)
1342 stop_apic_nmi_watchdog(NULL);
1343 clear_local_APIC(); 1293 clear_local_APIC();
1344 1294
1345 cpu_disable_common(); 1295 cpu_disable_common();
@@ -1370,12 +1320,11 @@ void play_dead_common(void)
1370{ 1320{
1371 idle_task_exit(); 1321 idle_task_exit();
1372 reset_lazy_tlbstate(); 1322 reset_lazy_tlbstate();
1373 irq_ctx_exit(raw_smp_processor_id()); 1323 amd_e400_remove_cpu(raw_smp_processor_id());
1374 c1e_remove_cpu(raw_smp_processor_id());
1375 1324
1376 mb(); 1325 mb();
1377 /* Ack it */ 1326 /* Ack it */
1378 __get_cpu_var(cpu_state) = CPU_DEAD; 1327 __this_cpu_write(cpu_state, CPU_DEAD);
1379 1328
1380 /* 1329 /*
1381 * With physical CPU hotplug, we should halt the cpu 1330 * With physical CPU hotplug, we should halt the cpu
@@ -1383,11 +1332,89 @@ void play_dead_common(void)
1383 local_irq_disable(); 1332 local_irq_disable();
1384} 1333}
1385 1334
1335/*
1336 * We need to flush the caches before going to sleep, lest we have
1337 * dirty data in our caches when we come back up.
1338 */
1339static inline void mwait_play_dead(void)
1340{
1341 unsigned int eax, ebx, ecx, edx;
1342 unsigned int highest_cstate = 0;
1343 unsigned int highest_subcstate = 0;
1344 int i;
1345 void *mwait_ptr;
1346 struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
1347
1348 if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)))
1349 return;
1350 if (!this_cpu_has(X86_FEATURE_CLFLSH))
1351 return;
1352 if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
1353 return;
1354
1355 eax = CPUID_MWAIT_LEAF;
1356 ecx = 0;
1357 native_cpuid(&eax, &ebx, &ecx, &edx);
1358
1359 /*
1360 * eax will be 0 if EDX enumeration is not valid.
1361 * Initialized below to cstate, sub_cstate value when EDX is valid.
1362 */
1363 if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
1364 eax = 0;
1365 } else {
1366 edx >>= MWAIT_SUBSTATE_SIZE;
1367 for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
1368 if (edx & MWAIT_SUBSTATE_MASK) {
1369 highest_cstate = i;
1370 highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
1371 }
1372 }
1373 eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
1374 (highest_subcstate - 1);
1375 }
1376
1377 /*
1378 * This should be a memory location in a cache line which is
1379 * unlikely to be touched by other processors. The actual
1380 * content is immaterial as it is not actually modified in any way.
1381 */
1382 mwait_ptr = &current_thread_info()->flags;
1383
1384 wbinvd();
1385
1386 while (1) {
1387 /*
1388 * The CLFLUSH is a workaround for erratum AAI65 for
1389 * the Xeon 7400 series. It's not clear it is actually
1390 * needed, but it should be harmless in either case.
1391 * The WBINVD is insufficient due to the spurious-wakeup
1392 * case where we return around the loop.
1393 */
1394 clflush(mwait_ptr);
1395 __monitor(mwait_ptr, 0, 0);
1396 mb();
1397 __mwait(eax, 0);
1398 }
1399}
1400
1401static inline void hlt_play_dead(void)
1402{
1403 if (__this_cpu_read(cpu_info.x86) >= 4)
1404 wbinvd();
1405
1406 while (1) {
1407 native_halt();
1408 }
1409}
1410
1386void native_play_dead(void) 1411void native_play_dead(void)
1387{ 1412{
1388 play_dead_common(); 1413 play_dead_common();
1389 tboot_shutdown(TB_SHUTDOWN_WFS); 1414 tboot_shutdown(TB_SHUTDOWN_WFS);
1390 wbinvd_halt(); 1415
1416 mwait_play_dead(); /* Only returns on failure */
1417 hlt_play_dead();
1391} 1418}
1392 1419
1393#else /* ... !CONFIG_HOTPLUG_CPU */ 1420#else /* ... !CONFIG_HOTPLUG_CPU */