diff options
Diffstat (limited to 'arch/x86/kernel/smpboot.c')
-rw-r--r-- | arch/x86/kernel/smpboot.c | 134 |
1 files changed, 107 insertions, 27 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8b3bfc4dd70..6af118511b4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -62,7 +62,7 @@ | |||
62 | #include <asm/pgtable.h> | 62 | #include <asm/pgtable.h> |
63 | #include <asm/tlbflush.h> | 63 | #include <asm/tlbflush.h> |
64 | #include <asm/mtrr.h> | 64 | #include <asm/mtrr.h> |
65 | #include <asm/vmi.h> | 65 | #include <asm/mwait.h> |
66 | #include <asm/apic.h> | 66 | #include <asm/apic.h> |
67 | #include <asm/setup.h> | 67 | #include <asm/setup.h> |
68 | #include <asm/uv/uv.h> | 68 | #include <asm/uv/uv.h> |
@@ -299,23 +299,16 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
299 | * fragile that we want to limit the things done here to the | 299 | * fragile that we want to limit the things done here to the |
300 | * most necessary things. | 300 | * most necessary things. |
301 | */ | 301 | */ |
302 | cpu_init(); | ||
303 | preempt_disable(); | ||
304 | smp_callin(); | ||
302 | 305 | ||
303 | #ifdef CONFIG_X86_32 | 306 | #ifdef CONFIG_X86_32 |
304 | /* | 307 | /* switch away from the initial page table */ |
305 | * Switch away from the trampoline page-table | ||
306 | * | ||
307 | * Do this before cpu_init() because it needs to access per-cpu | ||
308 | * data which may not be mapped in the trampoline page-table. | ||
309 | */ | ||
310 | load_cr3(swapper_pg_dir); | 308 | load_cr3(swapper_pg_dir); |
311 | __flush_tlb_all(); | 309 | __flush_tlb_all(); |
312 | #endif | 310 | #endif |
313 | 311 | ||
314 | vmi_bringup(); | ||
315 | cpu_init(); | ||
316 | preempt_disable(); | ||
317 | smp_callin(); | ||
318 | |||
319 | /* otherwise gcc will move up smp_processor_id before the cpu_init */ | 312 | /* otherwise gcc will move up smp_processor_id before the cpu_init */ |
320 | barrier(); | 313 | barrier(); |
321 | /* | 314 | /* |
@@ -324,9 +317,9 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
324 | check_tsc_sync_target(); | 317 | check_tsc_sync_target(); |
325 | 318 | ||
326 | if (nmi_watchdog == NMI_IO_APIC) { | 319 | if (nmi_watchdog == NMI_IO_APIC) { |
327 | legacy_pic->chip->mask(0); | 320 | legacy_pic->mask(0); |
328 | enable_NMI_through_LVT0(); | 321 | enable_NMI_through_LVT0(); |
329 | legacy_pic->chip->unmask(0); | 322 | legacy_pic->unmask(0); |
330 | } | 323 | } |
331 | 324 | ||
332 | /* This must be done before setting cpu_online_mask */ | 325 | /* This must be done before setting cpu_online_mask */ |
@@ -397,6 +390,19 @@ void __cpuinit smp_store_cpu_info(int id) | |||
397 | identify_secondary_cpu(c); | 390 | identify_secondary_cpu(c); |
398 | } | 391 | } |
399 | 392 | ||
393 | static void __cpuinit link_thread_siblings(int cpu1, int cpu2) | ||
394 | { | ||
395 | struct cpuinfo_x86 *c1 = &cpu_data(cpu1); | ||
396 | struct cpuinfo_x86 *c2 = &cpu_data(cpu2); | ||
397 | |||
398 | cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); | ||
399 | cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); | ||
400 | cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); | ||
401 | cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); | ||
402 | cpumask_set_cpu(cpu1, c2->llc_shared_map); | ||
403 | cpumask_set_cpu(cpu2, c1->llc_shared_map); | ||
404 | } | ||
405 | |||
400 | 406 | ||
401 | void __cpuinit set_cpu_sibling_map(int cpu) | 407 | void __cpuinit set_cpu_sibling_map(int cpu) |
402 | { | 408 | { |
@@ -409,14 +415,13 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
409 | for_each_cpu(i, cpu_sibling_setup_mask) { | 415 | for_each_cpu(i, cpu_sibling_setup_mask) { |
410 | struct cpuinfo_x86 *o = &cpu_data(i); | 416 | struct cpuinfo_x86 *o = &cpu_data(i); |
411 | 417 | ||
412 | if (c->phys_proc_id == o->phys_proc_id && | 418 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { |
413 | c->cpu_core_id == o->cpu_core_id) { | 419 | if (c->phys_proc_id == o->phys_proc_id && |
414 | cpumask_set_cpu(i, cpu_sibling_mask(cpu)); | 420 | c->compute_unit_id == o->compute_unit_id) |
415 | cpumask_set_cpu(cpu, cpu_sibling_mask(i)); | 421 | link_thread_siblings(cpu, i); |
416 | cpumask_set_cpu(i, cpu_core_mask(cpu)); | 422 | } else if (c->phys_proc_id == o->phys_proc_id && |
417 | cpumask_set_cpu(cpu, cpu_core_mask(i)); | 423 | c->cpu_core_id == o->cpu_core_id) { |
418 | cpumask_set_cpu(i, c->llc_shared_map); | 424 | link_thread_siblings(cpu, i); |
419 | cpumask_set_cpu(cpu, o->llc_shared_map); | ||
420 | } | 425 | } |
421 | } | 426 | } |
422 | } else { | 427 | } else { |
@@ -774,7 +779,6 @@ do_rest: | |||
774 | #ifdef CONFIG_X86_32 | 779 | #ifdef CONFIG_X86_32 |
775 | /* Stack for startup_32 can be just as for start_secondary onwards */ | 780 | /* Stack for startup_32 can be just as for start_secondary onwards */ |
776 | irq_ctx_init(cpu); | 781 | irq_ctx_init(cpu); |
777 | initial_page_table = __pa(&trampoline_pg_dir); | ||
778 | #else | 782 | #else |
779 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); | 783 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); |
780 | initial_gs = per_cpu_offset(cpu); | 784 | initial_gs = per_cpu_offset(cpu); |
@@ -923,7 +927,6 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
923 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 927 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; |
924 | 928 | ||
925 | err = do_boot_cpu(apicid, cpu); | 929 | err = do_boot_cpu(apicid, cpu); |
926 | |||
927 | if (err) { | 930 | if (err) { |
928 | pr_debug("do_boot_cpu failed %d\n", err); | 931 | pr_debug("do_boot_cpu failed %d\n", err); |
929 | return -EIO; | 932 | return -EIO; |
@@ -1109,8 +1112,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1109 | } | 1112 | } |
1110 | set_cpu_sibling_map(0); | 1113 | set_cpu_sibling_map(0); |
1111 | 1114 | ||
1112 | enable_IR_x2apic(); | ||
1113 | default_setup_apic_routing(); | ||
1114 | 1115 | ||
1115 | if (smp_sanity_check(max_cpus) < 0) { | 1116 | if (smp_sanity_check(max_cpus) < 0) { |
1116 | printk(KERN_INFO "SMP disabled\n"); | 1117 | printk(KERN_INFO "SMP disabled\n"); |
@@ -1118,6 +1119,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1118 | goto out; | 1119 | goto out; |
1119 | } | 1120 | } |
1120 | 1121 | ||
1122 | default_setup_apic_routing(); | ||
1123 | |||
1121 | preempt_disable(); | 1124 | preempt_disable(); |
1122 | if (read_apic_id() != boot_cpu_physical_apicid) { | 1125 | if (read_apic_id() != boot_cpu_physical_apicid) { |
1123 | panic("Boot APIC ID in local APIC unexpected (%d vs %d)", | 1126 | panic("Boot APIC ID in local APIC unexpected (%d vs %d)", |
@@ -1383,11 +1386,88 @@ void play_dead_common(void) | |||
1383 | local_irq_disable(); | 1386 | local_irq_disable(); |
1384 | } | 1387 | } |
1385 | 1388 | ||
1389 | /* | ||
1390 | * We need to flush the caches before going to sleep, lest we have | ||
1391 | * dirty data in our caches when we come back up. | ||
1392 | */ | ||
1393 | static inline void mwait_play_dead(void) | ||
1394 | { | ||
1395 | unsigned int eax, ebx, ecx, edx; | ||
1396 | unsigned int highest_cstate = 0; | ||
1397 | unsigned int highest_subcstate = 0; | ||
1398 | int i; | ||
1399 | void *mwait_ptr; | ||
1400 | |||
1401 | if (!cpu_has(¤t_cpu_data, X86_FEATURE_MWAIT)) | ||
1402 | return; | ||
1403 | if (!cpu_has(¤t_cpu_data, X86_FEATURE_CLFLSH)) | ||
1404 | return; | ||
1405 | if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) | ||
1406 | return; | ||
1407 | |||
1408 | eax = CPUID_MWAIT_LEAF; | ||
1409 | ecx = 0; | ||
1410 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
1411 | |||
1412 | /* | ||
1413 | * eax will be 0 if EDX enumeration is not valid. | ||
1414 | * Initialized below to cstate, sub_cstate value when EDX is valid. | ||
1415 | */ | ||
1416 | if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) { | ||
1417 | eax = 0; | ||
1418 | } else { | ||
1419 | edx >>= MWAIT_SUBSTATE_SIZE; | ||
1420 | for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) { | ||
1421 | if (edx & MWAIT_SUBSTATE_MASK) { | ||
1422 | highest_cstate = i; | ||
1423 | highest_subcstate = edx & MWAIT_SUBSTATE_MASK; | ||
1424 | } | ||
1425 | } | ||
1426 | eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) | | ||
1427 | (highest_subcstate - 1); | ||
1428 | } | ||
1429 | |||
1430 | /* | ||
1431 | * This should be a memory location in a cache line which is | ||
1432 | * unlikely to be touched by other processors. The actual | ||
1433 | * content is immaterial as it is not actually modified in any way. | ||
1434 | */ | ||
1435 | mwait_ptr = ¤t_thread_info()->flags; | ||
1436 | |||
1437 | wbinvd(); | ||
1438 | |||
1439 | while (1) { | ||
1440 | /* | ||
1441 | * The CLFLUSH is a workaround for erratum AAI65 for | ||
1442 | * the Xeon 7400 series. It's not clear it is actually | ||
1443 | * needed, but it should be harmless in either case. | ||
1444 | * The WBINVD is insufficient due to the spurious-wakeup | ||
1445 | * case where we return around the loop. | ||
1446 | */ | ||
1447 | clflush(mwait_ptr); | ||
1448 | __monitor(mwait_ptr, 0, 0); | ||
1449 | mb(); | ||
1450 | __mwait(eax, 0); | ||
1451 | } | ||
1452 | } | ||
1453 | |||
1454 | static inline void hlt_play_dead(void) | ||
1455 | { | ||
1456 | if (current_cpu_data.x86 >= 4) | ||
1457 | wbinvd(); | ||
1458 | |||
1459 | while (1) { | ||
1460 | native_halt(); | ||
1461 | } | ||
1462 | } | ||
1463 | |||
1386 | void native_play_dead(void) | 1464 | void native_play_dead(void) |
1387 | { | 1465 | { |
1388 | play_dead_common(); | 1466 | play_dead_common(); |
1389 | tboot_shutdown(TB_SHUTDOWN_WFS); | 1467 | tboot_shutdown(TB_SHUTDOWN_WFS); |
1390 | wbinvd_halt(); | 1468 | |
1469 | mwait_play_dead(); /* Only returns on failure */ | ||
1470 | hlt_play_dead(); | ||
1391 | } | 1471 | } |
1392 | 1472 | ||
1393 | #else /* ... !CONFIG_HOTPLUG_CPU */ | 1473 | #else /* ... !CONFIG_HOTPLUG_CPU */ |