diff options
Diffstat (limited to 'arch/x86/kernel/smpboot.c')
| -rw-r--r-- | arch/x86/kernel/smpboot.c | 156 |
1 files changed, 128 insertions, 28 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c80a33bc528b..ed0fe385289d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
| @@ -68,6 +68,8 @@ | |||
| 68 | #include <asm/mwait.h> | 68 | #include <asm/mwait.h> |
| 69 | #include <asm/apic.h> | 69 | #include <asm/apic.h> |
| 70 | #include <asm/io_apic.h> | 70 | #include <asm/io_apic.h> |
| 71 | #include <asm/i387.h> | ||
| 72 | #include <asm/fpu-internal.h> | ||
| 71 | #include <asm/setup.h> | 73 | #include <asm/setup.h> |
| 72 | #include <asm/uv/uv.h> | 74 | #include <asm/uv/uv.h> |
| 73 | #include <linux/mc146818rtc.h> | 75 | #include <linux/mc146818rtc.h> |
| @@ -125,8 +127,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); | |||
| 125 | atomic_t init_deasserted; | 127 | atomic_t init_deasserted; |
| 126 | 128 | ||
| 127 | /* | 129 | /* |
| 128 | * Report back to the Boot Processor. | 130 | * Report back to the Boot Processor during boot time or to the caller processor |
| 129 | * Running on AP. | 131 | * during CPU online. |
| 130 | */ | 132 | */ |
| 131 | static void __cpuinit smp_callin(void) | 133 | static void __cpuinit smp_callin(void) |
| 132 | { | 134 | { |
| @@ -138,15 +140,17 @@ static void __cpuinit smp_callin(void) | |||
| 138 | * we may get here before an INIT-deassert IPI reaches | 140 | * we may get here before an INIT-deassert IPI reaches |
| 139 | * our local APIC. We have to wait for the IPI or we'll | 141 | * our local APIC. We have to wait for the IPI or we'll |
| 140 | * lock up on an APIC access. | 142 | * lock up on an APIC access. |
| 143 | * | ||
| 144 | * Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI. | ||
| 141 | */ | 145 | */ |
| 142 | if (apic->wait_for_init_deassert) | 146 | cpuid = smp_processor_id(); |
| 147 | if (apic->wait_for_init_deassert && cpuid != 0) | ||
| 143 | apic->wait_for_init_deassert(&init_deasserted); | 148 | apic->wait_for_init_deassert(&init_deasserted); |
| 144 | 149 | ||
| 145 | /* | 150 | /* |
| 146 | * (This works even if the APIC is not enabled.) | 151 | * (This works even if the APIC is not enabled.) |
| 147 | */ | 152 | */ |
| 148 | phys_id = read_apic_id(); | 153 | phys_id = read_apic_id(); |
| 149 | cpuid = smp_processor_id(); | ||
| 150 | if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { | 154 | if (cpumask_test_cpu(cpuid, cpu_callin_mask)) { |
| 151 | panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, | 155 | panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, |
| 152 | phys_id, cpuid); | 156 | phys_id, cpuid); |
| @@ -228,6 +232,8 @@ static void __cpuinit smp_callin(void) | |||
| 228 | cpumask_set_cpu(cpuid, cpu_callin_mask); | 232 | cpumask_set_cpu(cpuid, cpu_callin_mask); |
| 229 | } | 233 | } |
| 230 | 234 | ||
| 235 | static int cpu0_logical_apicid; | ||
| 236 | static int enable_start_cpu0; | ||
| 231 | /* | 237 | /* |
| 232 | * Activate a secondary processor. | 238 | * Activate a secondary processor. |
| 233 | */ | 239 | */ |
| @@ -243,6 +249,8 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
| 243 | preempt_disable(); | 249 | preempt_disable(); |
| 244 | smp_callin(); | 250 | smp_callin(); |
| 245 | 251 | ||
| 252 | enable_start_cpu0 = 0; | ||
| 253 | |||
| 246 | #ifdef CONFIG_X86_32 | 254 | #ifdef CONFIG_X86_32 |
| 247 | /* switch away from the initial page table */ | 255 | /* switch away from the initial page table */ |
| 248 | load_cr3(swapper_pg_dir); | 256 | load_cr3(swapper_pg_dir); |
| @@ -279,19 +287,30 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
| 279 | cpu_idle(); | 287 | cpu_idle(); |
| 280 | } | 288 | } |
| 281 | 289 | ||
| 290 | void __init smp_store_boot_cpu_info(void) | ||
| 291 | { | ||
| 292 | int id = 0; /* CPU 0 */ | ||
| 293 | struct cpuinfo_x86 *c = &cpu_data(id); | ||
| 294 | |||
| 295 | *c = boot_cpu_data; | ||
| 296 | c->cpu_index = id; | ||
| 297 | } | ||
| 298 | |||
| 282 | /* | 299 | /* |
| 283 | * The bootstrap kernel entry code has set these up. Save them for | 300 | * The bootstrap kernel entry code has set these up. Save them for |
| 284 | * a given CPU | 301 | * a given CPU |
| 285 | */ | 302 | */ |
| 286 | |||
| 287 | void __cpuinit smp_store_cpu_info(int id) | 303 | void __cpuinit smp_store_cpu_info(int id) |
| 288 | { | 304 | { |
| 289 | struct cpuinfo_x86 *c = &cpu_data(id); | 305 | struct cpuinfo_x86 *c = &cpu_data(id); |
| 290 | 306 | ||
| 291 | *c = boot_cpu_data; | 307 | *c = boot_cpu_data; |
| 292 | c->cpu_index = id; | 308 | c->cpu_index = id; |
| 293 | if (id != 0) | 309 | /* |
| 294 | identify_secondary_cpu(c); | 310 | * During boot time, CPU0 has this setup already. Save the info when |
| 311 | * bringing up AP or offlined CPU0. | ||
| 312 | */ | ||
| 313 | identify_secondary_cpu(c); | ||
| 295 | } | 314 | } |
| 296 | 315 | ||
| 297 | static bool __cpuinit | 316 | static bool __cpuinit |
| @@ -313,7 +332,7 @@ do { \ | |||
| 313 | 332 | ||
| 314 | static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) | 333 | static bool __cpuinit match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) |
| 315 | { | 334 | { |
| 316 | if (cpu_has(c, X86_FEATURE_TOPOEXT)) { | 335 | if (cpu_has_topoext) { |
| 317 | int cpu1 = c->cpu_index, cpu2 = o->cpu_index; | 336 | int cpu1 = c->cpu_index, cpu2 = o->cpu_index; |
| 318 | 337 | ||
| 319 | if (c->phys_proc_id == o->phys_proc_id && | 338 | if (c->phys_proc_id == o->phys_proc_id && |
| @@ -481,7 +500,7 @@ void __inquire_remote_apic(int apicid) | |||
| 481 | * won't ... remember to clear down the APIC, etc later. | 500 | * won't ... remember to clear down the APIC, etc later. |
| 482 | */ | 501 | */ |
| 483 | int __cpuinit | 502 | int __cpuinit |
| 484 | wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) | 503 | wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip) |
| 485 | { | 504 | { |
| 486 | unsigned long send_status, accept_status = 0; | 505 | unsigned long send_status, accept_status = 0; |
| 487 | int maxlvt; | 506 | int maxlvt; |
| @@ -489,7 +508,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) | |||
| 489 | /* Target chip */ | 508 | /* Target chip */ |
| 490 | /* Boot on the stack */ | 509 | /* Boot on the stack */ |
| 491 | /* Kick the second */ | 510 | /* Kick the second */ |
| 492 | apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid); | 511 | apic_icr_write(APIC_DM_NMI | apic->dest_logical, apicid); |
| 493 | 512 | ||
| 494 | pr_debug("Waiting for send to finish...\n"); | 513 | pr_debug("Waiting for send to finish...\n"); |
| 495 | send_status = safe_apic_wait_icr_idle(); | 514 | send_status = safe_apic_wait_icr_idle(); |
| @@ -649,6 +668,63 @@ static void __cpuinit announce_cpu(int cpu, int apicid) | |||
| 649 | node, cpu, apicid); | 668 | node, cpu, apicid); |
| 650 | } | 669 | } |
| 651 | 670 | ||
| 671 | static int wakeup_cpu0_nmi(unsigned int cmd, struct pt_regs *regs) | ||
| 672 | { | ||
| 673 | int cpu; | ||
| 674 | |||
| 675 | cpu = smp_processor_id(); | ||
| 676 | if (cpu == 0 && !cpu_online(cpu) && enable_start_cpu0) | ||
| 677 | return NMI_HANDLED; | ||
| 678 | |||
| 679 | return NMI_DONE; | ||
| 680 | } | ||
| 681 | |||
| 682 | /* | ||
| 683 | * Wake up AP by INIT, INIT, STARTUP sequence. | ||
| 684 | * | ||
| 685 | * Instead of waiting for STARTUP after INITs, BSP will execute the BIOS | ||
| 686 | * boot-strap code which is not a desired behavior for waking up BSP. To | ||
| 687 | * void the boot-strap code, wake up CPU0 by NMI instead. | ||
| 688 | * | ||
| 689 | * This works to wake up soft offlined CPU0 only. If CPU0 is hard offlined | ||
| 690 | * (i.e. physically hot removed and then hot added), NMI won't wake it up. | ||
| 691 | * We'll change this code in the future to wake up hard offlined CPU0 if | ||
| 692 | * real platform and request are available. | ||
| 693 | */ | ||
| 694 | static int __cpuinit | ||
| 695 | wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid, | ||
| 696 | int *cpu0_nmi_registered) | ||
| 697 | { | ||
| 698 | int id; | ||
| 699 | int boot_error; | ||
| 700 | |||
| 701 | /* | ||
| 702 | * Wake up AP by INIT, INIT, STARTUP sequence. | ||
| 703 | */ | ||
| 704 | if (cpu) | ||
| 705 | return wakeup_secondary_cpu_via_init(apicid, start_ip); | ||
| 706 | |||
| 707 | /* | ||
| 708 | * Wake up BSP by nmi. | ||
| 709 | * | ||
| 710 | * Register a NMI handler to help wake up CPU0. | ||
| 711 | */ | ||
| 712 | boot_error = register_nmi_handler(NMI_LOCAL, | ||
| 713 | wakeup_cpu0_nmi, 0, "wake_cpu0"); | ||
| 714 | |||
| 715 | if (!boot_error) { | ||
| 716 | enable_start_cpu0 = 1; | ||
| 717 | *cpu0_nmi_registered = 1; | ||
| 718 | if (apic->dest_logical == APIC_DEST_LOGICAL) | ||
| 719 | id = cpu0_logical_apicid; | ||
| 720 | else | ||
| 721 | id = apicid; | ||
| 722 | boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip); | ||
| 723 | } | ||
| 724 | |||
| 725 | return boot_error; | ||
| 726 | } | ||
| 727 | |||
| 652 | /* | 728 | /* |
| 653 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 729 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
| 654 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. | 730 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. |
| @@ -664,6 +740,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
| 664 | 740 | ||
| 665 | unsigned long boot_error = 0; | 741 | unsigned long boot_error = 0; |
| 666 | int timeout; | 742 | int timeout; |
| 743 | int cpu0_nmi_registered = 0; | ||
| 667 | 744 | ||
| 668 | /* Just in case we booted with a single CPU. */ | 745 | /* Just in case we booted with a single CPU. */ |
| 669 | alternatives_enable_smp(); | 746 | alternatives_enable_smp(); |
| @@ -711,13 +788,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
| 711 | } | 788 | } |
| 712 | 789 | ||
| 713 | /* | 790 | /* |
| 714 | * Kick the secondary CPU. Use the method in the APIC driver | 791 | * Wake up a CPU in difference cases: |
| 715 | * if it's defined - or use an INIT boot APIC message otherwise: | 792 | * - Use the method in the APIC driver if it's defined |
| 793 | * Otherwise, | ||
| 794 | * - Use an INIT boot APIC message for APs or NMI for BSP. | ||
| 716 | */ | 795 | */ |
| 717 | if (apic->wakeup_secondary_cpu) | 796 | if (apic->wakeup_secondary_cpu) |
| 718 | boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); | 797 | boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); |
| 719 | else | 798 | else |
| 720 | boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); | 799 | boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid, |
| 800 | &cpu0_nmi_registered); | ||
| 721 | 801 | ||
| 722 | if (!boot_error) { | 802 | if (!boot_error) { |
| 723 | /* | 803 | /* |
| @@ -782,6 +862,13 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
| 782 | */ | 862 | */ |
| 783 | smpboot_restore_warm_reset_vector(); | 863 | smpboot_restore_warm_reset_vector(); |
| 784 | } | 864 | } |
| 865 | /* | ||
| 866 | * Clean up the nmi handler. Do this after the callin and callout sync | ||
| 867 | * to avoid impact of possible long unregister time. | ||
| 868 | */ | ||
| 869 | if (cpu0_nmi_registered) | ||
| 870 | unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); | ||
| 871 | |||
| 785 | return boot_error; | 872 | return boot_error; |
| 786 | } | 873 | } |
| 787 | 874 | ||
| @@ -795,7 +882,7 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
| 795 | 882 | ||
| 796 | pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); | 883 | pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); |
| 797 | 884 | ||
| 798 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || | 885 | if (apicid == BAD_APICID || |
| 799 | !physid_isset(apicid, phys_cpu_present_map) || | 886 | !physid_isset(apicid, phys_cpu_present_map) || |
| 800 | !apic->apic_id_valid(apicid)) { | 887 | !apic->apic_id_valid(apicid)) { |
| 801 | pr_err("%s: bad cpu %d\n", __func__, cpu); | 888 | pr_err("%s: bad cpu %d\n", __func__, cpu); |
| @@ -818,6 +905,9 @@ int __cpuinit native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
| 818 | 905 | ||
| 819 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 906 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; |
| 820 | 907 | ||
| 908 | /* the FPU context is blank, nobody can own it */ | ||
| 909 | __cpu_disable_lazy_restore(cpu); | ||
| 910 | |||
| 821 | err = do_boot_cpu(apicid, cpu, tidle); | 911 | err = do_boot_cpu(apicid, cpu, tidle); |
| 822 | if (err) { | 912 | if (err) { |
| 823 | pr_debug("do_boot_cpu failed %d\n", err); | 913 | pr_debug("do_boot_cpu failed %d\n", err); |
| @@ -990,7 +1080,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
| 990 | /* | 1080 | /* |
| 991 | * Setup boot CPU information | 1081 | * Setup boot CPU information |
| 992 | */ | 1082 | */ |
| 993 | smp_store_cpu_info(0); /* Final full version of the data */ | 1083 | smp_store_boot_cpu_info(); /* Final full version of the data */ |
| 994 | cpumask_copy(cpu_callin_mask, cpumask_of(0)); | 1084 | cpumask_copy(cpu_callin_mask, cpumask_of(0)); |
| 995 | mb(); | 1085 | mb(); |
| 996 | 1086 | ||
| @@ -1026,6 +1116,11 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
| 1026 | */ | 1116 | */ |
| 1027 | setup_local_APIC(); | 1117 | setup_local_APIC(); |
| 1028 | 1118 | ||
| 1119 | if (x2apic_mode) | ||
| 1120 | cpu0_logical_apicid = apic_read(APIC_LDR); | ||
| 1121 | else | ||
| 1122 | cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); | ||
| 1123 | |||
| 1029 | /* | 1124 | /* |
| 1030 | * Enable IO APIC before setting up error vector | 1125 | * Enable IO APIC before setting up error vector |
| 1031 | */ | 1126 | */ |
| @@ -1214,19 +1309,6 @@ void cpu_disable_common(void) | |||
| 1214 | 1309 | ||
| 1215 | int native_cpu_disable(void) | 1310 | int native_cpu_disable(void) |
| 1216 | { | 1311 | { |
| 1217 | int cpu = smp_processor_id(); | ||
| 1218 | |||
| 1219 | /* | ||
| 1220 | * Perhaps use cpufreq to drop frequency, but that could go | ||
| 1221 | * into generic code. | ||
| 1222 | * | ||
| 1223 | * We won't take down the boot processor on i386 due to some | ||
| 1224 | * interrupts only being able to be serviced by the BSP. | ||
| 1225 | * Especially so if we're not using an IOAPIC -zwane | ||
| 1226 | */ | ||
| 1227 | if (cpu == 0) | ||
| 1228 | return -EBUSY; | ||
| 1229 | |||
| 1230 | clear_local_APIC(); | 1312 | clear_local_APIC(); |
| 1231 | 1313 | ||
| 1232 | cpu_disable_common(); | 1314 | cpu_disable_common(); |
| @@ -1266,6 +1348,14 @@ void play_dead_common(void) | |||
| 1266 | local_irq_disable(); | 1348 | local_irq_disable(); |
| 1267 | } | 1349 | } |
| 1268 | 1350 | ||
| 1351 | static bool wakeup_cpu0(void) | ||
| 1352 | { | ||
| 1353 | if (smp_processor_id() == 0 && enable_start_cpu0) | ||
| 1354 | return true; | ||
| 1355 | |||
| 1356 | return false; | ||
| 1357 | } | ||
| 1358 | |||
| 1269 | /* | 1359 | /* |
| 1270 | * We need to flush the caches before going to sleep, lest we have | 1360 | * We need to flush the caches before going to sleep, lest we have |
| 1271 | * dirty data in our caches when we come back up. | 1361 | * dirty data in our caches when we come back up. |
| @@ -1329,6 +1419,11 @@ static inline void mwait_play_dead(void) | |||
| 1329 | __monitor(mwait_ptr, 0, 0); | 1419 | __monitor(mwait_ptr, 0, 0); |
| 1330 | mb(); | 1420 | mb(); |
| 1331 | __mwait(eax, 0); | 1421 | __mwait(eax, 0); |
| 1422 | /* | ||
| 1423 | * If NMI wants to wake up CPU0, start CPU0. | ||
| 1424 | */ | ||
| 1425 | if (wakeup_cpu0()) | ||
| 1426 | start_cpu0(); | ||
| 1332 | } | 1427 | } |
| 1333 | } | 1428 | } |
| 1334 | 1429 | ||
| @@ -1339,6 +1434,11 @@ static inline void hlt_play_dead(void) | |||
| 1339 | 1434 | ||
| 1340 | while (1) { | 1435 | while (1) { |
| 1341 | native_halt(); | 1436 | native_halt(); |
| 1437 | /* | ||
| 1438 | * If NMI wants to wake up CPU0, start CPU0. | ||
| 1439 | */ | ||
| 1440 | if (wakeup_cpu0()) | ||
| 1441 | start_cpu0(); | ||
| 1342 | } | 1442 | } |
| 1343 | } | 1443 | } |
| 1344 | 1444 | ||
