aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2008-05-13 09:26:57 -0400
committerIngo Molnar <mingo@elte.hu>2008-05-13 13:36:12 -0400
commit61165d7a035f6571c7576e7f51e7230157724c8d (patch)
tree2a51d4df60fa73aa84114b7f3add4d2774177076 /arch
parent1dbd6608191cff854ab467e9880b7aeb2385ca39 (diff)
x86: fix app crashes after SMP resume
After resume on a 2cpu laptop, kernel builds collapse with a sed hang, sh or make segfault (often on 20295564), real-time signal to cc1 etc. Several hurdles to jump, but a manually-assisted bisect led to -rc1's d2bcbad5f3ad38a1c09861bca7e252dde7bb8259 x86: do not zap_low_mappings in __smp_prepare_cpus. Though the low mappings were removed at bootup, they were left behind (with Global flags helping to keep them in TLB) after resume or cpu online, causing the crashes seen. Reinstate zap_low_mappings (with local __flush_tlb_all) for each cpu_up on x86_32. This used to be serialized by smp_commenced_mask: that's now gone, but a low_mappings flag will do. No need for native_smp_cpus_done to repeat the zap: let mem_init zap BSP's low mappings just like on UP. (In passing, fix error code from native_cpu_up: do_boot_cpu returns a variety of diagnostic values, Dprintk what it says but convert to -EIO. And save_pg_dir separately before zap_low_mappings: doesn't matter now, but zapping twice in succession wiped out resume's swsusp_pg_dir.) That worked well on the duo and one quad, but wouldn't boot 3rd or 4th cpu on P4 Xeon, oopsing just after unlock_ipi_call_lock. The TLB flush IPI now being sent reveals a long-standing bug: the booting cpu has its APIC readied in smp_callin at the top of start_secondary, but isn't put into the cpu_online_map until just before that unlock_ipi_call_lock. So native_smp_call_function_mask to online cpus would send_IPI_allbutself, including the cpu just coming up, though it has been excluded from the count to wait for: by the time it handles the IPI, the call data on native_smp_call_function_mask's stack may well have been overwritten. So fall back to send_IPI_mask while cpu_online_map does not match cpu_callout_map: perhaps there's a better APICological fix to be made at the start_secondary end, but I wouldn't know that. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/smp.c3
-rw-r--r--arch/x86/kernel/smpboot.c24
-rw-r--r--arch/x86/mm/init_32.c12
3 files changed, 20 insertions, 19 deletions
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 8f75893a6467..0cb7aadc87cd 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -231,7 +231,8 @@ native_smp_call_function_mask(cpumask_t mask,
231 wmb(); 231 wmb();
232 232
233 /* Send a message to other CPUs */ 233 /* Send a message to other CPUs */
234 if (cpus_equal(mask, allbutself)) 234 if (cpus_equal(mask, allbutself) &&
235 cpus_equal(cpu_online_map, cpu_callout_map))
235 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 236 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
236 else 237 else
237 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 238 send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6b087ab6cd8f..38988491c622 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -86,6 +86,7 @@ void *x86_bios_cpu_apicid_early_ptr;
86 86
87#ifdef CONFIG_X86_32 87#ifdef CONFIG_X86_32
88u8 apicid_2_node[MAX_APICID]; 88u8 apicid_2_node[MAX_APICID];
89static int low_mappings;
89#endif 90#endif
90 91
91/* State of each CPU */ 92/* State of each CPU */
@@ -326,6 +327,12 @@ static void __cpuinit start_secondary(void *unused)
326 enable_8259A_irq(0); 327 enable_8259A_irq(0);
327 } 328 }
328 329
330#ifdef CONFIG_X86_32
331 while (low_mappings)
332 cpu_relax();
333 __flush_tlb_all();
334#endif
335
329 /* This must be done before setting cpu_online_map */ 336 /* This must be done before setting cpu_online_map */
330 set_cpu_sibling_map(raw_smp_processor_id()); 337 set_cpu_sibling_map(raw_smp_processor_id());
331 wmb(); 338 wmb();
@@ -1040,14 +1047,20 @@ int __cpuinit native_cpu_up(unsigned int cpu)
1040#ifdef CONFIG_X86_32 1047#ifdef CONFIG_X86_32
1041 /* init low mem mapping */ 1048 /* init low mem mapping */
1042 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, 1049 clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
1043 min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); 1050 min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
1044 flush_tlb_all(); 1051 flush_tlb_all();
1045#endif 1052 low_mappings = 1;
1046 1053
1047 err = do_boot_cpu(apicid, cpu); 1054 err = do_boot_cpu(apicid, cpu);
1048 if (err < 0) { 1055
1056 zap_low_mappings();
1057 low_mappings = 0;
1058#else
1059 err = do_boot_cpu(apicid, cpu);
1060#endif
1061 if (err) {
1049 Dprintk("do_boot_cpu failed %d\n", err); 1062 Dprintk("do_boot_cpu failed %d\n", err);
1050 return err; 1063 return -EIO;
1051 } 1064 }
1052 1065
1053 /* 1066 /*
@@ -1259,9 +1272,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1259 setup_ioapic_dest(); 1272 setup_ioapic_dest();
1260#endif 1273#endif
1261 check_nmi_watchdog(); 1274 check_nmi_watchdog();
1262#ifdef CONFIG_X86_32
1263 zap_low_mappings();
1264#endif
1265} 1275}
1266 1276
1267#ifdef CONFIG_HOTPLUG_CPU 1277#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index de236e419cb5..ec30d10154b6 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -438,8 +438,6 @@ void zap_low_mappings(void)
438{ 438{
439 int i; 439 int i;
440 440
441 save_pg_dir();
442
443 /* 441 /*
444 * Zap initial low-memory mappings. 442 * Zap initial low-memory mappings.
445 * 443 *
@@ -663,16 +661,8 @@ void __init mem_init(void)
663 test_wp_bit(); 661 test_wp_bit();
664 662
665 cpa_init(); 663 cpa_init();
666 664 save_pg_dir();
667 /*
668 * Subtle. SMP is doing it's boot stuff late (because it has to
669 * fork idle threads) - but it also needs low mappings for the
670 * protected-mode entry to work. We zap these entries only after
671 * the WP-bit has been tested.
672 */
673#ifndef CONFIG_SMP
674 zap_low_mappings(); 665 zap_low_mappings();
675#endif
676} 666}
677 667
678#ifdef CONFIG_MEMORY_HOTPLUG 668#ifdef CONFIG_MEMORY_HOTPLUG