aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Redfearn <matt.redfearn@imgtec.com>2017-09-27 05:13:25 -0400
committerJames Hogan <jhogan@kernel.org>2017-10-31 20:09:17 -0400
commit9e8c399a88f0b87e41a894911475ed2a8f8dff9e (patch)
tree34481f4c214ed18b27b0f0ed8867e010f2a83dd2
parent6a2932a463d526e362a6b4e112be226f1d18d088 (diff)
MIPS: SMP: Fix deadlock & online race
Commit 6f542ebeaee0 ("MIPS: Fix race on setting and getting cpu_online_mask") effectively reverted commit 8f46cca1e6c06 ("MIPS: SMP: Fix possibility of deadlock when bringing CPUs online") and thus has reinstated the possibility of deadlock. The commit was based on testing of kernel v4.4, where the CPU hotplug core code issued a BUG() if the starting CPU is not marked online when the boot CPU returns from __cpu_up. The commit fixes this race (in v4.4), but re-introduces the deadlock situation. As noted in the commit message, upstream differs in this area. Commit 8df3e07e7f21f ("cpu/hotplug: Let upcoming cpu bring itself fully up") adds a completion event in the CPU hotplug core code, making this race impossible. However, people were unhappy with relying on the core code to do the right thing. To address the issues both commits were trying to fix, add a second completion event in the MIPS smp hotplug path. It removes the possibility of a race, since the MIPS smp hotplug code now synchronises both the boot and secondary CPUs before they return to the hotplug core code. It also addresses the deadlock by ensuring that the secondary CPU is not marked online before it's counters are synchronised. This fix should also be backported to fix the race condition introduced by the backport of commit 8f46cca1e6c06 ("MIPS: SMP: Fix possibility of deadlock when bringing CPUs online"), through really that race only existed before commit 8df3e07e7f21f ("cpu/hotplug: Let upcoming cpu bring itself fully up"). Signed-off-by: Matt Redfearn <matt.redfearn@imgtec.com> Fixes: 6f542ebeaee0 ("MIPS: Fix race on setting and getting cpu_online_mask") CC: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nokia.com> Cc: <stable@vger.kernel.org> # v4.1+: 8f46cca1e6c0: "MIPS: SMP: Fix possibility of deadlock when bringing CPUs online" Cc: <stable@vger.kernel.org> # v4.1+: a00eeede507c: "MIPS: SMP: Use a completion event to signal CPU up" Cc: <stable@vger.kernel.org> # v4.1+: 6f542ebeaee0: "MIPS: Fix race on setting and getting cpu_online_mask" Cc: <stable@vger.kernel.org> # v4.1+ Patchwork: https://patchwork.linux-mips.org/patch/17376/ Signed-off-by: James Hogan <jhogan@kernel.org>
-rw-r--r--arch/mips/kernel/smp.c22
1 files changed, 16 insertions, 6 deletions
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 3cd2f70d1c18..88be966d3e61 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -66,6 +66,7 @@ EXPORT_SYMBOL(cpu_sibling_map);
66cpumask_t cpu_core_map[NR_CPUS] __read_mostly; 66cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
67EXPORT_SYMBOL(cpu_core_map); 67EXPORT_SYMBOL(cpu_core_map);
68 68
69static DECLARE_COMPLETION(cpu_starting);
69static DECLARE_COMPLETION(cpu_running); 70static DECLARE_COMPLETION(cpu_running);
70 71
71/* 72/*
@@ -374,6 +375,12 @@ asmlinkage void start_secondary(void)
374 cpumask_set_cpu(cpu, &cpu_coherent_mask); 375 cpumask_set_cpu(cpu, &cpu_coherent_mask);
375 notify_cpu_starting(cpu); 376 notify_cpu_starting(cpu);
376 377
378 /* Notify boot CPU that we're starting & ready to sync counters */
379 complete(&cpu_starting);
380
381 synchronise_count_slave(cpu);
382
383 /* The CPU is running and counters synchronised, now mark it online */
377 set_cpu_online(cpu, true); 384 set_cpu_online(cpu, true);
378 385
379 set_cpu_sibling_map(cpu); 386 set_cpu_sibling_map(cpu);
@@ -381,8 +388,11 @@ asmlinkage void start_secondary(void)
381 388
382 calculate_cpu_foreign_map(); 389 calculate_cpu_foreign_map();
383 390
391 /*
392 * Notify boot CPU that we're up & online and it can safely return
393 * from __cpu_up
394 */
384 complete(&cpu_running); 395 complete(&cpu_running);
385 synchronise_count_slave(cpu);
386 396
387 /* 397 /*
388 * irq will be enabled in ->smp_finish(), enabling it too early 398 * irq will be enabled in ->smp_finish(), enabling it too early
@@ -445,17 +455,17 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
445 if (err) 455 if (err)
446 return err; 456 return err;
447 457
448 /* 458 /* Wait for CPU to start and be ready to sync counters */
449 * We must check for timeout here, as the CPU will not be marked 459 if (!wait_for_completion_timeout(&cpu_starting,
450 * online until the counters are synchronised.
451 */
452 if (!wait_for_completion_timeout(&cpu_running,
453 msecs_to_jiffies(1000))) { 460 msecs_to_jiffies(1000))) {
454 pr_crit("CPU%u: failed to start\n", cpu); 461 pr_crit("CPU%u: failed to start\n", cpu);
455 return -EIO; 462 return -EIO;
456 } 463 }
457 464
458 synchronise_count_master(cpu); 465 synchronise_count_master(cpu);
466
467 /* Wait for CPU to finish startup & mark itself online before return */
468 wait_for_completion(&cpu_running);
459 return 0; 469 return 0;
460} 470}
461 471