aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRik van Riel <riel@redhat.com>2013-11-01 10:41:46 -0400
committerIngo Molnar <mingo@kernel.org>2013-11-11 06:43:38 -0500
commit7053ea1a34fa8567cb5e3c39e04ace4c5d0fbeaa (patch)
treeed5866f5a2449e6d7b918ce92858f891c8bc1450
parent37dc6b50cee97954c4e6edcd5b1fa614b76038ee (diff)
stop_machine: Fix race between stop_two_cpus() and stop_cpus()
There is a race between stop_two_cpus, and the global stop_cpus. It is possible for two CPUs to get their stopper functions queued "backwards" from one another, resulting in the stopper threads getting stuck, and the system hanging. This can happen because queuing up stoppers is not synchronized. This patch adds synchronization between stop_cpus (a rare operation), and stop_two_cpus. Reported-and-Tested-by: Prarit Bhargava <prarit@redhat.com> Signed-off-by: Rik van Riel <riel@redhat.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Acked-by: Mel Gorman <mgorman@suse.de> Link: http://lkml.kernel.org/r/20131101104146.03d1e043@annuminas.surriel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/stop_machine.c15
1 files changed, 13 insertions, 2 deletions
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index c530bc5be7cf..84571e09c907 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -20,6 +20,7 @@
20#include <linux/kallsyms.h> 20#include <linux/kallsyms.h>
21#include <linux/smpboot.h> 21#include <linux/smpboot.h>
22#include <linux/atomic.h> 22#include <linux/atomic.h>
23#include <linux/lglock.h>
23 24
24/* 25/*
25 * Structure to determine completion condition and record errors. May 26 * Structure to determine completion condition and record errors. May
@@ -43,6 +44,14 @@ static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
43static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task); 44static DEFINE_PER_CPU(struct task_struct *, cpu_stopper_task);
44static bool stop_machine_initialized = false; 45static bool stop_machine_initialized = false;
45 46
47/*
48 * Avoids a race between stop_two_cpus and global stop_cpus, where
49 * the stoppers could get queued up in reverse order, leading to
50 * system deadlock. Using an lglock means stop_two_cpus remains
51 * relatively cheap.
52 */
53DEFINE_STATIC_LGLOCK(stop_cpus_lock);
54
46static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) 55static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
47{ 56{
48 memset(done, 0, sizeof(*done)); 57 memset(done, 0, sizeof(*done));
@@ -276,6 +285,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
276 return -ENOENT; 285 return -ENOENT;
277 } 286 }
278 287
288 lg_local_lock(&stop_cpus_lock);
279 /* 289 /*
280 * Queuing needs to be done by the lowest numbered CPU, to ensure 290 * Queuing needs to be done by the lowest numbered CPU, to ensure
281 * that works are always queued in the same order on every CPU. 291 * that works are always queued in the same order on every CPU.
@@ -284,6 +294,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *
284 smp_call_function_single(min(cpu1, cpu2), 294 smp_call_function_single(min(cpu1, cpu2),
285 &irq_cpu_stop_queue_work, 295 &irq_cpu_stop_queue_work,
286 &call_args, 0); 296 &call_args, 0);
297 lg_local_unlock(&stop_cpus_lock);
287 preempt_enable(); 298 preempt_enable();
288 299
289 wait_for_completion(&done.completion); 300 wait_for_completion(&done.completion);
@@ -335,10 +346,10 @@ static void queue_stop_cpus_work(const struct cpumask *cpumask,
335 * preempted by a stopper which might wait for other stoppers 346 * preempted by a stopper which might wait for other stoppers
336 * to enter @fn which can lead to deadlock. 347 * to enter @fn which can lead to deadlock.
337 */ 348 */
338 preempt_disable(); 349 lg_global_lock(&stop_cpus_lock);
339 for_each_cpu(cpu, cpumask) 350 for_each_cpu(cpu, cpumask)
340 cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu)); 351 cpu_stop_queue_work(cpu, &per_cpu(stop_cpus_work, cpu));
341 preempt_enable(); 352 lg_global_unlock(&stop_cpus_lock);
342} 353}
343 354
344static int __stop_cpus(const struct cpumask *cpumask, 355static int __stop_cpus(const struct cpumask *cpumask,