diff options
author | Oleg Nesterov <oleg@redhat.com> | 2015-11-21 13:11:48 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-09-22 09:25:55 -0400 |
commit | e6253970413d99f416f7de8bd516e5f1834d8216 (patch) | |
tree | af4c1099804d8cef54d7618a7a49d7e878facf12 | |
parent | 87709e28dc7c669af1126aa7352ff6f7b035412d (diff) |
stop_machine: Remove stop_cpus_lock and lg_double_lock/unlock()
stop_two_cpus() and stop_cpus() use stop_cpus_lock to avoid the deadlock,
we need to ensure that the stopper functions can't be queued "backwards"
from one another. This doesn't look nice; if we use lglock then we do not
really need stopper->lock, cpu_stop_queue_work() could use lg_local_lock()
under local_irq_save().
OTOH it would be even better to avoid lglock in stop_machine.c and remove
lg_double_lock(). This patch adds "bool stop_cpus_in_progress" set/cleared
by queue_stop_cpus_work(), and changes cpu_stop_queue_two_works() to busy
wait until it is cleared.
queue_stop_cpus_work() sets stop_cpus_in_progress = T lockless, but after
it queues a work on CPU1 it must be visible to stop_two_cpus(CPU1, CPU2)
which checks it under the same lock. And since stop_two_cpus() holds the
2nd lock too, queue_stop_cpus_work() can not clear stop_cpus_in_progress
if it is also going to queue a work on CPU2, it needs to take that 2nd
lock to do this.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20151121181148.GA433@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | include/linux/lglock.h | 5 | ||||
-rw-r--r-- | kernel/locking/lglock.c | 22 | ||||
-rw-r--r-- | kernel/stop_machine.c | 42 |
3 files changed, 26 insertions, 43 deletions
diff --git a/include/linux/lglock.h b/include/linux/lglock.h index c92ebd100d9b..0081f000e34b 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h | |||
@@ -52,15 +52,10 @@ struct lglock { | |||
52 | static struct lglock name = { .lock = &name ## _lock } | 52 | static struct lglock name = { .lock = &name ## _lock } |
53 | 53 | ||
54 | void lg_lock_init(struct lglock *lg, char *name); | 54 | void lg_lock_init(struct lglock *lg, char *name); |
55 | |||
56 | void lg_local_lock(struct lglock *lg); | 55 | void lg_local_lock(struct lglock *lg); |
57 | void lg_local_unlock(struct lglock *lg); | 56 | void lg_local_unlock(struct lglock *lg); |
58 | void lg_local_lock_cpu(struct lglock *lg, int cpu); | 57 | void lg_local_lock_cpu(struct lglock *lg, int cpu); |
59 | void lg_local_unlock_cpu(struct lglock *lg, int cpu); | 58 | void lg_local_unlock_cpu(struct lglock *lg, int cpu); |
60 | |||
61 | void lg_double_lock(struct lglock *lg, int cpu1, int cpu2); | ||
62 | void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2); | ||
63 | |||
64 | void lg_global_lock(struct lglock *lg); | 59 | void lg_global_lock(struct lglock *lg); |
65 | void lg_global_unlock(struct lglock *lg); | 60 | void lg_global_unlock(struct lglock *lg); |
66 | 61 | ||
diff --git a/kernel/locking/lglock.c b/kernel/locking/lglock.c index 951cfcd10b4a..86ae2aebf004 100644 --- a/kernel/locking/lglock.c +++ b/kernel/locking/lglock.c | |||
@@ -60,28 +60,6 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu) | |||
60 | } | 60 | } |
61 | EXPORT_SYMBOL(lg_local_unlock_cpu); | 61 | EXPORT_SYMBOL(lg_local_unlock_cpu); |
62 | 62 | ||
63 | void lg_double_lock(struct lglock *lg, int cpu1, int cpu2) | ||
64 | { | ||
65 | BUG_ON(cpu1 == cpu2); | ||
66 | |||
67 | /* lock in cpu order, just like lg_global_lock */ | ||
68 | if (cpu2 < cpu1) | ||
69 | swap(cpu1, cpu2); | ||
70 | |||
71 | preempt_disable(); | ||
72 | lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_); | ||
73 | arch_spin_lock(per_cpu_ptr(lg->lock, cpu1)); | ||
74 | arch_spin_lock(per_cpu_ptr(lg->lock, cpu2)); | ||
75 | } | ||
76 | |||
77 | void lg_double_unlock(struct lglock *lg, int cpu1, int cpu2) | ||
78 | { | ||
79 | lock_release(&lg->lock_dep_map, 1, _RET_IP_); | ||
80 | arch_spin_unlock(per_cpu_ptr(lg->lock, cpu1)); | ||
81 | arch_spin_unlock(per_cpu_ptr(lg->lock, cpu2)); | ||
82 | preempt_enable(); | ||
83 | } | ||
84 | |||
85 | void lg_global_lock(struct lglock *lg) | 63 | void lg_global_lock(struct lglock *lg) |
86 | { | 64 | { |
87 | int i; | 65 | int i; |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 4a1ca5f6da7e..ae6f41fb9cba 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/kallsyms.h> | 20 | #include <linux/kallsyms.h> |
21 | #include <linux/smpboot.h> | 21 | #include <linux/smpboot.h> |
22 | #include <linux/atomic.h> | 22 | #include <linux/atomic.h> |
23 | #include <linux/lglock.h> | ||
24 | #include <linux/nmi.h> | 23 | #include <linux/nmi.h> |
25 | 24 | ||
26 | /* | 25 | /* |
@@ -47,13 +46,9 @@ struct cpu_stopper { | |||
47 | static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); | 46 | static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); |
48 | static bool stop_machine_initialized = false; | 47 | static bool stop_machine_initialized = false; |
49 | 48 | ||
50 | /* | 49 | /* static data for stop_cpus */ |
51 | * Avoids a race between stop_two_cpus and global stop_cpus, where | 50 | static DEFINE_MUTEX(stop_cpus_mutex); |
52 | * the stoppers could get queued up in reverse order, leading to | 51 | static bool stop_cpus_in_progress; |
53 | * system deadlock. Using an lglock means stop_two_cpus remains | ||
54 | * relatively cheap. | ||
55 | */ | ||
56 | DEFINE_STATIC_LGLOCK(stop_cpus_lock); | ||
57 | 52 | ||
58 | static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) | 53 | static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) |
59 | { | 54 | { |
@@ -230,14 +225,26 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, | |||
230 | struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); | 225 | struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); |
231 | struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); | 226 | struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); |
232 | int err; | 227 | int err; |
233 | 228 | retry: | |
234 | lg_double_lock(&stop_cpus_lock, cpu1, cpu2); | ||
235 | spin_lock_irq(&stopper1->lock); | 229 | spin_lock_irq(&stopper1->lock); |
236 | spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); | 230 | spin_lock_nested(&stopper2->lock, SINGLE_DEPTH_NESTING); |
237 | 231 | ||
238 | err = -ENOENT; | 232 | err = -ENOENT; |
239 | if (!stopper1->enabled || !stopper2->enabled) | 233 | if (!stopper1->enabled || !stopper2->enabled) |
240 | goto unlock; | 234 | goto unlock; |
235 | /* | ||
236 | * Ensure that if we race with __stop_cpus() the stoppers won't get | ||
237 | * queued up in reverse order leading to system deadlock. | ||
238 | * | ||
239 | * We can't miss stop_cpus_in_progress if queue_stop_cpus_work() has | ||
240 | * queued a work on cpu1 but not on cpu2, we hold both locks. | ||
241 | * | ||
242 | * It can be falsely true but it is safe to spin until it is cleared, | ||
243 | * queue_stop_cpus_work() does everything under preempt_disable(). | ||
244 | */ | ||
245 | err = -EDEADLK; | ||
246 | if (unlikely(stop_cpus_in_progress)) | ||
247 | goto unlock; | ||
241 | 248 | ||
242 | err = 0; | 249 | err = 0; |
243 | __cpu_stop_queue_work(stopper1, work1); | 250 | __cpu_stop_queue_work(stopper1, work1); |
@@ -245,8 +252,12 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, | |||
245 | unlock: | 252 | unlock: |
246 | spin_unlock(&stopper2->lock); | 253 | spin_unlock(&stopper2->lock); |
247 | spin_unlock_irq(&stopper1->lock); | 254 | spin_unlock_irq(&stopper1->lock); |
248 | lg_double_unlock(&stop_cpus_lock, cpu1, cpu2); | ||
249 | 255 | ||
256 | if (unlikely(err == -EDEADLK)) { | ||
257 | while (stop_cpus_in_progress) | ||
258 | cpu_relax(); | ||
259 | goto retry; | ||
260 | } | ||
250 | return err; | 261 | return err; |
251 | } | 262 | } |
252 | /** | 263 | /** |
@@ -316,9 +327,6 @@ bool stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, | |||
316 | return cpu_stop_queue_work(cpu, work_buf); | 327 | return cpu_stop_queue_work(cpu, work_buf); |
317 | } | 328 | } |
318 | 329 | ||
319 | /* static data for stop_cpus */ | ||
320 | static DEFINE_MUTEX(stop_cpus_mutex); | ||
321 | |||
322 | static bool queue_stop_cpus_work(const struct cpumask *cpumask, | 330 | static bool queue_stop_cpus_work(const struct cpumask *cpumask, |
323 | cpu_stop_fn_t fn, void *arg, | 331 | cpu_stop_fn_t fn, void *arg, |
324 | struct cpu_stop_done *done) | 332 | struct cpu_stop_done *done) |
@@ -332,7 +340,8 @@ static bool queue_stop_cpus_work(const struct cpumask *cpumask, | |||
332 | * preempted by a stopper which might wait for other stoppers | 340 | * preempted by a stopper which might wait for other stoppers |
333 | * to enter @fn which can lead to deadlock. | 341 | * to enter @fn which can lead to deadlock. |
334 | */ | 342 | */ |
335 | lg_global_lock(&stop_cpus_lock); | 343 | preempt_disable(); |
344 | stop_cpus_in_progress = true; | ||
336 | for_each_cpu(cpu, cpumask) { | 345 | for_each_cpu(cpu, cpumask) { |
337 | work = &per_cpu(cpu_stopper.stop_work, cpu); | 346 | work = &per_cpu(cpu_stopper.stop_work, cpu); |
338 | work->fn = fn; | 347 | work->fn = fn; |
@@ -341,7 +350,8 @@ static bool queue_stop_cpus_work(const struct cpumask *cpumask, | |||
341 | if (cpu_stop_queue_work(cpu, work)) | 350 | if (cpu_stop_queue_work(cpu, work)) |
342 | queued = true; | 351 | queued = true; |
343 | } | 352 | } |
344 | lg_global_unlock(&stop_cpus_lock); | 353 | stop_cpus_in_progress = false; |
354 | preempt_enable(); | ||
345 | 355 | ||
346 | return queued; | 356 | return queued; |
347 | } | 357 | } |