diff options
author | Peter Zijlstra <peterz@infradead.org> | 2018-07-13 06:42:08 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-07-15 17:51:19 -0400 |
commit | be45bf5395e0886a93fc816bbe41a008ec2e42e2 (patch) | |
tree | ad459f7d60364e75743ef18066dffc6a31088f08 /kernel/watchdog.c | |
parent | fdf2ceb7f58c28639a36f8895006828582a1f5b2 (diff) |
watchdog/softlockup: Fix cpu_stop_queue_work() double-queue bug
When scheduling is delayed for longer than the softlockup interrupt
period it is possible to double-queue the cpu_stop_work, causing list
corruption.
Cure this by adding a completion to track the cpu_stop_work's
progress.
Reported-by: kernel test robot <lkp@intel.com>
Tested-by: Rong Chen <rong.a.chen@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 9cf57731b63e ("watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work")
Link: http://lkml.kernel.org/r/20180713104208.GW2494@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/watchdog.c')
-rw-r--r-- | kernel/watchdog.c | 20 |
1 files changed, 15 insertions, 5 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index b81f777838d5..5470dce212c0 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -330,6 +330,9 @@ static void watchdog_interrupt_count(void) | |||
330 | __this_cpu_inc(hrtimer_interrupts); | 330 | __this_cpu_inc(hrtimer_interrupts); |
331 | } | 331 | } |
332 | 332 | ||
333 | static DEFINE_PER_CPU(struct completion, softlockup_completion); | ||
334 | static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work); | ||
335 | |||
333 | /* | 336 | /* |
334 | * The watchdog thread function - touches the timestamp. | 337 | * The watchdog thread function - touches the timestamp. |
335 | * | 338 | * |
@@ -343,12 +346,11 @@ static int softlockup_fn(void *data) | |||
343 | __this_cpu_write(soft_lockup_hrtimer_cnt, | 346 | __this_cpu_write(soft_lockup_hrtimer_cnt, |
344 | __this_cpu_read(hrtimer_interrupts)); | 347 | __this_cpu_read(hrtimer_interrupts)); |
345 | __touch_watchdog(); | 348 | __touch_watchdog(); |
349 | complete(this_cpu_ptr(&softlockup_completion)); | ||
346 | 350 | ||
347 | return 0; | 351 | return 0; |
348 | } | 352 | } |
349 | 353 | ||
350 | static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work); | ||
351 | |||
352 | /* watchdog kicker functions */ | 354 | /* watchdog kicker functions */ |
353 | static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | 355 | static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) |
354 | { | 356 | { |
@@ -364,9 +366,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
364 | watchdog_interrupt_count(); | 366 | watchdog_interrupt_count(); |
365 | 367 | ||
366 | /* kick the softlockup detector */ | 368 | /* kick the softlockup detector */ |
367 | stop_one_cpu_nowait(smp_processor_id(), | 369 | if (completion_done(this_cpu_ptr(&softlockup_completion))) { |
368 | softlockup_fn, NULL, | 370 | reinit_completion(this_cpu_ptr(&softlockup_completion)); |
369 | this_cpu_ptr(&softlockup_stop_work)); | 371 | stop_one_cpu_nowait(smp_processor_id(), |
372 | softlockup_fn, NULL, | ||
373 | this_cpu_ptr(&softlockup_stop_work)); | ||
374 | } | ||
370 | 375 | ||
371 | /* .. and repeat */ | 376 | /* .. and repeat */ |
372 | hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); | 377 | hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); |
@@ -467,9 +472,13 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
467 | static void watchdog_enable(unsigned int cpu) | 472 | static void watchdog_enable(unsigned int cpu) |
468 | { | 473 | { |
469 | struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); | 474 | struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); |
475 | struct completion *done = this_cpu_ptr(&softlockup_completion); | ||
470 | 476 | ||
471 | WARN_ON_ONCE(cpu != smp_processor_id()); | 477 | WARN_ON_ONCE(cpu != smp_processor_id()); |
472 | 478 | ||
479 | init_completion(done); | ||
480 | complete(done); | ||
481 | |||
473 | /* | 482 | /* |
474 | * Start the timer first to prevent the NMI watchdog triggering | 483 | * Start the timer first to prevent the NMI watchdog triggering |
475 | * before the timer has a chance to fire. | 484 | * before the timer has a chance to fire. |
@@ -499,6 +508,7 @@ static void watchdog_disable(unsigned int cpu) | |||
499 | */ | 508 | */ |
500 | watchdog_nmi_disable(cpu); | 509 | watchdog_nmi_disable(cpu); |
501 | hrtimer_cancel(hrtimer); | 510 | hrtimer_cancel(hrtimer); |
511 | wait_for_completion(this_cpu_ptr(&softlockup_completion)); | ||
502 | } | 512 | } |
503 | 513 | ||
504 | static int softlockup_stop_fn(void *data) | 514 | static int softlockup_stop_fn(void *data) |