summaryrefslogtreecommitdiffstats
path: root/kernel/watchdog.c
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2018-07-13 06:42:08 -0400
committerIngo Molnar <mingo@kernel.org>2018-07-15 17:51:19 -0400
commitbe45bf5395e0886a93fc816bbe41a008ec2e42e2 (patch)
treead459f7d60364e75743ef18066dffc6a31088f08 /kernel/watchdog.c
parentfdf2ceb7f58c28639a36f8895006828582a1f5b2 (diff)
watchdog/softlockup: Fix cpu_stop_queue_work() double-queue bug
When scheduling is delayed for longer than the softlockup interrupt period it is possible to double-queue the cpu_stop_work, causing list corruption. Cure this by adding a completion to track the cpu_stop_work's progress. Reported-by: kernel test robot <lkp@intel.com> Tested-by: Rong Chen <rong.a.chen@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Fixes: 9cf57731b63e ("watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work") Link: http://lkml.kernel.org/r/20180713104208.GW2494@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/watchdog.c')
-rw-r--r--kernel/watchdog.c20
1 files changed, 15 insertions, 5 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index b81f777838d5..5470dce212c0 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -330,6 +330,9 @@ static void watchdog_interrupt_count(void)
330 __this_cpu_inc(hrtimer_interrupts); 330 __this_cpu_inc(hrtimer_interrupts);
331} 331}
332 332
333static DEFINE_PER_CPU(struct completion, softlockup_completion);
334static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
335
333/* 336/*
334 * The watchdog thread function - touches the timestamp. 337 * The watchdog thread function - touches the timestamp.
335 * 338 *
@@ -343,12 +346,11 @@ static int softlockup_fn(void *data)
343 __this_cpu_write(soft_lockup_hrtimer_cnt, 346 __this_cpu_write(soft_lockup_hrtimer_cnt,
344 __this_cpu_read(hrtimer_interrupts)); 347 __this_cpu_read(hrtimer_interrupts));
345 __touch_watchdog(); 348 __touch_watchdog();
349 complete(this_cpu_ptr(&softlockup_completion));
346 350
347 return 0; 351 return 0;
348} 352}
349 353
350static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
351
352/* watchdog kicker functions */ 354/* watchdog kicker functions */
353static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) 355static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
354{ 356{
@@ -364,9 +366,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
364 watchdog_interrupt_count(); 366 watchdog_interrupt_count();
365 367
366 /* kick the softlockup detector */ 368 /* kick the softlockup detector */
367 stop_one_cpu_nowait(smp_processor_id(), 369 if (completion_done(this_cpu_ptr(&softlockup_completion))) {
368 softlockup_fn, NULL, 370 reinit_completion(this_cpu_ptr(&softlockup_completion));
369 this_cpu_ptr(&softlockup_stop_work)); 371 stop_one_cpu_nowait(smp_processor_id(),
372 softlockup_fn, NULL,
373 this_cpu_ptr(&softlockup_stop_work));
374 }
370 375
371 /* .. and repeat */ 376 /* .. and repeat */
372 hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); 377 hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
@@ -467,9 +472,13 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
467static void watchdog_enable(unsigned int cpu) 472static void watchdog_enable(unsigned int cpu)
468{ 473{
469 struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); 474 struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
475 struct completion *done = this_cpu_ptr(&softlockup_completion);
470 476
471 WARN_ON_ONCE(cpu != smp_processor_id()); 477 WARN_ON_ONCE(cpu != smp_processor_id());
472 478
479 init_completion(done);
480 complete(done);
481
473 /* 482 /*
474 * Start the timer first to prevent the NMI watchdog triggering 483 * Start the timer first to prevent the NMI watchdog triggering
475 * before the timer has a chance to fire. 484 * before the timer has a chance to fire.
@@ -499,6 +508,7 @@ static void watchdog_disable(unsigned int cpu)
499 */ 508 */
500 watchdog_nmi_disable(cpu); 509 watchdog_nmi_disable(cpu);
501 hrtimer_cancel(hrtimer); 510 hrtimer_cancel(hrtimer);
511 wait_for_completion(this_cpu_ptr(&softlockup_completion));
502} 512}
503 513
504static int softlockup_stop_fn(void *data) 514static int softlockup_stop_fn(void *data)