diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2012-07-16 06:42:38 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2012-08-13 11:01:07 -0400 |
commit | bcd951cf10f24e341defcd002c15a1f4eea13ddb (patch) | |
tree | 6c300cbbb9e5c23ac52e1d490057ce98e0c2bc69 /kernel/watchdog.c | |
parent | 3e339b5dae24a7065e196eb8d0145ab2f8cc2d2d (diff) |
watchdog: Use hotplug thread infrastructure
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/20120716103948.563736676@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/watchdog.c')
-rw-r--r-- | kernel/watchdog.c | 263 |
1 files changed, 89 insertions, 174 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 4b1dfba70f7c..9d4c8d5a1f53 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/notifier.h> | 22 | #include <linux/notifier.h> |
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/sysctl.h> | 24 | #include <linux/sysctl.h> |
25 | #include <linux/smpboot.h> | ||
25 | 26 | ||
26 | #include <asm/irq_regs.h> | 27 | #include <asm/irq_regs.h> |
27 | #include <linux/kvm_para.h> | 28 | #include <linux/kvm_para.h> |
@@ -29,16 +30,18 @@ | |||
29 | 30 | ||
30 | int watchdog_enabled = 1; | 31 | int watchdog_enabled = 1; |
31 | int __read_mostly watchdog_thresh = 10; | 32 | int __read_mostly watchdog_thresh = 10; |
33 | static int __read_mostly watchdog_disabled; | ||
32 | 34 | ||
33 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); | 35 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); |
34 | static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); | 36 | static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog); |
35 | static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); | 37 | static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer); |
36 | static DEFINE_PER_CPU(bool, softlockup_touch_sync); | 38 | static DEFINE_PER_CPU(bool, softlockup_touch_sync); |
37 | static DEFINE_PER_CPU(bool, soft_watchdog_warn); | 39 | static DEFINE_PER_CPU(bool, soft_watchdog_warn); |
40 | static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); | ||
41 | static DEFINE_PER_CPU(unsigned long, soft_lockup_hrtimer_cnt); | ||
38 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 42 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
39 | static DEFINE_PER_CPU(bool, hard_watchdog_warn); | 43 | static DEFINE_PER_CPU(bool, hard_watchdog_warn); |
40 | static DEFINE_PER_CPU(bool, watchdog_nmi_touch); | 44 | static DEFINE_PER_CPU(bool, watchdog_nmi_touch); |
41 | static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts); | ||
42 | static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); | 45 | static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); |
43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | 46 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); |
44 | #endif | 47 | #endif |
@@ -248,13 +251,15 @@ static void watchdog_overflow_callback(struct perf_event *event, | |||
248 | __this_cpu_write(hard_watchdog_warn, false); | 251 | __this_cpu_write(hard_watchdog_warn, false); |
249 | return; | 252 | return; |
250 | } | 253 | } |
254 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ | ||
255 | |||
251 | static void watchdog_interrupt_count(void) | 256 | static void watchdog_interrupt_count(void) |
252 | { | 257 | { |
253 | __this_cpu_inc(hrtimer_interrupts); | 258 | __this_cpu_inc(hrtimer_interrupts); |
254 | } | 259 | } |
255 | #else | 260 | |
256 | static inline void watchdog_interrupt_count(void) { return; } | 261 | static int watchdog_nmi_enable(unsigned int cpu); |
257 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ | 262 | static void watchdog_nmi_disable(unsigned int cpu); |
258 | 263 | ||
259 | /* watchdog kicker functions */ | 264 | /* watchdog kicker functions */ |
260 | static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | 265 | static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) |
@@ -327,49 +332,68 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
327 | return HRTIMER_RESTART; | 332 | return HRTIMER_RESTART; |
328 | } | 333 | } |
329 | 334 | ||
335 | static void watchdog_set_prio(unsigned int policy, unsigned int prio) | ||
336 | { | ||
337 | struct sched_param param = { .sched_priority = prio }; | ||
330 | 338 | ||
331 | /* | 339 | sched_setscheduler(current, policy, ¶m); |
332 | * The watchdog thread - touches the timestamp. | 340 | } |
333 | */ | 341 | |
334 | static int watchdog(void *unused) | 342 | static void watchdog_enable(unsigned int cpu) |
335 | { | 343 | { |
336 | struct sched_param param = { .sched_priority = 0 }; | ||
337 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); | 344 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); |
338 | 345 | ||
339 | /* initialize timestamp */ | 346 | if (!watchdog_enabled) { |
340 | __touch_watchdog(); | 347 | kthread_park(current); |
348 | return; | ||
349 | } | ||
350 | |||
351 | /* Enable the perf event */ | ||
352 | watchdog_nmi_enable(cpu); | ||
341 | 353 | ||
342 | /* kick off the timer for the hardlockup detector */ | 354 | /* kick off the timer for the hardlockup detector */ |
355 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
356 | hrtimer->function = watchdog_timer_fn; | ||
357 | |||
343 | /* done here because hrtimer_start can only pin to smp_processor_id() */ | 358 | /* done here because hrtimer_start can only pin to smp_processor_id() */ |
344 | hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()), | 359 | hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()), |
345 | HRTIMER_MODE_REL_PINNED); | 360 | HRTIMER_MODE_REL_PINNED); |
346 | 361 | ||
347 | set_current_state(TASK_INTERRUPTIBLE); | 362 | /* initialize timestamp */ |
348 | /* | 363 | watchdog_set_prio(SCHED_FIFO, MAX_RT_PRIO - 1); |
349 | * Run briefly (kicked by the hrtimer callback function) once every | 364 | __touch_watchdog(); |
350 | * get_sample_period() seconds (4 seconds by default) to reset the | 365 | } |
351 | * softlockup timestamp. If this gets delayed for more than | ||
352 | * 2*watchdog_thresh seconds then the debug-printout triggers in | ||
353 | * watchdog_timer_fn(). | ||
354 | */ | ||
355 | while (!kthread_should_stop()) { | ||
356 | __touch_watchdog(); | ||
357 | schedule(); | ||
358 | 366 | ||
359 | if (kthread_should_stop()) | 367 | static void watchdog_disable(unsigned int cpu) |
360 | break; | 368 | { |
369 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); | ||
361 | 370 | ||
362 | set_current_state(TASK_INTERRUPTIBLE); | 371 | watchdog_set_prio(SCHED_NORMAL, 0); |
363 | } | 372 | hrtimer_cancel(hrtimer); |
364 | /* | 373 | /* disable the perf event */ |
365 | * Drop the policy/priority elevation during thread exit to avoid a | 374 | watchdog_nmi_disable(cpu); |
366 | * scheduling latency spike. | ||
367 | */ | ||
368 | __set_current_state(TASK_RUNNING); | ||
369 | sched_setscheduler(current, SCHED_NORMAL, ¶m); | ||
370 | return 0; | ||
371 | } | 375 | } |
372 | 376 | ||
377 | static int watchdog_should_run(unsigned int cpu) | ||
378 | { | ||
379 | return __this_cpu_read(hrtimer_interrupts) != | ||
380 | __this_cpu_read(soft_lockup_hrtimer_cnt); | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * The watchdog thread function - touches the timestamp. | ||
385 | * | ||
386 | * It only runs once every get_sample_period() seconds (4 seconds by | ||
387 | * default) to reset the softlockup timestamp. If this gets delayed | ||
388 | * for more than 2*watchdog_thresh seconds then the debug-printout | ||
389 | * triggers in watchdog_timer_fn(). | ||
390 | */ | ||
391 | static void watchdog(unsigned int cpu) | ||
392 | { | ||
393 | __this_cpu_write(soft_lockup_hrtimer_cnt, | ||
394 | __this_cpu_read(hrtimer_interrupts)); | ||
395 | __touch_watchdog(); | ||
396 | } | ||
373 | 397 | ||
374 | #ifdef CONFIG_HARDLOCKUP_DETECTOR | 398 | #ifdef CONFIG_HARDLOCKUP_DETECTOR |
375 | /* | 399 | /* |
@@ -379,7 +403,7 @@ static int watchdog(void *unused) | |||
379 | */ | 403 | */ |
380 | static unsigned long cpu0_err; | 404 | static unsigned long cpu0_err; |
381 | 405 | ||
382 | static int watchdog_nmi_enable(int cpu) | 406 | static int watchdog_nmi_enable(unsigned int cpu) |
383 | { | 407 | { |
384 | struct perf_event_attr *wd_attr; | 408 | struct perf_event_attr *wd_attr; |
385 | struct perf_event *event = per_cpu(watchdog_ev, cpu); | 409 | struct perf_event *event = per_cpu(watchdog_ev, cpu); |
@@ -433,7 +457,7 @@ out: | |||
433 | return 0; | 457 | return 0; |
434 | } | 458 | } |
435 | 459 | ||
436 | static void watchdog_nmi_disable(int cpu) | 460 | static void watchdog_nmi_disable(unsigned int cpu) |
437 | { | 461 | { |
438 | struct perf_event *event = per_cpu(watchdog_ev, cpu); | 462 | struct perf_event *event = per_cpu(watchdog_ev, cpu); |
439 | 463 | ||
@@ -447,107 +471,35 @@ static void watchdog_nmi_disable(int cpu) | |||
447 | return; | 471 | return; |
448 | } | 472 | } |
449 | #else | 473 | #else |
450 | static int watchdog_nmi_enable(int cpu) { return 0; } | 474 | static int watchdog_nmi_enable(unsigned int cpu) { return 0; } |
451 | static void watchdog_nmi_disable(int cpu) { return; } | 475 | static void watchdog_nmi_disable(unsigned int cpu) { return; } |
452 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ | 476 | #endif /* CONFIG_HARDLOCKUP_DETECTOR */ |
453 | 477 | ||
454 | /* prepare/enable/disable routines */ | 478 | /* prepare/enable/disable routines */ |
455 | static void watchdog_prepare_cpu(int cpu) | ||
456 | { | ||
457 | struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); | ||
458 | |||
459 | WARN_ON(per_cpu(softlockup_watchdog, cpu)); | ||
460 | hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
461 | hrtimer->function = watchdog_timer_fn; | ||
462 | } | ||
463 | |||
464 | static int watchdog_enable(int cpu) | ||
465 | { | ||
466 | struct task_struct *p = per_cpu(softlockup_watchdog, cpu); | ||
467 | int err = 0; | ||
468 | |||
469 | /* enable the perf event */ | ||
470 | err = watchdog_nmi_enable(cpu); | ||
471 | |||
472 | /* Regardless of err above, fall through and start softlockup */ | ||
473 | |||
474 | /* create the watchdog thread */ | ||
475 | if (!p) { | ||
476 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | ||
477 | p = kthread_create_on_node(watchdog, NULL, cpu_to_node(cpu), "watchdog/%d", cpu); | ||
478 | if (IS_ERR(p)) { | ||
479 | pr_err("softlockup watchdog for %i failed\n", cpu); | ||
480 | if (!err) { | ||
481 | /* if hardlockup hasn't already set this */ | ||
482 | err = PTR_ERR(p); | ||
483 | /* and disable the perf event */ | ||
484 | watchdog_nmi_disable(cpu); | ||
485 | } | ||
486 | goto out; | ||
487 | } | ||
488 | sched_setscheduler(p, SCHED_FIFO, ¶m); | ||
489 | kthread_bind(p, cpu); | ||
490 | per_cpu(watchdog_touch_ts, cpu) = 0; | ||
491 | per_cpu(softlockup_watchdog, cpu) = p; | ||
492 | wake_up_process(p); | ||
493 | } | ||
494 | |||
495 | out: | ||
496 | return err; | ||
497 | } | ||
498 | |||
499 | static void watchdog_disable(int cpu) | ||
500 | { | ||
501 | struct task_struct *p = per_cpu(softlockup_watchdog, cpu); | ||
502 | struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu); | ||
503 | |||
504 | /* | ||
505 | * cancel the timer first to stop incrementing the stats | ||
506 | * and waking up the kthread | ||
507 | */ | ||
508 | hrtimer_cancel(hrtimer); | ||
509 | |||
510 | /* disable the perf event */ | ||
511 | watchdog_nmi_disable(cpu); | ||
512 | |||
513 | /* stop the watchdog thread */ | ||
514 | if (p) { | ||
515 | per_cpu(softlockup_watchdog, cpu) = NULL; | ||
516 | kthread_stop(p); | ||
517 | } | ||
518 | } | ||
519 | |||
520 | /* sysctl functions */ | 479 | /* sysctl functions */ |
521 | #ifdef CONFIG_SYSCTL | 480 | #ifdef CONFIG_SYSCTL |
522 | static void watchdog_enable_all_cpus(void) | 481 | static void watchdog_enable_all_cpus(void) |
523 | { | 482 | { |
524 | int cpu; | 483 | unsigned int cpu; |
525 | |||
526 | watchdog_enabled = 0; | ||
527 | |||
528 | for_each_online_cpu(cpu) | ||
529 | if (!watchdog_enable(cpu)) | ||
530 | /* if any cpu succeeds, watchdog is considered | ||
531 | enabled for the system */ | ||
532 | watchdog_enabled = 1; | ||
533 | |||
534 | if (!watchdog_enabled) | ||
535 | pr_err("failed to be enabled on some cpus\n"); | ||
536 | 484 | ||
485 | if (watchdog_disabled) { | ||
486 | watchdog_disabled = 0; | ||
487 | for_each_online_cpu(cpu) | ||
488 | kthread_unpark(per_cpu(softlockup_watchdog, cpu)); | ||
489 | } | ||
537 | } | 490 | } |
538 | 491 | ||
539 | static void watchdog_disable_all_cpus(void) | 492 | static void watchdog_disable_all_cpus(void) |
540 | { | 493 | { |
541 | int cpu; | 494 | unsigned int cpu; |
542 | |||
543 | for_each_online_cpu(cpu) | ||
544 | watchdog_disable(cpu); | ||
545 | 495 | ||
546 | /* if all watchdogs are disabled, then they are disabled for the system */ | 496 | if (!watchdog_disabled) { |
547 | watchdog_enabled = 0; | 497 | watchdog_disabled = 1; |
498 | for_each_online_cpu(cpu) | ||
499 | kthread_park(per_cpu(softlockup_watchdog, cpu)); | ||
500 | } | ||
548 | } | 501 | } |
549 | 502 | ||
550 | |||
551 | /* | 503 | /* |
552 | * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh | 504 | * proc handler for /proc/sys/kernel/nmi_watchdog,watchdog_thresh |
553 | */ | 505 | */ |
@@ -557,73 +509,36 @@ int proc_dowatchdog(struct ctl_table *table, int write, | |||
557 | { | 509 | { |
558 | int ret; | 510 | int ret; |
559 | 511 | ||
512 | if (watchdog_disabled < 0) | ||
513 | return -ENODEV; | ||
514 | |||
560 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 515 | ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
561 | if (ret || !write) | 516 | if (ret || !write) |
562 | goto out; | 517 | return ret; |
563 | 518 | ||
564 | if (watchdog_enabled && watchdog_thresh) | 519 | if (watchdog_enabled && watchdog_thresh) |
565 | watchdog_enable_all_cpus(); | 520 | watchdog_enable_all_cpus(); |
566 | else | 521 | else |
567 | watchdog_disable_all_cpus(); | 522 | watchdog_disable_all_cpus(); |
568 | 523 | ||
569 | out: | ||
570 | return ret; | 524 | return ret; |
571 | } | 525 | } |
572 | #endif /* CONFIG_SYSCTL */ | 526 | #endif /* CONFIG_SYSCTL */ |
573 | 527 | ||
574 | 528 | static struct smp_hotplug_thread watchdog_threads = { | |
575 | /* | 529 | .store = &softlockup_watchdog, |
576 | * Create/destroy watchdog threads as CPUs come and go: | 530 | .thread_should_run = watchdog_should_run, |
577 | */ | 531 | .thread_fn = watchdog, |
578 | static int __cpuinit | 532 | .thread_comm = "watchdog/%u", |
579 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | 533 | .setup = watchdog_enable, |
580 | { | 534 | .park = watchdog_disable, |
581 | int hotcpu = (unsigned long)hcpu; | 535 | .unpark = watchdog_enable, |
582 | |||
583 | switch (action) { | ||
584 | case CPU_UP_PREPARE: | ||
585 | case CPU_UP_PREPARE_FROZEN: | ||
586 | watchdog_prepare_cpu(hotcpu); | ||
587 | break; | ||
588 | case CPU_ONLINE: | ||
589 | case CPU_ONLINE_FROZEN: | ||
590 | if (watchdog_enabled) | ||
591 | watchdog_enable(hotcpu); | ||
592 | break; | ||
593 | #ifdef CONFIG_HOTPLUG_CPU | ||
594 | case CPU_UP_CANCELED: | ||
595 | case CPU_UP_CANCELED_FROZEN: | ||
596 | watchdog_disable(hotcpu); | ||
597 | break; | ||
598 | case CPU_DEAD: | ||
599 | case CPU_DEAD_FROZEN: | ||
600 | watchdog_disable(hotcpu); | ||
601 | break; | ||
602 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
603 | } | ||
604 | |||
605 | /* | ||
606 | * hardlockup and softlockup are not important enough | ||
607 | * to block cpu bring up. Just always succeed and | ||
608 | * rely on printk output to flag problems. | ||
609 | */ | ||
610 | return NOTIFY_OK; | ||
611 | } | ||
612 | |||
613 | static struct notifier_block __cpuinitdata cpu_nfb = { | ||
614 | .notifier_call = cpu_callback | ||
615 | }; | 536 | }; |
616 | 537 | ||
617 | void __init lockup_detector_init(void) | 538 | void __init lockup_detector_init(void) |
618 | { | 539 | { |
619 | void *cpu = (void *)(long)smp_processor_id(); | 540 | if (smpboot_register_percpu_thread(&watchdog_threads)) { |
620 | int err; | 541 | pr_err("Failed to create watchdog threads, disabled\n"); |
621 | 542 | watchdog_disabled = -ENODEV; | |
622 | err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); | 543 | } |
623 | WARN_ON(notifier_to_errno(err)); | ||
624 | |||
625 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); | ||
626 | register_cpu_notifier(&cpu_nfb); | ||
627 | |||
628 | return; | ||
629 | } | 544 | } |