aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-09-12 15:37:11 -0400
committerIngo Molnar <mingo@kernel.org>2017-09-14 05:41:06 -0400
commitd57108d4f6791291e89d980e7f7a3566c32ab188 (patch)
treeb51bf3c0fb0b6f283a3f72eb16b56825365fab6d
parent2eb2527f847d1bd8d8fb9db1e8139db5d6eddb36 (diff)
watchdog/core: Get rid of the thread teardown/setup dance
The lockup detector reconfiguration tears down all watchdog threads when the watchdog is disabled and sets them up again when its enabled. That's a pointless exercise. The watchdog threads are not consuming an insane amount of resources, so it's enough to set them up at init time and keep them in parked position when the watchdog is disabled and unpark them when it is reenabled. The smpboot thread infrastructure takes care of keeping the force parked threads in place even across cpu hotplug. Aside of that the code implements the park/unpark facility of smp hotplug threads on its own, which is even more pointless. We have functionality in the smpboot thread code to do so. Use the new thread management functions and get rid of the unholy mess. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Don Zickus <dzickus@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Sebastian Siewior <bigeasy@linutronix.de> Cc: Ulrich Obergfell <uobergfe@redhat.com> Link: http://lkml.kernel.org/r/20170912194147.470370113@linutronix.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/watchdog.c190
1 files changed, 19 insertions, 171 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index b35518375fb7..762d3ed82a08 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -92,13 +92,6 @@ struct cpumask watchdog_cpumask __read_mostly;
92unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); 92unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask);
93 93
94/* 94/*
95 * The 'watchdog_running' variable is set to 1 when the watchdog threads
96 * are registered/started and is set to 0 when the watchdog threads are
97 * unregistered/stopped, so it is an indicator whether the threads exist.
98 */
99static int __read_mostly watchdog_running;
100
101/*
102 * These functions can be overridden if an architecture implements its 95 * These functions can be overridden if an architecture implements its
103 * own hardlockup detector. 96 * own hardlockup detector.
104 * 97 *
@@ -130,10 +123,6 @@ void __weak watchdog_nmi_reconfigure(void) { }
130 123
131#ifdef CONFIG_SOFTLOCKUP_DETECTOR 124#ifdef CONFIG_SOFTLOCKUP_DETECTOR
132 125
133/* Helper for online, unparked cpus. */
134#define for_each_watchdog_cpu(cpu) \
135 for_each_cpu_and((cpu), cpu_online_mask, &watchdog_cpumask)
136
137/* Global variables, exported for sysctl */ 126/* Global variables, exported for sysctl */
138unsigned int __read_mostly softlockup_panic = 127unsigned int __read_mostly softlockup_panic =
139 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE; 128 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
@@ -259,11 +248,15 @@ void touch_all_softlockup_watchdogs(void)
259 int cpu; 248 int cpu;
260 249
261 /* 250 /*
262 * this is done lockless 251 * watchdog_mutex cannpt be taken here, as this might be called
263 * do we care if a 0 races with a timestamp? 252 * from (soft)interrupt context, so the access to
264 * all it means is the softlock check starts one cycle later 253 * watchdog_allowed_cpumask might race with a concurrent update.
254 *
255 * The watchdog time stamp can race against a concurrent real
256 * update as well, the only side effect might be a cycle delay for
257 * the softlockup check.
265 */ 258 */
266 for_each_watchdog_cpu(cpu) 259 for_each_cpu(cpu, &watchdog_allowed_mask)
267 per_cpu(watchdog_touch_ts, cpu) = 0; 260 per_cpu(watchdog_touch_ts, cpu) = 0;
268 wq_watchdog_touch(-1); 261 wq_watchdog_touch(-1);
269} 262}
@@ -303,9 +296,6 @@ static void watchdog_interrupt_count(void)
303 __this_cpu_inc(hrtimer_interrupts); 296 __this_cpu_inc(hrtimer_interrupts);
304} 297}
305 298
306static int watchdog_enable_all_cpus(void);
307static void watchdog_disable_all_cpus(void);
308
309/* watchdog kicker functions */ 299/* watchdog kicker functions */
310static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) 300static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
311{ 301{
@@ -498,95 +488,6 @@ static struct smp_hotplug_thread watchdog_threads = {
498 .unpark = watchdog_enable, 488 .unpark = watchdog_enable,
499}; 489};
500 490
501/*
502 * park all watchdog threads that are specified in 'watchdog_cpumask'
503 *
504 * This function returns an error if kthread_park() of a watchdog thread
505 * fails. In this situation, the watchdog threads of some CPUs can already
506 * be parked and the watchdog threads of other CPUs can still be runnable.
507 * Callers are expected to handle this special condition as appropriate in
508 * their context.
509 *
510 * This function may only be called in a context that is protected against
511 * races with CPU hotplug - for example, via get_online_cpus().
512 */
513static int watchdog_park_threads(void)
514{
515 int cpu, ret = 0;
516
517 for_each_watchdog_cpu(cpu) {
518 ret = kthread_park(per_cpu(softlockup_watchdog, cpu));
519 if (ret)
520 break;
521 }
522 return ret;
523}
524
525/*
526 * unpark all watchdog threads that are specified in 'watchdog_cpumask'
527 *
528 * This function may only be called in a context that is protected against
529 * races with CPU hotplug - for example, via get_online_cpus().
530 */
531static void watchdog_unpark_threads(void)
532{
533 int cpu;
534
535 for_each_watchdog_cpu(cpu)
536 kthread_unpark(per_cpu(softlockup_watchdog, cpu));
537}
538
539static int update_watchdog_all_cpus(void)
540{
541 int ret;
542
543 ret = watchdog_park_threads();
544 if (ret)
545 return ret;
546
547 watchdog_unpark_threads();
548
549 return 0;
550}
551
552static int watchdog_enable_all_cpus(void)
553{
554 int err = 0;
555
556 if (!watchdog_running) {
557 err = smpboot_register_percpu_thread_cpumask(&watchdog_threads,
558 &watchdog_cpumask);
559 if (err)
560 pr_err("Failed to create watchdog threads, disabled\n");
561 else
562 watchdog_running = 1;
563 } else {
564 /*
565 * Enable/disable the lockup detectors or
566 * change the sample period 'on the fly'.
567 */
568 err = update_watchdog_all_cpus();
569
570 if (err) {
571 watchdog_disable_all_cpus();
572 pr_err("Failed to update lockup detectors, disabled\n");
573 }
574 }
575
576 if (err)
577 watchdog_enabled = 0;
578
579 return err;
580}
581
582static void watchdog_disable_all_cpus(void)
583{
584 if (watchdog_running) {
585 watchdog_running = 0;
586 smpboot_unregister_percpu_thread(&watchdog_threads);
587 }
588}
589
590static void softlockup_update_smpboot_threads(void) 491static void softlockup_update_smpboot_threads(void)
591{ 492{
592 lockdep_assert_held(&watchdog_mutex); 493 lockdep_assert_held(&watchdog_mutex);
@@ -661,7 +562,6 @@ static inline int watchdog_park_threads(void) { return 0; }
661static inline void watchdog_unpark_threads(void) { } 562static inline void watchdog_unpark_threads(void) { }
662static inline int watchdog_enable_all_cpus(void) { return 0; } 563static inline int watchdog_enable_all_cpus(void) { return 0; }
663static inline void watchdog_disable_all_cpus(void) { } 564static inline void watchdog_disable_all_cpus(void) { }
664static inline void set_sample_period(void) { }
665static inline void softlockup_init_threads(void) { } 565static inline void softlockup_init_threads(void) { }
666static inline void softlockup_update_threads(void) { } 566static inline void softlockup_update_threads(void) { }
667static inline void softlockup_reconfigure_threads(bool enabled) { } 567static inline void softlockup_reconfigure_threads(bool enabled) { }
@@ -701,28 +601,10 @@ void lockup_detector_soft_poweroff(void)
701/* 601/*
702 * Update the run state of the lockup detectors. 602 * Update the run state of the lockup detectors.
703 */ 603 */
704static int proc_watchdog_update(void) 604static void proc_watchdog_update(void)
705{ 605{
706 int err = 0; 606 softlockup_reconfigure_threads(watchdog_enabled && watchdog_thresh);
707
708 /*
709 * Watchdog threads won't be started if they are already active.
710 * The 'watchdog_running' variable in watchdog_*_all_cpus() takes
711 * care of this. If those threads are already active, the sample
712 * period will be updated and the lockup detectors will be enabled
713 * or disabled 'on the fly'.
714 */
715 if (watchdog_enabled && watchdog_thresh)
716 err = watchdog_enable_all_cpus();
717 else
718 watchdog_disable_all_cpus();
719
720 watchdog_nmi_reconfigure(); 607 watchdog_nmi_reconfigure();
721
722 __lockup_detector_cleanup();
723
724 return err;
725
726} 608}
727 609
728/* 610/*
@@ -778,17 +660,8 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write,
778 new = old & ~which; 660 new = old & ~which;
779 } while (cmpxchg(&watchdog_enabled, old, new) != old); 661 } while (cmpxchg(&watchdog_enabled, old, new) != old);
780 662
781 /* 663 if (old != new)
782 * Update the run state of the lockup detectors. There is _no_ 664 proc_watchdog_update();
783 * need to check the value returned by proc_watchdog_update()
784 * and to restore the previous value of 'watchdog_enabled' as
785 * both lockup detectors are disabled if proc_watchdog_update()
786 * returns an error.
787 */
788 if (old == new)
789 goto out;
790
791 err = proc_watchdog_update();
792 } 665 }
793out: 666out:
794 mutex_unlock(&watchdog_mutex); 667 mutex_unlock(&watchdog_mutex);
@@ -832,50 +705,28 @@ int proc_soft_watchdog(struct ctl_table *table, int write,
832int proc_watchdog_thresh(struct ctl_table *table, int write, 705int proc_watchdog_thresh(struct ctl_table *table, int write,
833 void __user *buffer, size_t *lenp, loff_t *ppos) 706 void __user *buffer, size_t *lenp, loff_t *ppos)
834{ 707{
835 int err, old, new; 708 int err, old;
836 709
837 cpu_hotplug_disable(); 710 cpu_hotplug_disable();
838 mutex_lock(&watchdog_mutex); 711 mutex_lock(&watchdog_mutex);
839 712
840 old = ACCESS_ONCE(watchdog_thresh); 713 old = READ_ONCE(watchdog_thresh);
841 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 714 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
842 715
843 if (err || !write) 716 if (!err && write && old != READ_ONCE(watchdog_thresh))
844 goto out; 717 proc_watchdog_update();
845 718
846 /*
847 * Update the sample period. Restore on failure.
848 */
849 new = ACCESS_ONCE(watchdog_thresh);
850 if (old == new)
851 goto out;
852
853 set_sample_period();
854 err = proc_watchdog_update();
855 if (err) {
856 watchdog_thresh = old;
857 set_sample_period();
858 }
859out:
860 mutex_unlock(&watchdog_mutex); 719 mutex_unlock(&watchdog_mutex);
861 cpu_hotplug_enable(); 720 cpu_hotplug_enable();
862 return err; 721 return err;
863} 722}
864 723
865static void watchdog_update_cpus(void)
866{
867 if (IS_ENABLED(CONFIG_SOFTLOCKUP_DETECTOR) && watchdog_running) {
868 smpboot_update_cpumask_percpu_thread(&watchdog_threads,
869 &watchdog_cpumask);
870 __lockup_detector_cleanup();
871 }
872}
873
874static void proc_watchdog_cpumask_update(void) 724static void proc_watchdog_cpumask_update(void)
875{ 725{
876 /* Remove impossible cpus to keep sysctl output clean. */ 726 /* Remove impossible cpus to keep sysctl output clean. */
877 cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask); 727 cpumask_and(&watchdog_cpumask, &watchdog_cpumask, cpu_possible_mask);
878 watchdog_update_cpus(); 728
729 softlockup_update_threads();
879 watchdog_nmi_reconfigure(); 730 watchdog_nmi_reconfigure();
880} 731}
881 732
@@ -905,8 +756,6 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write,
905 756
906void __init lockup_detector_init(void) 757void __init lockup_detector_init(void)
907{ 758{
908 set_sample_period();
909
910#ifdef CONFIG_NO_HZ_FULL 759#ifdef CONFIG_NO_HZ_FULL
911 if (tick_nohz_full_enabled()) { 760 if (tick_nohz_full_enabled()) {
912 pr_info("Disabling watchdog on nohz_full cores by default\n"); 761 pr_info("Disabling watchdog on nohz_full cores by default\n");
@@ -917,6 +766,5 @@ void __init lockup_detector_init(void)
917 cpumask_copy(&watchdog_cpumask, cpu_possible_mask); 766 cpumask_copy(&watchdog_cpumask, cpu_possible_mask);
918#endif 767#endif
919 768
920 if (watchdog_enabled) 769 softlockup_init_threads();
921 watchdog_enable_all_cpus();
922} 770}