From e12d0271774fea9fddf1e2a7952a0bffb2ee8e8b Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 10 May 2013 17:12:28 -0400 Subject: nohz: Warn if the machine can not perform nohz_full If the user configures NO_HZ_FULL and defines nohz_full=XXX on the kernel command line, or enables NO_HZ_FULL_ALL, but nohz fails due to the machine having a unstable clock, warn about it. We do not want users thinking that they are getting the benefit of nohz when their machine can not support it. Signed-off-by: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Andrew Morton Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Signed-off-by: Frederic Weisbecker --- kernel/time/tick-sched.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f4208138fbf4..d87d22cb9bf2 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -178,6 +178,11 @@ static bool can_stop_full_tick(void) */ if (!sched_clock_stable) { trace_tick_stop(0, "unstable sched clock\n"); + /* + * Don't allow the user to think they can get + * full NO_HZ with this machine. + */ + WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock"); return false; } #endif -- cgit v1.2.2 From b8900bc0217fac8e68085997bee2f05e6db931a2 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 6 Jun 2013 15:42:53 +0200 Subject: watchdog: Register / unregister watchdog kthreads on sysctl control The user activation/deactivation of the watchdog through boot parameters or systcl is currently implemented with a dance involving kthreads parking and unparking methods: the threads are unconditionally registered on boot and they park as soon as the user want the watchdog to be disabled. This method involves a few noisy details to handle though: the watchdog kthreads may be unparked anytime due to hotplug operations, after which the watchdog internals have to decide to park again if it is user-disabled. As a result the setup() and unpark() methods need to be able to request a reparking. This is not currently supported in the kthread infrastructure so this piece of the watchdog code only works halfway. Besides, unparking/reparking the watchdog kthreads consume unnecessary cputime on hotplug operations when those could be simply ignored in the first place. As suggested by Srivatsa, let's instead only register the watchdog threads when they are needed. This way we don't need to think about hotplug operations and we don't burden the CPU onlining when the watchdog is simply disabled. Suggested-by: Srivatsa S. Bhat Signed-off-by: Frederic Weisbecker Cc: Srivatsa S. Bhat Cc: Anish Singh Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Don Zickus --- kernel/watchdog.c | 87 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 47 insertions(+), 40 deletions(-) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 05039e348f07..52c9a9b91bdd 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -31,7 +31,7 @@ int watchdog_enabled = 1; int __read_mostly watchdog_thresh = 10; -static int __read_mostly watchdog_disabled; +static int __read_mostly watchdog_disabled = 1; static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -347,11 +347,6 @@ static void watchdog_enable(unsigned int cpu) hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer->function = watchdog_timer_fn; - if (!watchdog_enabled) { - kthread_park(current); - return; - } - /* Enable the perf event */ watchdog_nmi_enable(cpu); @@ -374,6 +369,11 @@ static void watchdog_disable(unsigned int cpu) watchdog_nmi_disable(cpu); } +static void watchdog_cleanup(unsigned int cpu, bool online) +{ + watchdog_disable(cpu); +} + static int watchdog_should_run(unsigned int cpu) { return __this_cpu_read(hrtimer_interrupts) != @@ -475,28 +475,40 @@ static int watchdog_nmi_enable(unsigned int cpu) { return 0; } static void watchdog_nmi_disable(unsigned int cpu) { return; } #endif /* CONFIG_HARDLOCKUP_DETECTOR */ -/* prepare/enable/disable routines */ -/* sysctl functions */ -#ifdef CONFIG_SYSCTL -static void watchdog_enable_all_cpus(void) +static struct smp_hotplug_thread watchdog_threads = { + .store = &softlockup_watchdog, + .thread_should_run = watchdog_should_run, + .thread_fn = watchdog, + .thread_comm = "watchdog/%u", + .setup = watchdog_enable, + .cleanup = watchdog_cleanup, + .park = watchdog_disable, + .unpark = watchdog_enable, +}; + +static int watchdog_enable_all_cpus(void) { - unsigned int cpu; + int err = 0; if (watchdog_disabled) { - watchdog_disabled = 0; - for_each_online_cpu(cpu) - kthread_unpark(per_cpu(softlockup_watchdog, cpu)); + err = smpboot_register_percpu_thread(&watchdog_threads); + if (err) + pr_err("Failed to create watchdog threads, disabled\n"); + else + watchdog_disabled = 0; } + + return err; } +/* prepare/enable/disable routines */ +/* sysctl functions */ +#ifdef CONFIG_SYSCTL static void watchdog_disable_all_cpus(void) { - unsigned int cpu; - if (!watchdog_disabled) { watchdog_disabled = 1; - for_each_online_cpu(cpu) - kthread_park(per_cpu(softlockup_watchdog, cpu)); + smpboot_unregister_percpu_thread(&watchdog_threads); } } @@ -507,14 +519,14 @@ static void watchdog_disable_all_cpus(void) int proc_dowatchdog(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - int ret; + int err, old_thresh, old_enabled; - if (watchdog_disabled < 0) - return -ENODEV; + old_thresh = ACCESS_ONCE(watchdog_thresh); + old_enabled = ACCESS_ONCE(watchdog_enabled); - ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (ret || !write) - return ret; + err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (err || !write) + return err; set_sample_period(); /* @@ -523,29 +535,24 @@ int proc_dowatchdog(struct ctl_table *table, int write, * watchdog_*_all_cpus() function takes care of this. */ if (watchdog_enabled && watchdog_thresh) - watchdog_enable_all_cpus(); + err = watchdog_enable_all_cpus(); else watchdog_disable_all_cpus(); - return ret; + /* Restore old values on failure */ + if (err) { + watchdog_thresh = old_thresh; + watchdog_enabled = old_enabled; + } + + return err; } #endif /* CONFIG_SYSCTL */ -static struct smp_hotplug_thread watchdog_threads = { - .store = &softlockup_watchdog, - .thread_should_run = watchdog_should_run, - .thread_fn = watchdog, - .thread_comm = "watchdog/%u", - .setup = watchdog_enable, - .park = watchdog_disable, - .unpark = watchdog_enable, -}; - void __init lockup_detector_init(void) { set_sample_period(); - if (smpboot_register_percpu_thread(&watchdog_threads)) { - pr_err("Failed to create watchdog threads, disabled\n"); - watchdog_disabled = -ENODEV; - } + + if (watchdog_enabled) + watchdog_enable_all_cpus(); } -- cgit v1.2.2 From 3c00ea82c724fab0b98f15428a804cb45eb9ad38 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 19 May 2013 20:45:15 +0200 Subject: watchdog: Rename confusing state variable We have two very conflicting state variable names in the watchdog: * watchdog_enabled: This one reflects the user interface. It's set to 1 by default and can be overriden with boot options or sysctl/procfs interface. * watchdog_disabled: This is the internal toggle state that tells if watchdog threads, timers and NMI events are currently running or not. This state mostly depends on the user settings. It's a convenient state latch. Now we really need to find clearer names because those are just too confusing to encourage deep review. watchdog_enabled now becomes watchdog_user_enabled to reflect its purpose as an interface. watchdog_disabled becomes watchdog_running to suggest its role as a pure internal state. Signed-off-by: Frederic Weisbecker Cc: Srivatsa S. Bhat Cc: Anish Singh Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Li Zhong Cc: Don Zickus --- kernel/sysctl.c | 4 ++-- kernel/watchdog.c | 30 +++++++++++++++--------------- 2 files changed, 17 insertions(+), 17 deletions(-) (limited to 'kernel') diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9edcf456e0fc..b0805652c4ff 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -801,7 +801,7 @@ static struct ctl_table kern_table[] = { #if defined(CONFIG_LOCKUP_DETECTOR) { .procname = "watchdog", - .data = &watchdog_enabled, + .data = &watchdog_user_enabled, .maxlen = sizeof (int), .mode = 0644, .proc_handler = proc_dowatchdog, @@ -828,7 +828,7 @@ static struct ctl_table kern_table[] = { }, { .procname = "nmi_watchdog", - .data = &watchdog_enabled, + .data = &watchdog_user_enabled, .maxlen = sizeof (int), .mode = 0644, .proc_handler = proc_dowatchdog, diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 52c9a9b91bdd..51c4f34d258e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -29,9 +29,9 @@ #include #include -int watchdog_enabled = 1; +int watchdog_user_enabled = 1; int __read_mostly watchdog_thresh = 10; -static int __read_mostly watchdog_disabled = 1; +static int __read_mostly watchdog_running; static u64 __read_mostly sample_period; static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); @@ -63,7 +63,7 @@ static int __init hardlockup_panic_setup(char *str) else if (!strncmp(str, "nopanic", 7)) hardlockup_panic = 0; else if (!strncmp(str, "0", 1)) - watchdog_enabled = 0; + watchdog_user_enabled = 0; return 1; } __setup("nmi_watchdog=", hardlockup_panic_setup); @@ -82,7 +82,7 @@ __setup("softlockup_panic=", softlockup_panic_setup); static int __init nowatchdog_setup(char *str) { - watchdog_enabled = 0; + watchdog_user_enabled = 0; return 1; } __setup("nowatchdog", nowatchdog_setup); @@ -90,7 +90,7 @@ __setup("nowatchdog", nowatchdog_setup); /* deprecated */ static int __init nosoftlockup_setup(char *str) { - watchdog_enabled = 0; + watchdog_user_enabled = 0; return 1; } __setup("nosoftlockup", nosoftlockup_setup); @@ -158,7 +158,7 @@ void touch_all_softlockup_watchdogs(void) #ifdef CONFIG_HARDLOCKUP_DETECTOR void touch_nmi_watchdog(void) { - if (watchdog_enabled) { + if (watchdog_user_enabled) { unsigned cpu; for_each_present_cpu(cpu) { @@ -490,12 +490,12 @@ static int watchdog_enable_all_cpus(void) { int err = 0; - if (watchdog_disabled) { + if (!watchdog_running) { err = smpboot_register_percpu_thread(&watchdog_threads); if (err) pr_err("Failed to create watchdog threads, disabled\n"); else - watchdog_disabled = 0; + watchdog_running = 1; } return err; @@ -506,8 +506,8 @@ static int watchdog_enable_all_cpus(void) #ifdef CONFIG_SYSCTL static void watchdog_disable_all_cpus(void) { - if (!watchdog_disabled) { - watchdog_disabled = 1; + if (watchdog_running) { + watchdog_running = 0; smpboot_unregister_percpu_thread(&watchdog_threads); } } @@ -522,7 +522,7 @@ int proc_dowatchdog(struct ctl_table *table, int write, int err, old_thresh, old_enabled; old_thresh = ACCESS_ONCE(watchdog_thresh); - old_enabled = ACCESS_ONCE(watchdog_enabled); + old_enabled = ACCESS_ONCE(watchdog_user_enabled); err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (err || !write) @@ -531,10 +531,10 @@ int proc_dowatchdog(struct ctl_table *table, int write, set_sample_period(); /* * Watchdog threads shouldn't be enabled if they are - * disabled. The 'watchdog_disabled' variable check in + * disabled. The 'watchdog_running' variable check in * watchdog_*_all_cpus() function takes care of this. */ - if (watchdog_enabled && watchdog_thresh) + if (watchdog_user_enabled && watchdog_thresh) err = watchdog_enable_all_cpus(); else watchdog_disable_all_cpus(); @@ -542,7 +542,7 @@ int proc_dowatchdog(struct ctl_table *table, int write, /* Restore old values on failure */ if (err) { watchdog_thresh = old_thresh; - watchdog_enabled = old_enabled; + watchdog_user_enabled = old_enabled; } return err; @@ -553,6 +553,6 @@ void __init lockup_detector_init(void) { set_sample_period(); - if (watchdog_enabled) + if (watchdog_user_enabled) watchdog_enable_all_cpus(); } -- cgit v1.2.2 From 940be35ac0139530d7554aa2352a8388e3d4adca Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Jun 2013 13:35:42 +0200 Subject: watchdog: Boot-disable by default on full dynticks When the watchdog runs, it prevents the full dynticks CPUs from stopping their tick because the hard lockup detector uses perf events internally, which in turn rely on the periodic tick. Since this is a rather confusing behaviour that is not easy to track down and identify for those who want to test CONFIG_NO_HZ_FULL, let's default disable the watchdog on boot time when full dynticks is enabled. The user can still enable it later on runtime using proc or sysctl. Reported-by: Steven Rostedt Suggested-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Andrew Morton Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Li Zhong Cc: Don Zickus Cc: Srivatsa S. Bhat Cc: Anish Singh --- kernel/watchdog.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'kernel') diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 51c4f34d258e..1241d8c91d5e 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -553,6 +553,14 @@ void __init lockup_detector_init(void) { set_sample_period(); +#ifdef CONFIG_NO_HZ_FULL + if (watchdog_user_enabled) { + watchdog_user_enabled = 0; + pr_warning("Disabled lockup detectors by default for full dynticks\n"); + pr_warning("You can reactivate it with 'sysctl -w kernel.watchdog=1'\n"); + } +#endif + if (watchdog_user_enabled) watchdog_enable_all_cpus(); } -- cgit v1.2.2 From 5b8621a68fdcd2baf1d3b413726f913a5254d46a Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 Jun 2013 13:47:31 +0200 Subject: nohz: Remove obsolete check for full dynticks CPUs to be RCU nocbs Building full dynticks now implies that all CPUs are forced into RCU nocb mode through CONFIG_RCU_NOCB_CPU_ALL. The dynamic check has become useless. Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Paul E. McKenney Cc: Ingo Molnar Cc: Andrew Morton Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Li Zhong Cc: Borislav Petkov --- kernel/time/tick-sched.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d87d22cb9bf2..b15750139260 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -351,16 +351,6 @@ void __init tick_nohz_init(void) } cpu_notifier(tick_nohz_cpu_down_callback, 0); - - /* Make sure full dynticks CPU are also RCU nocbs */ - for_each_cpu(cpu, nohz_full_mask) { - if (!rcu_is_nocb_cpu(cpu)) { - pr_warning("NO_HZ: CPU %d is not RCU nocb: " - "cleared from nohz_full range", cpu); - cpumask_clear_cpu(cpu, nohz_full_mask); - } - } - cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask); pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf); } -- cgit v1.2.2 From a272dcca1802a7e265a56e60b0d0a6715b0a8ac2 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 11 Jul 2013 07:00:59 -0700 Subject: tick: broadcast: Check broadcast mode on CPU hotplug On ARM systems the dummy clockevent is registered with the cpu hotplug notifier chain before any other per-cpu clockevent. This has the side-effect of causing the dummy clockevent to be registered first in every hotplug sequence. Because the dummy is first, we'll try to turn the broadcast source on but the code in tick_device_uses_broadcast() assumes the broadcast source is in periodic mode and calls tick_broadcast_start_periodic() unconditionally. On boot this isn't a problem because we typically haven't switched into oneshot mode yet (if at all). During hotplug, if the broadcast source isn't in periodic mode we'll replace the broadcast oneshot handler with the broadcast periodic handler and start emulating oneshot mode when we shouldn't. Due to the way the broadcast oneshot handler programs the next_event it's possible for it to contain KTIME_MAX and cause us to hang the system when the periodic handler tries to program the next tick. Fix this by using the appropriate function to start the broadcast source. Reported-by: Stephen Warren Tested-by: Stephen Warren Signed-off-by: Stephen Boyd Cc: Mark Rutland Cc: Marc Zyngier Cc: ARM kernel mailing list Cc: John Stultz Cc: Joseph Lo Link: http://lkml.kernel.org/r/20130711140059.GA27430@codeaurora.org Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 6d3f91631de6..218bcb565fed 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -157,7 +157,10 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) dev->event_handler = tick_handle_periodic; tick_device_setup_broadcast_func(dev); cpumask_set_cpu(cpu, tick_broadcast_mask); - tick_broadcast_start_periodic(bc); + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + tick_broadcast_start_periodic(bc); + else + tick_broadcast_setup_oneshot(bc); ret = 1; } else { /* -- cgit v1.2.2