summaryrefslogtreecommitdiffstats
path: root/kernel/watchdog_hld.c
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-09-12 15:37:04 -0400
committerIngo Molnar <mingo@kernel.org>2017-09-14 05:41:05 -0400
commit941154bd6937a710ae9193a3c733c0029e5ae7b8 (patch)
treedede6edfb0306f0ae368d892edbf32f06f24e30b /kernel/watchdog_hld.c
parent20d853fd0703b1d73c35a22024c0d4fcbcc57c8c (diff)
watchdog/hardlockup/perf: Prevent CPU hotplug deadlock
The following deadlock is possible in the watchdog hotplug code: cpus_write_lock() ... takedown_cpu() smpboot_park_threads() smpboot_park_thread() kthread_park() ->park() := watchdog_disable() watchdog_nmi_disable() perf_event_release_kernel(); put_event() _free_event() ->destroy() := hw_perf_event_destroy() x86_release_hardware() release_ds_buffers() get_online_cpus() when a per cpu watchdog perf event is destroyed which drops the last reference to the PMU hardware. The cleanup code there invokes get_online_cpus() which instantly deadlocks because the hotplug percpu rwsem is write locked. To solve this add a deferring mechanism: cpus_write_lock() kthread_park() watchdog_nmi_disable(deferred) perf_event_disable(event); move_event_to_deferred(event); .... cpus_write_unlock() cleaup_deferred_events() perf_event_release_kernel() This is still properly serialized against concurrent hotplug via the cpu_add_remove_lock, which is held by the task which initiated the hotplug event. This is also used to handle event destruction when the watchdog threads are parked via other mechanisms than CPU hotplug. Analyzed-by: Peter Zijlstra <peterz@infradead.org> Reported-by: Borislav Petkov <bp@alien8.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Don Zickus <dzickus@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Chris Metcalf <cmetcalf@mellanox.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Sebastian Siewior <bigeasy@linutronix.de> Cc: Ulrich Obergfell <uobergfe@redhat.com> Link: http://lkml.kernel.org/r/20170912194146.884469246@linutronix.de Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/watchdog_hld.c')
-rw-r--r--kernel/watchdog_hld.c34
1 files changed, 28 insertions, 6 deletions
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 7b602714ea53..94111ccb09b5 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -21,6 +21,8 @@
21static DEFINE_PER_CPU(bool, hard_watchdog_warn); 21static DEFINE_PER_CPU(bool, hard_watchdog_warn);
22static DEFINE_PER_CPU(bool, watchdog_nmi_touch); 22static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
23static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 23static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
24static DEFINE_PER_CPU(struct perf_event *, dead_event);
25static struct cpumask dead_events_mask;
24 26
25static unsigned long hardlockup_allcpu_dumped; 27static unsigned long hardlockup_allcpu_dumped;
26static bool hardlockup_detector_disabled; 28static bool hardlockup_detector_disabled;
@@ -239,16 +241,18 @@ out:
239 return 0; 241 return 0;
240} 242}
241 243
242void watchdog_nmi_disable(unsigned int cpu) 244/**
245 * hardlockup_detector_perf_disable - Disable the local event
246 */
247void hardlockup_detector_perf_disable(void)
243{ 248{
244 struct perf_event *event = per_cpu(watchdog_ev, cpu); 249 struct perf_event *event = this_cpu_read(watchdog_ev);
245 250
246 if (event) { 251 if (event) {
247 perf_event_disable(event); 252 perf_event_disable(event);
248 per_cpu(watchdog_ev, cpu) = NULL; 253 this_cpu_write(watchdog_ev, NULL);
249 254 this_cpu_write(dead_event, event);
250 /* should be in cleanup, but blocks oprofile */ 255 cpumask_set_cpu(smp_processor_id(), &dead_events_mask);
251 perf_event_release_kernel(event);
252 256
253 /* watchdog_nmi_enable() expects this to be zero initially. */ 257 /* watchdog_nmi_enable() expects this to be zero initially. */
254 if (atomic_dec_and_test(&watchdog_cpus)) 258 if (atomic_dec_and_test(&watchdog_cpus))
@@ -257,6 +261,24 @@ void watchdog_nmi_disable(unsigned int cpu)
257} 261}
258 262
259/** 263/**
264 * hardlockup_detector_perf_cleanup - Cleanup disabled events and destroy them
265 *
266 * Called from lockup_detector_cleanup(). Serialized by the caller.
267 */
268void hardlockup_detector_perf_cleanup(void)
269{
270 int cpu;
271
272 for_each_cpu(cpu, &dead_events_mask) {
273 struct perf_event *event = per_cpu(dead_event, cpu);
274
275 per_cpu(dead_event, cpu) = NULL;
276 perf_event_release_kernel(event);
277 }
278 cpumask_clear(&dead_events_mask);
279}
280
281/**
260 * hardlockup_detector_perf_stop - Globally stop watchdog events 282 * hardlockup_detector_perf_stop - Globally stop watchdog events
261 * 283 *
262 * Special interface for x86 to handle the perf HT bug. 284 * Special interface for x86 to handle the perf HT bug.