aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/watchdog.c
diff options
context:
space:
mode:
authorDon Zickus <dzickus@redhat.com>2010-05-07 17:11:44 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2010-05-12 17:55:33 -0400
commit58687acba59266735adb8ccd9b5b9aa2c7cd205b (patch)
tree7236582375310b116eedec6facbee87d42e3dd6d /kernel/watchdog.c
parenta9aa1d02de36b450990b0e25a88fc2ff1c3e6b94 (diff)
lockup_detector: Combine nmi_watchdog and softlockup detector
The new nmi_watchdog (which uses the perf event subsystem) is very similar in structure to the softlockup detector. Using Ingo's suggestion, I combined the two functionalities into one file: kernel/watchdog.c. Now both the nmi_watchdog (or hardlockup detector) and softlockup detector sit on top of the perf event subsystem, which is run every 60 seconds or so to see if there are any lockups. To detect hardlockups, cpus not responding to interrupts, I implemented an hrtimer that runs 5 times for every perf event overflow event. If that stops counting on a cpu, then the cpu is most likely in trouble. To detect softlockups, tasks not yielding to the scheduler, I used the previous kthread idea that now gets kicked every time the hrtimer fires. If the kthread isn't being scheduled neither is anyone else and the warning is printed to the console. I tested this on x86_64 and both the softlockup and hardlockup paths work. V2: - cleaned up the Kconfig and softlockup combination - surrounded hardlockup cases with #ifdef CONFIG_PERF_EVENTS_NMI - seperated out the softlockup case from perf event subsystem - re-arranged the enabling/disabling nmi watchdog from proc space - added cpumasks for hardlockup failure cases - removed fallback to soft events if no PMU exists for hard events V3: - comment cleanups - drop support for older softlockup code - per_cpu cleanups - completely remove software clock base hardlockup detector - use per_cpu masking on hard/soft lockup detection - #ifdef cleanups - rename config option NMI_WATCHDOG to LOCKUP_DETECTOR - documentation additions V4: - documentation fixes - convert per_cpu to __get_cpu_var - powerpc compile fixes V5: - split apart warn flags for hard and soft lockups TODO: - figure out how to make an arch-agnostic clock2cycles call (if possible) to feed into perf events as a sample period [fweisbec: merged conflict patch] Signed-off-by: Don Zickus <dzickus@redhat.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Cyrill Gorcunov <gorcunov@gmail.com> Cc: Eric Paris <eparis@redhat.com> Cc: Randy Dunlap <randy.dunlap@oracle.com> LKML-Reference: <1273266711-18706-2-git-send-email-dzickus@redhat.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Diffstat (limited to 'kernel/watchdog.c')
-rw-r--r--kernel/watchdog.c592
1 files changed, 592 insertions, 0 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
new file mode 100644
index 000000000000..6b7fad8497af
--- /dev/null
+++ b/kernel/watchdog.c
@@ -0,0 +1,592 @@
1/*
2 * Detect hard and soft lockups on a system
3 *
4 * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
5 *
6 * this code detects hard lockups: incidents in where on a CPU
7 * the kernel does not respond to anything except NMI.
8 *
9 * Note: Most of this code is borrowed heavily from softlockup.c,
10 * so thanks to Ingo for the initial implementation.
11 * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
12 * to those contributors as well.
13 */
14
15#include <linux/mm.h>
16#include <linux/cpu.h>
17#include <linux/nmi.h>
18#include <linux/init.h>
19#include <linux/delay.h>
20#include <linux/freezer.h>
21#include <linux/kthread.h>
22#include <linux/lockdep.h>
23#include <linux/notifier.h>
24#include <linux/module.h>
25#include <linux/sysctl.h>
26
27#include <asm/irq_regs.h>
28#include <linux/perf_event.h>
29
30int watchdog_enabled;
31int __read_mostly softlockup_thresh = 60;
32
33static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
34static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
35static DEFINE_PER_CPU(struct hrtimer, watchdog_hrtimer);
36static DEFINE_PER_CPU(bool, softlockup_touch_sync);
37static DEFINE_PER_CPU(bool, hard_watchdog_warn);
38static DEFINE_PER_CPU(bool, soft_watchdog_warn);
39#ifdef CONFIG_PERF_EVENTS_NMI
40static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
41static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
42static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
43#endif
44
45static int __read_mostly did_panic;
46static int __initdata no_watchdog;
47
48
49/* boot commands */
50/*
51 * Should we panic when a soft-lockup or hard-lockup occurs:
52 */
53#ifdef CONFIG_PERF_EVENTS_NMI
54static int hardlockup_panic;
55
56static int __init hardlockup_panic_setup(char *str)
57{
58 if (!strncmp(str, "panic", 5))
59 hardlockup_panic = 1;
60 return 1;
61}
62__setup("nmi_watchdog=", hardlockup_panic_setup);
63#endif
64
65unsigned int __read_mostly softlockup_panic =
66 CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
67
68static int __init softlockup_panic_setup(char *str)
69{
70 softlockup_panic = simple_strtoul(str, NULL, 0);
71
72 return 1;
73}
74__setup("softlockup_panic=", softlockup_panic_setup);
75
76static int __init nowatchdog_setup(char *str)
77{
78 no_watchdog = 1;
79 return 1;
80}
81__setup("nowatchdog", nowatchdog_setup);
82
83/* deprecated */
84static int __init nosoftlockup_setup(char *str)
85{
86 no_watchdog = 1;
87 return 1;
88}
89__setup("nosoftlockup", nosoftlockup_setup);
90/* */
91
92
93/*
94 * Returns seconds, approximately. We don't need nanosecond
95 * resolution, and we don't need to waste time with a big divide when
96 * 2^30ns == 1.074s.
97 */
98static unsigned long get_timestamp(int this_cpu)
99{
100 return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
101}
102
103static unsigned long get_sample_period(void)
104{
105 /*
106 * convert softlockup_thresh from seconds to ns
107 * the divide by 5 is to give hrtimer 5 chances to
108 * increment before the hardlockup detector generates
109 * a warning
110 */
111 return softlockup_thresh / 5 * NSEC_PER_SEC;
112}
113
114/* Commands for resetting the watchdog */
115static void __touch_watchdog(void)
116{
117 int this_cpu = raw_smp_processor_id();
118
119 __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu);
120}
121
122void touch_watchdog(void)
123{
124 __get_cpu_var(watchdog_touch_ts) = 0;
125}
126EXPORT_SYMBOL(touch_watchdog);
127
128void touch_all_watchdog(void)
129{
130 int cpu;
131
132 /*
133 * this is done lockless
134 * do we care if a 0 races with a timestamp?
135 * all it means is the softlock check starts one cycle later
136 */
137 for_each_online_cpu(cpu)
138 per_cpu(watchdog_touch_ts, cpu) = 0;
139}
140
141void touch_nmi_watchdog(void)
142{
143 touch_watchdog();
144}
145EXPORT_SYMBOL(touch_nmi_watchdog);
146
147void touch_all_nmi_watchdog(void)
148{
149 touch_all_watchdog();
150}
151
152void touch_softlockup_watchdog(void)
153{
154 touch_watchdog();
155}
156
157void touch_all_softlockup_watchdogs(void)
158{
159 touch_all_watchdog();
160}
161
162void touch_softlockup_watchdog_sync(void)
163{
164 __raw_get_cpu_var(softlockup_touch_sync) = true;
165 __raw_get_cpu_var(watchdog_touch_ts) = 0;
166}
167
168void softlockup_tick(void)
169{
170}
171
172#ifdef CONFIG_PERF_EVENTS_NMI
173/* watchdog detector functions */
174static int is_hardlockup(int cpu)
175{
176 unsigned long hrint = per_cpu(hrtimer_interrupts, cpu);
177
178 if (per_cpu(hrtimer_interrupts_saved, cpu) == hrint)
179 return 1;
180
181 per_cpu(hrtimer_interrupts_saved, cpu) = hrint;
182 return 0;
183}
184#endif
185
186static int is_softlockup(unsigned long touch_ts, int cpu)
187{
188 unsigned long now = get_timestamp(cpu);
189
190 /* Warn about unreasonable delays: */
191 if (time_after(now, touch_ts + softlockup_thresh))
192 return now - touch_ts;
193
194 return 0;
195}
196
197static int
198watchdog_panic(struct notifier_block *this, unsigned long event, void *ptr)
199{
200 did_panic = 1;
201
202 return NOTIFY_DONE;
203}
204
205static struct notifier_block panic_block = {
206 .notifier_call = watchdog_panic,
207};
208
209#ifdef CONFIG_PERF_EVENTS_NMI
210static struct perf_event_attr wd_hw_attr = {
211 .type = PERF_TYPE_HARDWARE,
212 .config = PERF_COUNT_HW_CPU_CYCLES,
213 .size = sizeof(struct perf_event_attr),
214 .pinned = 1,
215 .disabled = 1,
216};
217
218/* Callback function for perf event subsystem */
219void watchdog_overflow_callback(struct perf_event *event, int nmi,
220 struct perf_sample_data *data,
221 struct pt_regs *regs)
222{
223 int this_cpu = smp_processor_id();
224 unsigned long touch_ts = per_cpu(watchdog_touch_ts, this_cpu);
225
226 if (touch_ts == 0) {
227 __touch_watchdog();
228 return;
229 }
230
231 /* check for a hardlockup
232 * This is done by making sure our timer interrupt
233 * is incrementing. The timer interrupt should have
234 * fired multiple times before we overflow'd. If it hasn't
235 * then this is a good indication the cpu is stuck
236 */
237 if (is_hardlockup(this_cpu)) {
238 /* only print hardlockups once */
239 if (__get_cpu_var(hard_watchdog_warn) == true)
240 return;
241
242 if (hardlockup_panic)
243 panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
244 else
245 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
246
247 __get_cpu_var(hard_watchdog_warn) = true;
248 return;
249 }
250
251 __get_cpu_var(hard_watchdog_warn) = false;
252 return;
253}
254static void watchdog_interrupt_count(void)
255{
256 __get_cpu_var(hrtimer_interrupts)++;
257}
258#else
259static inline void watchdog_interrupt_count(void) { return; }
260#endif /* CONFIG_PERF_EVENTS_NMI */
261
262/* watchdog kicker functions */
263static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
264{
265 int this_cpu = smp_processor_id();
266 unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts);
267 struct pt_regs *regs = get_irq_regs();
268 int duration;
269
270 /* kick the hardlockup detector */
271 watchdog_interrupt_count();
272
273 /* kick the softlockup detector */
274 wake_up_process(__get_cpu_var(softlockup_watchdog));
275
276 /* .. and repeat */
277 hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
278
279 if (touch_ts == 0) {
280 if (unlikely(per_cpu(softlockup_touch_sync, this_cpu))) {
281 /*
282 * If the time stamp was touched atomically
283 * make sure the scheduler tick is up to date.
284 */
285 per_cpu(softlockup_touch_sync, this_cpu) = false;
286 sched_clock_tick();
287 }
288 __touch_watchdog();
289 return HRTIMER_RESTART;
290 }
291
292 /* check for a softlockup
293 * This is done by making sure a high priority task is
294 * being scheduled. The task touches the watchdog to
295 * indicate it is getting cpu time. If it hasn't then
296 * this is a good indication some task is hogging the cpu
297 */
298 duration = is_softlockup(touch_ts, this_cpu);
299 if (unlikely(duration)) {
300 /* only warn once */
301 if (__get_cpu_var(soft_watchdog_warn) == true)
302 return HRTIMER_RESTART;
303
304 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
305 this_cpu, duration,
306 current->comm, task_pid_nr(current));
307 print_modules();
308 print_irqtrace_events(current);
309 if (regs)
310 show_regs(regs);
311 else
312 dump_stack();
313
314 if (softlockup_panic)
315 panic("softlockup: hung tasks");
316 __get_cpu_var(soft_watchdog_warn) = true;
317 } else
318 __get_cpu_var(soft_watchdog_warn) = false;
319
320 return HRTIMER_RESTART;
321}
322
323
324/*
325 * The watchdog thread - touches the timestamp.
326 */
327static int watchdog(void *__bind_cpu)
328{
329 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
330 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, (unsigned long)__bind_cpu);
331
332 sched_setscheduler(current, SCHED_FIFO, &param);
333
334 /* initialize timestamp */
335 __touch_watchdog();
336
337 /* kick off the timer for the hardlockup detector */
338 /* done here because hrtimer_start can only pin to smp_processor_id() */
339 hrtimer_start(hrtimer, ns_to_ktime(get_sample_period()),
340 HRTIMER_MODE_REL_PINNED);
341
342 set_current_state(TASK_INTERRUPTIBLE);
343 /*
344 * Run briefly once per second to reset the softlockup timestamp.
345 * If this gets delayed for more than 60 seconds then the
346 * debug-printout triggers in softlockup_tick().
347 */
348 while (!kthread_should_stop()) {
349 __touch_watchdog();
350 schedule();
351
352 if (kthread_should_stop())
353 break;
354
355 set_current_state(TASK_INTERRUPTIBLE);
356 }
357 __set_current_state(TASK_RUNNING);
358
359 return 0;
360}
361
362
363#ifdef CONFIG_PERF_EVENTS_NMI
364static int watchdog_nmi_enable(int cpu)
365{
366 struct perf_event_attr *wd_attr;
367 struct perf_event *event = per_cpu(watchdog_ev, cpu);
368
369 /* is it already setup and enabled? */
370 if (event && event->state > PERF_EVENT_STATE_OFF)
371 goto out;
372
373 /* it is setup but not enabled */
374 if (event != NULL)
375 goto out_enable;
376
377 /* Try to register using hardware perf events */
378 wd_attr = &wd_hw_attr;
379 wd_attr->sample_period = hw_nmi_get_sample_period();
380 event = perf_event_create_kernel_counter(wd_attr, cpu, -1, watchdog_overflow_callback);
381 if (!IS_ERR(event)) {
382 printk(KERN_INFO "NMI watchdog enabled, takes one hw-pmu counter.\n");
383 goto out_save;
384 }
385
386 printk(KERN_ERR "NMI watchdog failed to create perf event on cpu%i: %p\n", cpu, event);
387 return -1;
388
389 /* success path */
390out_save:
391 per_cpu(watchdog_ev, cpu) = event;
392out_enable:
393 perf_event_enable(per_cpu(watchdog_ev, cpu));
394out:
395 return 0;
396}
397
398static void watchdog_nmi_disable(int cpu)
399{
400 struct perf_event *event = per_cpu(watchdog_ev, cpu);
401
402 if (event) {
403 perf_event_disable(event);
404 per_cpu(watchdog_ev, cpu) = NULL;
405
406 /* should be in cleanup, but blocks oprofile */
407 perf_event_release_kernel(event);
408 }
409 return;
410}
411#else
412static int watchdog_nmi_enable(int cpu) { return 0; }
413static void watchdog_nmi_disable(int cpu) { return; }
414#endif /* CONFIG_PERF_EVENTS_NMI */
415
416/* prepare/enable/disable routines */
417static int watchdog_prepare_cpu(int cpu)
418{
419 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
420
421 WARN_ON(per_cpu(softlockup_watchdog, cpu));
422 hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
423 hrtimer->function = watchdog_timer_fn;
424
425 return 0;
426}
427
428static int watchdog_enable(int cpu)
429{
430 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
431
432 /* enable the perf event */
433 if (watchdog_nmi_enable(cpu) != 0)
434 return -1;
435
436 /* create the watchdog thread */
437 if (!p) {
438 p = kthread_create(watchdog, (void *)(unsigned long)cpu, "watchdog/%d", cpu);
439 if (IS_ERR(p)) {
440 printk(KERN_ERR "softlockup watchdog for %i failed\n", cpu);
441 return -1;
442 }
443 kthread_bind(p, cpu);
444 per_cpu(watchdog_touch_ts, cpu) = 0;
445 per_cpu(softlockup_watchdog, cpu) = p;
446 wake_up_process(p);
447 }
448
449 return 0;
450}
451
452static void watchdog_disable(int cpu)
453{
454 struct task_struct *p = per_cpu(softlockup_watchdog, cpu);
455 struct hrtimer *hrtimer = &per_cpu(watchdog_hrtimer, cpu);
456
457 /*
458 * cancel the timer first to stop incrementing the stats
459 * and waking up the kthread
460 */
461 hrtimer_cancel(hrtimer);
462
463 /* disable the perf event */
464 watchdog_nmi_disable(cpu);
465
466 /* stop the watchdog thread */
467 if (p) {
468 per_cpu(softlockup_watchdog, cpu) = NULL;
469 kthread_stop(p);
470 }
471
472 /* if any cpu succeeds, watchdog is considered enabled for the system */
473 watchdog_enabled = 1;
474}
475
476static void watchdog_enable_all_cpus(void)
477{
478 int cpu;
479 int result;
480
481 for_each_online_cpu(cpu)
482 result += watchdog_enable(cpu);
483
484 if (result)
485 printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");
486
487}
488
489static void watchdog_disable_all_cpus(void)
490{
491 int cpu;
492
493 for_each_online_cpu(cpu)
494 watchdog_disable(cpu);
495
496 /* if all watchdogs are disabled, then they are disabled for the system */
497 watchdog_enabled = 0;
498}
499
500
501/* sysctl functions */
502#ifdef CONFIG_SYSCTL
503/*
504 * proc handler for /proc/sys/kernel/nmi_watchdog
505 */
506
507int proc_dowatchdog_enabled(struct ctl_table *table, int write,
508 void __user *buffer, size_t *length, loff_t *ppos)
509{
510 proc_dointvec(table, write, buffer, length, ppos);
511
512 if (watchdog_enabled)
513 watchdog_enable_all_cpus();
514 else
515 watchdog_disable_all_cpus();
516 return 0;
517}
518
519int proc_dowatchdog_thresh(struct ctl_table *table, int write,
520 void __user *buffer,
521 size_t *lenp, loff_t *ppos)
522{
523 return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
524}
525
526/* stub functions */
527int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
528 void __user *buffer,
529 size_t *lenp, loff_t *ppos)
530{
531 return proc_dowatchdog_thresh(table, write, buffer, lenp, ppos);
532}
533/* end of stub functions */
534#endif /* CONFIG_SYSCTL */
535
536
537/*
538 * Create/destroy watchdog threads as CPUs come and go:
539 */
540static int __cpuinit
541cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
542{
543 int hotcpu = (unsigned long)hcpu;
544
545 switch (action) {
546 case CPU_UP_PREPARE:
547 case CPU_UP_PREPARE_FROZEN:
548 if (watchdog_prepare_cpu(hotcpu))
549 return NOTIFY_BAD;
550 break;
551 case CPU_ONLINE:
552 case CPU_ONLINE_FROZEN:
553 if (watchdog_enable(hotcpu))
554 return NOTIFY_BAD;
555 break;
556#ifdef CONFIG_HOTPLUG_CPU
557 case CPU_UP_CANCELED:
558 case CPU_UP_CANCELED_FROZEN:
559 watchdog_disable(hotcpu);
560 break;
561 case CPU_DEAD:
562 case CPU_DEAD_FROZEN:
563 watchdog_disable(hotcpu);
564 break;
565#endif /* CONFIG_HOTPLUG_CPU */
566 }
567 return NOTIFY_OK;
568}
569
570static struct notifier_block __cpuinitdata cpu_nfb = {
571 .notifier_call = cpu_callback
572};
573
574static int __init spawn_watchdog_task(void)
575{
576 void *cpu = (void *)(long)smp_processor_id();
577 int err;
578
579 if (no_watchdog)
580 return 0;
581
582 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
583 WARN_ON(err == NOTIFY_BAD);
584
585 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
586 register_cpu_notifier(&cpu_nfb);
587
588 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
589
590 return 0;
591}
592early_initcall(spawn_watchdog_task);