aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/watchdog.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/watchdog.c')
-rw-r--r--kernel/watchdog.c96
1 files changed, 49 insertions, 47 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 5b082156cd21..18bb15776c57 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -27,7 +27,7 @@
27#include <asm/irq_regs.h> 27#include <asm/irq_regs.h>
28#include <linux/perf_event.h> 28#include <linux/perf_event.h>
29 29
30int watchdog_enabled; 30int watchdog_enabled = 1;
31int __read_mostly softlockup_thresh = 60; 31int __read_mostly softlockup_thresh = 60;
32 32
33static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); 33static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
@@ -43,9 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
44#endif 44#endif
45 45
46static int no_watchdog;
47
48
49/* boot commands */ 46/* boot commands */
50/* 47/*
51 * Should we panic when a soft-lockup or hard-lockup occurs: 48 * Should we panic when a soft-lockup or hard-lockup occurs:
@@ -57,6 +54,8 @@ static int __init hardlockup_panic_setup(char *str)
57{ 54{
58 if (!strncmp(str, "panic", 5)) 55 if (!strncmp(str, "panic", 5))
59 hardlockup_panic = 1; 56 hardlockup_panic = 1;
57 else if (!strncmp(str, "0", 1))
58 watchdog_enabled = 0;
60 return 1; 59 return 1;
61} 60}
62__setup("nmi_watchdog=", hardlockup_panic_setup); 61__setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -75,7 +74,7 @@ __setup("softlockup_panic=", softlockup_panic_setup);
75 74
76static int __init nowatchdog_setup(char *str) 75static int __init nowatchdog_setup(char *str)
77{ 76{
78 no_watchdog = 1; 77 watchdog_enabled = 0;
79 return 1; 78 return 1;
80} 79}
81__setup("nowatchdog", nowatchdog_setup); 80__setup("nowatchdog", nowatchdog_setup);
@@ -83,7 +82,7 @@ __setup("nowatchdog", nowatchdog_setup);
83/* deprecated */ 82/* deprecated */
84static int __init nosoftlockup_setup(char *str) 83static int __init nosoftlockup_setup(char *str)
85{ 84{
86 no_watchdog = 1; 85 watchdog_enabled = 0;
87 return 1; 86 return 1;
88} 87}
89__setup("nosoftlockup", nosoftlockup_setup); 88__setup("nosoftlockup", nosoftlockup_setup);
@@ -116,12 +115,12 @@ static void __touch_watchdog(void)
116{ 115{
117 int this_cpu = smp_processor_id(); 116 int this_cpu = smp_processor_id();
118 117
119 __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu); 118 __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu));
120} 119}
121 120
122void touch_softlockup_watchdog(void) 121void touch_softlockup_watchdog(void)
123{ 122{
124 __raw_get_cpu_var(watchdog_touch_ts) = 0; 123 __this_cpu_write(watchdog_touch_ts, 0);
125} 124}
126EXPORT_SYMBOL(touch_softlockup_watchdog); 125EXPORT_SYMBOL(touch_softlockup_watchdog);
127 126
@@ -165,12 +164,12 @@ void touch_softlockup_watchdog_sync(void)
165/* watchdog detector functions */ 164/* watchdog detector functions */
166static int is_hardlockup(void) 165static int is_hardlockup(void)
167{ 166{
168 unsigned long hrint = __get_cpu_var(hrtimer_interrupts); 167 unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
169 168
170 if (__get_cpu_var(hrtimer_interrupts_saved) == hrint) 169 if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
171 return 1; 170 return 1;
172 171
173 __get_cpu_var(hrtimer_interrupts_saved) = hrint; 172 __this_cpu_write(hrtimer_interrupts_saved, hrint);
174 return 0; 173 return 0;
175} 174}
176#endif 175#endif
@@ -203,8 +202,8 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
203 /* Ensure the watchdog never gets throttled */ 202 /* Ensure the watchdog never gets throttled */
204 event->hw.interrupts = 0; 203 event->hw.interrupts = 0;
205 204
206 if (__get_cpu_var(watchdog_nmi_touch) == true) { 205 if (__this_cpu_read(watchdog_nmi_touch) == true) {
207 __get_cpu_var(watchdog_nmi_touch) = false; 206 __this_cpu_write(watchdog_nmi_touch, false);
208 return; 207 return;
209 } 208 }
210 209
@@ -218,7 +217,7 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
218 int this_cpu = smp_processor_id(); 217 int this_cpu = smp_processor_id();
219 218
220 /* only print hardlockups once */ 219 /* only print hardlockups once */
221 if (__get_cpu_var(hard_watchdog_warn) == true) 220 if (__this_cpu_read(hard_watchdog_warn) == true)
222 return; 221 return;
223 222
224 if (hardlockup_panic) 223 if (hardlockup_panic)
@@ -226,16 +225,16 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
226 else 225 else
227 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); 226 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
228 227
229 __get_cpu_var(hard_watchdog_warn) = true; 228 __this_cpu_write(hard_watchdog_warn, true);
230 return; 229 return;
231 } 230 }
232 231
233 __get_cpu_var(hard_watchdog_warn) = false; 232 __this_cpu_write(hard_watchdog_warn, false);
234 return; 233 return;
235} 234}
236static void watchdog_interrupt_count(void) 235static void watchdog_interrupt_count(void)
237{ 236{
238 __get_cpu_var(hrtimer_interrupts)++; 237 __this_cpu_inc(hrtimer_interrupts);
239} 238}
240#else 239#else
241static inline void watchdog_interrupt_count(void) { return; } 240static inline void watchdog_interrupt_count(void) { return; }
@@ -244,7 +243,7 @@ static inline void watchdog_interrupt_count(void) { return; }
244/* watchdog kicker functions */ 243/* watchdog kicker functions */
245static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) 244static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
246{ 245{
247 unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts); 246 unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
248 struct pt_regs *regs = get_irq_regs(); 247 struct pt_regs *regs = get_irq_regs();
249 int duration; 248 int duration;
250 249
@@ -252,18 +251,18 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
252 watchdog_interrupt_count(); 251 watchdog_interrupt_count();
253 252
254 /* kick the softlockup detector */ 253 /* kick the softlockup detector */
255 wake_up_process(__get_cpu_var(softlockup_watchdog)); 254 wake_up_process(__this_cpu_read(softlockup_watchdog));
256 255
257 /* .. and repeat */ 256 /* .. and repeat */
258 hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period())); 257 hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
259 258
260 if (touch_ts == 0) { 259 if (touch_ts == 0) {
261 if (unlikely(__get_cpu_var(softlockup_touch_sync))) { 260 if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
262 /* 261 /*
263 * If the time stamp was touched atomically 262 * If the time stamp was touched atomically
264 * make sure the scheduler tick is up to date. 263 * make sure the scheduler tick is up to date.
265 */ 264 */
266 __get_cpu_var(softlockup_touch_sync) = false; 265 __this_cpu_write(softlockup_touch_sync, false);
267 sched_clock_tick(); 266 sched_clock_tick();
268 } 267 }
269 __touch_watchdog(); 268 __touch_watchdog();
@@ -279,7 +278,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
279 duration = is_softlockup(touch_ts); 278 duration = is_softlockup(touch_ts);
280 if (unlikely(duration)) { 279 if (unlikely(duration)) {
281 /* only warn once */ 280 /* only warn once */
282 if (__get_cpu_var(soft_watchdog_warn) == true) 281 if (__this_cpu_read(soft_watchdog_warn) == true)
283 return HRTIMER_RESTART; 282 return HRTIMER_RESTART;
284 283
285 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", 284 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
@@ -294,9 +293,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
294 293
295 if (softlockup_panic) 294 if (softlockup_panic)
296 panic("softlockup: hung tasks"); 295 panic("softlockup: hung tasks");
297 __get_cpu_var(soft_watchdog_warn) = true; 296 __this_cpu_write(soft_watchdog_warn, true);
298 } else 297 } else
299 __get_cpu_var(soft_watchdog_warn) = false; 298 __this_cpu_write(soft_watchdog_warn, false);
300 299
301 return HRTIMER_RESTART; 300 return HRTIMER_RESTART;
302} 301}
@@ -307,7 +306,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
307 */ 306 */
308static int watchdog(void *unused) 307static int watchdog(void *unused)
309{ 308{
310 struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; 309 static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
311 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); 310 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
312 311
313 sched_setscheduler(current, SCHED_FIFO, &param); 312 sched_setscheduler(current, SCHED_FIFO, &param);
@@ -364,8 +363,14 @@ static int watchdog_nmi_enable(int cpu)
364 goto out_save; 363 goto out_save;
365 } 364 }
366 365
367 printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n", 366
368 cpu, PTR_ERR(event)); 367 /* vary the KERN level based on the returned errno */
368 if (PTR_ERR(event) == -EOPNOTSUPP)
369 printk(KERN_INFO "NMI watchdog disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
370 else if (PTR_ERR(event) == -ENOENT)
371 printk(KERN_WARNING "NMI watchdog disabled (cpu%i): hardware events not enabled\n", cpu);
372 else
373 printk(KERN_ERR "NMI watchdog disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event));
369 return PTR_ERR(event); 374 return PTR_ERR(event);
370 375
371 /* success path */ 376 /* success path */
@@ -430,9 +435,6 @@ static int watchdog_enable(int cpu)
430 wake_up_process(p); 435 wake_up_process(p);
431 } 436 }
432 437
433 /* if any cpu succeeds, watchdog is considered enabled for the system */
434 watchdog_enabled = 1;
435
436 return 0; 438 return 0;
437} 439}
438 440
@@ -460,12 +462,16 @@ static void watchdog_disable(int cpu)
460static void watchdog_enable_all_cpus(void) 462static void watchdog_enable_all_cpus(void)
461{ 463{
462 int cpu; 464 int cpu;
463 int result = 0; 465
466 watchdog_enabled = 0;
464 467
465 for_each_online_cpu(cpu) 468 for_each_online_cpu(cpu)
466 result += watchdog_enable(cpu); 469 if (!watchdog_enable(cpu))
470 /* if any cpu succeeds, watchdog is considered
471 enabled for the system */
472 watchdog_enabled = 1;
467 473
468 if (result) 474 if (!watchdog_enabled)
469 printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); 475 printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");
470 476
471} 477}
@@ -474,9 +480,6 @@ static void watchdog_disable_all_cpus(void)
474{ 480{
475 int cpu; 481 int cpu;
476 482
477 if (no_watchdog)
478 return;
479
480 for_each_online_cpu(cpu) 483 for_each_online_cpu(cpu)
481 watchdog_disable(cpu); 484 watchdog_disable(cpu);
482 485
@@ -496,10 +499,12 @@ int proc_dowatchdog_enabled(struct ctl_table *table, int write,
496{ 499{
497 proc_dointvec(table, write, buffer, length, ppos); 500 proc_dointvec(table, write, buffer, length, ppos);
498 501
499 if (watchdog_enabled) 502 if (write) {
500 watchdog_enable_all_cpus(); 503 if (watchdog_enabled)
501 else 504 watchdog_enable_all_cpus();
502 watchdog_disable_all_cpus(); 505 else
506 watchdog_disable_all_cpus();
507 }
503 return 0; 508 return 0;
504} 509}
505 510
@@ -528,7 +533,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
528 break; 533 break;
529 case CPU_ONLINE: 534 case CPU_ONLINE:
530 case CPU_ONLINE_FROZEN: 535 case CPU_ONLINE_FROZEN:
531 err = watchdog_enable(hotcpu); 536 if (watchdog_enabled)
537 err = watchdog_enable(hotcpu);
532 break; 538 break;
533#ifdef CONFIG_HOTPLUG_CPU 539#ifdef CONFIG_HOTPLUG_CPU
534 case CPU_UP_CANCELED: 540 case CPU_UP_CANCELED:
@@ -548,20 +554,16 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
548 .notifier_call = cpu_callback 554 .notifier_call = cpu_callback
549}; 555};
550 556
551static int __init spawn_watchdog_task(void) 557void __init lockup_detector_init(void)
552{ 558{
553 void *cpu = (void *)(long)smp_processor_id(); 559 void *cpu = (void *)(long)smp_processor_id();
554 int err; 560 int err;
555 561
556 if (no_watchdog)
557 return 0;
558
559 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 562 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
560 WARN_ON(notifier_to_errno(err)); 563 WARN_ON(notifier_to_errno(err));
561 564
562 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 565 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
563 register_cpu_notifier(&cpu_nfb); 566 register_cpu_notifier(&cpu_nfb);
564 567
565 return 0; 568 return;
566} 569}
567early_initcall(spawn_watchdog_task);