aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/watchdog.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/watchdog.c')
-rw-r--r--kernel/watchdog.c89
1 files changed, 45 insertions, 44 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 6e7b575ac33c..18bb15776c57 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -27,7 +27,7 @@
27#include <asm/irq_regs.h> 27#include <asm/irq_regs.h>
28#include <linux/perf_event.h> 28#include <linux/perf_event.h>
29 29
30int watchdog_enabled; 30int watchdog_enabled = 1;
31int __read_mostly softlockup_thresh = 60; 31int __read_mostly softlockup_thresh = 60;
32 32
33static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); 33static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
@@ -43,9 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); 43static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
44#endif 44#endif
45 45
46static int no_watchdog;
47
48
49/* boot commands */ 46/* boot commands */
50/* 47/*
51 * Should we panic when a soft-lockup or hard-lockup occurs: 48 * Should we panic when a soft-lockup or hard-lockup occurs:
@@ -58,7 +55,7 @@ static int __init hardlockup_panic_setup(char *str)
58 if (!strncmp(str, "panic", 5)) 55 if (!strncmp(str, "panic", 5))
59 hardlockup_panic = 1; 56 hardlockup_panic = 1;
60 else if (!strncmp(str, "0", 1)) 57 else if (!strncmp(str, "0", 1))
61 no_watchdog = 1; 58 watchdog_enabled = 0;
62 return 1; 59 return 1;
63} 60}
64__setup("nmi_watchdog=", hardlockup_panic_setup); 61__setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -77,7 +74,7 @@ __setup("softlockup_panic=", softlockup_panic_setup);
77 74
78static int __init nowatchdog_setup(char *str) 75static int __init nowatchdog_setup(char *str)
79{ 76{
80 no_watchdog = 1; 77 watchdog_enabled = 0;
81 return 1; 78 return 1;
82} 79}
83__setup("nowatchdog", nowatchdog_setup); 80__setup("nowatchdog", nowatchdog_setup);
@@ -85,7 +82,7 @@ __setup("nowatchdog", nowatchdog_setup);
85/* deprecated */ 82/* deprecated */
86static int __init nosoftlockup_setup(char *str) 83static int __init nosoftlockup_setup(char *str)
87{ 84{
88 no_watchdog = 1; 85 watchdog_enabled = 0;
89 return 1; 86 return 1;
90} 87}
91__setup("nosoftlockup", nosoftlockup_setup); 88__setup("nosoftlockup", nosoftlockup_setup);
@@ -118,12 +115,12 @@ static void __touch_watchdog(void)
118{ 115{
119 int this_cpu = smp_processor_id(); 116 int this_cpu = smp_processor_id();
120 117
121 __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu); 118 __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu));
122} 119}
123 120
124void touch_softlockup_watchdog(void) 121void touch_softlockup_watchdog(void)
125{ 122{
126 __raw_get_cpu_var(watchdog_touch_ts) = 0; 123 __this_cpu_write(watchdog_touch_ts, 0);
127} 124}
128EXPORT_SYMBOL(touch_softlockup_watchdog); 125EXPORT_SYMBOL(touch_softlockup_watchdog);
129 126
@@ -167,12 +164,12 @@ void touch_softlockup_watchdog_sync(void)
167/* watchdog detector functions */ 164/* watchdog detector functions */
168static int is_hardlockup(void) 165static int is_hardlockup(void)
169{ 166{
170 unsigned long hrint = __get_cpu_var(hrtimer_interrupts); 167 unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
171 168
172 if (__get_cpu_var(hrtimer_interrupts_saved) == hrint) 169 if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
173 return 1; 170 return 1;
174 171
175 __get_cpu_var(hrtimer_interrupts_saved) = hrint; 172 __this_cpu_write(hrtimer_interrupts_saved, hrint);
176 return 0; 173 return 0;
177} 174}
178#endif 175#endif
@@ -205,8 +202,8 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
205 /* Ensure the watchdog never gets throttled */ 202 /* Ensure the watchdog never gets throttled */
206 event->hw.interrupts = 0; 203 event->hw.interrupts = 0;
207 204
208 if (__get_cpu_var(watchdog_nmi_touch) == true) { 205 if (__this_cpu_read(watchdog_nmi_touch) == true) {
209 __get_cpu_var(watchdog_nmi_touch) = false; 206 __this_cpu_write(watchdog_nmi_touch, false);
210 return; 207 return;
211 } 208 }
212 209
@@ -220,7 +217,7 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
220 int this_cpu = smp_processor_id(); 217 int this_cpu = smp_processor_id();
221 218
222 /* only print hardlockups once */ 219 /* only print hardlockups once */
223 if (__get_cpu_var(hard_watchdog_warn) == true) 220 if (__this_cpu_read(hard_watchdog_warn) == true)
224 return; 221 return;
225 222
226 if (hardlockup_panic) 223 if (hardlockup_panic)
@@ -228,16 +225,16 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi,
228 else 225 else
229 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); 226 WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
230 227
231 __get_cpu_var(hard_watchdog_warn) = true; 228 __this_cpu_write(hard_watchdog_warn, true);
232 return; 229 return;
233 } 230 }
234 231
235 __get_cpu_var(hard_watchdog_warn) = false; 232 __this_cpu_write(hard_watchdog_warn, false);
236 return; 233 return;
237} 234}
238static void watchdog_interrupt_count(void) 235static void watchdog_interrupt_count(void)
239{ 236{
240 __get_cpu_var(hrtimer_interrupts)++; 237 __this_cpu_inc(hrtimer_interrupts);
241} 238}
242#else 239#else
243static inline void watchdog_interrupt_count(void) { return; } 240static inline void watchdog_interrupt_count(void) { return; }
@@ -246,7 +243,7 @@ static inline void watchdog_interrupt_count(void) { return; }
246/* watchdog kicker functions */ 243/* watchdog kicker functions */
247static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) 244static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
248{ 245{
249 unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts); 246 unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
250 struct pt_regs *regs = get_irq_regs(); 247 struct pt_regs *regs = get_irq_regs();
251 int duration; 248 int duration;
252 249
@@ -254,18 +251,18 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
254 watchdog_interrupt_count(); 251 watchdog_interrupt_count();
255 252
256 /* kick the softlockup detector */ 253 /* kick the softlockup detector */
257 wake_up_process(__get_cpu_var(softlockup_watchdog)); 254 wake_up_process(__this_cpu_read(softlockup_watchdog));
258 255
259 /* .. and repeat */ 256 /* .. and repeat */
260 hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period())); 257 hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period()));
261 258
262 if (touch_ts == 0) { 259 if (touch_ts == 0) {
263 if (unlikely(__get_cpu_var(softlockup_touch_sync))) { 260 if (unlikely(__this_cpu_read(softlockup_touch_sync))) {
264 /* 261 /*
265 * If the time stamp was touched atomically 262 * If the time stamp was touched atomically
266 * make sure the scheduler tick is up to date. 263 * make sure the scheduler tick is up to date.
267 */ 264 */
268 __get_cpu_var(softlockup_touch_sync) = false; 265 __this_cpu_write(softlockup_touch_sync, false);
269 sched_clock_tick(); 266 sched_clock_tick();
270 } 267 }
271 __touch_watchdog(); 268 __touch_watchdog();
@@ -281,7 +278,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
281 duration = is_softlockup(touch_ts); 278 duration = is_softlockup(touch_ts);
282 if (unlikely(duration)) { 279 if (unlikely(duration)) {
283 /* only warn once */ 280 /* only warn once */
284 if (__get_cpu_var(soft_watchdog_warn) == true) 281 if (__this_cpu_read(soft_watchdog_warn) == true)
285 return HRTIMER_RESTART; 282 return HRTIMER_RESTART;
286 283
287 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", 284 printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
@@ -296,9 +293,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
296 293
297 if (softlockup_panic) 294 if (softlockup_panic)
298 panic("softlockup: hung tasks"); 295 panic("softlockup: hung tasks");
299 __get_cpu_var(soft_watchdog_warn) = true; 296 __this_cpu_write(soft_watchdog_warn, true);
300 } else 297 } else
301 __get_cpu_var(soft_watchdog_warn) = false; 298 __this_cpu_write(soft_watchdog_warn, false);
302 299
303 return HRTIMER_RESTART; 300 return HRTIMER_RESTART;
304} 301}
@@ -366,8 +363,14 @@ static int watchdog_nmi_enable(int cpu)
366 goto out_save; 363 goto out_save;
367 } 364 }
368 365
369 printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n", 366
370 cpu, PTR_ERR(event)); 367 /* vary the KERN level based on the returned errno */
368 if (PTR_ERR(event) == -EOPNOTSUPP)
369 printk(KERN_INFO "NMI watchdog disabled (cpu%i): not supported (no LAPIC?)\n", cpu);
370 else if (PTR_ERR(event) == -ENOENT)
371 printk(KERN_WARNING "NMI watchdog disabled (cpu%i): hardware events not enabled\n", cpu);
372 else
373 printk(KERN_ERR "NMI watchdog disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event));
371 return PTR_ERR(event); 374 return PTR_ERR(event);
372 375
373 /* success path */ 376 /* success path */
@@ -432,9 +435,6 @@ static int watchdog_enable(int cpu)
432 wake_up_process(p); 435 wake_up_process(p);
433 } 436 }
434 437
435 /* if any cpu succeeds, watchdog is considered enabled for the system */
436 watchdog_enabled = 1;
437
438 return 0; 438 return 0;
439} 439}
440 440
@@ -462,12 +462,16 @@ static void watchdog_disable(int cpu)
462static void watchdog_enable_all_cpus(void) 462static void watchdog_enable_all_cpus(void)
463{ 463{
464 int cpu; 464 int cpu;
465 int result = 0; 465
466 watchdog_enabled = 0;
466 467
467 for_each_online_cpu(cpu) 468 for_each_online_cpu(cpu)
468 result += watchdog_enable(cpu); 469 if (!watchdog_enable(cpu))
470 /* if any cpu succeeds, watchdog is considered
471 enabled for the system */
472 watchdog_enabled = 1;
469 473
470 if (result) 474 if (!watchdog_enabled)
471 printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); 475 printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n");
472 476
473} 477}
@@ -476,9 +480,6 @@ static void watchdog_disable_all_cpus(void)
476{ 480{
477 int cpu; 481 int cpu;
478 482
479 if (no_watchdog)
480 return;
481
482 for_each_online_cpu(cpu) 483 for_each_online_cpu(cpu)
483 watchdog_disable(cpu); 484 watchdog_disable(cpu);
484 485
@@ -498,10 +499,12 @@ int proc_dowatchdog_enabled(struct ctl_table *table, int write,
498{ 499{
499 proc_dointvec(table, write, buffer, length, ppos); 500 proc_dointvec(table, write, buffer, length, ppos);
500 501
501 if (watchdog_enabled) 502 if (write) {
502 watchdog_enable_all_cpus(); 503 if (watchdog_enabled)
503 else 504 watchdog_enable_all_cpus();
504 watchdog_disable_all_cpus(); 505 else
506 watchdog_disable_all_cpus();
507 }
505 return 0; 508 return 0;
506} 509}
507 510
@@ -530,7 +533,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
530 break; 533 break;
531 case CPU_ONLINE: 534 case CPU_ONLINE:
532 case CPU_ONLINE_FROZEN: 535 case CPU_ONLINE_FROZEN:
533 err = watchdog_enable(hotcpu); 536 if (watchdog_enabled)
537 err = watchdog_enable(hotcpu);
534 break; 538 break;
535#ifdef CONFIG_HOTPLUG_CPU 539#ifdef CONFIG_HOTPLUG_CPU
536 case CPU_UP_CANCELED: 540 case CPU_UP_CANCELED:
@@ -555,9 +559,6 @@ void __init lockup_detector_init(void)
555 void *cpu = (void *)(long)smp_processor_id(); 559 void *cpu = (void *)(long)smp_processor_id();
556 int err; 560 int err;
557 561
558 if (no_watchdog)
559 return;
560
561 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); 562 err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
562 WARN_ON(notifier_to_errno(err)); 563 WARN_ON(notifier_to_errno(err));
563 564