diff options
Diffstat (limited to 'kernel/watchdog.c')
-rw-r--r-- | kernel/watchdog.c | 96 |
1 files changed, 49 insertions, 47 deletions
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 5b082156cd21..18bb15776c57 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -27,7 +27,7 @@ | |||
27 | #include <asm/irq_regs.h> | 27 | #include <asm/irq_regs.h> |
28 | #include <linux/perf_event.h> | 28 | #include <linux/perf_event.h> |
29 | 29 | ||
30 | int watchdog_enabled; | 30 | int watchdog_enabled = 1; |
31 | int __read_mostly softlockup_thresh = 60; | 31 | int __read_mostly softlockup_thresh = 60; |
32 | 32 | ||
33 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); | 33 | static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts); |
@@ -43,9 +43,6 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); | |||
43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | 43 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); |
44 | #endif | 44 | #endif |
45 | 45 | ||
46 | static int no_watchdog; | ||
47 | |||
48 | |||
49 | /* boot commands */ | 46 | /* boot commands */ |
50 | /* | 47 | /* |
51 | * Should we panic when a soft-lockup or hard-lockup occurs: | 48 | * Should we panic when a soft-lockup or hard-lockup occurs: |
@@ -57,6 +54,8 @@ static int __init hardlockup_panic_setup(char *str) | |||
57 | { | 54 | { |
58 | if (!strncmp(str, "panic", 5)) | 55 | if (!strncmp(str, "panic", 5)) |
59 | hardlockup_panic = 1; | 56 | hardlockup_panic = 1; |
57 | else if (!strncmp(str, "0", 1)) | ||
58 | watchdog_enabled = 0; | ||
60 | return 1; | 59 | return 1; |
61 | } | 60 | } |
62 | __setup("nmi_watchdog=", hardlockup_panic_setup); | 61 | __setup("nmi_watchdog=", hardlockup_panic_setup); |
@@ -75,7 +74,7 @@ __setup("softlockup_panic=", softlockup_panic_setup); | |||
75 | 74 | ||
76 | static int __init nowatchdog_setup(char *str) | 75 | static int __init nowatchdog_setup(char *str) |
77 | { | 76 | { |
78 | no_watchdog = 1; | 77 | watchdog_enabled = 0; |
79 | return 1; | 78 | return 1; |
80 | } | 79 | } |
81 | __setup("nowatchdog", nowatchdog_setup); | 80 | __setup("nowatchdog", nowatchdog_setup); |
@@ -83,7 +82,7 @@ __setup("nowatchdog", nowatchdog_setup); | |||
83 | /* deprecated */ | 82 | /* deprecated */ |
84 | static int __init nosoftlockup_setup(char *str) | 83 | static int __init nosoftlockup_setup(char *str) |
85 | { | 84 | { |
86 | no_watchdog = 1; | 85 | watchdog_enabled = 0; |
87 | return 1; | 86 | return 1; |
88 | } | 87 | } |
89 | __setup("nosoftlockup", nosoftlockup_setup); | 88 | __setup("nosoftlockup", nosoftlockup_setup); |
@@ -116,12 +115,12 @@ static void __touch_watchdog(void) | |||
116 | { | 115 | { |
117 | int this_cpu = smp_processor_id(); | 116 | int this_cpu = smp_processor_id(); |
118 | 117 | ||
119 | __get_cpu_var(watchdog_touch_ts) = get_timestamp(this_cpu); | 118 | __this_cpu_write(watchdog_touch_ts, get_timestamp(this_cpu)); |
120 | } | 119 | } |
121 | 120 | ||
122 | void touch_softlockup_watchdog(void) | 121 | void touch_softlockup_watchdog(void) |
123 | { | 122 | { |
124 | __raw_get_cpu_var(watchdog_touch_ts) = 0; | 123 | __this_cpu_write(watchdog_touch_ts, 0); |
125 | } | 124 | } |
126 | EXPORT_SYMBOL(touch_softlockup_watchdog); | 125 | EXPORT_SYMBOL(touch_softlockup_watchdog); |
127 | 126 | ||
@@ -165,12 +164,12 @@ void touch_softlockup_watchdog_sync(void) | |||
165 | /* watchdog detector functions */ | 164 | /* watchdog detector functions */ |
166 | static int is_hardlockup(void) | 165 | static int is_hardlockup(void) |
167 | { | 166 | { |
168 | unsigned long hrint = __get_cpu_var(hrtimer_interrupts); | 167 | unsigned long hrint = __this_cpu_read(hrtimer_interrupts); |
169 | 168 | ||
170 | if (__get_cpu_var(hrtimer_interrupts_saved) == hrint) | 169 | if (__this_cpu_read(hrtimer_interrupts_saved) == hrint) |
171 | return 1; | 170 | return 1; |
172 | 171 | ||
173 | __get_cpu_var(hrtimer_interrupts_saved) = hrint; | 172 | __this_cpu_write(hrtimer_interrupts_saved, hrint); |
174 | return 0; | 173 | return 0; |
175 | } | 174 | } |
176 | #endif | 175 | #endif |
@@ -203,8 +202,8 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi, | |||
203 | /* Ensure the watchdog never gets throttled */ | 202 | /* Ensure the watchdog never gets throttled */ |
204 | event->hw.interrupts = 0; | 203 | event->hw.interrupts = 0; |
205 | 204 | ||
206 | if (__get_cpu_var(watchdog_nmi_touch) == true) { | 205 | if (__this_cpu_read(watchdog_nmi_touch) == true) { |
207 | __get_cpu_var(watchdog_nmi_touch) = false; | 206 | __this_cpu_write(watchdog_nmi_touch, false); |
208 | return; | 207 | return; |
209 | } | 208 | } |
210 | 209 | ||
@@ -218,7 +217,7 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi, | |||
218 | int this_cpu = smp_processor_id(); | 217 | int this_cpu = smp_processor_id(); |
219 | 218 | ||
220 | /* only print hardlockups once */ | 219 | /* only print hardlockups once */ |
221 | if (__get_cpu_var(hard_watchdog_warn) == true) | 220 | if (__this_cpu_read(hard_watchdog_warn) == true) |
222 | return; | 221 | return; |
223 | 222 | ||
224 | if (hardlockup_panic) | 223 | if (hardlockup_panic) |
@@ -226,16 +225,16 @@ static void watchdog_overflow_callback(struct perf_event *event, int nmi, | |||
226 | else | 225 | else |
227 | WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); | 226 | WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu); |
228 | 227 | ||
229 | __get_cpu_var(hard_watchdog_warn) = true; | 228 | __this_cpu_write(hard_watchdog_warn, true); |
230 | return; | 229 | return; |
231 | } | 230 | } |
232 | 231 | ||
233 | __get_cpu_var(hard_watchdog_warn) = false; | 232 | __this_cpu_write(hard_watchdog_warn, false); |
234 | return; | 233 | return; |
235 | } | 234 | } |
236 | static void watchdog_interrupt_count(void) | 235 | static void watchdog_interrupt_count(void) |
237 | { | 236 | { |
238 | __get_cpu_var(hrtimer_interrupts)++; | 237 | __this_cpu_inc(hrtimer_interrupts); |
239 | } | 238 | } |
240 | #else | 239 | #else |
241 | static inline void watchdog_interrupt_count(void) { return; } | 240 | static inline void watchdog_interrupt_count(void) { return; } |
@@ -244,7 +243,7 @@ static inline void watchdog_interrupt_count(void) { return; } | |||
244 | /* watchdog kicker functions */ | 243 | /* watchdog kicker functions */ |
245 | static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | 244 | static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) |
246 | { | 245 | { |
247 | unsigned long touch_ts = __get_cpu_var(watchdog_touch_ts); | 246 | unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); |
248 | struct pt_regs *regs = get_irq_regs(); | 247 | struct pt_regs *regs = get_irq_regs(); |
249 | int duration; | 248 | int duration; |
250 | 249 | ||
@@ -252,18 +251,18 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
252 | watchdog_interrupt_count(); | 251 | watchdog_interrupt_count(); |
253 | 252 | ||
254 | /* kick the softlockup detector */ | 253 | /* kick the softlockup detector */ |
255 | wake_up_process(__get_cpu_var(softlockup_watchdog)); | 254 | wake_up_process(__this_cpu_read(softlockup_watchdog)); |
256 | 255 | ||
257 | /* .. and repeat */ | 256 | /* .. and repeat */ |
258 | hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period())); | 257 | hrtimer_forward_now(hrtimer, ns_to_ktime(get_sample_period())); |
259 | 258 | ||
260 | if (touch_ts == 0) { | 259 | if (touch_ts == 0) { |
261 | if (unlikely(__get_cpu_var(softlockup_touch_sync))) { | 260 | if (unlikely(__this_cpu_read(softlockup_touch_sync))) { |
262 | /* | 261 | /* |
263 | * If the time stamp was touched atomically | 262 | * If the time stamp was touched atomically |
264 | * make sure the scheduler tick is up to date. | 263 | * make sure the scheduler tick is up to date. |
265 | */ | 264 | */ |
266 | __get_cpu_var(softlockup_touch_sync) = false; | 265 | __this_cpu_write(softlockup_touch_sync, false); |
267 | sched_clock_tick(); | 266 | sched_clock_tick(); |
268 | } | 267 | } |
269 | __touch_watchdog(); | 268 | __touch_watchdog(); |
@@ -279,7 +278,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
279 | duration = is_softlockup(touch_ts); | 278 | duration = is_softlockup(touch_ts); |
280 | if (unlikely(duration)) { | 279 | if (unlikely(duration)) { |
281 | /* only warn once */ | 280 | /* only warn once */ |
282 | if (__get_cpu_var(soft_watchdog_warn) == true) | 281 | if (__this_cpu_read(soft_watchdog_warn) == true) |
283 | return HRTIMER_RESTART; | 282 | return HRTIMER_RESTART; |
284 | 283 | ||
285 | printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", | 284 | printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", |
@@ -294,9 +293,9 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
294 | 293 | ||
295 | if (softlockup_panic) | 294 | if (softlockup_panic) |
296 | panic("softlockup: hung tasks"); | 295 | panic("softlockup: hung tasks"); |
297 | __get_cpu_var(soft_watchdog_warn) = true; | 296 | __this_cpu_write(soft_watchdog_warn, true); |
298 | } else | 297 | } else |
299 | __get_cpu_var(soft_watchdog_warn) = false; | 298 | __this_cpu_write(soft_watchdog_warn, false); |
300 | 299 | ||
301 | return HRTIMER_RESTART; | 300 | return HRTIMER_RESTART; |
302 | } | 301 | } |
@@ -307,7 +306,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
307 | */ | 306 | */ |
308 | static int watchdog(void *unused) | 307 | static int watchdog(void *unused) |
309 | { | 308 | { |
310 | struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; | 309 | static struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 }; |
311 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); | 310 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); |
312 | 311 | ||
313 | sched_setscheduler(current, SCHED_FIFO, ¶m); | 312 | sched_setscheduler(current, SCHED_FIFO, ¶m); |
@@ -364,8 +363,14 @@ static int watchdog_nmi_enable(int cpu) | |||
364 | goto out_save; | 363 | goto out_save; |
365 | } | 364 | } |
366 | 365 | ||
367 | printk(KERN_ERR "NMI watchdog disabled for cpu%i: unable to create perf event: %ld\n", | 366 | |
368 | cpu, PTR_ERR(event)); | 367 | /* vary the KERN level based on the returned errno */ |
368 | if (PTR_ERR(event) == -EOPNOTSUPP) | ||
369 | printk(KERN_INFO "NMI watchdog disabled (cpu%i): not supported (no LAPIC?)\n", cpu); | ||
370 | else if (PTR_ERR(event) == -ENOENT) | ||
371 | printk(KERN_WARNING "NMI watchdog disabled (cpu%i): hardware events not enabled\n", cpu); | ||
372 | else | ||
373 | printk(KERN_ERR "NMI watchdog disabled (cpu%i): unable to create perf event: %ld\n", cpu, PTR_ERR(event)); | ||
369 | return PTR_ERR(event); | 374 | return PTR_ERR(event); |
370 | 375 | ||
371 | /* success path */ | 376 | /* success path */ |
@@ -430,9 +435,6 @@ static int watchdog_enable(int cpu) | |||
430 | wake_up_process(p); | 435 | wake_up_process(p); |
431 | } | 436 | } |
432 | 437 | ||
433 | /* if any cpu succeeds, watchdog is considered enabled for the system */ | ||
434 | watchdog_enabled = 1; | ||
435 | |||
436 | return 0; | 438 | return 0; |
437 | } | 439 | } |
438 | 440 | ||
@@ -460,12 +462,16 @@ static void watchdog_disable(int cpu) | |||
460 | static void watchdog_enable_all_cpus(void) | 462 | static void watchdog_enable_all_cpus(void) |
461 | { | 463 | { |
462 | int cpu; | 464 | int cpu; |
463 | int result = 0; | 465 | |
466 | watchdog_enabled = 0; | ||
464 | 467 | ||
465 | for_each_online_cpu(cpu) | 468 | for_each_online_cpu(cpu) |
466 | result += watchdog_enable(cpu); | 469 | if (!watchdog_enable(cpu)) |
470 | /* if any cpu succeeds, watchdog is considered | ||
471 | enabled for the system */ | ||
472 | watchdog_enabled = 1; | ||
467 | 473 | ||
468 | if (result) | 474 | if (!watchdog_enabled) |
469 | printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); | 475 | printk(KERN_ERR "watchdog: failed to be enabled on some cpus\n"); |
470 | 476 | ||
471 | } | 477 | } |
@@ -474,9 +480,6 @@ static void watchdog_disable_all_cpus(void) | |||
474 | { | 480 | { |
475 | int cpu; | 481 | int cpu; |
476 | 482 | ||
477 | if (no_watchdog) | ||
478 | return; | ||
479 | |||
480 | for_each_online_cpu(cpu) | 483 | for_each_online_cpu(cpu) |
481 | watchdog_disable(cpu); | 484 | watchdog_disable(cpu); |
482 | 485 | ||
@@ -496,10 +499,12 @@ int proc_dowatchdog_enabled(struct ctl_table *table, int write, | |||
496 | { | 499 | { |
497 | proc_dointvec(table, write, buffer, length, ppos); | 500 | proc_dointvec(table, write, buffer, length, ppos); |
498 | 501 | ||
499 | if (watchdog_enabled) | 502 | if (write) { |
500 | watchdog_enable_all_cpus(); | 503 | if (watchdog_enabled) |
501 | else | 504 | watchdog_enable_all_cpus(); |
502 | watchdog_disable_all_cpus(); | 505 | else |
506 | watchdog_disable_all_cpus(); | ||
507 | } | ||
503 | return 0; | 508 | return 0; |
504 | } | 509 | } |
505 | 510 | ||
@@ -528,7 +533,8 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
528 | break; | 533 | break; |
529 | case CPU_ONLINE: | 534 | case CPU_ONLINE: |
530 | case CPU_ONLINE_FROZEN: | 535 | case CPU_ONLINE_FROZEN: |
531 | err = watchdog_enable(hotcpu); | 536 | if (watchdog_enabled) |
537 | err = watchdog_enable(hotcpu); | ||
532 | break; | 538 | break; |
533 | #ifdef CONFIG_HOTPLUG_CPU | 539 | #ifdef CONFIG_HOTPLUG_CPU |
534 | case CPU_UP_CANCELED: | 540 | case CPU_UP_CANCELED: |
@@ -548,20 +554,16 @@ static struct notifier_block __cpuinitdata cpu_nfb = { | |||
548 | .notifier_call = cpu_callback | 554 | .notifier_call = cpu_callback |
549 | }; | 555 | }; |
550 | 556 | ||
551 | static int __init spawn_watchdog_task(void) | 557 | void __init lockup_detector_init(void) |
552 | { | 558 | { |
553 | void *cpu = (void *)(long)smp_processor_id(); | 559 | void *cpu = (void *)(long)smp_processor_id(); |
554 | int err; | 560 | int err; |
555 | 561 | ||
556 | if (no_watchdog) | ||
557 | return 0; | ||
558 | |||
559 | err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); | 562 | err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); |
560 | WARN_ON(notifier_to_errno(err)); | 563 | WARN_ON(notifier_to_errno(err)); |
561 | 564 | ||
562 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); | 565 | cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); |
563 | register_cpu_notifier(&cpu_nfb); | 566 | register_cpu_notifier(&cpu_nfb); |
564 | 567 | ||
565 | return 0; | 568 | return; |
566 | } | 569 | } |
567 | early_initcall(spawn_watchdog_task); | ||