aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGraf Yang <graf.yang@analog.com>2010-01-20 05:56:24 -0500
committerMike Frysinger <vapier@gentoo.org>2010-03-09 00:30:49 -0500
commit60ffdb36547da2397d6cfefe9c752ebad16524f6 (patch)
treece188bc0ccd1d8d0e6ff0f49937dca95e1f571c0
parent726e96561e4704278bc5197238f6459e1a63aa77 (diff)
Blackfin: implement nmi_watchdog for SMP on BF561
Signed-off-by: Graf Yang <graf.yang@analog.com> Signed-off-by: Mike Frysinger <vapier@gentoo.org>
-rw-r--r--arch/blackfin/Kconfig.debug9
-rw-r--r--arch/blackfin/include/asm/irq.h4
-rw-r--r--arch/blackfin/include/asm/nmi.h12
-rw-r--r--arch/blackfin/include/asm/smp.h1
-rw-r--r--arch/blackfin/kernel/Makefile1
-rw-r--r--arch/blackfin/kernel/nmi.c313
-rw-r--r--arch/blackfin/kernel/time-ts.c4
-rw-r--r--arch/blackfin/mach-common/interrupt.S18
8 files changed, 361 insertions, 1 deletions
diff --git a/arch/blackfin/Kconfig.debug b/arch/blackfin/Kconfig.debug
index 87f195ee2e06..1460d7b5edc1 100644
--- a/arch/blackfin/Kconfig.debug
+++ b/arch/blackfin/Kconfig.debug
@@ -238,6 +238,15 @@ config EARLY_PRINTK
238 all of this lives in the init section and is thrown away after the 238 all of this lives in the init section and is thrown away after the
239 kernel boots completely. 239 kernel boots completely.
240 240
241config NMI_WATCHDOG
242 bool "Enable NMI watchdog to help debugging lockup on SMP"
243 default n
244 depends on (SMP && !BFIN_SCRATCH_REG_RETN)
245 help
246 If any CPU in the system does not execute the period local timer
247 interrupt for more than 5 seconds, then the NMI handler dumps debug
248 information. This information can be used to debug the lockup.
249
241config CPLB_INFO 250config CPLB_INFO
242 bool "Display the CPLB information" 251 bool "Display the CPLB information"
243 help 252 help
diff --git a/arch/blackfin/include/asm/irq.h b/arch/blackfin/include/asm/irq.h
index 89de539ed010..12f4060a31b0 100644
--- a/arch/blackfin/include/asm/irq.h
+++ b/arch/blackfin/include/asm/irq.h
@@ -38,4 +38,8 @@
38 38
39#include <asm-generic/irq.h> 39#include <asm-generic/irq.h>
40 40
41#ifdef CONFIG_NMI_WATCHDOG
42# define ARCH_HAS_NMI_WATCHDOG
43#endif
44
41#endif /* _BFIN_IRQ_H_ */ 45#endif /* _BFIN_IRQ_H_ */
diff --git a/arch/blackfin/include/asm/nmi.h b/arch/blackfin/include/asm/nmi.h
new file mode 100644
index 000000000000..b9caac4fcfd8
--- /dev/null
+++ b/arch/blackfin/include/asm/nmi.h
@@ -0,0 +1,12 @@
1/*
2 * Copyright 2010 Analog Devices Inc.
3 *
4 * Licensed under the GPL-2
5 */
6
7#ifndef _BFIN_NMI_H_
8#define _BFIN_NMI_H_
9
10#include <linux/nmi.h>
11
12#endif
diff --git a/arch/blackfin/include/asm/smp.h b/arch/blackfin/include/asm/smp.h
index 29fb88219470..7f26de09ca9c 100644
--- a/arch/blackfin/include/asm/smp.h
+++ b/arch/blackfin/include/asm/smp.h
@@ -22,6 +22,7 @@ extern char coreb_trampoline_start, coreb_trampoline_end;
22struct corelock_slot { 22struct corelock_slot {
23 int lock; 23 int lock;
24}; 24};
25extern struct corelock_slot corelock;
25 26
26void smp_icache_flush_range_others(unsigned long start, 27void smp_icache_flush_range_others(unsigned long start,
27 unsigned long end); 28 unsigned long end);
diff --git a/arch/blackfin/kernel/Makefile b/arch/blackfin/kernel/Makefile
index a8ddbc8ed5af..346a421f1562 100644
--- a/arch/blackfin/kernel/Makefile
+++ b/arch/blackfin/kernel/Makefile
@@ -25,6 +25,7 @@ obj-$(CONFIG_CPLB_INFO) += cplbinfo.o
25obj-$(CONFIG_MODULES) += module.o 25obj-$(CONFIG_MODULES) += module.o
26obj-$(CONFIG_KGDB) += kgdb.o 26obj-$(CONFIG_KGDB) += kgdb.o
27obj-$(CONFIG_KGDB_TESTS) += kgdb_test.o 27obj-$(CONFIG_KGDB_TESTS) += kgdb_test.o
28obj-$(CONFIG_NMI_WATCHDOG) += nmi.o
28obj-$(CONFIG_EARLY_PRINTK) += early_printk.o 29obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
29obj-$(CONFIG_EARLY_PRINTK) += shadow_console.o 30obj-$(CONFIG_EARLY_PRINTK) += shadow_console.o
30obj-$(CONFIG_STACKTRACE) += stacktrace.o 31obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c
new file mode 100644
index 000000000000..19093c17632b
--- /dev/null
+++ b/arch/blackfin/kernel/nmi.c
@@ -0,0 +1,313 @@
1/*
2 * Blackfin nmi_watchdog Driver
3 *
4 * Originally based on bfin_wdt.c
5 * Copyright 2010-2010 Analog Devices Inc.
6 * Graff Yang <graf.yang@analog.com>
7 *
8 * Enter bugs at http://blackfin.uclinux.org/
9 *
10 * Licensed under the GPL-2 or later.
11 */
12
13#include <linux/bitops.h>
14#include <linux/hardirq.h>
15#include <linux/sysdev.h>
16#include <linux/pm.h>
17#include <linux/nmi.h>
18#include <linux/smp.h>
19#include <linux/timer.h>
20#include <asm/blackfin.h>
21#include <asm/atomic.h>
22#include <asm/cacheflush.h>
23
24/* Bit in WDOG_CTL that indicates watchdog has expired (WDR0) */
25#define WDOG_EXPIRED 0x8000
26
27/* Masks for WDEV field in WDOG_CTL register */
28#define ICTL_RESET 0x0
29#define ICTL_NMI 0x2
30#define ICTL_GPI 0x4
31#define ICTL_NONE 0x6
32#define ICTL_MASK 0x6
33
34/* Masks for WDEN field in WDOG_CTL register */
35#define WDEN_MASK 0x0FF0
36#define WDEN_ENABLE 0x0000
37#define WDEN_DISABLE 0x0AD0
38
39#define DRV_NAME "nmi-wdt"
40
41#define NMI_WDT_TIMEOUT 5 /* 5 seconds */
42#define NMI_CHECK_TIMEOUT (4 * HZ) /* 4 seconds in jiffies */
43static int nmi_wdt_cpu = 1;
44
45static unsigned int timeout = NMI_WDT_TIMEOUT;
46static int nmi_active;
47
48static unsigned short wdoga_ctl;
49static unsigned int wdoga_cnt;
50static struct corelock_slot saved_corelock;
51static atomic_t nmi_touched[NR_CPUS];
52static struct timer_list ntimer;
53
54enum {
55 COREA_ENTER_NMI = 0,
56 COREA_EXIT_NMI,
57 COREB_EXIT_NMI,
58
59 NMI_EVENT_NR,
60};
61static unsigned long nmi_event __attribute__ ((__section__(".l2.bss")));
62
63/* we are in nmi, non-atomic bit ops is safe */
64static inline void set_nmi_event(int event)
65{
66 __set_bit(event, &nmi_event);
67}
68
69static inline void wait_nmi_event(int event)
70{
71 while (!test_bit(event, &nmi_event))
72 barrier();
73 __clear_bit(event, &nmi_event);
74}
75
76static inline void send_corea_nmi(void)
77{
78 wdoga_ctl = bfin_read_WDOGA_CTL();
79 wdoga_cnt = bfin_read_WDOGA_CNT();
80
81 bfin_write_WDOGA_CTL(WDEN_DISABLE);
82 bfin_write_WDOGA_CNT(0);
83 bfin_write_WDOGA_CTL(WDEN_ENABLE | ICTL_NMI);
84}
85
86static inline void restore_corea_nmi(void)
87{
88 bfin_write_WDOGA_CTL(WDEN_DISABLE);
89 bfin_write_WDOGA_CTL(WDOG_EXPIRED | WDEN_DISABLE | ICTL_NONE);
90
91 bfin_write_WDOGA_CNT(wdoga_cnt);
92 bfin_write_WDOGA_CTL(wdoga_ctl);
93}
94
95static inline void save_corelock(void)
96{
97 saved_corelock = corelock;
98 corelock.lock = 0;
99}
100
101static inline void restore_corelock(void)
102{
103 corelock = saved_corelock;
104}
105
106
107static inline void nmi_wdt_keepalive(void)
108{
109 bfin_write_WDOGB_STAT(0);
110}
111
112static inline void nmi_wdt_stop(void)
113{
114 bfin_write_WDOGB_CTL(WDEN_DISABLE);
115}
116
117/* before calling this function, you must stop the WDT */
118static inline void nmi_wdt_clear(void)
119{
120 /* clear TRO bit, disable event generation */
121 bfin_write_WDOGB_CTL(WDOG_EXPIRED | WDEN_DISABLE | ICTL_NONE);
122}
123
124static inline void nmi_wdt_start(void)
125{
126 bfin_write_WDOGB_CTL(WDEN_ENABLE | ICTL_NMI);
127}
128
129static inline int nmi_wdt_running(void)
130{
131 return ((bfin_read_WDOGB_CTL() & WDEN_MASK) != WDEN_DISABLE);
132}
133
134static inline int nmi_wdt_set_timeout(unsigned long t)
135{
136 u32 cnt, max_t, sclk;
137 int run;
138
139 sclk = get_sclk();
140 max_t = -1 / sclk;
141 cnt = t * sclk;
142 if (t > max_t) {
143 pr_warning("NMI: timeout value is too large\n");
144 return -EINVAL;
145 }
146
147 run = nmi_wdt_running();
148 nmi_wdt_stop();
149 bfin_write_WDOGB_CNT(cnt);
150 if (run)
151 nmi_wdt_start();
152
153 timeout = t;
154
155 return 0;
156}
157
158int check_nmi_wdt_touched(void)
159{
160 unsigned int this_cpu = smp_processor_id();
161 unsigned int cpu;
162
163 cpumask_t mask = cpu_online_map;
164
165 if (!atomic_read(&nmi_touched[this_cpu]))
166 return 0;
167
168 atomic_set(&nmi_touched[this_cpu], 0);
169
170 cpu_clear(this_cpu, mask);
171 for_each_cpu_mask(cpu, mask) {
172 invalidate_dcache_range((unsigned long)(&nmi_touched[cpu]),
173 (unsigned long)(&nmi_touched[cpu]));
174 if (!atomic_read(&nmi_touched[cpu]))
175 return 0;
176 atomic_set(&nmi_touched[cpu], 0);
177 }
178
179 return 1;
180}
181
182static void nmi_wdt_timer(unsigned long data)
183{
184 if (check_nmi_wdt_touched())
185 nmi_wdt_keepalive();
186
187 mod_timer(&ntimer, jiffies + NMI_CHECK_TIMEOUT);
188}
189
190static int __init init_nmi_wdt(void)
191{
192 nmi_wdt_set_timeout(timeout);
193 nmi_wdt_start();
194 nmi_active = true;
195
196 init_timer(&ntimer);
197 ntimer.function = nmi_wdt_timer;
198 ntimer.expires = jiffies + NMI_CHECK_TIMEOUT;
199 add_timer(&ntimer);
200
201 pr_info("nmi_wdt: initialized: timeout=%d sec\n", timeout);
202 return 0;
203}
204device_initcall(init_nmi_wdt);
205
206void touch_nmi_watchdog(void)
207{
208 atomic_set(&nmi_touched[smp_processor_id()], 1);
209}
210
211/* Suspend/resume support */
212#ifdef CONFIG_PM
213static int nmi_wdt_suspend(struct sys_device *dev, pm_message_t state)
214{
215 nmi_wdt_stop();
216 return 0;
217}
218
219static int nmi_wdt_resume(struct sys_device *dev)
220{
221 if (nmi_active)
222 nmi_wdt_start();
223 return 0;
224}
225
226static struct sysdev_class nmi_sysclass = {
227 .name = DRV_NAME,
228 .resume = nmi_wdt_resume,
229 .suspend = nmi_wdt_suspend,
230};
231
232static struct sys_device device_nmi_wdt = {
233 .id = 0,
234 .cls = &nmi_sysclass,
235};
236
237static int __init init_nmi_wdt_sysfs(void)
238{
239 int error;
240
241 if (!nmi_active)
242 return 0;
243
244 error = sysdev_class_register(&nmi_sysclass);
245 if (!error)
246 error = sysdev_register(&device_nmi_wdt);
247 return error;
248}
249late_initcall(init_nmi_wdt_sysfs);
250
251#endif /* CONFIG_PM */
252
253
254asmlinkage notrace void do_nmi(struct pt_regs *fp)
255{
256 unsigned int cpu = smp_processor_id();
257 nmi_enter();
258
259 cpu_pda[cpu].__nmi_count += 1;
260
261 if (cpu == nmi_wdt_cpu) {
262 /* CoreB goes here first */
263
264 /* reload the WDOG_STAT */
265 nmi_wdt_keepalive();
266
267 /* clear nmi interrupt for CoreB */
268 nmi_wdt_stop();
269 nmi_wdt_clear();
270
271 /* trigger NMI interrupt of CoreA */
272 send_corea_nmi();
273
274 /* waiting CoreB to enter NMI */
275 wait_nmi_event(COREA_ENTER_NMI);
276
277 /* recover WDOGA's settings */
278 restore_corea_nmi();
279
280 save_corelock();
281
282 /* corelock is save/cleared, CoreA is dummping messages */
283
284 wait_nmi_event(COREA_EXIT_NMI);
285 } else {
286 /* OK, CoreA entered NMI */
287 set_nmi_event(COREA_ENTER_NMI);
288 }
289
290 pr_emerg("\nNMI Watchdog detected LOCKUP, dump for CPU %d\n", cpu);
291 dump_bfin_process(fp);
292 dump_bfin_mem(fp);
293 show_regs(fp);
294 dump_bfin_trace_buffer();
295 show_stack(current, (unsigned long *)fp);
296
297 if (cpu == nmi_wdt_cpu) {
298 pr_emerg("This fault is not recoverable, sorry!\n");
299
300 /* CoreA dump finished, restore the corelock */
301 restore_corelock();
302
303 set_nmi_event(COREB_EXIT_NMI);
304 } else {
305 /* CoreB dump finished, notice the CoreA we are done */
306 set_nmi_event(COREA_EXIT_NMI);
307
308 /* synchronize with CoreA */
309 wait_nmi_event(COREB_EXIT_NMI);
310 }
311
312 nmi_exit();
313}
diff --git a/arch/blackfin/kernel/time-ts.c b/arch/blackfin/kernel/time-ts.c
index a351f97c87a3..41a907596c70 100644
--- a/arch/blackfin/kernel/time-ts.c
+++ b/arch/blackfin/kernel/time-ts.c
@@ -21,6 +21,7 @@
21#include <asm/blackfin.h> 21#include <asm/blackfin.h>
22#include <asm/time.h> 22#include <asm/time.h>
23#include <asm/gptimers.h> 23#include <asm/gptimers.h>
24#include <asm/nmi.h>
24 25
25/* Accelerators for sched_clock() 26/* Accelerators for sched_clock()
26 * convert from cycles(64bits) => nanoseconds (64bits) 27 * convert from cycles(64bits) => nanoseconds (64bits)
@@ -309,6 +310,9 @@ irqreturn_t bfin_coretmr_interrupt(int irq, void *dev_id)
309 310
310 smp_mb(); 311 smp_mb();
311 evt->event_handler(evt); 312 evt->event_handler(evt);
313
314 touch_nmi_watchdog();
315
312 return IRQ_HANDLED; 316 return IRQ_HANDLED;
313} 317}
314 318
diff --git a/arch/blackfin/mach-common/interrupt.S b/arch/blackfin/mach-common/interrupt.S
index 0a0c088ead8c..cee62cf4acd4 100644
--- a/arch/blackfin/mach-common/interrupt.S
+++ b/arch/blackfin/mach-common/interrupt.S
@@ -194,12 +194,28 @@ ENTRY(_evt_ivhw)
194ENDPROC(_evt_ivhw) 194ENDPROC(_evt_ivhw)
195 195
196/* Interrupt routine for evt2 (NMI). 196/* Interrupt routine for evt2 (NMI).
197 * We don't actually use this, so just return.
198 * For inner circle type details, please see: 197 * For inner circle type details, please see:
199 * http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:nmi 198 * http://docs.blackfin.uclinux.org/doku.php?id=linux-kernel:nmi
200 */ 199 */
201ENTRY(_evt_nmi) 200ENTRY(_evt_nmi)
201#ifndef CONFIG_NMI_WATCHDOG
202.weak _evt_nmi 202.weak _evt_nmi
203#else
204 /* Not take account of CPLBs, this handler will not return */
205 SAVE_ALL_SYS
206 r0 = sp;
207 r1 = retn;
208 [sp + PT_PC] = r1;
209 trace_buffer_save(p4,r5);
210
211 ANOMALY_283_315_WORKAROUND(p4, r5)
212
213 SP += -12;
214 call _do_nmi;
215 SP += 12;
2161:
217 jump 1b;
218#endif
203 rtn; 219 rtn;
204ENDPROC(_evt_nmi) 220ENDPROC(_evt_nmi)
205 221