aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorRobin Getz <robin.getz@analog.com>2009-05-18 14:33:26 -0400
committerMike Frysinger <vapier@gentoo.org>2009-06-12 06:11:44 -0400
commitb9a3899d59c3f0fc074573f0eba2419b1e4c0bca (patch)
tree09d20c3b062604d615c9d224c06c5df5a5acbcec /arch
parent97b070c8e7e82be30c8a3bf19e69b8c0c71f1fac (diff)
Blackfin: make deferred hardware errors more exact
Hardware errors on the Blackfin architecture are queued by nature of the hardware design. Things that could generate a hardware level queue up at the system interface and might not process until much later, at which point the system would send a notification back to the core. As such, it is possible for user space code to do something that would trigger a hardware error, but have it delay long enough for the process context to switch. So when the hardware error does signal, we mistakenly evaluate it as a different process or as kernel context and panic (erp!). This makes it pretty difficult to find the offending context. But wait, there is good news somewhere. By forcing a SSYNC in the interrupt entry, we force all pending queues at the system level to be processed and all hardware errors to be signaled. Then we check the current interrupt state to see if the hardware error is now signaled. If so, we re-queue the current interrupt and return thus allowing the higher priority hardware error interrupt to process properly. Since we haven't done any other context processing yet, the right context will be selected and killed. There is still the possibility that the exact offending instruction will be unknown, but at least we'll have a much better idea of where to look. The downside of course is that this causes system-wide syncs at every interrupt point which results in significant performance degradation. Since this situation should not occur in any properly configured system (as hardware errors are triggered by things like bad pointers), make it a debug configuration option and disable it by default. Signed-off-by: Robin Getz <robin.getz@analog.com> Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/blackfin/Kconfig.debug13
-rw-r--r--arch/blackfin/include/asm/entry.h92
-rw-r--r--arch/blackfin/mach-common/entry.S59
-rw-r--r--arch/blackfin/mach-common/interrupt.S27
4 files changed, 170 insertions, 21 deletions
diff --git a/arch/blackfin/Kconfig.debug b/arch/blackfin/Kconfig.debug
index 79e7e63ab709..1fc4981d486f 100644
--- a/arch/blackfin/Kconfig.debug
+++ b/arch/blackfin/Kconfig.debug
@@ -54,6 +54,19 @@ config DEBUG_HWERR
54 hardware error interrupts and need to know where they are coming 54 hardware error interrupts and need to know where they are coming
55 from. 55 from.
56 56
57config EXACT_HWERR
58 bool "Try to make Hardware errors exact"
59 depends on DEBUG_HWERR
60 help
61 By default, the Blackfin hardware errors are not exact - the error
62 be reported multiple cycles after the error happens. This delay
63 can cause the wrong application, or even the kernel to receive a
64 signal to be killed. If you are getting HW errors in your system,
65 try turning this on to ensure they are at least comming from the
66 proper thread.
67
68 On production systems, it is safe (and a small optimization) to say N.
69
57config DEBUG_DOUBLEFAULT 70config DEBUG_DOUBLEFAULT
58 bool "Debug Double Faults" 71 bool "Debug Double Faults"
59 default n 72 default n
diff --git a/arch/blackfin/include/asm/entry.h b/arch/blackfin/include/asm/entry.h
index b30a2968e274..ec58efc130e6 100644
--- a/arch/blackfin/include/asm/entry.h
+++ b/arch/blackfin/include/asm/entry.h
@@ -35,21 +35,39 @@
35#else 35#else
36# define LOAD_IPIPE_IPEND 36# define LOAD_IPIPE_IPEND
37#endif 37#endif
38
39#ifndef CONFIG_EXACT_HWERR
40/* As a debugging aid - we save IPEND when DEBUG_KERNEL is on,
41 * otherwise it is a waste of cycles.
42 */
43# ifndef CONFIG_DEBUG_KERNEL
44#define INTERRUPT_ENTRY(N) \
45 [--sp] = SYSCFG; \
46 [--sp] = P0; /*orig_p0*/ \
47 [--sp] = R0; /*orig_r0*/ \
48 [--sp] = (R7:0,P5:0); \
49 R0 = (N); \
50 LOAD_IPIPE_IPEND \
51 jump __common_int_entry;
52# else /* CONFIG_DEBUG_KERNEL */
38#define INTERRUPT_ENTRY(N) \ 53#define INTERRUPT_ENTRY(N) \
39 [--sp] = SYSCFG; \ 54 [--sp] = SYSCFG; \
40 \
41 [--sp] = P0; /*orig_p0*/ \ 55 [--sp] = P0; /*orig_p0*/ \
42 [--sp] = R0; /*orig_r0*/ \ 56 [--sp] = R0; /*orig_r0*/ \
43 [--sp] = (R7:0,P5:0); \ 57 [--sp] = (R7:0,P5:0); \
58 p0.l = lo(IPEND); \
59 p0.h = hi(IPEND); \
60 r1 = [p0]; \
44 R0 = (N); \ 61 R0 = (N); \
45 LOAD_IPIPE_IPEND \ 62 LOAD_IPIPE_IPEND \
46 jump __common_int_entry; 63 jump __common_int_entry;
64# endif /* CONFIG_DEBUG_KERNEL */
47 65
48/* For timer interrupts, we need to save IPEND, since the user_mode 66/* For timer interrupts, we need to save IPEND, since the user_mode
49 macro accesses it to determine where to account time. */ 67 *macro accesses it to determine where to account time.
68 */
50#define TIMER_INTERRUPT_ENTRY(N) \ 69#define TIMER_INTERRUPT_ENTRY(N) \
51 [--sp] = SYSCFG; \ 70 [--sp] = SYSCFG; \
52 \
53 [--sp] = P0; /*orig_p0*/ \ 71 [--sp] = P0; /*orig_p0*/ \
54 [--sp] = R0; /*orig_r0*/ \ 72 [--sp] = R0; /*orig_r0*/ \
55 [--sp] = (R7:0,P5:0); \ 73 [--sp] = (R7:0,P5:0); \
@@ -58,6 +76,74 @@
58 r1 = [p0]; \ 76 r1 = [p0]; \
59 R0 = (N); \ 77 R0 = (N); \
60 jump __common_int_entry; 78 jump __common_int_entry;
79#else /* CONFIG_EXACT_HWERR is defined */
80
81/* if we want hardware error to be exact, we need to do a SSYNC (which forces
82 * read/writes to complete to the memory controllers), and check to see that
83 * caused a pending HW error condition. If so, we assume it was caused by user
84 * space, by setting the same interrupt that we are in (so it goes off again)
85 * and context restore, and a RTI (without servicing anything). This should
86 * cause the pending HWERR to fire, and when that is done, this interrupt will
87 * be re-serviced properly.
88 * As you can see by the code - we actually need to do two SSYNCS - one to
89 * make sure the read/writes complete, and another to make sure the hardware
90 * error is recognized by the core.
91 */
92#define INTERRUPT_ENTRY(N) \
93 SSYNC; \
94 SSYNC; \
95 [--sp] = SYSCFG; \
96 [--sp] = P0; /*orig_p0*/ \
97 [--sp] = R0; /*orig_r0*/ \
98 [--sp] = (R7:0,P5:0); \
99 R1 = ASTAT; \
100 P0.L = LO(ILAT); \
101 P0.H = HI(ILAT); \
102 R0 = [P0]; \
103 CC = BITTST(R0, EVT_IVHW_P); \
104 IF CC JUMP 1f; \
105 ASTAT = R1; \
106 p0.l = lo(IPEND); \
107 p0.h = hi(IPEND); \
108 r1 = [p0]; \
109 R0 = (N); \
110 LOAD_IPIPE_IPEND \
111 jump __common_int_entry; \
1121: ASTAT = R1; \
113 RAISE N; \
114 (R7:0, P5:0) = [SP++]; \
115 SP += 0x8; \
116 SYSCFG = [SP++]; \
117 CSYNC; \
118 RTI;
119
120#define TIMER_INTERRUPT_ENTRY(N) \
121 SSYNC; \
122 SSYNC; \
123 [--sp] = SYSCFG; \
124 [--sp] = P0; /*orig_p0*/ \
125 [--sp] = R0; /*orig_r0*/ \
126 [--sp] = (R7:0,P5:0); \
127 R1 = ASTAT; \
128 P0.L = LO(ILAT); \
129 P0.H = HI(ILAT); \
130 R0 = [P0]; \
131 CC = BITTST(R0, EVT_IVHW_P); \
132 IF CC JUMP 1f; \
133 ASTAT = R1; \
134 p0.l = lo(IPEND); \
135 p0.h = hi(IPEND); \
136 r1 = [p0]; \
137 R0 = (N); \
138 jump __common_int_entry; \
1391: ASTAT = R1; \
140 RAISE N; \
141 (R7:0, P5:0) = [SP++]; \
142 SP += 0x8; \
143 SYSCFG = [SP++]; \
144 CSYNC; \
145 RTI;
146#endif /* CONFIG_EXACT_HWERR */
61 147
62/* This one pushes RETI without using CLI. Interrupts are enabled. */ 148/* This one pushes RETI without using CLI. Interrupts are enabled. */
63#define SAVE_CONTEXT_SYSCALL save_context_syscall 149#define SAVE_CONTEXT_SYSCALL save_context_syscall
diff --git a/arch/blackfin/mach-common/entry.S b/arch/blackfin/mach-common/entry.S
index f0636fdcb353..da0558ad1b1a 100644
--- a/arch/blackfin/mach-common/entry.S
+++ b/arch/blackfin/mach-common/entry.S
@@ -200,7 +200,18 @@ ENTRY(_ex_single_step)
200 cc = r7 == 0; 200 cc = r7 == 0;
201 if !cc jump 1f; 201 if !cc jump 1f;
202#endif 202#endif
203 203#ifdef CONFIG_EXACT_HWERR
204 /* Read the ILAT, and to check to see if the process we are
205 * single stepping caused a previous hardware error
206 * If so, do not single step, (which lowers to IRQ5, and makes
207 * us miss the error).
208 */
209 p5.l = lo(ILAT);
210 p5.h = hi(ILAT);
211 r7 = [p5];
212 cc = bittst(r7, EVT_IVHW_P);
213 if cc jump 1f;
214#endif
204 /* Single stepping only a single instruction, so clear the trace 215 /* Single stepping only a single instruction, so clear the trace
205 * bit here. */ 216 * bit here. */
206 r7 = syscfg; 217 r7 = syscfg;
@@ -262,15 +273,6 @@ ENTRY(_bfin_return_from_exception)
262 r6 = 0x25; 273 r6 = 0x25;
263 CC = R7 == R6; 274 CC = R7 == R6;
264 if CC JUMP _double_fault; 275 if CC JUMP _double_fault;
265
266 /* Did we cause a HW error? */
267 p5.l = lo(ILAT);
268 p5.h = hi(ILAT);
269 r6 = [p5];
270 r7 = 0x20; /* Did I just cause anther HW error? */
271 r6 = r7 & r6;
272 CC = R7 == R6;
273 if CC JUMP _double_fault;
274#endif 276#endif
275 277
276 (R7:6,P5:4) = [sp++]; 278 (R7:6,P5:4) = [sp++];
@@ -472,6 +474,16 @@ ENTRY(_trap) /* Exception: 4th entry into system event table(supervisor mode)*/
472 [--sp] = ASTAT; 474 [--sp] = ASTAT;
473 [--sp] = (R7:6,P5:4); 475 [--sp] = (R7:6,P5:4);
474 476
477#ifdef CONFIG_EXACT_HWERR
478 /* Make sure all pending read/writes complete. This will ensure any
479 * accesses which could cause hardware errors completes, and signal
480 * the the hardware before we do something silly, like crash the
481 * kernel. We don't need to work around anomaly 05000312, since
482 * we are already atomic
483 */
484 ssync;
485#endif
486
475#if ANOMALY_05000283 || ANOMALY_05000315 487#if ANOMALY_05000283 || ANOMALY_05000315
476 cc = r7 == r7; 488 cc = r7 == r7;
477 p5.h = HI(CHIPID); 489 p5.h = HI(CHIPID);
@@ -854,7 +866,7 @@ ENTRY(_ret_from_exception)
854 p1.h = _schedule_and_signal; 866 p1.h = _schedule_and_signal;
855 [p0] = p1; 867 [p0] = p1;
856 csync; 868 csync;
857 raise 15; /* raise evt14 to do signal or reschedule */ 869 raise 15; /* raise evt15 to do signal or reschedule */
8584: 8704:
859 r0 = syscfg; 871 r0 = syscfg;
860 bitclr(r0, 0); 872 bitclr(r0, 0);
@@ -915,7 +927,7 @@ ENTRY(_return_from_int)
915 p1.h = _schedule_and_signal_from_int; 927 p1.h = _schedule_and_signal_from_int;
916 [p0] = p1; 928 [p0] = p1;
917 csync; 929 csync;
918#if ANOMALY_05000281 930#if ANOMALY_05000281 || ANOMALY_05000461
919 r0.l = lo(SAFE_USER_INSTRUCTION); 931 r0.l = lo(SAFE_USER_INSTRUCTION);
920 r0.h = hi(SAFE_USER_INSTRUCTION); 932 r0.h = hi(SAFE_USER_INSTRUCTION);
921 reti = r0; 933 reti = r0;
@@ -929,18 +941,27 @@ ENTRY(_return_from_int)
929ENDPROC(_return_from_int) 941ENDPROC(_return_from_int)
930 942
931ENTRY(_lower_to_irq14) 943ENTRY(_lower_to_irq14)
932#if ANOMALY_05000281 944#if ANOMALY_05000281 || ANOMALY_05000461
933 r0.l = lo(SAFE_USER_INSTRUCTION); 945 r0.l = lo(SAFE_USER_INSTRUCTION);
934 r0.h = hi(SAFE_USER_INSTRUCTION); 946 r0.h = hi(SAFE_USER_INSTRUCTION);
935 reti = r0; 947 reti = r0;
936#endif 948#endif
937 r0 = 0x401f; 949
950#ifdef CONFIG_DEBUG_HWERR
951 /* enable irq14 & hwerr interrupt, until we transition to _evt14_softirq */
952 r0 = (EVT_IVG14 | EVT_IVHW | EVT_IRPTEN | EVT_EVX | EVT_NMI | EVT_RST | EVT_EMU);
953#else
954 /* Only enable irq14 interrupt, until we transition to _evt14_softirq */
955 r0 = (EVT_IVG14 | EVT_IRPTEN | EVT_EVX | EVT_NMI | EVT_RST | EVT_EMU);
956#endif
938 sti r0; 957 sti r0;
939 raise 14; 958 raise 14;
940 rti; 959 rti;
960ENDPROC(_lower_to_irq14)
961
941ENTRY(_evt14_softirq) 962ENTRY(_evt14_softirq)
942#ifdef CONFIG_DEBUG_HWERR 963#ifdef CONFIG_DEBUG_HWERR
943 r0 = 0x3f; 964 r0 = (EVT_IVHW | EVT_IRPTEN | EVT_EVX | EVT_NMI | EVT_RST | EVT_EMU);
944 sti r0; 965 sti r0;
945#else 966#else
946 cli r0; 967 cli r0;
@@ -948,8 +969,9 @@ ENTRY(_evt14_softirq)
948 [--sp] = RETI; 969 [--sp] = RETI;
949 SP += 4; 970 SP += 4;
950 rts; 971 rts;
972ENDPROC(_evt14_softirq)
951 973
952_schedule_and_signal_from_int: 974ENTRY(_schedule_and_signal_from_int)
953 /* To end up here, vector 15 was changed - so we have to change it 975 /* To end up here, vector 15 was changed - so we have to change it
954 * back. 976 * back.
955 */ 977 */
@@ -982,8 +1004,9 @@ _schedule_and_signal_from_int:
982 call _finish_atomic_sections; 1004 call _finish_atomic_sections;
983 sp += 12; 1005 sp += 12;
984 jump.s .Lresume_userspace; 1006 jump.s .Lresume_userspace;
1007ENDPROC(_schedule_and_signal_from_int)
985 1008
986_schedule_and_signal: 1009ENTRY(_schedule_and_signal)
987 SAVE_CONTEXT_SYSCALL 1010 SAVE_CONTEXT_SYSCALL
988 /* To end up here, vector 15 was changed - so we have to change it 1011 /* To end up here, vector 15 was changed - so we have to change it
989 * back. 1012 * back.
@@ -1001,7 +1024,7 @@ _schedule_and_signal:
10011: 10241:
1002 RESTORE_CONTEXT 1025 RESTORE_CONTEXT
1003 rti; 1026 rti;
1004ENDPROC(_lower_to_irq14) 1027ENDPROC(_schedule_and_signal)
1005 1028
1006/* We handle this 100% in exception space - to reduce overhead 1029/* We handle this 100% in exception space - to reduce overhead
1007 * Only potiential problem is if the software buffer gets swapped out of the 1030 * Only potiential problem is if the software buffer gets swapped out of the
diff --git a/arch/blackfin/mach-common/interrupt.S b/arch/blackfin/mach-common/interrupt.S
index 0069c2dd4625..9c46680186e4 100644
--- a/arch/blackfin/mach-common/interrupt.S
+++ b/arch/blackfin/mach-common/interrupt.S
@@ -145,6 +145,14 @@ __common_int_entry:
145 145
146/* interrupt routine for ivhw - 5 */ 146/* interrupt routine for ivhw - 5 */
147ENTRY(_evt_ivhw) 147ENTRY(_evt_ivhw)
148 /* In case a single action kicks off multiple memory transactions, (like
149 * a cache line fetch, - this can cause multiple hardware errors, let's
150 * catch them all. First - make sure all the actions are complete, and
151 * the core sees the hardware errors.
152 */
153 SSYNC;
154 SSYNC;
155
148 SAVE_ALL_SYS 156 SAVE_ALL_SYS
149#ifdef CONFIG_FRAME_POINTER 157#ifdef CONFIG_FRAME_POINTER
150 fp = 0; 158 fp = 0;
@@ -159,6 +167,25 @@ ENTRY(_evt_ivhw)
1591: 1671:
160#endif 168#endif
161 169
170 /* Handle all stacked hardware errors
171 * To make sure we don't hang forever, only do it 10 times
172 */
173 R0 = 0;
174 R2 = 10;
1751:
176 P0.L = LO(ILAT);
177 P0.H = HI(ILAT);
178 R1 = [P0];
179 CC = BITTST(R1, EVT_IVHW_P);
180 IF ! CC JUMP 2f;
181 /* OK a hardware error is pending - clear it */
182 R1 = EVT_IVHW_P;
183 [P0] = R1;
184 R0 += 1;
185 CC = R1 == R2;
186 if CC JUMP 2f;
187 JUMP 1b;
1882:
162 # We are going to dump something out, so make sure we print IPEND properly 189 # We are going to dump something out, so make sure we print IPEND properly
163 p2.l = lo(IPEND); 190 p2.l = lo(IPEND);
164 p2.h = hi(IPEND); 191 p2.h = hi(IPEND);