aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Isaacson <adi@broadcom.com>2005-10-20 02:57:40 -0400
committerRalf Baechle <ralf@linux-mips.org>2005-10-29 14:32:48 -0400
commita4b5bd9abcf5b0586de68722ff8e9b91020279bf (patch)
treefe0f4014bf5e92d72d4e15d90cf3eab3713ceee5
parent9a6dcea10308df50ed54d6d5a43c9f6c3e927118 (diff)
SB1 cache exception handling.
Expand SB1 cache error handling by adding SB1_CEX_ALWAYS_FATAL and SB1_CEX_STALL, allowing configurable behavior on cache errors. Signed-Off-By: Andy Isaacson <adi@broadcom.com> Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
-rw-r--r--arch/mips/mm/cerr-sb1.c54
-rw-r--r--arch/mips/mm/cex-sb1.S5
-rw-r--r--arch/mips/sibyte/Kconfig8
3 files changed, 59 insertions, 8 deletions
diff --git a/arch/mips/mm/cerr-sb1.c b/arch/mips/mm/cerr-sb1.c
index 7166ffe63502..1cf3c6006ccd 100644
--- a/arch/mips/mm/cerr-sb1.c
+++ b/arch/mips/mm/cerr-sb1.c
@@ -19,13 +19,19 @@
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <asm/mipsregs.h> 20#include <asm/mipsregs.h>
21#include <asm/sibyte/sb1250.h> 21#include <asm/sibyte/sb1250.h>
22#include <asm/sibyte/sb1250_regs.h>
22 23
23#ifndef CONFIG_SIBYTE_BUS_WATCHER 24#if !defined(CONFIG_SIBYTE_BUS_WATCHER) || defined(CONFIG_SIBYTE_BW_TRACE)
24#include <asm/io.h> 25#include <asm/io.h>
25#include <asm/sibyte/sb1250_regs.h>
26#include <asm/sibyte/sb1250_scd.h> 26#include <asm/sibyte/sb1250_scd.h>
27#endif 27#endif
28 28
29/*
30 * We'd like to dump the L2_ECC_TAG register on errors, but errata make
31 * that unsafe... So for now we don't. (BCM1250/BCM112x erratum SOC-48.)
32 */
33#undef DUMP_L2_ECC_TAG_ON_ERROR
34
29/* SB1 definitions */ 35/* SB1 definitions */
30 36
31/* XXX should come from config1 XXX */ 37/* XXX should come from config1 XXX */
@@ -139,12 +145,18 @@ static inline void breakout_cerrd(unsigned int val)
139static void check_bus_watcher(void) 145static void check_bus_watcher(void)
140{ 146{
141 uint32_t status, l2_err, memio_err; 147 uint32_t status, l2_err, memio_err;
148#ifdef DUMP_L2_ECC_TAG_ON_ERROR
149 uint64_t l2_tag;
150#endif
142 151
143 /* Destructive read, clears register and interrupt */ 152 /* Destructive read, clears register and interrupt */
144 status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS)); 153 status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS));
145 /* Bit 31 is always on, but there's no #define for that */ 154 /* Bit 31 is always on, but there's no #define for that */
146 if (status & ~(1UL << 31)) { 155 if (status & ~(1UL << 31)) {
147 l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS)); 156 l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS));
157#ifdef DUMP_L2_ECC_TAG_ON_ERROR
158 l2_tag = in64(IO_SPACE_BASE | A_L2_ECC_TAG);
159#endif
148 memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS)); 160 memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS));
149 prom_printf("Bus watcher error counters: %08x %08x\n", l2_err, memio_err); 161 prom_printf("Bus watcher error counters: %08x %08x\n", l2_err, memio_err);
150 prom_printf("\nLast recorded signature:\n"); 162 prom_printf("\nLast recorded signature:\n");
@@ -153,6 +165,9 @@ static void check_bus_watcher(void)
153 (int)(G_SCD_BERR_TID(status) >> 6), 165 (int)(G_SCD_BERR_TID(status) >> 6),
154 (int)G_SCD_BERR_RID(status), 166 (int)G_SCD_BERR_RID(status),
155 (int)G_SCD_BERR_DCODE(status)); 167 (int)G_SCD_BERR_DCODE(status));
168#ifdef DUMP_L2_ECC_TAG_ON_ERROR
169 prom_printf("Last L2 tag w/ bad ECC: %016llx\n", l2_tag);
170#endif
156 } else { 171 } else {
157 prom_printf("Bus watcher indicates no error\n"); 172 prom_printf("Bus watcher indicates no error\n");
158 } 173 }
@@ -166,6 +181,16 @@ asmlinkage void sb1_cache_error(void)
166 uint64_t cerr_dpa; 181 uint64_t cerr_dpa;
167 uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res; 182 uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res;
168 183
184#ifdef CONFIG_SIBYTE_BW_TRACE
185 /* Freeze the trace buffer now */
186#if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80)
187 csr_out32(M_BCM1480_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG);
188#else
189 csr_out32(M_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG);
190#endif
191 prom_printf("Trace buffer frozen\n");
192#endif
193
169 prom_printf("Cache error exception on CPU %x:\n", 194 prom_printf("Cache error exception on CPU %x:\n",
170 (read_c0_prid() >> 25) & 0x7); 195 (read_c0_prid() >> 25) & 0x7);
171 196
@@ -229,11 +254,19 @@ asmlinkage void sb1_cache_error(void)
229 254
230 check_bus_watcher(); 255 check_bus_watcher();
231 256
232 while (1);
233 /* 257 /*
234 * This tends to make things get really ugly; let's just stall instead. 258 * Calling panic() when a fatal cache error occurs scrambles the
235 * panic("Can't handle the cache error!"); 259 * state of the system (and the cache), making it difficult to
260 * investigate after the fact. However, if you just stall the CPU,
261 * the other CPU may keep on running, which is typically very
262 * undesirable.
236 */ 263 */
264#ifdef CONFIG_SB1_CERR_STALL
265 while (1)
266 ;
267#else
268 panic("unhandled cache error");
269#endif
237} 270}
238 271
239 272
@@ -434,7 +467,8 @@ static struct dc_state dc_states[] = {
434}; 467};
435 468
436#define DC_TAG_VALID(state) \ 469#define DC_TAG_VALID(state) \
437 (((state) == 0xf) || ((state) == 0x13) || ((state) == 0x19) || ((state == 0x16)) || ((state) == 0x1c)) 470 (((state) == 0x0) || ((state) == 0xf) || ((state) == 0x13) || \
471 ((state) == 0x19) || ((state) == 0x16) || ((state) == 0x1c))
438 472
439static char *dc_state_str(unsigned char state) 473static char *dc_state_str(unsigned char state)
440{ 474{
@@ -505,6 +539,7 @@ static uint32_t extract_dc(unsigned short addr, int data)
505 uint64_t datalo; 539 uint64_t datalo;
506 uint32_t datalohi, datalolo, datahi; 540 uint32_t datalohi, datalolo, datahi;
507 int offset; 541 int offset;
542 char bad_ecc = 0;
508 543
509 for (offset = 0; offset < 4; offset++) { 544 for (offset = 0; offset < 4; offset++) {
510 /* Index-load-data-D */ 545 /* Index-load-data-D */
@@ -525,8 +560,7 @@ static uint32_t extract_dc(unsigned short addr, int data)
525 ecc = dc_ecc(datalo); 560 ecc = dc_ecc(datalo);
526 if (ecc != datahi) { 561 if (ecc != datahi) {
527 int bits = 0; 562 int bits = 0;
528 prom_printf(" ** bad ECC (%02x %02x) ->", 563 bad_ecc |= 1 << (3-offset);
529 datahi, ecc);
530 ecc ^= datahi; 564 ecc ^= datahi;
531 while (ecc) { 565 while (ecc) {
532 if (ecc & 1) bits++; 566 if (ecc & 1) bits++;
@@ -537,6 +571,10 @@ static uint32_t extract_dc(unsigned short addr, int data)
537 prom_printf(" %02X-%016llX", datahi, datalo); 571 prom_printf(" %02X-%016llX", datahi, datalo);
538 } 572 }
539 prom_printf("\n"); 573 prom_printf("\n");
574 if (bad_ecc)
575 prom_printf(" dwords w/ bad ECC: %d %d %d %d\n",
576 !!(bad_ecc & 8), !!(bad_ecc & 4),
577 !!(bad_ecc & 2), !!(bad_ecc & 1));
540 } 578 }
541 } 579 }
542 return res; 580 return res;
diff --git a/arch/mips/mm/cex-sb1.S b/arch/mips/mm/cex-sb1.S
index 2c3a23aa88c3..0e71580774ff 100644
--- a/arch/mips/mm/cex-sb1.S
+++ b/arch/mips/mm/cex-sb1.S
@@ -64,6 +64,10 @@ LEAF(except_vec2_sb1)
64 sd k0,0x170($0) 64 sd k0,0x170($0)
65 sd k1,0x178($0) 65 sd k1,0x178($0)
66 66
67#if CONFIG_SB1_CEX_ALWAYS_FATAL
68 j handle_vec2_sb1
69 nop
70#else
67 /* 71 /*
68 * M_ERRCTL_RECOVERABLE is bit 31, which makes it easy to tell 72 * M_ERRCTL_RECOVERABLE is bit 31, which makes it easy to tell
69 * if we can fast-path out of here for a h/w-recovered error. 73 * if we can fast-path out of here for a h/w-recovered error.
@@ -134,6 +138,7 @@ unrecoverable:
134 /* Unrecoverable Icache or Dcache error; log it and/or fail */ 138 /* Unrecoverable Icache or Dcache error; log it and/or fail */
135 j handle_vec2_sb1 139 j handle_vec2_sb1
136 nop 140 nop
141#endif
137 142
138END(except_vec2_sb1) 143END(except_vec2_sb1)
139 144
diff --git a/arch/mips/sibyte/Kconfig b/arch/mips/sibyte/Kconfig
index 6a5a08f5e212..de46f62ac462 100644
--- a/arch/mips/sibyte/Kconfig
+++ b/arch/mips/sibyte/Kconfig
@@ -102,6 +102,14 @@ config SIMULATION
102 Build a kernel suitable for running under the GDB simulator. 102 Build a kernel suitable for running under the GDB simulator.
103 Primarily adjusts the kernel's notion of time. 103 Primarily adjusts the kernel's notion of time.
104 104
105config CONFIG_SB1_CEX_ALWAYS_FATAL
106 bool "All cache exceptions considered fatal (no recovery attempted)"
107 depends on SIBYTE_SB1xxx_SOC
108
109config CONFIG_SB1_CERR_STALL
110 bool "Stall (rather than panic) on fatal cache error"
111 depends on SIBYTE_SB1xxx_SOC
112
105config SIBYTE_CFE 113config SIBYTE_CFE
106 bool "Booting from CFE" 114 bool "Booting from CFE"
107 depends on SIBYTE_SB1xxx_SOC 115 depends on SIBYTE_SB1xxx_SOC