diff options
-rw-r--r-- | arch/mips/mm/cerr-sb1.c | 54 | ||||
-rw-r--r-- | arch/mips/mm/cex-sb1.S | 5 | ||||
-rw-r--r-- | arch/mips/sibyte/Kconfig | 8 |
3 files changed, 59 insertions, 8 deletions
diff --git a/arch/mips/mm/cerr-sb1.c b/arch/mips/mm/cerr-sb1.c index 7166ffe63502..1cf3c6006ccd 100644 --- a/arch/mips/mm/cerr-sb1.c +++ b/arch/mips/mm/cerr-sb1.c | |||
@@ -19,13 +19,19 @@ | |||
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <asm/mipsregs.h> | 20 | #include <asm/mipsregs.h> |
21 | #include <asm/sibyte/sb1250.h> | 21 | #include <asm/sibyte/sb1250.h> |
22 | #include <asm/sibyte/sb1250_regs.h> | ||
22 | 23 | ||
23 | #ifndef CONFIG_SIBYTE_BUS_WATCHER | 24 | #if !defined(CONFIG_SIBYTE_BUS_WATCHER) || defined(CONFIG_SIBYTE_BW_TRACE) |
24 | #include <asm/io.h> | 25 | #include <asm/io.h> |
25 | #include <asm/sibyte/sb1250_regs.h> | ||
26 | #include <asm/sibyte/sb1250_scd.h> | 26 | #include <asm/sibyte/sb1250_scd.h> |
27 | #endif | 27 | #endif |
28 | 28 | ||
29 | /* | ||
30 | * We'd like to dump the L2_ECC_TAG register on errors, but errata make | ||
31 | * that unsafe... So for now we don't. (BCM1250/BCM112x erratum SOC-48.) | ||
32 | */ | ||
33 | #undef DUMP_L2_ECC_TAG_ON_ERROR | ||
34 | |||
29 | /* SB1 definitions */ | 35 | /* SB1 definitions */ |
30 | 36 | ||
31 | /* XXX should come from config1 XXX */ | 37 | /* XXX should come from config1 XXX */ |
@@ -139,12 +145,18 @@ static inline void breakout_cerrd(unsigned int val) | |||
139 | static void check_bus_watcher(void) | 145 | static void check_bus_watcher(void) |
140 | { | 146 | { |
141 | uint32_t status, l2_err, memio_err; | 147 | uint32_t status, l2_err, memio_err; |
148 | #ifdef DUMP_L2_ECC_TAG_ON_ERROR | ||
149 | uint64_t l2_tag; | ||
150 | #endif | ||
142 | 151 | ||
143 | /* Destructive read, clears register and interrupt */ | 152 | /* Destructive read, clears register and interrupt */ |
144 | status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS)); | 153 | status = csr_in32(IOADDR(A_SCD_BUS_ERR_STATUS)); |
145 | /* Bit 31 is always on, but there's no #define for that */ | 154 | /* Bit 31 is always on, but there's no #define for that */ |
146 | if (status & ~(1UL << 31)) { | 155 | if (status & ~(1UL << 31)) { |
147 | l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS)); | 156 | l2_err = csr_in32(IOADDR(A_BUS_L2_ERRORS)); |
157 | #ifdef DUMP_L2_ECC_TAG_ON_ERROR | ||
158 | l2_tag = in64(IO_SPACE_BASE | A_L2_ECC_TAG); | ||
159 | #endif | ||
148 | memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS)); | 160 | memio_err = csr_in32(IOADDR(A_BUS_MEM_IO_ERRORS)); |
149 | prom_printf("Bus watcher error counters: %08x %08x\n", l2_err, memio_err); | 161 | prom_printf("Bus watcher error counters: %08x %08x\n", l2_err, memio_err); |
150 | prom_printf("\nLast recorded signature:\n"); | 162 | prom_printf("\nLast recorded signature:\n"); |
@@ -153,6 +165,9 @@ static void check_bus_watcher(void) | |||
153 | (int)(G_SCD_BERR_TID(status) >> 6), | 165 | (int)(G_SCD_BERR_TID(status) >> 6), |
154 | (int)G_SCD_BERR_RID(status), | 166 | (int)G_SCD_BERR_RID(status), |
155 | (int)G_SCD_BERR_DCODE(status)); | 167 | (int)G_SCD_BERR_DCODE(status)); |
168 | #ifdef DUMP_L2_ECC_TAG_ON_ERROR | ||
169 | prom_printf("Last L2 tag w/ bad ECC: %016llx\n", l2_tag); | ||
170 | #endif | ||
156 | } else { | 171 | } else { |
157 | prom_printf("Bus watcher indicates no error\n"); | 172 | prom_printf("Bus watcher indicates no error\n"); |
158 | } | 173 | } |
@@ -166,6 +181,16 @@ asmlinkage void sb1_cache_error(void) | |||
166 | uint64_t cerr_dpa; | 181 | uint64_t cerr_dpa; |
167 | uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res; | 182 | uint32_t errctl, cerr_i, cerr_d, dpalo, dpahi, eepc, res; |
168 | 183 | ||
184 | #ifdef CONFIG_SIBYTE_BW_TRACE | ||
185 | /* Freeze the trace buffer now */ | ||
186 | #if defined(CONFIG_SIBYTE_BCM1x55) || defined(CONFIG_SIBYTE_BCM1x80) | ||
187 | csr_out32(M_BCM1480_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG); | ||
188 | #else | ||
189 | csr_out32(M_SCD_TRACE_CFG_FREEZE, IO_SPACE_BASE | A_SCD_TRACE_CFG); | ||
190 | #endif | ||
191 | prom_printf("Trace buffer frozen\n"); | ||
192 | #endif | ||
193 | |||
169 | prom_printf("Cache error exception on CPU %x:\n", | 194 | prom_printf("Cache error exception on CPU %x:\n", |
170 | (read_c0_prid() >> 25) & 0x7); | 195 | (read_c0_prid() >> 25) & 0x7); |
171 | 196 | ||
@@ -229,11 +254,19 @@ asmlinkage void sb1_cache_error(void) | |||
229 | 254 | ||
230 | check_bus_watcher(); | 255 | check_bus_watcher(); |
231 | 256 | ||
232 | while (1); | ||
233 | /* | 257 | /* |
234 | * This tends to make things get really ugly; let's just stall instead. | 258 | * Calling panic() when a fatal cache error occurs scrambles the |
235 | * panic("Can't handle the cache error!"); | 259 | * state of the system (and the cache), making it difficult to |
260 | * investigate after the fact. However, if you just stall the CPU, | ||
261 | * the other CPU may keep on running, which is typically very | ||
262 | * undesirable. | ||
236 | */ | 263 | */ |
264 | #ifdef CONFIG_SB1_CERR_STALL | ||
265 | while (1) | ||
266 | ; | ||
267 | #else | ||
268 | panic("unhandled cache error"); | ||
269 | #endif | ||
237 | } | 270 | } |
238 | 271 | ||
239 | 272 | ||
@@ -434,7 +467,8 @@ static struct dc_state dc_states[] = { | |||
434 | }; | 467 | }; |
435 | 468 | ||
436 | #define DC_TAG_VALID(state) \ | 469 | #define DC_TAG_VALID(state) \ |
437 | (((state) == 0xf) || ((state) == 0x13) || ((state) == 0x19) || ((state == 0x16)) || ((state) == 0x1c)) | 470 | (((state) == 0x0) || ((state) == 0xf) || ((state) == 0x13) || \ |
471 | ((state) == 0x19) || ((state) == 0x16) || ((state) == 0x1c)) | ||
438 | 472 | ||
439 | static char *dc_state_str(unsigned char state) | 473 | static char *dc_state_str(unsigned char state) |
440 | { | 474 | { |
@@ -505,6 +539,7 @@ static uint32_t extract_dc(unsigned short addr, int data) | |||
505 | uint64_t datalo; | 539 | uint64_t datalo; |
506 | uint32_t datalohi, datalolo, datahi; | 540 | uint32_t datalohi, datalolo, datahi; |
507 | int offset; | 541 | int offset; |
542 | char bad_ecc = 0; | ||
508 | 543 | ||
509 | for (offset = 0; offset < 4; offset++) { | 544 | for (offset = 0; offset < 4; offset++) { |
510 | /* Index-load-data-D */ | 545 | /* Index-load-data-D */ |
@@ -525,8 +560,7 @@ static uint32_t extract_dc(unsigned short addr, int data) | |||
525 | ecc = dc_ecc(datalo); | 560 | ecc = dc_ecc(datalo); |
526 | if (ecc != datahi) { | 561 | if (ecc != datahi) { |
527 | int bits = 0; | 562 | int bits = 0; |
528 | prom_printf(" ** bad ECC (%02x %02x) ->", | 563 | bad_ecc |= 1 << (3-offset); |
529 | datahi, ecc); | ||
530 | ecc ^= datahi; | 564 | ecc ^= datahi; |
531 | while (ecc) { | 565 | while (ecc) { |
532 | if (ecc & 1) bits++; | 566 | if (ecc & 1) bits++; |
@@ -537,6 +571,10 @@ static uint32_t extract_dc(unsigned short addr, int data) | |||
537 | prom_printf(" %02X-%016llX", datahi, datalo); | 571 | prom_printf(" %02X-%016llX", datahi, datalo); |
538 | } | 572 | } |
539 | prom_printf("\n"); | 573 | prom_printf("\n"); |
574 | if (bad_ecc) | ||
575 | prom_printf(" dwords w/ bad ECC: %d %d %d %d\n", | ||
576 | !!(bad_ecc & 8), !!(bad_ecc & 4), | ||
577 | !!(bad_ecc & 2), !!(bad_ecc & 1)); | ||
540 | } | 578 | } |
541 | } | 579 | } |
542 | return res; | 580 | return res; |
diff --git a/arch/mips/mm/cex-sb1.S b/arch/mips/mm/cex-sb1.S index 2c3a23aa88c3..0e71580774ff 100644 --- a/arch/mips/mm/cex-sb1.S +++ b/arch/mips/mm/cex-sb1.S | |||
@@ -64,6 +64,10 @@ LEAF(except_vec2_sb1) | |||
64 | sd k0,0x170($0) | 64 | sd k0,0x170($0) |
65 | sd k1,0x178($0) | 65 | sd k1,0x178($0) |
66 | 66 | ||
67 | #if CONFIG_SB1_CEX_ALWAYS_FATAL | ||
68 | j handle_vec2_sb1 | ||
69 | nop | ||
70 | #else | ||
67 | /* | 71 | /* |
68 | * M_ERRCTL_RECOVERABLE is bit 31, which makes it easy to tell | 72 | * M_ERRCTL_RECOVERABLE is bit 31, which makes it easy to tell |
69 | * if we can fast-path out of here for a h/w-recovered error. | 73 | * if we can fast-path out of here for a h/w-recovered error. |
@@ -134,6 +138,7 @@ unrecoverable: | |||
134 | /* Unrecoverable Icache or Dcache error; log it and/or fail */ | 138 | /* Unrecoverable Icache or Dcache error; log it and/or fail */ |
135 | j handle_vec2_sb1 | 139 | j handle_vec2_sb1 |
136 | nop | 140 | nop |
141 | #endif | ||
137 | 142 | ||
138 | END(except_vec2_sb1) | 143 | END(except_vec2_sb1) |
139 | 144 | ||
diff --git a/arch/mips/sibyte/Kconfig b/arch/mips/sibyte/Kconfig index 6a5a08f5e212..de46f62ac462 100644 --- a/arch/mips/sibyte/Kconfig +++ b/arch/mips/sibyte/Kconfig | |||
@@ -102,6 +102,14 @@ config SIMULATION | |||
102 | Build a kernel suitable for running under the GDB simulator. | 102 | Build a kernel suitable for running under the GDB simulator. |
103 | Primarily adjusts the kernel's notion of time. | 103 | Primarily adjusts the kernel's notion of time. |
104 | 104 | ||
105 | config CONFIG_SB1_CEX_ALWAYS_FATAL | ||
106 | bool "All cache exceptions considered fatal (no recovery attempted)" | ||
107 | depends on SIBYTE_SB1xxx_SOC | ||
108 | |||
109 | config CONFIG_SB1_CERR_STALL | ||
110 | bool "Stall (rather than panic) on fatal cache error" | ||
111 | depends on SIBYTE_SB1xxx_SOC | ||
112 | |||
105 | config SIBYTE_CFE | 113 | config SIBYTE_CFE |
106 | bool "Booting from CFE" | 114 | bool "Booting from CFE" |
107 | depends on SIBYTE_SB1xxx_SOC | 115 | depends on SIBYTE_SB1xxx_SOC |