diff options
author | bob picco <bpicco@meloft.net> | 2014-09-16 09:26:47 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-09-16 20:46:44 -0400 |
commit | 4ccb9272892c33ef1c19a783cfa87103b30c2784 (patch) | |
tree | fe904676d83557eff6d1bb04127ba23541736140 | |
parent | d1105287aabe88dbb3af825140badaa05cf0442c (diff) |
sparc64: sun4v TLB error power off events
We've witnessed a few TLB events causing the machine to power off because
of prom_halt. In one case it was some nfs related area during rmmod. Another
was an mmapper of /dev/mem. A more recent one is an ITLB issue with
a bad pagesize which could be a hardware bug. Bugs happen but we should
attempt to not power off the machine and/or hang it when possible.
This is a DTLB error from an mmapper of /dev/mem:
[root@sparcie ~]# SUN4V-DTLB: Error at TPC[fffff80100903e6c], tl 1
SUN4V-DTLB: TPC<0xfffff80100903e6c>
SUN4V-DTLB: O7[fffff801081979d0]
SUN4V-DTLB: O7<0xfffff801081979d0>
SUN4V-DTLB: vaddr[fffff80100000000] ctx[1250] pte[98000000000f0610] error[2]
.
This is recent mainline for ITLB:
[ 3708.179864] SUN4V-ITLB: TPC<0xfffffc010071cefc>
[ 3708.188866] SUN4V-ITLB: O7[fffffc010071cee8]
[ 3708.197377] SUN4V-ITLB: O7<0xfffffc010071cee8>
[ 3708.206539] SUN4V-ITLB: vaddr[e0003] ctx[1a3c] pte[2900000dcc800eeb] error[4]
.
Normally sun4v_itlb_error_report() and sun4v_dtlb_error_report() would call
prom_halt() and drop us to OF command prompt "ok". This isn't the case for
LDOMs and the machine powers off.
For the HV reported error of HV_ENORADDR for HV HV_MMU_MAP_ADDR_TRAP we cause
a SIGBUS error by qualifying it within do_sparc64_fault() for fault code mask
of FAULT_CODE_BAD_RA. This is done when trap level (%tl) is less or equal
one("1"). Otherwise, for %tl > 1, we proceed eventually to die_if_kernel().
The logic of this patch was partially inspired by David Miller's feedback.
Power off of large sparc64 machines is painful. Plus die_if_kernel provides
more context. A reset sequence isn't a brief period on large sparc64 but
better than power-off/power-on sequence.
Cc: sparclinux@vger.kernel.org
Signed-off-by: Bob Picco <bob.picco@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | arch/sparc/include/asm/thread_info_64.h | 1 | ||||
-rw-r--r-- | arch/sparc/kernel/sun4v_tlb_miss.S | 35 | ||||
-rw-r--r-- | arch/sparc/kernel/traps_64.c | 15 | ||||
-rw-r--r-- | arch/sparc/mm/fault_64.c | 3 |
4 files changed, 34 insertions, 20 deletions
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h index a5f01ac6d0f1..f85dc8512ab3 100644 --- a/arch/sparc/include/asm/thread_info_64.h +++ b/arch/sparc/include/asm/thread_info_64.h | |||
@@ -102,6 +102,7 @@ struct thread_info { | |||
102 | #define FAULT_CODE_ITLB 0x04 /* Miss happened in I-TLB */ | 102 | #define FAULT_CODE_ITLB 0x04 /* Miss happened in I-TLB */ |
103 | #define FAULT_CODE_WINFIXUP 0x08 /* Miss happened during spill/fill */ | 103 | #define FAULT_CODE_WINFIXUP 0x08 /* Miss happened during spill/fill */ |
104 | #define FAULT_CODE_BLKCOMMIT 0x10 /* Use blk-commit ASI in copy_page */ | 104 | #define FAULT_CODE_BLKCOMMIT 0x10 /* Use blk-commit ASI in copy_page */ |
105 | #define FAULT_CODE_BAD_RA 0x20 /* Bad RA for sun4v */ | ||
105 | 106 | ||
106 | #if PAGE_SHIFT == 13 | 107 | #if PAGE_SHIFT == 13 |
107 | #define THREAD_SIZE (2*PAGE_SIZE) | 108 | #define THREAD_SIZE (2*PAGE_SIZE) |
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S index e0c09bf85610..6179e19bc9b9 100644 --- a/arch/sparc/kernel/sun4v_tlb_miss.S +++ b/arch/sparc/kernel/sun4v_tlb_miss.S | |||
@@ -195,6 +195,11 @@ sun4v_tsb_miss_common: | |||
195 | ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7 | 195 | ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7 |
196 | 196 | ||
197 | sun4v_itlb_error: | 197 | sun4v_itlb_error: |
198 | rdpr %tl, %g1 | ||
199 | cmp %g1, 1 | ||
200 | ble,pt %icc, sun4v_bad_ra | ||
201 | or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_ITLB, %g1 | ||
202 | |||
198 | sethi %hi(sun4v_err_itlb_vaddr), %g1 | 203 | sethi %hi(sun4v_err_itlb_vaddr), %g1 |
199 | stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)] | 204 | stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)] |
200 | sethi %hi(sun4v_err_itlb_ctx), %g1 | 205 | sethi %hi(sun4v_err_itlb_ctx), %g1 |
@@ -206,15 +211,10 @@ sun4v_itlb_error: | |||
206 | sethi %hi(sun4v_err_itlb_error), %g1 | 211 | sethi %hi(sun4v_err_itlb_error), %g1 |
207 | stx %o0, [%g1 + %lo(sun4v_err_itlb_error)] | 212 | stx %o0, [%g1 + %lo(sun4v_err_itlb_error)] |
208 | 213 | ||
214 | sethi %hi(1f), %g7 | ||
209 | rdpr %tl, %g4 | 215 | rdpr %tl, %g4 |
210 | cmp %g4, 1 | ||
211 | ble,pt %icc, 1f | ||
212 | sethi %hi(2f), %g7 | ||
213 | ba,pt %xcc, etraptl1 | 216 | ba,pt %xcc, etraptl1 |
214 | or %g7, %lo(2f), %g7 | 217 | 1: or %g7, %lo(1f), %g7 |
215 | |||
216 | 1: ba,pt %xcc, etrap | ||
217 | 2: or %g7, %lo(2b), %g7 | ||
218 | mov %l4, %o1 | 218 | mov %l4, %o1 |
219 | call sun4v_itlb_error_report | 219 | call sun4v_itlb_error_report |
220 | add %sp, PTREGS_OFF, %o0 | 220 | add %sp, PTREGS_OFF, %o0 |
@@ -222,6 +222,11 @@ sun4v_itlb_error: | |||
222 | /* NOTREACHED */ | 222 | /* NOTREACHED */ |
223 | 223 | ||
224 | sun4v_dtlb_error: | 224 | sun4v_dtlb_error: |
225 | rdpr %tl, %g1 | ||
226 | cmp %g1, 1 | ||
227 | ble,pt %icc, sun4v_bad_ra | ||
228 | or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_DTLB, %g1 | ||
229 | |||
225 | sethi %hi(sun4v_err_dtlb_vaddr), %g1 | 230 | sethi %hi(sun4v_err_dtlb_vaddr), %g1 |
226 | stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)] | 231 | stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)] |
227 | sethi %hi(sun4v_err_dtlb_ctx), %g1 | 232 | sethi %hi(sun4v_err_dtlb_ctx), %g1 |
@@ -233,21 +238,23 @@ sun4v_dtlb_error: | |||
233 | sethi %hi(sun4v_err_dtlb_error), %g1 | 238 | sethi %hi(sun4v_err_dtlb_error), %g1 |
234 | stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)] | 239 | stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)] |
235 | 240 | ||
241 | sethi %hi(1f), %g7 | ||
236 | rdpr %tl, %g4 | 242 | rdpr %tl, %g4 |
237 | cmp %g4, 1 | ||
238 | ble,pt %icc, 1f | ||
239 | sethi %hi(2f), %g7 | ||
240 | ba,pt %xcc, etraptl1 | 243 | ba,pt %xcc, etraptl1 |
241 | or %g7, %lo(2f), %g7 | 244 | 1: or %g7, %lo(1f), %g7 |
242 | |||
243 | 1: ba,pt %xcc, etrap | ||
244 | 2: or %g7, %lo(2b), %g7 | ||
245 | mov %l4, %o1 | 245 | mov %l4, %o1 |
246 | call sun4v_dtlb_error_report | 246 | call sun4v_dtlb_error_report |
247 | add %sp, PTREGS_OFF, %o0 | 247 | add %sp, PTREGS_OFF, %o0 |
248 | 248 | ||
249 | /* NOTREACHED */ | 249 | /* NOTREACHED */ |
250 | 250 | ||
251 | sun4v_bad_ra: | ||
252 | or %g0, %g4, %g5 | ||
253 | ba,pt %xcc, sparc64_realfault_common | ||
254 | or %g1, %g0, %g4 | ||
255 | |||
256 | /* NOTREACHED */ | ||
257 | |||
251 | /* Instruction Access Exception, tl0. */ | 258 | /* Instruction Access Exception, tl0. */ |
252 | sun4v_iacc: | 259 | sun4v_iacc: |
253 | ldxa [%g0] ASI_SCRATCHPAD, %g2 | 260 | ldxa [%g0] ASI_SCRATCHPAD, %g2 |
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index fb6640ec8557..981a769b9558 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c | |||
@@ -2104,6 +2104,11 @@ void sun4v_nonresum_overflow(struct pt_regs *regs) | |||
2104 | atomic_inc(&sun4v_nonresum_oflow_cnt); | 2104 | atomic_inc(&sun4v_nonresum_oflow_cnt); |
2105 | } | 2105 | } |
2106 | 2106 | ||
2107 | static void sun4v_tlb_error(struct pt_regs *regs) | ||
2108 | { | ||
2109 | die_if_kernel("TLB/TSB error", regs); | ||
2110 | } | ||
2111 | |||
2107 | unsigned long sun4v_err_itlb_vaddr; | 2112 | unsigned long sun4v_err_itlb_vaddr; |
2108 | unsigned long sun4v_err_itlb_ctx; | 2113 | unsigned long sun4v_err_itlb_ctx; |
2109 | unsigned long sun4v_err_itlb_pte; | 2114 | unsigned long sun4v_err_itlb_pte; |
@@ -2111,8 +2116,7 @@ unsigned long sun4v_err_itlb_error; | |||
2111 | 2116 | ||
2112 | void sun4v_itlb_error_report(struct pt_regs *regs, int tl) | 2117 | void sun4v_itlb_error_report(struct pt_regs *regs, int tl) |
2113 | { | 2118 | { |
2114 | if (tl > 1) | 2119 | dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); |
2115 | dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); | ||
2116 | 2120 | ||
2117 | printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", | 2121 | printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", |
2118 | regs->tpc, tl); | 2122 | regs->tpc, tl); |
@@ -2125,7 +2129,7 @@ void sun4v_itlb_error_report(struct pt_regs *regs, int tl) | |||
2125 | sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx, | 2129 | sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx, |
2126 | sun4v_err_itlb_pte, sun4v_err_itlb_error); | 2130 | sun4v_err_itlb_pte, sun4v_err_itlb_error); |
2127 | 2131 | ||
2128 | prom_halt(); | 2132 | sun4v_tlb_error(regs); |
2129 | } | 2133 | } |
2130 | 2134 | ||
2131 | unsigned long sun4v_err_dtlb_vaddr; | 2135 | unsigned long sun4v_err_dtlb_vaddr; |
@@ -2135,8 +2139,7 @@ unsigned long sun4v_err_dtlb_error; | |||
2135 | 2139 | ||
2136 | void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) | 2140 | void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) |
2137 | { | 2141 | { |
2138 | if (tl > 1) | 2142 | dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); |
2139 | dump_tl1_traplog((struct tl1_traplog *)(regs + 1)); | ||
2140 | 2143 | ||
2141 | printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", | 2144 | printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", |
2142 | regs->tpc, tl); | 2145 | regs->tpc, tl); |
@@ -2149,7 +2152,7 @@ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) | |||
2149 | sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx, | 2152 | sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx, |
2150 | sun4v_err_dtlb_pte, sun4v_err_dtlb_error); | 2153 | sun4v_err_dtlb_pte, sun4v_err_dtlb_error); |
2151 | 2154 | ||
2152 | prom_halt(); | 2155 | sun4v_tlb_error(regs); |
2153 | } | 2156 | } |
2154 | 2157 | ||
2155 | void hypervisor_tlbop_error(unsigned long err, unsigned long op) | 2158 | void hypervisor_tlbop_error(unsigned long err, unsigned long op) |
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 587cd0565128..18fcd7167095 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c | |||
@@ -346,6 +346,9 @@ retry: | |||
346 | down_read(&mm->mmap_sem); | 346 | down_read(&mm->mmap_sem); |
347 | } | 347 | } |
348 | 348 | ||
349 | if (fault_code & FAULT_CODE_BAD_RA) | ||
350 | goto do_sigbus; | ||
351 | |||
349 | vma = find_vma(mm, address); | 352 | vma = find_vma(mm, address); |
350 | if (!vma) | 353 | if (!vma) |
351 | goto bad_area; | 354 | goto bad_area; |