aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbob picco <bpicco@meloft.net>2014-09-16 09:26:47 -0400
committerDavid S. Miller <davem@davemloft.net>2014-09-16 20:46:44 -0400
commit4ccb9272892c33ef1c19a783cfa87103b30c2784 (patch)
treefe904676d83557eff6d1bb04127ba23541736140
parentd1105287aabe88dbb3af825140badaa05cf0442c (diff)
sparc64: sun4v TLB error power off events
We've witnessed a few TLB events causing the machine to power off because of prom_halt. In one case it was some nfs related area during rmmod. Another was an mmapper of /dev/mem. A more recent one is an ITLB issue with a bad pagesize which could be a hardware bug. Bugs happen but we should attempt to not power off the machine and/or hang it when possible. This is a DTLB error from an mmapper of /dev/mem: [root@sparcie ~]# SUN4V-DTLB: Error at TPC[fffff80100903e6c], tl 1 SUN4V-DTLB: TPC<0xfffff80100903e6c> SUN4V-DTLB: O7[fffff801081979d0] SUN4V-DTLB: O7<0xfffff801081979d0> SUN4V-DTLB: vaddr[fffff80100000000] ctx[1250] pte[98000000000f0610] error[2] . This is recent mainline for ITLB: [ 3708.179864] SUN4V-ITLB: TPC<0xfffffc010071cefc> [ 3708.188866] SUN4V-ITLB: O7[fffffc010071cee8] [ 3708.197377] SUN4V-ITLB: O7<0xfffffc010071cee8> [ 3708.206539] SUN4V-ITLB: vaddr[e0003] ctx[1a3c] pte[2900000dcc800eeb] error[4] . Normally sun4v_itlb_error_report() and sun4v_dtlb_error_report() would call prom_halt() and drop us to OF command prompt "ok". This isn't the case for LDOMs and the machine powers off. For the HV reported error of HV_ENORADDR for HV HV_MMU_MAP_ADDR_TRAP we cause a SIGBUS error by qualifying it within do_sparc64_fault() for fault code mask of FAULT_CODE_BAD_RA. This is done when trap level (%tl) is less or equal one("1"). Otherwise, for %tl > 1, we proceed eventually to die_if_kernel(). The logic of this patch was partially inspired by David Miller's feedback. Power off of large sparc64 machines is painful. Plus die_if_kernel provides more context. A reset sequence isn't a brief period on large sparc64 but better than power-off/power-on sequence. Cc: sparclinux@vger.kernel.org Signed-off-by: Bob Picco <bob.picco@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/sparc/include/asm/thread_info_64.h1
-rw-r--r--arch/sparc/kernel/sun4v_tlb_miss.S35
-rw-r--r--arch/sparc/kernel/traps_64.c15
-rw-r--r--arch/sparc/mm/fault_64.c3
4 files changed, 34 insertions, 20 deletions
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index a5f01ac6d0f1..f85dc8512ab3 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -102,6 +102,7 @@ struct thread_info {
102#define FAULT_CODE_ITLB 0x04 /* Miss happened in I-TLB */ 102#define FAULT_CODE_ITLB 0x04 /* Miss happened in I-TLB */
103#define FAULT_CODE_WINFIXUP 0x08 /* Miss happened during spill/fill */ 103#define FAULT_CODE_WINFIXUP 0x08 /* Miss happened during spill/fill */
104#define FAULT_CODE_BLKCOMMIT 0x10 /* Use blk-commit ASI in copy_page */ 104#define FAULT_CODE_BLKCOMMIT 0x10 /* Use blk-commit ASI in copy_page */
105#define FAULT_CODE_BAD_RA 0x20 /* Bad RA for sun4v */
105 106
106#if PAGE_SHIFT == 13 107#if PAGE_SHIFT == 13
107#define THREAD_SIZE (2*PAGE_SIZE) 108#define THREAD_SIZE (2*PAGE_SIZE)
diff --git a/arch/sparc/kernel/sun4v_tlb_miss.S b/arch/sparc/kernel/sun4v_tlb_miss.S
index e0c09bf85610..6179e19bc9b9 100644
--- a/arch/sparc/kernel/sun4v_tlb_miss.S
+++ b/arch/sparc/kernel/sun4v_tlb_miss.S
@@ -195,6 +195,11 @@ sun4v_tsb_miss_common:
195 ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7 195 ldx [%g2 + TRAP_PER_CPU_PGD_PADDR], %g7
196 196
197sun4v_itlb_error: 197sun4v_itlb_error:
198 rdpr %tl, %g1
199 cmp %g1, 1
200 ble,pt %icc, sun4v_bad_ra
201 or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_ITLB, %g1
202
198 sethi %hi(sun4v_err_itlb_vaddr), %g1 203 sethi %hi(sun4v_err_itlb_vaddr), %g1
199 stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)] 204 stx %g4, [%g1 + %lo(sun4v_err_itlb_vaddr)]
200 sethi %hi(sun4v_err_itlb_ctx), %g1 205 sethi %hi(sun4v_err_itlb_ctx), %g1
@@ -206,15 +211,10 @@ sun4v_itlb_error:
206 sethi %hi(sun4v_err_itlb_error), %g1 211 sethi %hi(sun4v_err_itlb_error), %g1
207 stx %o0, [%g1 + %lo(sun4v_err_itlb_error)] 212 stx %o0, [%g1 + %lo(sun4v_err_itlb_error)]
208 213
214 sethi %hi(1f), %g7
209 rdpr %tl, %g4 215 rdpr %tl, %g4
210 cmp %g4, 1
211 ble,pt %icc, 1f
212 sethi %hi(2f), %g7
213 ba,pt %xcc, etraptl1 216 ba,pt %xcc, etraptl1
214 or %g7, %lo(2f), %g7 2171: or %g7, %lo(1f), %g7
215
2161: ba,pt %xcc, etrap
2172: or %g7, %lo(2b), %g7
218 mov %l4, %o1 218 mov %l4, %o1
219 call sun4v_itlb_error_report 219 call sun4v_itlb_error_report
220 add %sp, PTREGS_OFF, %o0 220 add %sp, PTREGS_OFF, %o0
@@ -222,6 +222,11 @@ sun4v_itlb_error:
222 /* NOTREACHED */ 222 /* NOTREACHED */
223 223
224sun4v_dtlb_error: 224sun4v_dtlb_error:
225 rdpr %tl, %g1
226 cmp %g1, 1
227 ble,pt %icc, sun4v_bad_ra
228 or %g0, FAULT_CODE_BAD_RA | FAULT_CODE_DTLB, %g1
229
225 sethi %hi(sun4v_err_dtlb_vaddr), %g1 230 sethi %hi(sun4v_err_dtlb_vaddr), %g1
226 stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)] 231 stx %g4, [%g1 + %lo(sun4v_err_dtlb_vaddr)]
227 sethi %hi(sun4v_err_dtlb_ctx), %g1 232 sethi %hi(sun4v_err_dtlb_ctx), %g1
@@ -233,21 +238,23 @@ sun4v_dtlb_error:
233 sethi %hi(sun4v_err_dtlb_error), %g1 238 sethi %hi(sun4v_err_dtlb_error), %g1
234 stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)] 239 stx %o0, [%g1 + %lo(sun4v_err_dtlb_error)]
235 240
241 sethi %hi(1f), %g7
236 rdpr %tl, %g4 242 rdpr %tl, %g4
237 cmp %g4, 1
238 ble,pt %icc, 1f
239 sethi %hi(2f), %g7
240 ba,pt %xcc, etraptl1 243 ba,pt %xcc, etraptl1
241 or %g7, %lo(2f), %g7 2441: or %g7, %lo(1f), %g7
242
2431: ba,pt %xcc, etrap
2442: or %g7, %lo(2b), %g7
245 mov %l4, %o1 245 mov %l4, %o1
246 call sun4v_dtlb_error_report 246 call sun4v_dtlb_error_report
247 add %sp, PTREGS_OFF, %o0 247 add %sp, PTREGS_OFF, %o0
248 248
249 /* NOTREACHED */ 249 /* NOTREACHED */
250 250
251sun4v_bad_ra:
252 or %g0, %g4, %g5
253 ba,pt %xcc, sparc64_realfault_common
254 or %g1, %g0, %g4
255
256 /* NOTREACHED */
257
251 /* Instruction Access Exception, tl0. */ 258 /* Instruction Access Exception, tl0. */
252sun4v_iacc: 259sun4v_iacc:
253 ldxa [%g0] ASI_SCRATCHPAD, %g2 260 ldxa [%g0] ASI_SCRATCHPAD, %g2
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index fb6640ec8557..981a769b9558 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2104,6 +2104,11 @@ void sun4v_nonresum_overflow(struct pt_regs *regs)
2104 atomic_inc(&sun4v_nonresum_oflow_cnt); 2104 atomic_inc(&sun4v_nonresum_oflow_cnt);
2105} 2105}
2106 2106
2107static void sun4v_tlb_error(struct pt_regs *regs)
2108{
2109 die_if_kernel("TLB/TSB error", regs);
2110}
2111
2107unsigned long sun4v_err_itlb_vaddr; 2112unsigned long sun4v_err_itlb_vaddr;
2108unsigned long sun4v_err_itlb_ctx; 2113unsigned long sun4v_err_itlb_ctx;
2109unsigned long sun4v_err_itlb_pte; 2114unsigned long sun4v_err_itlb_pte;
@@ -2111,8 +2116,7 @@ unsigned long sun4v_err_itlb_error;
2111 2116
2112void sun4v_itlb_error_report(struct pt_regs *regs, int tl) 2117void sun4v_itlb_error_report(struct pt_regs *regs, int tl)
2113{ 2118{
2114 if (tl > 1) 2119 dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
2115 dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
2116 2120
2117 printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n", 2121 printk(KERN_EMERG "SUN4V-ITLB: Error at TPC[%lx], tl %d\n",
2118 regs->tpc, tl); 2122 regs->tpc, tl);
@@ -2125,7 +2129,7 @@ void sun4v_itlb_error_report(struct pt_regs *regs, int tl)
2125 sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx, 2129 sun4v_err_itlb_vaddr, sun4v_err_itlb_ctx,
2126 sun4v_err_itlb_pte, sun4v_err_itlb_error); 2130 sun4v_err_itlb_pte, sun4v_err_itlb_error);
2127 2131
2128 prom_halt(); 2132 sun4v_tlb_error(regs);
2129} 2133}
2130 2134
2131unsigned long sun4v_err_dtlb_vaddr; 2135unsigned long sun4v_err_dtlb_vaddr;
@@ -2135,8 +2139,7 @@ unsigned long sun4v_err_dtlb_error;
2135 2139
2136void sun4v_dtlb_error_report(struct pt_regs *regs, int tl) 2140void sun4v_dtlb_error_report(struct pt_regs *regs, int tl)
2137{ 2141{
2138 if (tl > 1) 2142 dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
2139 dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
2140 2143
2141 printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n", 2144 printk(KERN_EMERG "SUN4V-DTLB: Error at TPC[%lx], tl %d\n",
2142 regs->tpc, tl); 2145 regs->tpc, tl);
@@ -2149,7 +2152,7 @@ void sun4v_dtlb_error_report(struct pt_regs *regs, int tl)
2149 sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx, 2152 sun4v_err_dtlb_vaddr, sun4v_err_dtlb_ctx,
2150 sun4v_err_dtlb_pte, sun4v_err_dtlb_error); 2153 sun4v_err_dtlb_pte, sun4v_err_dtlb_error);
2151 2154
2152 prom_halt(); 2155 sun4v_tlb_error(regs);
2153} 2156}
2154 2157
2155void hypervisor_tlbop_error(unsigned long err, unsigned long op) 2158void hypervisor_tlbop_error(unsigned long err, unsigned long op)
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 587cd0565128..18fcd7167095 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -346,6 +346,9 @@ retry:
346 down_read(&mm->mmap_sem); 346 down_read(&mm->mmap_sem);
347 } 347 }
348 348
349 if (fault_code & FAULT_CODE_BAD_RA)
350 goto do_sigbus;
351
349 vma = find_vma(mm, address); 352 vma = find_vma(mm, address);
350 if (!vma) 353 if (!vma)
351 goto bad_area; 354 goto bad_area;