aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>2013-12-16 00:16:24 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-03-06 23:52:10 -0500
commit55672ecfa21f23616541c50e0e687f14f9ecf165 (patch)
tree1584ec952c56dcc7b2894085ddea9de3825c8d08 /arch/powerpc/kernel
parentd2a36071ef8dd24dceb95c3d9b05aaeac987b447 (diff)
powerpc/book3s: Recover from MC in sapphire on SCOM read via MMIO.
Detect and recover from machine check when inside opal on a special scom load instructions. On specific SCOM read via MMIO we may get a machine check exception with SRR0 pointing inside opal. To recover from MC in this scenario, get a recovery instruction address and return to it from MC. OPAL will export the machine check recoverable ranges through device tree node mcheck-recoverable-ranges under ibm,opal: # hexdump /proc/device-tree/ibm,opal/mcheck-recoverable-ranges 0000000 0000 0000 3000 2804 0000 000c 0000 0000 0000010 3000 2814 0000 0000 3000 27f0 0000 000c 0000020 0000 0000 3000 2814 xxxx xxxx xxxx xxxx 0000030 llll llll yyyy yyyy yyyy yyyy ... ... # where: xxxx xxxx xxxx xxxx = Starting instruction address llll llll = Length of the address range. yyyy yyyy yyyy yyyy = recovery address Each recoverable address range entry is (start address, len, recovery address), 2 cells each for start and recovery address, 1 cell for len, totalling 5 cells per entry. During kernel boot time, build up the recovery table with the list of recovery ranges from device-tree node which will be used during machine check exception to recover from MMIO SCOM UE. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/mce.c4
-rw-r--r--arch/powerpc/kernel/mce_power.c37
-rw-r--r--arch/powerpc/kernel/prom.c5
3 files changed, 40 insertions, 6 deletions
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index cadef7e64e42..a7fd4cb78b78 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -70,7 +70,7 @@ static void mce_set_error_info(struct machine_check_event *mce,
70 */ 70 */
71void save_mce_event(struct pt_regs *regs, long handled, 71void save_mce_event(struct pt_regs *regs, long handled,
72 struct mce_error_info *mce_err, 72 struct mce_error_info *mce_err,
73 uint64_t addr) 73 uint64_t nip, uint64_t addr)
74{ 74{
75 uint64_t srr1; 75 uint64_t srr1;
76 int index = __get_cpu_var(mce_nest_count)++; 76 int index = __get_cpu_var(mce_nest_count)++;
@@ -86,7 +86,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
86 86
87 /* Populate generic machine check info */ 87 /* Populate generic machine check info */
88 mce->version = MCE_V1; 88 mce->version = MCE_V1;
89 mce->srr0 = regs->nip; 89 mce->srr0 = nip;
90 mce->srr1 = regs->msr; 90 mce->srr1 = regs->msr;
91 mce->gpr3 = regs->gpr[3]; 91 mce->gpr3 = regs->gpr[3];
92 mce->in_use = 1; 92 mce->in_use = 1;
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 27c93f41166f..aa9aff3d6ad3 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -26,6 +26,7 @@
26#include <linux/ptrace.h> 26#include <linux/ptrace.h>
27#include <asm/mmu.h> 27#include <asm/mmu.h>
28#include <asm/mce.h> 28#include <asm/mce.h>
29#include <asm/machdep.h>
29 30
30/* flush SLBs and reload */ 31/* flush SLBs and reload */
31static void flush_and_reload_slb(void) 32static void flush_and_reload_slb(void)
@@ -197,13 +198,32 @@ static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
197 } 198 }
198} 199}
199 200
201static long mce_handle_ue_error(struct pt_regs *regs)
202{
203 long handled = 0;
204
205 /*
206 * On specific SCOM read via MMIO we may get a machine check
207 * exception with SRR0 pointing inside opal. If that is the
208 * case OPAL may have recovery address to re-read SCOM data in
209 * different way and hence we can recover from this MC.
210 */
211
212 if (ppc_md.mce_check_early_recovery) {
213 if (ppc_md.mce_check_early_recovery(regs))
214 handled = 1;
215 }
216 return handled;
217}
218
200long __machine_check_early_realmode_p7(struct pt_regs *regs) 219long __machine_check_early_realmode_p7(struct pt_regs *regs)
201{ 220{
202 uint64_t srr1, addr; 221 uint64_t srr1, nip, addr;
203 long handled = 1; 222 long handled = 1;
204 struct mce_error_info mce_error_info = { 0 }; 223 struct mce_error_info mce_error_info = { 0 };
205 224
206 srr1 = regs->msr; 225 srr1 = regs->msr;
226 nip = regs->nip;
207 227
208 /* 228 /*
209 * Handle memory errors depending whether this was a load/store or 229 * Handle memory errors depending whether this was a load/store or
@@ -221,7 +241,11 @@ long __machine_check_early_realmode_p7(struct pt_regs *regs)
221 addr = regs->nip; 241 addr = regs->nip;
222 } 242 }
223 243
224 save_mce_event(regs, handled, &mce_error_info, addr); 244 /* Handle UE error. */
245 if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
246 handled = mce_handle_ue_error(regs);
247
248 save_mce_event(regs, handled, &mce_error_info, nip, addr);
225 return handled; 249 return handled;
226} 250}
227 251
@@ -263,11 +287,12 @@ static long mce_handle_derror_p8(uint64_t dsisr)
263 287
264long __machine_check_early_realmode_p8(struct pt_regs *regs) 288long __machine_check_early_realmode_p8(struct pt_regs *regs)
265{ 289{
266 uint64_t srr1, addr; 290 uint64_t srr1, nip, addr;
267 long handled = 1; 291 long handled = 1;
268 struct mce_error_info mce_error_info = { 0 }; 292 struct mce_error_info mce_error_info = { 0 };
269 293
270 srr1 = regs->msr; 294 srr1 = regs->msr;
295 nip = regs->nip;
271 296
272 if (P7_SRR1_MC_LOADSTORE(srr1)) { 297 if (P7_SRR1_MC_LOADSTORE(srr1)) {
273 handled = mce_handle_derror_p8(regs->dsisr); 298 handled = mce_handle_derror_p8(regs->dsisr);
@@ -279,6 +304,10 @@ long __machine_check_early_realmode_p8(struct pt_regs *regs)
279 addr = regs->nip; 304 addr = regs->nip;
280 } 305 }
281 306
282 save_mce_event(regs, handled, &mce_error_info, addr); 307 /* Handle UE error. */
308 if (mce_error_info.error_type == MCE_ERROR_TYPE_UE)
309 handled = mce_handle_ue_error(regs);
310
311 save_mce_event(regs, handled, &mce_error_info, nip, addr);
283 return handled; 312 return handled;
284} 313}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index f58c0d3aaeb4..d711b7eb05aa 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -752,6 +752,11 @@ void __init early_init_devtree(void *params)
752 spinning_secondaries = boot_cpu_count - 1; 752 spinning_secondaries = boot_cpu_count - 1;
753#endif 753#endif
754 754
755#ifdef CONFIG_PPC_POWERNV
756 /* Scan and build the list of machine check recoverable ranges */
757 of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL);
758#endif
759
755 DBG(" <- early_init_devtree()\n"); 760 DBG(" <- early_init_devtree()\n");
756} 761}
757 762