aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>2013-10-30 10:35:40 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2013-12-05 00:05:20 -0500
commit36df96f8acaf51992177645eb2d781f766ce97dc (patch)
tree0d7563425e6a848379e60b7a4b65d9239cc7e51e
parentae744f3432d3872c51298d922728e13c24ccc068 (diff)
powerpc/book3s: Decode and save machine check event.
Now that we handle machine check in linux, the MCE decoding should also take place in linux host. This info is crucial to log before we go down in case we can not handle the machine check errors. This patch decodes and populates a machine check event which contain high level meaning full MCE information. We do this in real mode C code with ME bit on. The MCE information is still available on emergency stack (in pt_regs structure format). Even if we take another exception at this point the MCE early handler will allocate a new stack frame on top of current one. So when we return back here we still have our MCE information safe on current stack. We use per cpu buffer to save high level MCE information. Each per cpu buffer is an array of machine check event structure indexed by per cpu counter mce_nest_count. The mce_nest_count is incremented every time we enter machine check early handler in real mode to get the current free slot (index = mce_nest_count - 1). The mce_nest_count is decremented once the MCE info is consumed by virtual mode machine exception handler. This patch provides save_mce_event(), get_mce_event() and release_mce_event() generic routines that can be used by machine check handlers to populate and retrieve the event. The routine release_mce_event() will free the event slot so that it can be reused. Caller can invoke get_mce_event() with a release flag either to release the event slot immediately OR keep it so that it can be fetched again. The event slot can be also released anytime by invoking release_mce_event(). This patch also updates kvm code to invoke get_mce_event to retrieve generic mce event rather than paca->opal_mce_evt. The KVM code always calls get_mce_event() with release flags set to false so that event is available for linus host machine If machine check occurs while we are in guest, KVM tries to handle the error. If KVM is able to handle MC error successfully, it enters the guest and delivers the machine check to guest. If KVM is not able to handle MC error, it exists the guest and passes the control to linux host machine check handler which then logs MC event and decides how to handle it in linux host. In failure case, KVM needs to make sure that the MC event is available for linux host to consume. Hence KVM always calls get_mce_event() with release flags set to false and later it invokes release_mce_event() only if it succeeds to handle error. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/mce.h124
-rw-r--r--arch/powerpc/kernel/Makefile2
-rw-r--r--arch/powerpc/kernel/mce.c164
-rw-r--r--arch/powerpc/kernel/mce_power.c116
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c32
-rw-r--r--arch/powerpc/platforms/powernv/opal.c35
6 files changed, 434 insertions, 39 deletions
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index e3ffa825b970..87cad2a808c2 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -66,5 +66,129 @@
66 66
67#define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \ 67#define P8_DSISR_MC_SLB_ERRORS (P7_DSISR_MC_SLB_ERRORS | \
68 P8_DSISR_MC_ERAT_MULTIHIT_SEC) 68 P8_DSISR_MC_ERAT_MULTIHIT_SEC)
69enum MCE_Version {
70 MCE_V1 = 1,
71};
72
73enum MCE_Severity {
74 MCE_SEV_NO_ERROR = 0,
75 MCE_SEV_WARNING = 1,
76 MCE_SEV_ERROR_SYNC = 2,
77 MCE_SEV_FATAL = 3,
78};
79
80enum MCE_Disposition {
81 MCE_DISPOSITION_RECOVERED = 0,
82 MCE_DISPOSITION_NOT_RECOVERED = 1,
83};
84
85enum MCE_Initiator {
86 MCE_INITIATOR_UNKNOWN = 0,
87 MCE_INITIATOR_CPU = 1,
88};
89
90enum MCE_ErrorType {
91 MCE_ERROR_TYPE_UNKNOWN = 0,
92 MCE_ERROR_TYPE_UE = 1,
93 MCE_ERROR_TYPE_SLB = 2,
94 MCE_ERROR_TYPE_ERAT = 3,
95 MCE_ERROR_TYPE_TLB = 4,
96};
97
98enum MCE_UeErrorType {
99 MCE_UE_ERROR_INDETERMINATE = 0,
100 MCE_UE_ERROR_IFETCH = 1,
101 MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH = 2,
102 MCE_UE_ERROR_LOAD_STORE = 3,
103 MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 4,
104};
105
106enum MCE_SlbErrorType {
107 MCE_SLB_ERROR_INDETERMINATE = 0,
108 MCE_SLB_ERROR_PARITY = 1,
109 MCE_SLB_ERROR_MULTIHIT = 2,
110};
111
112enum MCE_EratErrorType {
113 MCE_ERAT_ERROR_INDETERMINATE = 0,
114 MCE_ERAT_ERROR_PARITY = 1,
115 MCE_ERAT_ERROR_MULTIHIT = 2,
116};
117
118enum MCE_TlbErrorType {
119 MCE_TLB_ERROR_INDETERMINATE = 0,
120 MCE_TLB_ERROR_PARITY = 1,
121 MCE_TLB_ERROR_MULTIHIT = 2,
122};
123
124struct machine_check_event {
125 enum MCE_Version version:8; /* 0x00 */
126 uint8_t in_use; /* 0x01 */
127 enum MCE_Severity severity:8; /* 0x02 */
128 enum MCE_Initiator initiator:8; /* 0x03 */
129 enum MCE_ErrorType error_type:8; /* 0x04 */
130 enum MCE_Disposition disposition:8; /* 0x05 */
131 uint8_t reserved_1[2]; /* 0x06 */
132 uint64_t gpr3; /* 0x08 */
133 uint64_t srr0; /* 0x10 */
134 uint64_t srr1; /* 0x18 */
135 union { /* 0x20 */
136 struct {
137 enum MCE_UeErrorType ue_error_type:8;
138 uint8_t effective_address_provided;
139 uint8_t physical_address_provided;
140 uint8_t reserved_1[5];
141 uint64_t effective_address;
142 uint64_t physical_address;
143 uint8_t reserved_2[8];
144 } ue_error;
145
146 struct {
147 enum MCE_SlbErrorType slb_error_type:8;
148 uint8_t effective_address_provided;
149 uint8_t reserved_1[6];
150 uint64_t effective_address;
151 uint8_t reserved_2[16];
152 } slb_error;
153
154 struct {
155 enum MCE_EratErrorType erat_error_type:8;
156 uint8_t effective_address_provided;
157 uint8_t reserved_1[6];
158 uint64_t effective_address;
159 uint8_t reserved_2[16];
160 } erat_error;
161
162 struct {
163 enum MCE_TlbErrorType tlb_error_type:8;
164 uint8_t effective_address_provided;
165 uint8_t reserved_1[6];
166 uint64_t effective_address;
167 uint8_t reserved_2[16];
168 } tlb_error;
169 } u;
170};
171
172struct mce_error_info {
173 enum MCE_ErrorType error_type:8;
174 union {
175 enum MCE_UeErrorType ue_error_type:8;
176 enum MCE_SlbErrorType slb_error_type:8;
177 enum MCE_EratErrorType erat_error_type:8;
178 enum MCE_TlbErrorType tlb_error_type:8;
179 } u;
180 uint8_t reserved[2];
181};
182
183#define MAX_MC_EVT 100
184
185/* Release flags for get_mce_event() */
186#define MCE_EVENT_RELEASE true
187#define MCE_EVENT_DONTRELEASE false
188
189extern void save_mce_event(struct pt_regs *regs, long handled,
190 struct mce_error_info *mce_err, uint64_t addr);
191extern int get_mce_event(struct machine_check_event *mce, bool release);
192extern void release_mce_event(void);
69 193
70#endif /* __ASM_PPC64_MCE_H__ */ 194#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 07c63d0aa759..904d713366ff 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
39obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o 39obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
40obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o 40obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o
41obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o 41obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o
42obj-$(CONFIG_PPC_BOOK3S_64) += mce_power.o 42obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o
43obj64-$(CONFIG_RELOCATABLE) += reloc_64.o 43obj64-$(CONFIG_RELOCATABLE) += reloc_64.o
44obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o 44obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o
45obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o 45obj-$(CONFIG_PPC_A2) += cpu_setup_a2.o
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
new file mode 100644
index 000000000000..aeecdf1ba897
--- /dev/null
+++ b/arch/powerpc/kernel/mce.c
@@ -0,0 +1,164 @@
1/*
2 * Machine check exception handling.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright 2013 IBM Corporation
19 * Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
20 */
21
22#undef DEBUG
23#define pr_fmt(fmt) "mce: " fmt
24
25#include <linux/types.h>
26#include <linux/ptrace.h>
27#include <linux/percpu.h>
28#include <linux/export.h>
29#include <asm/mce.h>
30
31static DEFINE_PER_CPU(int, mce_nest_count);
32static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
33
34static void mce_set_error_info(struct machine_check_event *mce,
35 struct mce_error_info *mce_err)
36{
37 mce->error_type = mce_err->error_type;
38 switch (mce_err->error_type) {
39 case MCE_ERROR_TYPE_UE:
40 mce->u.ue_error.ue_error_type = mce_err->u.ue_error_type;
41 break;
42 case MCE_ERROR_TYPE_SLB:
43 mce->u.slb_error.slb_error_type = mce_err->u.slb_error_type;
44 break;
45 case MCE_ERROR_TYPE_ERAT:
46 mce->u.erat_error.erat_error_type = mce_err->u.erat_error_type;
47 break;
48 case MCE_ERROR_TYPE_TLB:
49 mce->u.tlb_error.tlb_error_type = mce_err->u.tlb_error_type;
50 break;
51 case MCE_ERROR_TYPE_UNKNOWN:
52 default:
53 break;
54 }
55}
56
57/*
58 * Decode and save high level MCE information into per cpu buffer which
59 * is an array of machine_check_event structure.
60 */
61void save_mce_event(struct pt_regs *regs, long handled,
62 struct mce_error_info *mce_err,
63 uint64_t addr)
64{
65 uint64_t srr1;
66 int index = __get_cpu_var(mce_nest_count)++;
67 struct machine_check_event *mce = &__get_cpu_var(mce_event[index]);
68
69 /*
70 * Return if we don't have enough space to log mce event.
71 * mce_nest_count may go beyond MAX_MC_EVT but that's ok,
72 * the check below will stop buffer overrun.
73 */
74 if (index >= MAX_MC_EVT)
75 return;
76
77 /* Populate generic machine check info */
78 mce->version = MCE_V1;
79 mce->srr0 = regs->nip;
80 mce->srr1 = regs->msr;
81 mce->gpr3 = regs->gpr[3];
82 mce->in_use = 1;
83
84 mce->initiator = MCE_INITIATOR_CPU;
85 if (handled)
86 mce->disposition = MCE_DISPOSITION_RECOVERED;
87 else
88 mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
89 mce->severity = MCE_SEV_ERROR_SYNC;
90
91 srr1 = regs->msr;
92
93 /*
94 * Populate the mce error_type and type-specific error_type.
95 */
96 mce_set_error_info(mce, mce_err);
97
98 if (!addr)
99 return;
100
101 if (mce->error_type == MCE_ERROR_TYPE_TLB) {
102 mce->u.tlb_error.effective_address_provided = true;
103 mce->u.tlb_error.effective_address = addr;
104 } else if (mce->error_type == MCE_ERROR_TYPE_SLB) {
105 mce->u.slb_error.effective_address_provided = true;
106 mce->u.slb_error.effective_address = addr;
107 } else if (mce->error_type == MCE_ERROR_TYPE_ERAT) {
108 mce->u.erat_error.effective_address_provided = true;
109 mce->u.erat_error.effective_address = addr;
110 } else if (mce->error_type == MCE_ERROR_TYPE_UE) {
111 mce->u.ue_error.effective_address_provided = true;
112 mce->u.ue_error.effective_address = addr;
113 }
114 return;
115}
116
117/*
118 * get_mce_event:
119 * mce Pointer to machine_check_event structure to be filled.
120 * release Flag to indicate whether to free the event slot or not.
121 * 0 <= do not release the mce event. Caller will invoke
122 * release_mce_event() once event has been consumed.
123 * 1 <= release the slot.
124 *
125 * return 1 = success
126 * 0 = failure
127 *
128 * get_mce_event() will be called by platform specific machine check
129 * handle routine and in KVM.
130 * When we call get_mce_event(), we are still in interrupt context and
131 * preemption will not be scheduled until ret_from_expect() routine
132 * is called.
133 */
134int get_mce_event(struct machine_check_event *mce, bool release)
135{
136 int index = __get_cpu_var(mce_nest_count) - 1;
137 struct machine_check_event *mc_evt;
138 int ret = 0;
139
140 /* Sanity check */
141 if (index < 0)
142 return ret;
143
144 /* Check if we have MCE info to process. */
145 if (index < MAX_MC_EVT) {
146 mc_evt = &__get_cpu_var(mce_event[index]);
147 /* Copy the event structure and release the original */
148 if (mce)
149 *mce = *mc_evt;
150 if (release)
151 mc_evt->in_use = 0;
152 ret = 1;
153 }
154 /* Decrement the count to free the slot. */
155 if (release)
156 __get_cpu_var(mce_nest_count)--;
157
158 return ret;
159}
160
161void release_mce_event(void)
162{
163 get_mce_event(NULL, true);
164}
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 60a217f11275..b36e777a734f 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -133,22 +133,116 @@ static long mce_handle_ierror_p7(uint64_t srr1)
133 return handled; 133 return handled;
134} 134}
135 135
136static void mce_get_common_ierror(struct mce_error_info *mce_err, uint64_t srr1)
137{
138 switch (P7_SRR1_MC_IFETCH(srr1)) {
139 case P7_SRR1_MC_IFETCH_SLB_PARITY:
140 mce_err->error_type = MCE_ERROR_TYPE_SLB;
141 mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
142 break;
143 case P7_SRR1_MC_IFETCH_SLB_MULTIHIT:
144 mce_err->error_type = MCE_ERROR_TYPE_SLB;
145 mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
146 break;
147 case P7_SRR1_MC_IFETCH_TLB_MULTIHIT:
148 mce_err->error_type = MCE_ERROR_TYPE_TLB;
149 mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
150 break;
151 case P7_SRR1_MC_IFETCH_UE:
152 case P7_SRR1_MC_IFETCH_UE_IFU_INTERNAL:
153 mce_err->error_type = MCE_ERROR_TYPE_UE;
154 mce_err->u.ue_error_type = MCE_UE_ERROR_IFETCH;
155 break;
156 case P7_SRR1_MC_IFETCH_UE_TLB_RELOAD:
157 mce_err->error_type = MCE_ERROR_TYPE_UE;
158 mce_err->u.ue_error_type =
159 MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
160 break;
161 }
162}
163
164static void mce_get_ierror_p7(struct mce_error_info *mce_err, uint64_t srr1)
165{
166 mce_get_common_ierror(mce_err, srr1);
167 if (P7_SRR1_MC_IFETCH(srr1) == P7_SRR1_MC_IFETCH_SLB_BOTH) {
168 mce_err->error_type = MCE_ERROR_TYPE_SLB;
169 mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
170 }
171}
172
173static void mce_get_derror_p7(struct mce_error_info *mce_err, uint64_t dsisr)
174{
175 if (dsisr & P7_DSISR_MC_UE) {
176 mce_err->error_type = MCE_ERROR_TYPE_UE;
177 mce_err->u.ue_error_type = MCE_UE_ERROR_LOAD_STORE;
178 } else if (dsisr & P7_DSISR_MC_UE_TABLEWALK) {
179 mce_err->error_type = MCE_ERROR_TYPE_UE;
180 mce_err->u.ue_error_type =
181 MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
182 } else if (dsisr & P7_DSISR_MC_ERAT_MULTIHIT) {
183 mce_err->error_type = MCE_ERROR_TYPE_ERAT;
184 mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
185 } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT) {
186 mce_err->error_type = MCE_ERROR_TYPE_SLB;
187 mce_err->u.slb_error_type = MCE_SLB_ERROR_MULTIHIT;
188 } else if (dsisr & P7_DSISR_MC_SLB_PARITY_MFSLB) {
189 mce_err->error_type = MCE_ERROR_TYPE_SLB;
190 mce_err->u.slb_error_type = MCE_SLB_ERROR_PARITY;
191 } else if (dsisr & P7_DSISR_MC_TLB_MULTIHIT_MFTLB) {
192 mce_err->error_type = MCE_ERROR_TYPE_TLB;
193 mce_err->u.tlb_error_type = MCE_TLB_ERROR_MULTIHIT;
194 } else if (dsisr & P7_DSISR_MC_SLB_MULTIHIT_PARITY) {
195 mce_err->error_type = MCE_ERROR_TYPE_SLB;
196 mce_err->u.slb_error_type = MCE_SLB_ERROR_INDETERMINATE;
197 }
198}
199
136long __machine_check_early_realmode_p7(struct pt_regs *regs) 200long __machine_check_early_realmode_p7(struct pt_regs *regs)
137{ 201{
138 uint64_t srr1; 202 uint64_t srr1, addr;
139 long handled = 1; 203 long handled = 1;
204 struct mce_error_info mce_error_info = { 0 };
140 205
141 srr1 = regs->msr; 206 srr1 = regs->msr;
142 207
143 if (P7_SRR1_MC_LOADSTORE(srr1)) 208 /*
209 * Handle memory errors depending whether this was a load/store or
210 * ifetch exception. Also, populate the mce error_type and
211 * type-specific error_type from either SRR1 or DSISR, depending
212 * whether this was a load/store or ifetch exception
213 */
214 if (P7_SRR1_MC_LOADSTORE(srr1)) {
144 handled = mce_handle_derror_p7(regs->dsisr); 215 handled = mce_handle_derror_p7(regs->dsisr);
145 else 216 mce_get_derror_p7(&mce_error_info, regs->dsisr);
217 addr = regs->dar;
218 } else {
146 handled = mce_handle_ierror_p7(srr1); 219 handled = mce_handle_ierror_p7(srr1);
220 mce_get_ierror_p7(&mce_error_info, srr1);
221 addr = regs->nip;
222 }
147 223
148 /* TODO: Decode machine check reason. */ 224 save_mce_event(regs, handled, &mce_error_info, addr);
149 return handled; 225 return handled;
150} 226}
151 227
228static void mce_get_ierror_p8(struct mce_error_info *mce_err, uint64_t srr1)
229{
230 mce_get_common_ierror(mce_err, srr1);
231 if (P7_SRR1_MC_IFETCH(srr1) == P8_SRR1_MC_IFETCH_ERAT_MULTIHIT) {
232 mce_err->error_type = MCE_ERROR_TYPE_ERAT;
233 mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
234 }
235}
236
237static void mce_get_derror_p8(struct mce_error_info *mce_err, uint64_t dsisr)
238{
239 mce_get_derror_p7(mce_err, dsisr);
240 if (dsisr & P8_DSISR_MC_ERAT_MULTIHIT_SEC) {
241 mce_err->error_type = MCE_ERROR_TYPE_ERAT;
242 mce_err->u.erat_error_type = MCE_ERAT_ERROR_MULTIHIT;
243 }
244}
245
152static long mce_handle_ierror_p8(uint64_t srr1) 246static long mce_handle_ierror_p8(uint64_t srr1)
153{ 247{
154 long handled = 0; 248 long handled = 0;
@@ -169,16 +263,22 @@ static long mce_handle_derror_p8(uint64_t dsisr)
169 263
170long __machine_check_early_realmode_p8(struct pt_regs *regs) 264long __machine_check_early_realmode_p8(struct pt_regs *regs)
171{ 265{
172 uint64_t srr1; 266 uint64_t srr1, addr;
173 long handled = 1; 267 long handled = 1;
268 struct mce_error_info mce_error_info = { 0 };
174 269
175 srr1 = regs->msr; 270 srr1 = regs->msr;
176 271
177 if (P7_SRR1_MC_LOADSTORE(srr1)) 272 if (P7_SRR1_MC_LOADSTORE(srr1)) {
178 handled = mce_handle_derror_p8(regs->dsisr); 273 handled = mce_handle_derror_p8(regs->dsisr);
179 else 274 mce_get_derror_p8(&mce_error_info, regs->dsisr);
275 addr = regs->dar;
276 } else {
180 handled = mce_handle_ierror_p8(srr1); 277 handled = mce_handle_ierror_p8(srr1);
278 mce_get_ierror_p8(&mce_error_info, srr1);
279 addr = regs->nip;
280 }
181 281
182 /* TODO: Decode machine check reason. */ 282 save_mce_event(regs, handled, &mce_error_info, addr);
183 return handled; 283 return handled;
184} 284}
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 5c427b41a2f5..768a9f977c00 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -12,6 +12,7 @@
12#include <linux/kvm_host.h> 12#include <linux/kvm_host.h>
13#include <linux/kernel.h> 13#include <linux/kernel.h>
14#include <asm/opal.h> 14#include <asm/opal.h>
15#include <asm/mce.h>
15 16
16/* SRR1 bits for machine check on POWER7 */ 17/* SRR1 bits for machine check on POWER7 */
17#define SRR1_MC_LDSTERR (1ul << (63-42)) 18#define SRR1_MC_LDSTERR (1ul << (63-42))
@@ -67,9 +68,7 @@ static void reload_slb(struct kvm_vcpu *vcpu)
67static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu) 68static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
68{ 69{
69 unsigned long srr1 = vcpu->arch.shregs.msr; 70 unsigned long srr1 = vcpu->arch.shregs.msr;
70#ifdef CONFIG_PPC_POWERNV 71 struct machine_check_event mce_evt;
71 struct opal_machine_check_event *opal_evt;
72#endif
73 long handled = 1; 72 long handled = 1;
74 73
75 if (srr1 & SRR1_MC_LDSTERR) { 74 if (srr1 & SRR1_MC_LDSTERR) {
@@ -109,22 +108,31 @@ static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
109 handled = 0; 108 handled = 0;
110 } 109 }
111 110
112#ifdef CONFIG_PPC_POWERNV
113 /* 111 /*
114 * See if OPAL has already handled the condition. 112 * See if we have already handled the condition in the linux host.
115 * We assume that if the condition is recovered then OPAL 113 * We assume that if the condition is recovered then linux host
116 * will have generated an error log event that we will pick 114 * will have generated an error log event that we will pick
117 * up and log later. 115 * up and log later.
116 * Don't release mce event now. In case if condition is not
117 * recovered we do guest exit and go back to linux host machine
118 * check handler. Hence we need make sure that current mce event
119 * is available for linux host to consume.
118 */ 120 */
119 opal_evt = local_paca->opal_mc_evt; 121 if (!get_mce_event(&mce_evt, MCE_EVENT_DONTRELEASE))
120 if (opal_evt->version == OpalMCE_V1 && 122 goto out;
121 (opal_evt->severity == OpalMCE_SEV_NO_ERROR || 123
122 opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED)) 124 if (mce_evt.version == MCE_V1 &&
125 (mce_evt.severity == MCE_SEV_NO_ERROR ||
126 mce_evt.disposition == MCE_DISPOSITION_RECOVERED))
123 handled = 1; 127 handled = 1;
124 128
129out:
130 /*
131 * If we have handled the error, then release the mce event because
132 * we will be delivering machine check to guest.
133 */
125 if (handled) 134 if (handled)
126 opal_evt->in_use = 0; 135 release_mce_event();
127#endif
128 136
129 return handled; 137 return handled;
130} 138}
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 1c798cd55372..c5e71d773f47 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -21,6 +21,7 @@
21#include <linux/kobject.h> 21#include <linux/kobject.h>
22#include <asm/opal.h> 22#include <asm/opal.h>
23#include <asm/firmware.h> 23#include <asm/firmware.h>
24#include <asm/mce.h>
24 25
25#include "powernv.h" 26#include "powernv.h"
26 27
@@ -256,8 +257,7 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
256 257
257int opal_machine_check(struct pt_regs *regs) 258int opal_machine_check(struct pt_regs *regs)
258{ 259{
259 struct opal_machine_check_event *opal_evt = get_paca()->opal_mc_evt; 260 struct machine_check_event evt;
260 struct opal_machine_check_event evt;
261 const char *level, *sevstr, *subtype; 261 const char *level, *sevstr, *subtype;
262 static const char *opal_mc_ue_types[] = { 262 static const char *opal_mc_ue_types[] = {
263 "Indeterminate", 263 "Indeterminate",
@@ -282,30 +282,29 @@ int opal_machine_check(struct pt_regs *regs)
282 "Multihit", 282 "Multihit",
283 }; 283 };
284 284
285 /* Copy the event structure and release the original */ 285 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
286 evt = *opal_evt; 286 return 0;
287 opal_evt->in_use = 0;
288 287
289 /* Print things out */ 288 /* Print things out */
290 if (evt.version != OpalMCE_V1) { 289 if (evt.version != MCE_V1) {
291 pr_err("Machine Check Exception, Unknown event version %d !\n", 290 pr_err("Machine Check Exception, Unknown event version %d !\n",
292 evt.version); 291 evt.version);
293 return 0; 292 return 0;
294 } 293 }
295 switch(evt.severity) { 294 switch(evt.severity) {
296 case OpalMCE_SEV_NO_ERROR: 295 case MCE_SEV_NO_ERROR:
297 level = KERN_INFO; 296 level = KERN_INFO;
298 sevstr = "Harmless"; 297 sevstr = "Harmless";
299 break; 298 break;
300 case OpalMCE_SEV_WARNING: 299 case MCE_SEV_WARNING:
301 level = KERN_WARNING; 300 level = KERN_WARNING;
302 sevstr = ""; 301 sevstr = "";
303 break; 302 break;
304 case OpalMCE_SEV_ERROR_SYNC: 303 case MCE_SEV_ERROR_SYNC:
305 level = KERN_ERR; 304 level = KERN_ERR;
306 sevstr = "Severe"; 305 sevstr = "Severe";
307 break; 306 break;
308 case OpalMCE_SEV_FATAL: 307 case MCE_SEV_FATAL:
309 default: 308 default:
310 level = KERN_ERR; 309 level = KERN_ERR;
311 sevstr = "Fatal"; 310 sevstr = "Fatal";
@@ -313,12 +312,12 @@ int opal_machine_check(struct pt_regs *regs)
313 } 312 }
314 313
315 printk("%s%s Machine check interrupt [%s]\n", level, sevstr, 314 printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
316 evt.disposition == OpalMCE_DISPOSITION_RECOVERED ? 315 evt.disposition == MCE_DISPOSITION_RECOVERED ?
317 "Recovered" : "[Not recovered"); 316 "Recovered" : "[Not recovered");
318 printk("%s Initiator: %s\n", level, 317 printk("%s Initiator: %s\n", level,
319 evt.initiator == OpalMCE_INITIATOR_CPU ? "CPU" : "Unknown"); 318 evt.initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
320 switch(evt.error_type) { 319 switch(evt.error_type) {
321 case OpalMCE_ERROR_TYPE_UE: 320 case MCE_ERROR_TYPE_UE:
322 subtype = evt.u.ue_error.ue_error_type < 321 subtype = evt.u.ue_error.ue_error_type <
323 ARRAY_SIZE(opal_mc_ue_types) ? 322 ARRAY_SIZE(opal_mc_ue_types) ?
324 opal_mc_ue_types[evt.u.ue_error.ue_error_type] 323 opal_mc_ue_types[evt.u.ue_error.ue_error_type]
@@ -331,7 +330,7 @@ int opal_machine_check(struct pt_regs *regs)
331 printk("%s Physial address: %016llx\n", 330 printk("%s Physial address: %016llx\n",
332 level, evt.u.ue_error.physical_address); 331 level, evt.u.ue_error.physical_address);
333 break; 332 break;
334 case OpalMCE_ERROR_TYPE_SLB: 333 case MCE_ERROR_TYPE_SLB:
335 subtype = evt.u.slb_error.slb_error_type < 334 subtype = evt.u.slb_error.slb_error_type <
336 ARRAY_SIZE(opal_mc_slb_types) ? 335 ARRAY_SIZE(opal_mc_slb_types) ?
337 opal_mc_slb_types[evt.u.slb_error.slb_error_type] 336 opal_mc_slb_types[evt.u.slb_error.slb_error_type]
@@ -341,7 +340,7 @@ int opal_machine_check(struct pt_regs *regs)
341 printk("%s Effective address: %016llx\n", 340 printk("%s Effective address: %016llx\n",
342 level, evt.u.slb_error.effective_address); 341 level, evt.u.slb_error.effective_address);
343 break; 342 break;
344 case OpalMCE_ERROR_TYPE_ERAT: 343 case MCE_ERROR_TYPE_ERAT:
345 subtype = evt.u.erat_error.erat_error_type < 344 subtype = evt.u.erat_error.erat_error_type <
346 ARRAY_SIZE(opal_mc_erat_types) ? 345 ARRAY_SIZE(opal_mc_erat_types) ?
347 opal_mc_erat_types[evt.u.erat_error.erat_error_type] 346 opal_mc_erat_types[evt.u.erat_error.erat_error_type]
@@ -351,7 +350,7 @@ int opal_machine_check(struct pt_regs *regs)
351 printk("%s Effective address: %016llx\n", 350 printk("%s Effective address: %016llx\n",
352 level, evt.u.erat_error.effective_address); 351 level, evt.u.erat_error.effective_address);
353 break; 352 break;
354 case OpalMCE_ERROR_TYPE_TLB: 353 case MCE_ERROR_TYPE_TLB:
355 subtype = evt.u.tlb_error.tlb_error_type < 354 subtype = evt.u.tlb_error.tlb_error_type <
356 ARRAY_SIZE(opal_mc_tlb_types) ? 355 ARRAY_SIZE(opal_mc_tlb_types) ?
357 opal_mc_tlb_types[evt.u.tlb_error.tlb_error_type] 356 opal_mc_tlb_types[evt.u.tlb_error.tlb_error_type]
@@ -362,11 +361,11 @@ int opal_machine_check(struct pt_regs *regs)
362 level, evt.u.tlb_error.effective_address); 361 level, evt.u.tlb_error.effective_address);
363 break; 362 break;
364 default: 363 default:
365 case OpalMCE_ERROR_TYPE_UNKNOWN: 364 case MCE_ERROR_TYPE_UNKNOWN:
366 printk("%s Error type: Unknown\n", level); 365 printk("%s Error type: Unknown\n", level);
367 break; 366 break;
368 } 367 }
369 return evt.severity == OpalMCE_SEV_FATAL ? 0 : 1; 368 return evt.severity == MCE_SEV_FATAL ? 0 : 1;
370} 369}
371 370
372static irqreturn_t opal_interrupt(int irq, void *data) 371static irqreturn_t opal_interrupt(int irq, void *data)