aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMahesh Salgaonkar <mahesh@linux.vnet.ibm.com>2013-10-30 10:35:49 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2013-12-05 00:05:21 -0500
commitb5ff4211a8294be2ddbaf963fa3666fa042292a8 (patch)
treec87b212ebfddc4c42c54c61ee1b3fb3d49e0c941 /arch
parent36df96f8acaf51992177645eb2d781f766ce97dc (diff)
powerpc/book3s: Queue up and process delayed MCE events.
When machine check real mode handler can not continue into host kernel in V mode, it returns from the interrupt and we loose MCE event which never gets logged. In such a situation queue up the MCE event so that we can log it later when we get back into host kernel with r1 pointing to kernel stack e.g. during syscall exit. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/mce.h3
-rw-r--r--arch/powerpc/kernel/entry_64.S5
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S7
-rw-r--r--arch/powerpc/kernel/mce.c154
-rw-r--r--arch/powerpc/platforms/powernv/opal.c97
5 files changed, 168 insertions, 98 deletions
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 87cad2a808c2..3276b409299c 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -190,5 +190,8 @@ extern void save_mce_event(struct pt_regs *regs, long handled,
190 struct mce_error_info *mce_err, uint64_t addr); 190 struct mce_error_info *mce_err, uint64_t addr);
191extern int get_mce_event(struct machine_check_event *mce, bool release); 191extern int get_mce_event(struct machine_check_event *mce, bool release);
192extern void release_mce_event(void); 192extern void release_mce_event(void);
193extern void machine_check_queue_event(void);
194extern void machine_check_process_queued_event(void);
195extern void machine_check_print_event_info(struct machine_check_event *evt);
193 196
194#endif /* __ASM_PPC64_MCE_H__ */ 197#endif /* __ASM_PPC64_MCE_H__ */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index bbfb0294b354..770d6d65c47b 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -184,6 +184,11 @@ syscall_exit:
184 bl .do_show_syscall_exit 184 bl .do_show_syscall_exit
185 ld r3,RESULT(r1) 185 ld r3,RESULT(r1)
186#endif 186#endif
187#ifdef CONFIG_PPC_BOOK3S_64
188BEGIN_FTR_SECTION
189 bl .machine_check_process_queued_event
190END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
191#endif
187 CURRENT_THREAD_INFO(r12, r1) 192 CURRENT_THREAD_INFO(r12, r1)
188 193
189 ld r8,_MSR(r1) 194 ld r8,_MSR(r1)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1aec3025eeee..862b9dd4a9db 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -855,7 +855,8 @@ BEGIN_FTR_SECTION
855 /* Supervisor state loss */ 855 /* Supervisor state loss */
856 li r0,1 856 li r0,1
857 stb r0,PACA_NAPSTATELOST(r13) 857 stb r0,PACA_NAPSTATELOST(r13)
8583: MACHINE_CHECK_HANDLER_WINDUP 8583: bl .machine_check_queue_event
859 MACHINE_CHECK_HANDLER_WINDUP
859 GET_PACA(r13) 860 GET_PACA(r13)
860 ld r1,PACAR1(r13) 861 ld r1,PACAR1(r13)
861 b .power7_enter_nap_mode 862 b .power7_enter_nap_mode
@@ -895,8 +896,10 @@ BEGIN_FTR_SECTION
8952: 8962:
896 /* 897 /*
897 * Return from MC interrupt. 898 * Return from MC interrupt.
898 * TODO: Queue up the MCE event so that we can log it later. 899 * Queue up the MCE event so that we can log it later, while
900 * returning from kernel or opal call.
899 */ 901 */
902 bl .machine_check_queue_event
900 MACHINE_CHECK_HANDLER_WINDUP 903 MACHINE_CHECK_HANDLER_WINDUP
901 rfid 904 rfid
9029: 9059:
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index aeecdf1ba897..1c6d15701c56 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -31,6 +31,10 @@
31static DEFINE_PER_CPU(int, mce_nest_count); 31static DEFINE_PER_CPU(int, mce_nest_count);
32static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event); 32static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
33 33
34/* Queue for delayed MCE events. */
35static DEFINE_PER_CPU(int, mce_queue_count);
36static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
37
34static void mce_set_error_info(struct machine_check_event *mce, 38static void mce_set_error_info(struct machine_check_event *mce,
35 struct mce_error_info *mce_err) 39 struct mce_error_info *mce_err)
36{ 40{
@@ -162,3 +166,153 @@ void release_mce_event(void)
162{ 166{
163 get_mce_event(NULL, true); 167 get_mce_event(NULL, true);
164} 168}
169
170/*
171 * Queue up the MCE event which then can be handled later.
172 */
173void machine_check_queue_event(void)
174{
175 int index;
176 struct machine_check_event evt;
177
178 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
179 return;
180
181 index = __get_cpu_var(mce_queue_count)++;
182 /* If queue is full, just return for now. */
183 if (index >= MAX_MC_EVT) {
184 __get_cpu_var(mce_queue_count)--;
185 return;
186 }
187 __get_cpu_var(mce_event_queue[index]) = evt;
188}
189
190/*
191 * process pending MCE event from the mce event queue. This function will be
192 * called during syscall exit.
193 */
194void machine_check_process_queued_event(void)
195{
196 int index;
197
198 preempt_disable();
199 /*
200 * For now just print it to console.
201 * TODO: log this error event to FSP or nvram.
202 */
203 while (__get_cpu_var(mce_queue_count) > 0) {
204 index = __get_cpu_var(mce_queue_count) - 1;
205 machine_check_print_event_info(
206 &__get_cpu_var(mce_event_queue[index]));
207 __get_cpu_var(mce_queue_count)--;
208 }
209 preempt_enable();
210}
211
212void machine_check_print_event_info(struct machine_check_event *evt)
213{
214 const char *level, *sevstr, *subtype;
215 static const char *mc_ue_types[] = {
216 "Indeterminate",
217 "Instruction fetch",
218 "Page table walk ifetch",
219 "Load/Store",
220 "Page table walk Load/Store",
221 };
222 static const char *mc_slb_types[] = {
223 "Indeterminate",
224 "Parity",
225 "Multihit",
226 };
227 static const char *mc_erat_types[] = {
228 "Indeterminate",
229 "Parity",
230 "Multihit",
231 };
232 static const char *mc_tlb_types[] = {
233 "Indeterminate",
234 "Parity",
235 "Multihit",
236 };
237
238 /* Print things out */
239 if (evt->version != MCE_V1) {
240 pr_err("Machine Check Exception, Unknown event version %d !\n",
241 evt->version);
242 return;
243 }
244 switch (evt->severity) {
245 case MCE_SEV_NO_ERROR:
246 level = KERN_INFO;
247 sevstr = "Harmless";
248 break;
249 case MCE_SEV_WARNING:
250 level = KERN_WARNING;
251 sevstr = "";
252 break;
253 case MCE_SEV_ERROR_SYNC:
254 level = KERN_ERR;
255 sevstr = "Severe";
256 break;
257 case MCE_SEV_FATAL:
258 default:
259 level = KERN_ERR;
260 sevstr = "Fatal";
261 break;
262 }
263
264 printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
265 evt->disposition == MCE_DISPOSITION_RECOVERED ?
266 "Recovered" : "[Not recovered");
267 printk("%s Initiator: %s\n", level,
268 evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
269 switch (evt->error_type) {
270 case MCE_ERROR_TYPE_UE:
271 subtype = evt->u.ue_error.ue_error_type <
272 ARRAY_SIZE(mc_ue_types) ?
273 mc_ue_types[evt->u.ue_error.ue_error_type]
274 : "Unknown";
275 printk("%s Error type: UE [%s]\n", level, subtype);
276 if (evt->u.ue_error.effective_address_provided)
277 printk("%s Effective address: %016llx\n",
278 level, evt->u.ue_error.effective_address);
279 if (evt->u.ue_error.physical_address_provided)
280 printk("%s Physial address: %016llx\n",
281 level, evt->u.ue_error.physical_address);
282 break;
283 case MCE_ERROR_TYPE_SLB:
284 subtype = evt->u.slb_error.slb_error_type <
285 ARRAY_SIZE(mc_slb_types) ?
286 mc_slb_types[evt->u.slb_error.slb_error_type]
287 : "Unknown";
288 printk("%s Error type: SLB [%s]\n", level, subtype);
289 if (evt->u.slb_error.effective_address_provided)
290 printk("%s Effective address: %016llx\n",
291 level, evt->u.slb_error.effective_address);
292 break;
293 case MCE_ERROR_TYPE_ERAT:
294 subtype = evt->u.erat_error.erat_error_type <
295 ARRAY_SIZE(mc_erat_types) ?
296 mc_erat_types[evt->u.erat_error.erat_error_type]
297 : "Unknown";
298 printk("%s Error type: ERAT [%s]\n", level, subtype);
299 if (evt->u.erat_error.effective_address_provided)
300 printk("%s Effective address: %016llx\n",
301 level, evt->u.erat_error.effective_address);
302 break;
303 case MCE_ERROR_TYPE_TLB:
304 subtype = evt->u.tlb_error.tlb_error_type <
305 ARRAY_SIZE(mc_tlb_types) ?
306 mc_tlb_types[evt->u.tlb_error.tlb_error_type]
307 : "Unknown";
308 printk("%s Error type: TLB [%s]\n", level, subtype);
309 if (evt->u.tlb_error.effective_address_provided)
310 printk("%s Effective address: %016llx\n",
311 level, evt->u.tlb_error.effective_address);
312 break;
313 default:
314 case MCE_ERROR_TYPE_UNKNOWN:
315 printk("%s Error type: Unknown\n", level);
316 break;
317 }
318}
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index c5e71d773f47..245096f90437 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -258,29 +258,6 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
258int opal_machine_check(struct pt_regs *regs) 258int opal_machine_check(struct pt_regs *regs)
259{ 259{
260 struct machine_check_event evt; 260 struct machine_check_event evt;
261 const char *level, *sevstr, *subtype;
262 static const char *opal_mc_ue_types[] = {
263 "Indeterminate",
264 "Instruction fetch",
265 "Page table walk ifetch",
266 "Load/Store",
267 "Page table walk Load/Store",
268 };
269 static const char *opal_mc_slb_types[] = {
270 "Indeterminate",
271 "Parity",
272 "Multihit",
273 };
274 static const char *opal_mc_erat_types[] = {
275 "Indeterminate",
276 "Parity",
277 "Multihit",
278 };
279 static const char *opal_mc_tlb_types[] = {
280 "Indeterminate",
281 "Parity",
282 "Multihit",
283 };
284 261
285 if (!get_mce_event(&evt, MCE_EVENT_RELEASE)) 262 if (!get_mce_event(&evt, MCE_EVENT_RELEASE))
286 return 0; 263 return 0;
@@ -291,80 +268,8 @@ int opal_machine_check(struct pt_regs *regs)
291 evt.version); 268 evt.version);
292 return 0; 269 return 0;
293 } 270 }
294 switch(evt.severity) { 271 machine_check_print_event_info(&evt);
295 case MCE_SEV_NO_ERROR:
296 level = KERN_INFO;
297 sevstr = "Harmless";
298 break;
299 case MCE_SEV_WARNING:
300 level = KERN_WARNING;
301 sevstr = "";
302 break;
303 case MCE_SEV_ERROR_SYNC:
304 level = KERN_ERR;
305 sevstr = "Severe";
306 break;
307 case MCE_SEV_FATAL:
308 default:
309 level = KERN_ERR;
310 sevstr = "Fatal";
311 break;
312 }
313 272
314 printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
315 evt.disposition == MCE_DISPOSITION_RECOVERED ?
316 "Recovered" : "[Not recovered");
317 printk("%s Initiator: %s\n", level,
318 evt.initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
319 switch(evt.error_type) {
320 case MCE_ERROR_TYPE_UE:
321 subtype = evt.u.ue_error.ue_error_type <
322 ARRAY_SIZE(opal_mc_ue_types) ?
323 opal_mc_ue_types[evt.u.ue_error.ue_error_type]
324 : "Unknown";
325 printk("%s Error type: UE [%s]\n", level, subtype);
326 if (evt.u.ue_error.effective_address_provided)
327 printk("%s Effective address: %016llx\n",
328 level, evt.u.ue_error.effective_address);
329 if (evt.u.ue_error.physical_address_provided)
330 printk("%s Physial address: %016llx\n",
331 level, evt.u.ue_error.physical_address);
332 break;
333 case MCE_ERROR_TYPE_SLB:
334 subtype = evt.u.slb_error.slb_error_type <
335 ARRAY_SIZE(opal_mc_slb_types) ?
336 opal_mc_slb_types[evt.u.slb_error.slb_error_type]
337 : "Unknown";
338 printk("%s Error type: SLB [%s]\n", level, subtype);
339 if (evt.u.slb_error.effective_address_provided)
340 printk("%s Effective address: %016llx\n",
341 level, evt.u.slb_error.effective_address);
342 break;
343 case MCE_ERROR_TYPE_ERAT:
344 subtype = evt.u.erat_error.erat_error_type <
345 ARRAY_SIZE(opal_mc_erat_types) ?
346 opal_mc_erat_types[evt.u.erat_error.erat_error_type]
347 : "Unknown";
348 printk("%s Error type: ERAT [%s]\n", level, subtype);
349 if (evt.u.erat_error.effective_address_provided)
350 printk("%s Effective address: %016llx\n",
351 level, evt.u.erat_error.effective_address);
352 break;
353 case MCE_ERROR_TYPE_TLB:
354 subtype = evt.u.tlb_error.tlb_error_type <
355 ARRAY_SIZE(opal_mc_tlb_types) ?
356 opal_mc_tlb_types[evt.u.tlb_error.tlb_error_type]
357 : "Unknown";
358 printk("%s Error type: TLB [%s]\n", level, subtype);
359 if (evt.u.tlb_error.effective_address_provided)
360 printk("%s Effective address: %016llx\n",
361 level, evt.u.tlb_error.effective_address);
362 break;
363 default:
364 case MCE_ERROR_TYPE_UNKNOWN:
365 printk("%s Error type: Unknown\n", level);
366 break;
367 }
368 return evt.severity == MCE_SEV_FATAL ? 0 : 1; 273 return evt.severity == MCE_SEV_FATAL ? 0 : 1;
369} 274}
370 275