diff options
author | Anton Blanchard <anton@samba.org> | 2011-01-11 14:49:19 -0500 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2011-01-20 22:08:38 -0500 |
commit | d47d1d8af52e37bcf9059dd86878474e5ccc9c2a (patch) | |
tree | c8af975e293d17c96f06913db78cac6041e01a47 /arch/powerpc/platforms/pseries | |
parent | e49b1fae0ba4d06b29bd753a961abb447566bf4a (diff) |
powerpc: Rework pseries machine check handler
Rework pseries machine check handler:
- If MSR_RI isn't set, we cannot recover even if the machine check was fully
recovered
- Rename nonfatal to recovered
- Handle RTAS_DISP_LIMITED_RECOVERY
- Use BUS_MCEERR_AR instead of BUS_ADRERR
- Don't check all the RTAS error log fields when receiving a synchronous
machine check. Recent versions of the pseries firmware do not fill them
in during a machine check and instead send a follow up error log with
the detailed information. If we see a synchronous machine check, and we
came from userspace then kill the task.
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r-- | arch/powerpc/platforms/pseries/ras.c | 48 |
1 files changed, 30 insertions, 18 deletions
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 048e25711c6b..d194150cf342 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c | |||
@@ -259,31 +259,43 @@ int pSeries_system_reset_exception(struct pt_regs *regs) | |||
259 | * Return 1 if corrected (or delivered a signal). | 259 | * Return 1 if corrected (or delivered a signal). |
260 | * Return 0 if there is nothing we can do. | 260 | * Return 0 if there is nothing we can do. |
261 | */ | 261 | */ |
262 | static int recover_mce(struct pt_regs *regs, struct rtas_error_log * err) | 262 | static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err) |
263 | { | 263 | { |
264 | int nonfatal = 0; | 264 | int recovered = 0; |
265 | 265 | ||
266 | if (err->disposition == RTAS_DISP_FULLY_RECOVERED) { | 266 | if (!(regs->msr & MSR_RI)) { |
267 | /* If MSR_RI isn't set, we cannot recover */ | ||
268 | recovered = 0; | ||
269 | |||
270 | } else if (err->disposition == RTAS_DISP_FULLY_RECOVERED) { | ||
267 | /* Platform corrected itself */ | 271 | /* Platform corrected itself */ |
268 | nonfatal = 1; | 272 | recovered = 1; |
269 | } else if ((regs->msr & MSR_RI) && | 273 | |
270 | user_mode(regs) && | 274 | } else if (err->disposition == RTAS_DISP_LIMITED_RECOVERY) { |
271 | err->severity == RTAS_SEVERITY_ERROR_SYNC && | 275 | /* Platform corrected itself but could be degraded */ |
272 | err->disposition == RTAS_DISP_NOT_RECOVERED && | 276 | printk(KERN_ERR "MCE: limited recovery, system may " |
273 | err->target == RTAS_TARGET_MEMORY && | 277 | "be degraded\n"); |
274 | err->type == RTAS_TYPE_ECC_UNCORR && | 278 | recovered = 1; |
275 | !(current->pid == 0 || is_global_init(current))) { | 279 | |
276 | /* Kill off a user process with an ECC error */ | 280 | } else if (user_mode(regs) && !is_global_init(current) && |
277 | printk(KERN_ERR "MCE: uncorrectable ecc error for pid %d\n", | 281 | err->severity == RTAS_SEVERITY_ERROR_SYNC) { |
278 | current->pid); | 282 | |
279 | /* XXX something better for ECC error? */ | 283 | /* |
280 | _exception(SIGBUS, regs, BUS_ADRERR, regs->nip); | 284 | * If we received a synchronous error when in userspace |
281 | nonfatal = 1; | 285 | * kill the task. Firmware may report details of the fail |
286 | * asynchronously, so we can't rely on the target and type | ||
287 | * fields being valid here. | ||
288 | */ | ||
289 | printk(KERN_ERR "MCE: uncorrectable error, killing task " | ||
290 | "%s:%d\n", current->comm, current->pid); | ||
291 | |||
292 | _exception(SIGBUS, regs, BUS_MCEERR_AR, regs->nip); | ||
293 | recovered = 1; | ||
282 | } | 294 | } |
283 | 295 | ||
284 | log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); | 296 | log_error((char *)err, ERR_TYPE_RTAS_LOG, 0); |
285 | 297 | ||
286 | return nonfatal; | 298 | return recovered; |
287 | } | 299 | } |
288 | 300 | ||
289 | /* | 301 | /* |