aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel/mca_drv.c
diff options
context:
space:
mode:
authorRuss Anderson <rja@sgi.com>2006-03-24 12:49:52 -0500
committerTony Luck <tony.luck@intel.com>2006-03-24 12:49:52 -0500
commitd2a28ad9fa7bf16761d070d8a3338375e1574b32 (patch)
tree9da2c18c91a4ee12fa6a9b2d23dcb55d13dd0ca8 /arch/ia64/kernel/mca_drv.c
parenta5b00bb4fe60796c791238cf5653b82110031c93 (diff)
[IA64] MCA recovery: kernel context recovery table
Memory errors encountered by user applications may surface when the CPU is running in kernel context. The current code will not attempt recovery if the MCA surfaces in kernel context (privilage mode 0). This patch adds a check for cases where the user initiated the load that surfaces in kernel interrupt code. An example is a user process lauching a load from memory and the data in memory had bad ECC. Before the bad data gets to the CPU register, and interrupt comes in. The code jumps to the IVT interrupt entry point and begins execution in kernel context. The process of saving the user registers (SAVE_REST) causes the bad data to be loaded into a CPU register, triggering the MCA. The MCA surfaces in kernel context, even though the load was initiated from user context. As suggested by David and Tony, this patch uses an exception table like approach, puting the tagged recovery addresses in a searchable table. One difference from the exception table is that MCAs do not surface in precise places (such as with a TLB miss), so instead of tagging specific instructions, address ranges are registers. A single macro is used to do the tagging, with the input parameter being the label of the starting address and the macro being the ending address. This limits clutter in the code. This patch only tags one spot, the interrupt ivt entry. Testing showed that spot to be a "heavy hitter" with MCAs surfacing while saving user registers. Other spots can be added as needed by adding a single macro. Signed-off-by: Russ Anderson (rja@sgi.com) Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/mca_drv.c')
-rw-r--r--arch/ia64/kernel/mca_drv.c22
1 files changed, 15 insertions, 7 deletions
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index e883d85906db..37c88eb55873 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -6,6 +6,7 @@
6 * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) 6 * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
7 * Copyright (C) 2005 Silicon Graphics, Inc 7 * Copyright (C) 2005 Silicon Graphics, Inc
8 * Copyright (C) 2005 Keith Owens <kaos@sgi.com> 8 * Copyright (C) 2005 Keith Owens <kaos@sgi.com>
9 * Copyright (C) 2006 Russ Anderson <rja@sgi.com>
9 */ 10 */
10#include <linux/config.h> 11#include <linux/config.h>
11#include <linux/types.h> 12#include <linux/types.h>
@@ -121,11 +122,12 @@ mca_page_isolate(unsigned long paddr)
121 */ 122 */
122 123
123void 124void
124mca_handler_bh(unsigned long paddr) 125mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr)
125{ 126{
126 printk(KERN_ERR 127 printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, "
127 "OS_MCA: process [pid: %d](%s) encounters MCA (paddr=%lx)\n", 128 "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n",
128 current->pid, current->comm, paddr); 129 raw_smp_processor_id(), current->pid, current->uid,
130 iip, ipsr, paddr, current->comm);
129 131
130 spin_lock(&mca_bh_lock); 132 spin_lock(&mca_bh_lock);
131 switch (mca_page_isolate(paddr)) { 133 switch (mca_page_isolate(paddr)) {
@@ -442,21 +444,26 @@ recover_from_read_error(slidx_table_t *slidx,
442 if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) 444 if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
443 return 0; 445 return 0;
444 psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); 446 psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
447 psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
445 448
446 /* 449 /*
447 * Check the privilege level of interrupted context. 450 * Check the privilege level of interrupted context.
448 * If it is user-mode, then terminate affected process. 451 * If it is user-mode, then terminate affected process.
449 */ 452 */
450 if (psr1->cpl != 0) { 453
454 pmsa = sos->pal_min_state;
455 if (psr1->cpl != 0 ||
456 ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) {
451 smei = peidx_bus_check(peidx, 0); 457 smei = peidx_bus_check(peidx, 0);
452 if (smei->valid.target_identifier) { 458 if (smei->valid.target_identifier) {
453 /* 459 /*
454 * setup for resume to bottom half of MCA, 460 * setup for resume to bottom half of MCA,
455 * "mca_handler_bhhook" 461 * "mca_handler_bhhook"
456 */ 462 */
457 pmsa = sos->pal_min_state; 463 /* pass to bhhook as argument (gr8, ...) */
458 /* pass to bhhook as 1st argument (gr8) */
459 pmsa->pmsa_gr[8-1] = smei->target_identifier; 464 pmsa->pmsa_gr[8-1] = smei->target_identifier;
465 pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip;
466 pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr;
460 /* set interrupted return address (but no use) */ 467 /* set interrupted return address (but no use) */
461 pmsa->pmsa_br0 = pmsa->pmsa_iip; 468 pmsa->pmsa_br0 = pmsa->pmsa_iip;
462 /* change resume address to bottom half */ 469 /* change resume address to bottom half */
@@ -466,6 +473,7 @@ recover_from_read_error(slidx_table_t *slidx,
466 psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; 473 psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
467 psr2->cpl = 0; 474 psr2->cpl = 0;
468 psr2->ri = 0; 475 psr2->ri = 0;
476 psr2->bn = 1;
469 psr2->i = 0; 477 psr2->i = 0;
470 478
471 return 1; 479 return 1;