aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRuss Anderson <rja@sgi.com>2006-10-25 18:59:47 -0400
committerTony Luck <tony.luck@intel.com>2006-10-31 17:30:34 -0500
commit264b0f99308436deaee38bab99e586612d012fc1 (patch)
treea262e6f320c19d47e5842dfe8ccdd3823704072f
parent5ee7737379b1d7f0c977c0f1661fbaf01a8d4721 (diff)
[IA64] MCA recovery: Montecito support
The information in MCA records is filled in slightly differently on Montecito than on Madison/McKinley. Usually, the cache check and bus check target identifiers have the same address. On Montecito the cache check and bus check target identifiers can be different if a corrected error (ie SBE or unconsumed poison data) was encountered and then an uncorrected error (ie DBE) was consumed. In that case, the cache check target identifier is the physical address of the DBE (that caused the MCA to surface) while the bus check target identifier is the physical address of the SBE. This patch correctly finds the target identifier that triggered the MCA. If there are multiple valid cache target identifiers in the same error record then use the one with the lowest cache level. Signed-off-by: Russ Anderson (rja@sgi.com) Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--arch/ia64/kernel/mca_drv.c95
1 files changed, 68 insertions, 27 deletions
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index a45009d2bc90..afc1403799c9 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -435,6 +435,50 @@ is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci,
435} 435}
436 436
437/** 437/**
438 * get_target_identifier - Get the valid Cache or Bus check target identifier.
439 * @peidx: pointer of index of processor error section
440 *
441 * Return value:
442 * target address on Success / 0 on Failue
443 */
444static u64
445get_target_identifier(peidx_table_t *peidx)
446{
447 u64 target_address = 0;
448 sal_log_mod_error_info_t *smei;
449 pal_cache_check_info_t *pcci;
450 int i, level = 9;
451
452 /*
453 * Look through the cache checks for a valid target identifier
454 * If more than one valid target identifier, return the one
455 * with the lowest cache level.
456 */
457 for (i = 0; i < peidx_cache_check_num(peidx); i++) {
458 smei = (sal_log_mod_error_info_t *)peidx_cache_check(peidx, i);
459 if (smei->valid.target_identifier && smei->target_identifier) {
460 pcci = (pal_cache_check_info_t *)&(smei->check_info);
461 if (!target_address || (pcci->level < level)) {
462 target_address = smei->target_identifier;
463 level = pcci->level;
464 continue;
465 }
466 }
467 }
468 if (target_address)
469 return target_address;
470
471 /*
472 * Look at the bus check for a valid target identifier
473 */
474 smei = peidx_bus_check(peidx, 0);
475 if (smei && smei->valid.target_identifier)
476 return smei->target_identifier;
477
478 return 0;
479}
480
481/**
438 * recover_from_read_error - Try to recover the errors which type are "read"s. 482 * recover_from_read_error - Try to recover the errors which type are "read"s.
439 * @slidx: pointer of index of SAL error record 483 * @slidx: pointer of index of SAL error record
440 * @peidx: pointer of index of processor error section 484 * @peidx: pointer of index of processor error section
@@ -450,13 +494,14 @@ recover_from_read_error(slidx_table_t *slidx,
450 peidx_table_t *peidx, pal_bus_check_info_t *pbci, 494 peidx_table_t *peidx, pal_bus_check_info_t *pbci,
451 struct ia64_sal_os_state *sos) 495 struct ia64_sal_os_state *sos)
452{ 496{
453 sal_log_mod_error_info_t *smei; 497 u64 target_identifier;
454 pal_min_state_area_t *pmsa; 498 pal_min_state_area_t *pmsa;
455 struct ia64_psr *psr1, *psr2; 499 struct ia64_psr *psr1, *psr2;
456 ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook; 500 ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook;
457 501
458 /* Is target address valid? */ 502 /* Is target address valid? */
459 if (!pbci->tv) 503 target_identifier = get_target_identifier(peidx);
504 if (!target_identifier)
460 return fatal_mca("target address not valid"); 505 return fatal_mca("target address not valid");
461 506
462 /* 507 /*
@@ -487,32 +532,28 @@ recover_from_read_error(slidx_table_t *slidx,
487 pmsa = sos->pal_min_state; 532 pmsa = sos->pal_min_state;
488 if (psr1->cpl != 0 || 533 if (psr1->cpl != 0 ||
489 ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) { 534 ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) {
490 smei = peidx_bus_check(peidx, 0); 535 /*
491 if (smei->valid.target_identifier) { 536 * setup for resume to bottom half of MCA,
492 /* 537 * "mca_handler_bhhook"
493 * setup for resume to bottom half of MCA, 538 */
494 * "mca_handler_bhhook" 539 /* pass to bhhook as argument (gr8, ...) */
495 */ 540 pmsa->pmsa_gr[8-1] = target_identifier;
496 /* pass to bhhook as argument (gr8, ...) */ 541 pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip;
497 pmsa->pmsa_gr[8-1] = smei->target_identifier; 542 pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr;
498 pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip; 543 /* set interrupted return address (but no use) */
499 pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr; 544 pmsa->pmsa_br0 = pmsa->pmsa_iip;
500 /* set interrupted return address (but no use) */ 545 /* change resume address to bottom half */
501 pmsa->pmsa_br0 = pmsa->pmsa_iip; 546 pmsa->pmsa_iip = mca_hdlr_bh->fp;
502 /* change resume address to bottom half */ 547 pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp;
503 pmsa->pmsa_iip = mca_hdlr_bh->fp; 548 /* set cpl with kernel mode */
504 pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp; 549 psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
505 /* set cpl with kernel mode */ 550 psr2->cpl = 0;
506 psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; 551 psr2->ri = 0;
507 psr2->cpl = 0; 552 psr2->bn = 1;
508 psr2->ri = 0; 553 psr2->i = 0;
509 psr2->bn = 1; 554
510 psr2->i = 0; 555 return mca_recovered("user memory corruption. "
511
512 return mca_recovered("user memory corruption. "
513 "kill affected process - recovered."); 556 "kill affected process - recovered.");
514 }
515
516 } 557 }
517 558
518 return fatal_mca("kernel context not recovered, iip 0x%lx\n", 559 return fatal_mca("kernel context not recovered, iip 0x%lx\n",