aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorRuss Anderson <rja@efs.americas.sgi.com>2006-04-27 11:07:08 -0400
committerTony Luck <tony.luck@intel.com>2006-04-27 17:34:01 -0400
commit189979619f90fd2eb168fbb9c262569176160624 (patch)
tree8f274464ac1604d927351448d1e5148c199960b7 /arch
parentcda3d4a069b915cf46e640bb6872a9d9aefeaabe (diff)
[IA64] Add mca recovery failure messages
When the mca recovery code encounters a condition that makes the MCA non-recoverable, print the reason it could not recover. This will make it easier to identify why the recovery code did not recover. Signed-off-by: Russ Anderson <rja@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/ia64/kernel/mca_drv.c54
1 files changed, 36 insertions, 18 deletions
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index 37c88eb55873..ca6666b51ccb 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -62,6 +62,11 @@ typedef enum {
62 ISOLATE_NONE 62 ISOLATE_NONE
63} isolate_status_t; 63} isolate_status_t;
64 64
65typedef enum {
66 MCA_NOT_RECOVERED = 0,
67 MCA_RECOVERED = 1
68} recovery_status_t;
69
65/* 70/*
66 * This pool keeps pointers to the section part of SAL error record 71 * This pool keeps pointers to the section part of SAL error record
67 */ 72 */
@@ -71,6 +76,18 @@ static struct {
71 int max_idx; /* Maximum index of section pointer list pool */ 76 int max_idx; /* Maximum index of section pointer list pool */
72} slidx_pool; 77} slidx_pool;
73 78
79static int
80fatal_mca(const char *fmt, ...)
81{
82 va_list args;
83
84 va_start(args, fmt);
85 vprintk(fmt, args);
86 va_end(args);
87
88 return MCA_NOT_RECOVERED;
89}
90
74/** 91/**
75 * mca_page_isolate - isolate a poisoned page in order not to use it later 92 * mca_page_isolate - isolate a poisoned page in order not to use it later
76 * @paddr: poisoned memory location 93 * @paddr: poisoned memory location
@@ -424,7 +441,7 @@ recover_from_read_error(slidx_table_t *slidx,
424 441
425 /* Is target address valid? */ 442 /* Is target address valid? */
426 if (!pbci->tv) 443 if (!pbci->tv)
427 return 0; 444 return fatal_mca(KERN_ALERT "MCA: target address not valid\n");
428 445
429 /* 446 /*
430 * cpu read or memory-mapped io read 447 * cpu read or memory-mapped io read
@@ -442,7 +459,7 @@ recover_from_read_error(slidx_table_t *slidx,
442 459
443 /* Is minstate valid? */ 460 /* Is minstate valid? */
444 if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) 461 if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate))
445 return 0; 462 return fatal_mca(KERN_ALERT "MCA: minstate not valid\n");
446 psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); 463 psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
447 psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); 464 psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr);
448 465
@@ -476,12 +493,13 @@ recover_from_read_error(slidx_table_t *slidx,
476 psr2->bn = 1; 493 psr2->bn = 1;
477 psr2->i = 0; 494 psr2->i = 0;
478 495
479 return 1; 496 return MCA_RECOVERED;
480 } 497 }
481 498
482 } 499 }
483 500
484 return 0; 501 return fatal_mca(KERN_ALERT "MCA: kernel context not recovered,"
502 " iip 0x%lx\n", pmsa->pmsa_iip);
485} 503}
486 504
487/** 505/**
@@ -567,13 +585,13 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
567 * The machine check is corrected. 585 * The machine check is corrected.
568 */ 586 */
569 if (psp->cm == 1) 587 if (psp->cm == 1)
570 return 1; 588 return MCA_RECOVERED;
571 589
572 /* 590 /*
573 * The error was not contained. Software must be reset. 591 * The error was not contained. Software must be reset.
574 */ 592 */
575 if (psp->us || psp->ci == 0) 593 if (psp->us || psp->ci == 0)
576 return 0; 594 return fatal_mca(KERN_ALERT "MCA: error not contained\n");
577 595
578 /* 596 /*
579 * The cache check and bus check bits have four possible states 597 * The cache check and bus check bits have four possible states
@@ -584,20 +602,22 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
584 * 1 1 Memory error, attempt recovery 602 * 1 1 Memory error, attempt recovery
585 */ 603 */
586 if (psp->bc == 0 || pbci == NULL) 604 if (psp->bc == 0 || pbci == NULL)
587 return 0; 605 return fatal_mca(KERN_ALERT "MCA: No bus check\n");
588 606
589 /* 607 /*
590 * Sorry, we cannot handle so many. 608 * Sorry, we cannot handle so many.
591 */ 609 */
592 if (peidx_bus_check_num(peidx) > 1) 610 if (peidx_bus_check_num(peidx) > 1)
593 return 0; 611 return fatal_mca(KERN_ALERT "MCA: Too many bus checks\n");
594 /* 612 /*
595 * Well, here is only one bus error. 613 * Well, here is only one bus error.
596 */ 614 */
597 if (pbci->ib || pbci->cc) 615 if (pbci->ib)
598 return 0; 616 return fatal_mca(KERN_ALERT "MCA: Internal Bus error\n");
617 if (pbci->cc)
618 return fatal_mca(KERN_ALERT "MCA: Cache-cache error\n");
599 if (pbci->eb && pbci->bsi > 0) 619 if (pbci->eb && pbci->bsi > 0)
600 return 0; 620 return fatal_mca(KERN_ALERT "MCA: External bus check fatal status\n");
601 621
602 /* 622 /*
603 * This is a local MCA and estimated as recoverble external bus error. 623 * This is a local MCA and estimated as recoverble external bus error.
@@ -609,7 +629,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
609 /* 629 /*
610 * On account of strange SAL error record, we cannot recover. 630 * On account of strange SAL error record, we cannot recover.
611 */ 631 */
612 return 0; 632 return fatal_mca(KERN_ALERT "MCA: Strange SAL record\n");
613} 633}
614 634
615/** 635/**
@@ -638,12 +658,10 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
638 658
639 /* Now, OS can recover when there is one processor error section */ 659 /* Now, OS can recover when there is one processor error section */
640 if (n_proc_err > 1) 660 if (n_proc_err > 1)
641 return 0; 661 return fatal_mca(KERN_ALERT "MCA: Too Many Errors\n");
642 else if (n_proc_err == 0) { 662 else if (n_proc_err == 0)
643 /* Weird SAL record ... We need not to recover */ 663 /* Weird SAL record ... We need not to recover */
644 664 return fatal_mca(KERN_ALERT "MCA: Weird SAL record\n");
645 return 1;
646 }
647 665
648 /* Make index of processor error section */ 666 /* Make index of processor error section */
649 mca_make_peidx((sal_log_processor_info_t*) 667 mca_make_peidx((sal_log_processor_info_t*)
@@ -654,7 +672,7 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos)
654 672
655 /* Check whether MCA is global or not */ 673 /* Check whether MCA is global or not */
656 if (is_mca_global(&peidx, &pbci, sos)) 674 if (is_mca_global(&peidx, &pbci, sos))
657 return 0; 675 return fatal_mca(KERN_ALERT "MCA: global MCA\n");
658 676
659 /* Try to recover a processor error */ 677 /* Try to recover a processor error */
660 return recover_from_processor_error(platform_err, &slidx, &peidx, 678 return recover_from_processor_error(platform_err, &slidx, &peidx,