diff options
author | Russ Anderson <rja@sgi.com> | 2006-12-20 12:32:27 -0500 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2007-03-08 12:44:45 -0500 |
commit | 396e8e76c5eda19184e7b14ba9f1dcd5c4ea8d7e (patch) | |
tree | 992cff98c27c43ed49e935eed513e66cd77cdb01 /arch/ia64/kernel | |
parent | 618b206f0b580d965eb26f704ed23beee2a8c25d (diff) |
[IA64] Cache error recovery
Similar to memory error recovery, when a cache error is consumed
by a user process terminate the user instead of crashing the system.
Signed-off-by: Russ Anderson (rja@sgi.com)
Acked-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel')
-rw-r--r-- | arch/ia64/kernel/mca_drv.c | 32 |
1 files changed, 11 insertions, 21 deletions
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 5e2d18fbce40..832cf1e647e8 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c | |||
@@ -602,6 +602,8 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, | |||
602 | default: | 602 | default: |
603 | break; | 603 | break; |
604 | } | 604 | } |
605 | } else if (psp->cc && !psp->bc) { /* Cache error */ | ||
606 | status = recover_from_read_error(slidx, peidx, pbci, sos); | ||
605 | } | 607 | } |
606 | 608 | ||
607 | return status; | 609 | return status; |
@@ -645,13 +647,6 @@ recover_from_tlb_check(peidx_table_t *peidx) | |||
645 | * Return value: | 647 | * Return value: |
646 | * 1 on Success / 0 on Failure | 648 | * 1 on Success / 0 on Failure |
647 | */ | 649 | */ |
648 | /* | ||
649 | * Later we try to recover when below all conditions are satisfied. | ||
650 | * 1. Only one processor error section is exist. | ||
651 | * 2. BUS_CHECK is exist and the others are not exist.(Except TLB_CHECK) | ||
652 | * 3. The entry of BUS_CHECK_INFO is 1. | ||
653 | * 4. "External bus error" flag is set and the others are not set. | ||
654 | */ | ||
655 | 650 | ||
656 | static int | 651 | static int |
657 | recover_from_processor_error(int platform, slidx_table_t *slidx, | 652 | recover_from_processor_error(int platform, slidx_table_t *slidx, |
@@ -687,36 +682,31 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, | |||
687 | /* | 682 | /* |
688 | * The cache check and bus check bits have four possible states | 683 | * The cache check and bus check bits have four possible states |
689 | * cc bc | 684 | * cc bc |
690 | * 0 0 Weird record, not recovered | ||
691 | * 1 0 Cache error, not recovered | ||
692 | * 0 1 I/O error, attempt recovery | ||
693 | * 1 1 Memory error, attempt recovery | 685 | * 1 1 Memory error, attempt recovery |
686 | * 1 0 Cache error, attempt recovery | ||
687 | * 0 1 I/O error, attempt recovery | ||
688 | * 0 0 Other error type, not recovered | ||
694 | */ | 689 | */ |
695 | if (psp->bc == 0 || pbci == NULL) | 690 | if (psp->cc == 0 && (psp->bc == 0 || pbci == NULL)) |
696 | return fatal_mca("No bus check"); | 691 | return fatal_mca("No cache or bus check"); |
697 | 692 | ||
698 | /* | 693 | /* |
699 | * Sorry, we cannot handle so many. | 694 | * Cannot handle more than one bus check. |
700 | */ | 695 | */ |
701 | if (peidx_bus_check_num(peidx) > 1) | 696 | if (peidx_bus_check_num(peidx) > 1) |
702 | return fatal_mca("Too many bus checks"); | 697 | return fatal_mca("Too many bus checks"); |
703 | /* | 698 | |
704 | * Well, here is only one bus error. | ||
705 | */ | ||
706 | if (pbci->ib) | 699 | if (pbci->ib) |
707 | return fatal_mca("Internal Bus error"); | 700 | return fatal_mca("Internal Bus error"); |
708 | if (pbci->cc) | ||
709 | return fatal_mca("Cache-cache error"); | ||
710 | if (pbci->eb && pbci->bsi > 0) | 701 | if (pbci->eb && pbci->bsi > 0) |
711 | return fatal_mca("External bus check fatal status"); | 702 | return fatal_mca("External bus check fatal status"); |
712 | 703 | ||
713 | /* | 704 | /* |
714 | * This is a local MCA and estimated as recoverble external bus error. | 705 | * This is a local MCA and estimated as a recoverble error. |
715 | * (e.g. a load from poisoned memory) | ||
716 | * This means "there are some platform errors". | ||
717 | */ | 706 | */ |
718 | if (platform) | 707 | if (platform) |
719 | return recover_from_platform_error(slidx, peidx, pbci, sos); | 708 | return recover_from_platform_error(slidx, peidx, pbci, sos); |
709 | |||
720 | /* | 710 | /* |
721 | * On account of strange SAL error record, we cannot recover. | 711 | * On account of strange SAL error record, we cannot recover. |
722 | */ | 712 | */ |