diff options
author | Russ Anderson <rja@sgi.com> | 2006-12-14 17:01:41 -0500 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2007-03-08 12:41:46 -0500 |
commit | 618b206f0b580d965eb26f704ed23beee2a8c25d (patch) | |
tree | a286eca054a9d5e8fbd54463647b68158f2a50d2 | |
parent | 908e0a8a265fe8057604a9a30aec3f0be7bb5ebb (diff) |
[IA64] Proper handling of TLB errors from duplicate itr.d dropins
Jack Steiner noticed that duplicate TLB DTC entries do not cause a
linux panic. See discussion:
http://www.gelato.unsw.edu.au/archives/linux-ia64/0307/6108.html
The current TLB recovery code is recovering from the duplicate itr.d
dropins, masking the underlying problem. This change modifies
the MCA recovery code to look for the TLB check signature of the
duplicate TLB entry and panic in that case.
Signed-off-by: Russ Anderson (rja@sgi.com)
Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r-- | arch/ia64/kernel/mca.c | 8 | ||||
-rw-r--r-- | arch/ia64/kernel/mca_drv.c | 33 | ||||
-rw-r--r-- | include/asm-ia64/pal.h | 1 |
3 files changed, 36 insertions, 6 deletions
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index a76add3e76a..491687f84fb 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c | |||
@@ -1192,8 +1192,6 @@ void | |||
1192 | ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, | 1192 | ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, |
1193 | struct ia64_sal_os_state *sos) | 1193 | struct ia64_sal_os_state *sos) |
1194 | { | 1194 | { |
1195 | pal_processor_state_info_t *psp = (pal_processor_state_info_t *) | ||
1196 | &sos->proc_state_param; | ||
1197 | int recover, cpu = smp_processor_id(); | 1195 | int recover, cpu = smp_processor_id(); |
1198 | struct task_struct *previous_current; | 1196 | struct task_struct *previous_current; |
1199 | struct ia64_mca_notify_die nd = | 1197 | struct ia64_mca_notify_die nd = |
@@ -1223,10 +1221,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1223 | /* Get the MCA error record and log it */ | 1221 | /* Get the MCA error record and log it */ |
1224 | ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); | 1222 | ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); |
1225 | 1223 | ||
1226 | /* TLB error is only exist in this SAL error record */ | 1224 | /* MCA error recovery */ |
1227 | recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) | 1225 | recover = (ia64_mca_ucmc_extension |
1228 | /* other error recovery */ | ||
1229 | || (ia64_mca_ucmc_extension | ||
1230 | && ia64_mca_ucmc_extension( | 1226 | && ia64_mca_ucmc_extension( |
1231 | IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), | 1227 | IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), |
1232 | sos)); | 1228 | sos)); |
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index afc1403799c..5e2d18fbce4 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c | |||
@@ -607,6 +607,33 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, | |||
607 | return status; | 607 | return status; |
608 | } | 608 | } |
609 | 609 | ||
610 | /* | ||
611 | * recover_from_tlb_check | ||
612 | * @peidx: pointer of index of processor error section | ||
613 | * | ||
614 | * Return value: | ||
615 | * 1 on Success / 0 on Failure | ||
616 | */ | ||
617 | static int | ||
618 | recover_from_tlb_check(peidx_table_t *peidx) | ||
619 | { | ||
620 | sal_log_mod_error_info_t *smei; | ||
621 | pal_tlb_check_info_t *ptci; | ||
622 | |||
623 | smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0); | ||
624 | ptci = (pal_tlb_check_info_t *)&(smei->check_info); | ||
625 | |||
626 | /* | ||
627 | * Look for signature of a duplicate TLB DTC entry, which is | ||
628 | * a SW bug and always fatal. | ||
629 | */ | ||
630 | if (ptci->op == PAL_TLB_CHECK_OP_PURGE | ||
631 | && !(ptci->itr || ptci->dtc || ptci->itc)) | ||
632 | return fatal_mca("Duplicate TLB entry"); | ||
633 | |||
634 | return mca_recovered("TLB check recovered"); | ||
635 | } | ||
636 | |||
610 | /** | 637 | /** |
611 | * recover_from_processor_error | 638 | * recover_from_processor_error |
612 | * @platform: whether there are some platform error section or not | 639 | * @platform: whether there are some platform error section or not |
@@ -652,6 +679,12 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, | |||
652 | return fatal_mca("error not contained"); | 679 | return fatal_mca("error not contained"); |
653 | 680 | ||
654 | /* | 681 | /* |
682 | * Look for recoverable TLB check | ||
683 | */ | ||
684 | if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) | ||
685 | return recover_from_tlb_check(peidx); | ||
686 | |||
687 | /* | ||
655 | * The cache check and bus check bits have four possible states | 688 | * The cache check and bus check bits have four possible states |
656 | * cc bc | 689 | * cc bc |
657 | * 0 0 Weird record, not recovered | 690 | * 0 0 Weird record, not recovered |
diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h index e43021a99a2..67656ce767c 100644 --- a/include/asm-ia64/pal.h +++ b/include/asm-ia64/pal.h | |||
@@ -371,6 +371,7 @@ typedef u64 pal_mc_info_index_t; | |||
371 | * dependent | 371 | * dependent |
372 | */ | 372 | */ |
373 | 373 | ||
374 | #define PAL_TLB_CHECK_OP_PURGE 8 | ||
374 | 375 | ||
375 | typedef struct pal_process_state_info_s { | 376 | typedef struct pal_process_state_info_s { |
376 | u64 reserved1 : 2, | 377 | u64 reserved1 : 2, |