aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRuss Anderson <rja@sgi.com>2006-12-14 17:01:41 -0500
committerTony Luck <tony.luck@intel.com>2007-03-08 12:41:46 -0500
commit618b206f0b580d965eb26f704ed23beee2a8c25d (patch)
treea286eca054a9d5e8fbd54463647b68158f2a50d2
parent908e0a8a265fe8057604a9a30aec3f0be7bb5ebb (diff)
[IA64] Proper handling of TLB errors from duplicate itr.d dropins
Jack Steiner noticed that duplicate TLB DTC entries do not cause a linux panic. See discussion: http://www.gelato.unsw.edu.au/archives/linux-ia64/0307/6108.html The current TLB recovery code is recovering from the duplicate itr.d dropins, masking the underlying problem. This change modifies the MCA recovery code to look for the TLB check signature of the duplicate TLB entry and panic in that case. Signed-off-by: Russ Anderson (rja@sgi.com) Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--arch/ia64/kernel/mca.c8
-rw-r--r--arch/ia64/kernel/mca_drv.c33
-rw-r--r--include/asm-ia64/pal.h1
3 files changed, 36 insertions, 6 deletions
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index a76add3e76a2..491687f84fb5 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1192,8 +1192,6 @@ void
1192ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, 1192ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
1193 struct ia64_sal_os_state *sos) 1193 struct ia64_sal_os_state *sos)
1194{ 1194{
1195 pal_processor_state_info_t *psp = (pal_processor_state_info_t *)
1196 &sos->proc_state_param;
1197 int recover, cpu = smp_processor_id(); 1195 int recover, cpu = smp_processor_id();
1198 struct task_struct *previous_current; 1196 struct task_struct *previous_current;
1199 struct ia64_mca_notify_die nd = 1197 struct ia64_mca_notify_die nd =
@@ -1223,10 +1221,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
1223 /* Get the MCA error record and log it */ 1221 /* Get the MCA error record and log it */
1224 ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); 1222 ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
1225 1223
1226 /* TLB error is only exist in this SAL error record */ 1224 /* MCA error recovery */
1227 recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) 1225 recover = (ia64_mca_ucmc_extension
1228 /* other error recovery */
1229 || (ia64_mca_ucmc_extension
1230 && ia64_mca_ucmc_extension( 1226 && ia64_mca_ucmc_extension(
1231 IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), 1227 IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA),
1232 sos)); 1228 sos));
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
index afc1403799c9..5e2d18fbce40 100644
--- a/arch/ia64/kernel/mca_drv.c
+++ b/arch/ia64/kernel/mca_drv.c
@@ -607,6 +607,33 @@ recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx,
607 return status; 607 return status;
608} 608}
609 609
610/*
611 * recover_from_tlb_check
612 * @peidx: pointer of index of processor error section
613 *
614 * Return value:
615 * 1 on Success / 0 on Failure
616 */
617static int
618recover_from_tlb_check(peidx_table_t *peidx)
619{
620 sal_log_mod_error_info_t *smei;
621 pal_tlb_check_info_t *ptci;
622
623 smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0);
624 ptci = (pal_tlb_check_info_t *)&(smei->check_info);
625
626 /*
627 * Look for signature of a duplicate TLB DTC entry, which is
628 * a SW bug and always fatal.
629 */
630 if (ptci->op == PAL_TLB_CHECK_OP_PURGE
631 && !(ptci->itr || ptci->dtc || ptci->itc))
632 return fatal_mca("Duplicate TLB entry");
633
634 return mca_recovered("TLB check recovered");
635}
636
610/** 637/**
611 * recover_from_processor_error 638 * recover_from_processor_error
612 * @platform: whether there are some platform error section or not 639 * @platform: whether there are some platform error section or not
@@ -652,6 +679,12 @@ recover_from_processor_error(int platform, slidx_table_t *slidx,
652 return fatal_mca("error not contained"); 679 return fatal_mca("error not contained");
653 680
654 /* 681 /*
682 * Look for recoverable TLB check
683 */
684 if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
685 return recover_from_tlb_check(peidx);
686
687 /*
655 * The cache check and bus check bits have four possible states 688 * The cache check and bus check bits have four possible states
656 * cc bc 689 * cc bc
657 * 0 0 Weird record, not recovered 690 * 0 0 Weird record, not recovered
diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h
index e43021a99a20..67656ce767c2 100644
--- a/include/asm-ia64/pal.h
+++ b/include/asm-ia64/pal.h
@@ -371,6 +371,7 @@ typedef u64 pal_mc_info_index_t;
371 * dependent 371 * dependent
372 */ 372 */
373 373
374#define PAL_TLB_CHECK_OP_PURGE 8
374 375
375typedef struct pal_process_state_info_s { 376typedef struct pal_process_state_info_s {
376 u64 reserved1 : 2, 377 u64 reserved1 : 2,