diff options
author | Russ Anderson <rja@efs.americas.sgi.com> | 2005-12-16 18:19:01 -0500 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2006-01-13 17:06:53 -0500 |
commit | 17e8ce0e9417eee1f57f9b3d4aad168425e043c3 (patch) | |
tree | 58bae85da8cedb339d945d43191ff910501bd175 /arch/ia64/sn | |
parent | 8a4b7b6f187f2967bff222e8c3758ab47efdb14f (diff) |
[IA64-SGI] Altix BTE error handling fixes
Altix (shub2) pushes the BTE clean-up into SAL.
This patch correctly interfaces with the now implemented SAL call.
It also fixes a bug when delaying clean-up to allow busy BTEs to
complete (or error out).
Signed-off-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/sn')
-rw-r--r-- | arch/ia64/sn/kernel/bte_error.c | 58 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/huberror.c | 9 |
2 files changed, 53 insertions, 14 deletions
diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c index fcbc748ae433..f1ec1370b3e3 100644 --- a/arch/ia64/sn/kernel/bte_error.c +++ b/arch/ia64/sn/kernel/bte_error.c | |||
@@ -33,7 +33,7 @@ void bte_error_handler(unsigned long); | |||
33 | * Wait until all BTE related CRBs are completed | 33 | * Wait until all BTE related CRBs are completed |
34 | * and then reset the interfaces. | 34 | * and then reset the interfaces. |
35 | */ | 35 | */ |
36 | void shub1_bte_error_handler(unsigned long _nodepda) | 36 | int shub1_bte_error_handler(unsigned long _nodepda) |
37 | { | 37 | { |
38 | struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; | 38 | struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; |
39 | struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; | 39 | struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; |
@@ -53,7 +53,7 @@ void shub1_bte_error_handler(unsigned long _nodepda) | |||
53 | (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) { | 53 | (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) { |
54 | BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda, | 54 | BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda, |
55 | smp_processor_id())); | 55 | smp_processor_id())); |
56 | return; | 56 | return 1; |
57 | } | 57 | } |
58 | 58 | ||
59 | /* Determine information about our hub */ | 59 | /* Determine information about our hub */ |
@@ -81,7 +81,7 @@ void shub1_bte_error_handler(unsigned long _nodepda) | |||
81 | mod_timer(recovery_timer, HZ * 5); | 81 | mod_timer(recovery_timer, HZ * 5); |
82 | BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, | 82 | BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, |
83 | smp_processor_id())); | 83 | smp_processor_id())); |
84 | return; | 84 | return 1; |
85 | } | 85 | } |
86 | if (icmr.ii_icmr_fld_s.i_crb_vld != 0) { | 86 | if (icmr.ii_icmr_fld_s.i_crb_vld != 0) { |
87 | 87 | ||
@@ -99,7 +99,7 @@ void shub1_bte_error_handler(unsigned long _nodepda) | |||
99 | BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n", | 99 | BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n", |
100 | err_nodepda, smp_processor_id(), | 100 | err_nodepda, smp_processor_id(), |
101 | i)); | 101 | i)); |
102 | return; | 102 | return 1; |
103 | } | 103 | } |
104 | } | 104 | } |
105 | } | 105 | } |
@@ -124,6 +124,42 @@ void shub1_bte_error_handler(unsigned long _nodepda) | |||
124 | REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval); | 124 | REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval); |
125 | 125 | ||
126 | del_timer(recovery_timer); | 126 | del_timer(recovery_timer); |
127 | return 0; | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Wait until all BTE related CRBs are completed | ||
132 | * and then reset the interfaces. | ||
133 | */ | ||
134 | int shub2_bte_error_handler(unsigned long _nodepda) | ||
135 | { | ||
136 | struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; | ||
137 | struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer; | ||
138 | struct bteinfo_s *bte; | ||
139 | nasid_t nasid; | ||
140 | u64 status; | ||
141 | int i; | ||
142 | |||
143 | nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); | ||
144 | |||
145 | /* | ||
146 | * Verify that all the BTEs are complete | ||
147 | */ | ||
148 | for (i = 0; i < BTES_PER_NODE; i++) { | ||
149 | bte = &err_nodepda->bte_if[i]; | ||
150 | status = BTE_LNSTAT_LOAD(bte); | ||
151 | if ((status & IBLS_ERROR) || !(status & IBLS_BUSY)) | ||
152 | continue; | ||
153 | mod_timer(recovery_timer, HZ * 5); | ||
154 | BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda, | ||
155 | smp_processor_id())); | ||
156 | return 1; | ||
157 | } | ||
158 | if (ia64_sn_bte_recovery(nasid)) | ||
159 | panic("bte_error_handler(): Fatal BTE Error"); | ||
160 | |||
161 | del_timer(recovery_timer); | ||
162 | return 0; | ||
127 | } | 163 | } |
128 | 164 | ||
129 | /* | 165 | /* |
@@ -135,7 +171,6 @@ void bte_error_handler(unsigned long _nodepda) | |||
135 | struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; | 171 | struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda; |
136 | spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock; | 172 | spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock; |
137 | int i; | 173 | int i; |
138 | nasid_t nasid; | ||
139 | unsigned long irq_flags; | 174 | unsigned long irq_flags; |
140 | volatile u64 *notify; | 175 | volatile u64 *notify; |
141 | bte_result_t bh_error; | 176 | bte_result_t bh_error; |
@@ -160,12 +195,15 @@ void bte_error_handler(unsigned long _nodepda) | |||
160 | } | 195 | } |
161 | 196 | ||
162 | if (is_shub1()) { | 197 | if (is_shub1()) { |
163 | shub1_bte_error_handler(_nodepda); | 198 | if (shub1_bte_error_handler(_nodepda)) { |
199 | spin_unlock_irqrestore(recovery_lock, irq_flags); | ||
200 | return; | ||
201 | } | ||
164 | } else { | 202 | } else { |
165 | nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode); | 203 | if (shub2_bte_error_handler(_nodepda)) { |
166 | 204 | spin_unlock_irqrestore(recovery_lock, irq_flags); | |
167 | if (ia64_sn_bte_recovery(nasid)) | 205 | return; |
168 | panic("bte_error_handler(): Fatal BTE Error"); | 206 | } |
169 | } | 207 | } |
170 | 208 | ||
171 | for (i = 0; i < BTES_PER_NODE; i++) { | 209 | for (i = 0; i < BTES_PER_NODE; i++) { |
diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c index 5c5eb01c50f0..56ab6bae00ee 100644 --- a/arch/ia64/sn/kernel/huberror.c +++ b/arch/ia64/sn/kernel/huberror.c | |||
@@ -32,13 +32,14 @@ static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep) | |||
32 | ret_stuff.v0 = 0; | 32 | ret_stuff.v0 = 0; |
33 | hubdev_info = (struct hubdev_info *)arg; | 33 | hubdev_info = (struct hubdev_info *)arg; |
34 | nasid = hubdev_info->hdi_nasid; | 34 | nasid = hubdev_info->hdi_nasid; |
35 | SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT, | 35 | |
36 | if (is_shub1()) { | ||
37 | SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT, | ||
36 | (u64) nasid, 0, 0, 0, 0, 0, 0); | 38 | (u64) nasid, 0, 0, 0, 0, 0, 0); |
37 | 39 | ||
38 | if ((int)ret_stuff.v0) | 40 | if ((int)ret_stuff.v0) |
39 | panic("hubii_eint_handler(): Fatal TIO Error"); | 41 | panic("hubii_eint_handler(): Fatal TIO Error"); |
40 | 42 | ||
41 | if (is_shub1()) { | ||
42 | if (!(nasid & 1)) /* Not a TIO, handle CRB errors */ | 43 | if (!(nasid & 1)) /* Not a TIO, handle CRB errors */ |
43 | (void)hubiio_crb_error_handler(hubdev_info); | 44 | (void)hubiio_crb_error_handler(hubdev_info); |
44 | } else | 45 | } else |