diff options
author | Dave Olson <dave.olson@qlogic.com> | 2007-07-20 17:41:26 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2007-07-30 16:16:46 -0400 |
commit | 78d1e02fac0595a8aa8a5064d1bd0c0ea55b22b0 (patch) | |
tree | 3c0ec43cacc0ec7ec2eea4ea831396995bbe1d03 /drivers/infiniband/hw | |
parent | 3810f2a84e994e295e181eb9bd4b8007f611b5eb (diff) |
IB/ipath: Workaround problem of errormask register being overwritten
On some system hardware, we are seeing moderately common cases of the
chip errormask register being overwritten due to a chip bug in iba6120
that is triggered by a vendor-specific PCIe broadcast message. This
patch merely checks periodically, and corrects it if needed (the
overwrite can cause us to not get error and hardware error
interrupts). Also, make dd->ipath_errormask the one, true canonical
source for kr_errormask, and remove references to ipath_ignorederrs as
it is currently unused.
Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: John Gregor <john.gregor@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_init_chip.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_intr.c | 25 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_kernel.h | 11 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_stats.c | 54 |
4 files changed, 66 insertions, 29 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 71e6c9d4a714..9dd0bacf8461 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c | |||
@@ -851,13 +851,14 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit) | |||
851 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, | 851 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask, |
852 | dd->ipath_hwerrmask); | 852 | dd->ipath_hwerrmask); |
853 | 853 | ||
854 | dd->ipath_maskederrs = dd->ipath_ignorederrs; | ||
855 | /* clear all */ | 854 | /* clear all */ |
856 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); | 855 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); |
857 | /* enable errors that are masked, at least this first time. */ | 856 | /* enable errors that are masked, at least this first time. */ |
858 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, | 857 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, |
859 | ~dd->ipath_maskederrs); | 858 | ~dd->ipath_maskederrs); |
860 | /* clear any interrups up to this point (ints still not enabled) */ | 859 | dd->ipath_errormask = ipath_read_kreg64(dd, |
860 | dd->ipath_kregs->kr_errormask); | ||
861 | /* clear any interrupts up to this point (ints still not enabled) */ | ||
861 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); | 862 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); |
862 | 863 | ||
863 | /* | 864 | /* |
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 0c075cf8316b..b29fe7e9b11a 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c | |||
@@ -517,10 +517,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) | |||
517 | 517 | ||
518 | supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint); | 518 | supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint); |
519 | 519 | ||
520 | /* | 520 | /* don't report errors that are masked */ |
521 | * don't report errors that are masked (includes those always | ||
522 | * ignored) | ||
523 | */ | ||
524 | errs &= ~dd->ipath_maskederrs; | 521 | errs &= ~dd->ipath_maskederrs; |
525 | 522 | ||
526 | /* do these first, they are most important */ | 523 | /* do these first, they are most important */ |
@@ -566,19 +563,19 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) | |||
566 | * ones on this particular interrupt, which also isn't great | 563 | * ones on this particular interrupt, which also isn't great |
567 | */ | 564 | */ |
568 | dd->ipath_maskederrs |= dd->ipath_lasterror | errs; | 565 | dd->ipath_maskederrs |= dd->ipath_lasterror | errs; |
566 | dd->ipath_errormask &= ~dd->ipath_maskederrs; | ||
569 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, | 567 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, |
570 | ~dd->ipath_maskederrs); | 568 | dd->ipath_errormask); |
571 | s_iserr = ipath_decode_err(msg, sizeof msg, | 569 | s_iserr = ipath_decode_err(msg, sizeof msg, |
572 | (dd->ipath_maskederrs & ~dd-> | 570 | dd->ipath_maskederrs); |
573 | ipath_ignorederrs)); | ||
574 | 571 | ||
575 | if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & | 572 | if (dd->ipath_maskederrs & |
576 | ~(INFINIPATH_E_RRCVEGRFULL | | 573 | ~(INFINIPATH_E_RRCVEGRFULL | |
577 | INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) | 574 | INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS)) |
578 | ipath_dev_err(dd, "Temporarily disabling " | 575 | ipath_dev_err(dd, "Temporarily disabling " |
579 | "error(s) %llx reporting; too frequent (%s)\n", | 576 | "error(s) %llx reporting; too frequent (%s)\n", |
580 | (unsigned long long) (dd->ipath_maskederrs & | 577 | (unsigned long long)dd->ipath_maskederrs, |
581 | ~dd->ipath_ignorederrs), msg); | 578 | msg); |
582 | else { | 579 | else { |
583 | /* | 580 | /* |
584 | * rcvegrfull and rcvhdrqfull are "normal", | 581 | * rcvegrfull and rcvhdrqfull are "normal", |
@@ -793,6 +790,9 @@ void ipath_clear_freeze(struct ipath_devdata *dd) | |||
793 | /* disable error interrupts, to avoid confusion */ | 790 | /* disable error interrupts, to avoid confusion */ |
794 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); | 791 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); |
795 | 792 | ||
793 | /* also disable interrupts; errormask is sometimes overwriten */ | ||
794 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); | ||
795 | |||
796 | /* | 796 | /* |
797 | * clear all sends, because they have may been | 797 | * clear all sends, because they have may been |
798 | * completed by usercode while in freeze mode, and | 798 | * completed by usercode while in freeze mode, and |
@@ -817,7 +817,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd) | |||
817 | for (i = 0; i < dd->ipath_pioavregs; i++) { | 817 | for (i = 0; i < dd->ipath_pioavregs; i++) { |
818 | /* deal with 6110 chip bug */ | 818 | /* deal with 6110 chip bug */ |
819 | im = i > 3 ? ((i&1) ? i-1 : i+1) : i; | 819 | im = i > 3 ? ((i&1) ? i-1 : i+1) : i; |
820 | val = ipath_read_kreg64(dd, 0x1000+(im*sizeof(u64))); | 820 | val = ipath_read_kreg64(dd, (0x1000/sizeof(u64))+im); |
821 | dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i] | 821 | dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i] |
822 | = le64_to_cpu(val); | 822 | = le64_to_cpu(val); |
823 | } | 823 | } |
@@ -832,7 +832,8 @@ void ipath_clear_freeze(struct ipath_devdata *dd) | |||
832 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, | 832 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, |
833 | E_SPKT_ERRS_IGNORE); | 833 | E_SPKT_ERRS_IGNORE); |
834 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, | 834 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, |
835 | ~dd->ipath_maskederrs); | 835 | dd->ipath_errormask); |
836 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL); | ||
836 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); | 837 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); |
837 | } | 838 | } |
838 | 839 | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index ef773298b805..7a7966f7e4ff 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h | |||
@@ -261,18 +261,10 @@ struct ipath_devdata { | |||
261 | * limiting of hwerror reporting | 261 | * limiting of hwerror reporting |
262 | */ | 262 | */ |
263 | ipath_err_t ipath_lasthwerror; | 263 | ipath_err_t ipath_lasthwerror; |
264 | /* | 264 | /* errors masked because they occur too fast */ |
265 | * errors masked because they occur too fast, also includes errors | ||
266 | * that are always ignored (ipath_ignorederrs) | ||
267 | */ | ||
268 | ipath_err_t ipath_maskederrs; | 265 | ipath_err_t ipath_maskederrs; |
269 | /* time in jiffies at which to re-enable maskederrs */ | 266 | /* time in jiffies at which to re-enable maskederrs */ |
270 | unsigned long ipath_unmasktime; | 267 | unsigned long ipath_unmasktime; |
271 | /* | ||
272 | * errors always ignored (masked), at least for a given | ||
273 | * chip/device, because they are wrong or not useful | ||
274 | */ | ||
275 | ipath_err_t ipath_ignorederrs; | ||
276 | /* count of egrfull errors, combined for all ports */ | 268 | /* count of egrfull errors, combined for all ports */ |
277 | u64 ipath_last_tidfull; | 269 | u64 ipath_last_tidfull; |
278 | /* for ipath_qcheck() */ | 270 | /* for ipath_qcheck() */ |
@@ -436,6 +428,7 @@ struct ipath_devdata { | |||
436 | u64 ipath_lastibcstat; | 428 | u64 ipath_lastibcstat; |
437 | /* hwerrmask shadow */ | 429 | /* hwerrmask shadow */ |
438 | ipath_err_t ipath_hwerrmask; | 430 | ipath_err_t ipath_hwerrmask; |
431 | ipath_err_t ipath_errormask; /* errormask shadow */ | ||
439 | /* interrupt config reg shadow */ | 432 | /* interrupt config reg shadow */ |
440 | u64 ipath_intconfig; | 433 | u64 ipath_intconfig; |
441 | /* kr_sendpiobufbase value */ | 434 | /* kr_sendpiobufbase value */ |
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index 73ed17d03188..bae4f56f7271 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c | |||
@@ -196,6 +196,45 @@ static void ipath_qcheck(struct ipath_devdata *dd) | |||
196 | } | 196 | } |
197 | } | 197 | } |
198 | 198 | ||
199 | static void ipath_chk_errormask(struct ipath_devdata *dd) | ||
200 | { | ||
201 | static u32 fixed; | ||
202 | u32 ctrl; | ||
203 | unsigned long errormask; | ||
204 | unsigned long hwerrs; | ||
205 | |||
206 | if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED)) | ||
207 | return; | ||
208 | |||
209 | errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask); | ||
210 | |||
211 | if (errormask == dd->ipath_errormask) | ||
212 | return; | ||
213 | fixed++; | ||
214 | |||
215 | hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); | ||
216 | ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); | ||
217 | |||
218 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, | ||
219 | dd->ipath_errormask); | ||
220 | |||
221 | if ((hwerrs & dd->ipath_hwerrmask) || | ||
222 | (ctrl & INFINIPATH_C_FREEZEMODE)) { | ||
223 | /* force re-interrupt of pending events, just in case */ | ||
224 | ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL); | ||
225 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL); | ||
226 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL); | ||
227 | dev_info(&dd->pcidev->dev, | ||
228 | "errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n", | ||
229 | fixed, errormask, (unsigned long)dd->ipath_errormask, | ||
230 | ctrl, hwerrs); | ||
231 | } else | ||
232 | ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n", | ||
233 | fixed, errormask, | ||
234 | (unsigned long)dd->ipath_errormask); | ||
235 | } | ||
236 | |||
237 | |||
199 | /** | 238 | /** |
200 | * ipath_get_faststats - get word counters from chip before they overflow | 239 | * ipath_get_faststats - get word counters from chip before they overflow |
201 | * @opaque - contains a pointer to the infinipath device ipath_devdata | 240 | * @opaque - contains a pointer to the infinipath device ipath_devdata |
@@ -251,14 +290,13 @@ void ipath_get_faststats(unsigned long opaque) | |||
251 | dd->ipath_lasterror = 0; | 290 | dd->ipath_lasterror = 0; |
252 | if (dd->ipath_lasthwerror) | 291 | if (dd->ipath_lasthwerror) |
253 | dd->ipath_lasthwerror = 0; | 292 | dd->ipath_lasthwerror = 0; |
254 | if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) | 293 | if (dd->ipath_maskederrs |
255 | && time_after(jiffies, dd->ipath_unmasktime)) { | 294 | && time_after(jiffies, dd->ipath_unmasktime)) { |
256 | char ebuf[256]; | 295 | char ebuf[256]; |
257 | int iserr; | 296 | int iserr; |
258 | iserr = ipath_decode_err(ebuf, sizeof ebuf, | 297 | iserr = ipath_decode_err(ebuf, sizeof ebuf, |
259 | (dd->ipath_maskederrs & ~dd-> | 298 | dd->ipath_maskederrs); |
260 | ipath_ignorederrs)); | 299 | if (dd->ipath_maskederrs & |
261 | if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & | ||
262 | ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | | 300 | ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL | |
263 | INFINIPATH_E_PKTERRS )) | 301 | INFINIPATH_E_PKTERRS )) |
264 | ipath_dev_err(dd, "Re-enabling masked errors " | 302 | ipath_dev_err(dd, "Re-enabling masked errors " |
@@ -278,9 +316,12 @@ void ipath_get_faststats(unsigned long opaque) | |||
278 | ipath_cdbg(ERRPKT, "Re-enabling packet" | 316 | ipath_cdbg(ERRPKT, "Re-enabling packet" |
279 | " problem interrupt (%s)\n", ebuf); | 317 | " problem interrupt (%s)\n", ebuf); |
280 | } | 318 | } |
281 | dd->ipath_maskederrs = dd->ipath_ignorederrs; | 319 | |
320 | /* re-enable masked errors */ | ||
321 | dd->ipath_errormask |= dd->ipath_maskederrs; | ||
282 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, | 322 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, |
283 | ~dd->ipath_maskederrs); | 323 | dd->ipath_errormask); |
324 | dd->ipath_maskederrs = 0; | ||
284 | } | 325 | } |
285 | 326 | ||
286 | /* limit qfull messages to ~one per minute per port */ | 327 | /* limit qfull messages to ~one per minute per port */ |
@@ -294,6 +335,7 @@ void ipath_get_faststats(unsigned long opaque) | |||
294 | } | 335 | } |
295 | } | 336 | } |
296 | 337 | ||
338 | ipath_chk_errormask(dd); | ||
297 | done: | 339 | done: |
298 | mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5); | 340 | mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5); |
299 | } | 341 | } |