aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorBryan O'Sullivan <bos@pathscale.com>2006-09-28 12:00:18 -0400
committerRoland Dreier <rolandd@cisco.com>2006-09-28 14:17:03 -0400
commit89d1e09b6a6d844ef327937f41658a426be42501 (patch)
tree5730241c737baf67b0b1ddf89ff38f6936d649c4 /drivers/infiniband
parent510847750c9d26052a71631e0fcad9e7f7a5f369 (diff)
IB/ipath: Fix and recover TXE piobuf and PBC parity errors
We can sometimes trigger parity errors due to processor speculative reads to our write-combined memory (mostly seen on Woodcrest). Add a stats counter for these. Factored out the sendbuffererror buffer cancellation code so it can be used in the new handling; suppress likely subsequent error messages if within two jiffies of the cancellation. Also restore 2 dropped TXE lines on hwe_bitsextant noticed while debugging. Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c32
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c37
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c98
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h6
5 files changed, 124 insertions, 52 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 382956d2ea4b..a9b109a353bc 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -141,8 +141,9 @@ struct infinipath_stats {
141 * packets if ipath not configured, etc.) 141 * packets if ipath not configured, etc.)
142 */ 142 */
143 __u64 sps_krdrops; 143 __u64 sps_krdrops;
144 __u64 sps_txeparity; /* PIO buffer parity error, recovered */
144 /* pad for future growth */ 145 /* pad for future growth */
145 __u64 __sps_pad[46]; 146 __u64 __sps_pad[45];
146}; 147};
147 148
148/* 149/*
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index fd49c9c32c68..9e4e8d4c6e20 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -451,7 +451,10 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
451 * make sure we get this much out, unless told to be quiet, 451 * make sure we get this much out, unless told to be quiet,
452 * or it's occurred within the last 5 seconds 452 * or it's occurred within the last 5 seconds
453 */ 453 */
454 if ((hwerrs & ~dd->ipath_lasthwerror) || 454 if ((hwerrs & ~(dd->ipath_lasthwerror |
455 ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
456 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
457 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
455 (ipath_debug & __IPATH_VERBDBG)) 458 (ipath_debug & __IPATH_VERBDBG))
456 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " 459 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
457 "(cleared)\n", (unsigned long long) hwerrs); 460 "(cleared)\n", (unsigned long long) hwerrs);
@@ -464,6 +467,33 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
464 467
465 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); 468 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
466 if (ctrl & INFINIPATH_C_FREEZEMODE) { 469 if (ctrl & INFINIPATH_C_FREEZEMODE) {
470 /*
471 * parity errors in send memory are recoverable,
472 * just cancel the send (if indicated in * sendbuffererror),
473 * count the occurrence, unfreeze (if no other handled
474 * hardware error bits are set), and continue. They can
475 * occur if a processor speculative read is done to the PIO
476 * buffer while we are sending a packet, for example.
477 */
478 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
479 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
480 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
481 ipath_stats.sps_txeparity++;
482 ipath_dbg("Recovering from TXE parity error (%llu), "
483 "hwerrstatus=%llx\n",
484 (unsigned long long) ipath_stats.sps_txeparity,
485 (unsigned long long) hwerrs);
486 ipath_disarm_senderrbufs(dd);
487 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
488 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
489 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
490 if (!hwerrs) { /* else leave in freeze mode */
491 ipath_write_kreg(dd,
492 dd->ipath_kregs->kr_control,
493 dd->ipath_control);
494 return;
495 }
496 }
467 if (hwerrs) { 497 if (hwerrs) {
468 /* 498 /*
469 * if any set that we aren't ignoring; only 499 * if any set that we aren't ignoring; only
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 08a44dd9ed6f..024b6aa320f1 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -370,7 +370,10 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
370 * make sure we get this much out, unless told to be quiet, 370 * make sure we get this much out, unless told to be quiet,
371 * or it's occurred within the last 5 seconds 371 * or it's occurred within the last 5 seconds
372 */ 372 */
373 if ((hwerrs & ~dd->ipath_lasthwerror) || 373 if ((hwerrs & ~(dd->ipath_lasthwerror |
374 ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
375 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
376 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
374 (ipath_debug & __IPATH_VERBDBG)) 377 (ipath_debug & __IPATH_VERBDBG))
375 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " 378 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
376 "(cleared)\n", (unsigned long long) hwerrs); 379 "(cleared)\n", (unsigned long long) hwerrs);
@@ -383,6 +386,33 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
383 386
384 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); 387 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
385 if (ctrl & INFINIPATH_C_FREEZEMODE) { 388 if (ctrl & INFINIPATH_C_FREEZEMODE) {
389 /*
390 * parity errors in send memory are recoverable,
391 * just cancel the send (if indicated in * sendbuffererror),
392 * count the occurrence, unfreeze (if no other handled
393 * hardware error bits are set), and continue. They can
394 * occur if a processor speculative read is done to the PIO
395 * buffer while we are sending a packet, for example.
396 */
397 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
398 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
399 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
400 ipath_stats.sps_txeparity++;
401 ipath_dbg("Recovering from TXE parity error (%llu), "
402 "hwerrstatus=%llx\n",
403 (unsigned long long) ipath_stats.sps_txeparity,
404 (unsigned long long) hwerrs);
405 ipath_disarm_senderrbufs(dd);
406 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
407 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
408 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
409 if (!hwerrs) { /* else leave in freeze mode */
410 ipath_write_kreg(dd,
411 dd->ipath_kregs->kr_control,
412 dd->ipath_control);
413 return;
414 }
415 }
386 if (hwerrs) { 416 if (hwerrs) {
387 /* 417 /*
388 * if any set that we aren't ignoring only make the 418 * if any set that we aren't ignoring only make the
@@ -406,9 +436,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
406 } else { 436 } else {
407 ipath_dbg("Clearing freezemode on ignored hardware " 437 ipath_dbg("Clearing freezemode on ignored hardware "
408 "error\n"); 438 "error\n");
409 ctrl &= ~INFINIPATH_C_FREEZEMODE;
410 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 439 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
411 ctrl); 440 dd->ipath_control);
412 } 441 }
413 } 442 }
414 443
@@ -880,6 +909,8 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd)
880 dd->ipath_hwe_bitsextant = 909 dd->ipath_hwe_bitsextant =
881 (INFINIPATH_HWE_RXEMEMPARITYERR_MASK << 910 (INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
882 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) | 911 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) |
912 (INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
913 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) |
883 (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK << 914 (INFINIPATH_HWE_PCIEMEMPARITYERR_MASK <<
884 INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) | 915 INFINIPATH_HWE_PCIEMEMPARITYERR_SHIFT) |
885 INFINIPATH_HWE_PCIE1PLLFAILED | 916 INFINIPATH_HWE_PCIE1PLLFAILED |
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index f4d8aafc6306..6bee53ce5f33 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -37,6 +37,50 @@
37#include "ipath_verbs.h" 37#include "ipath_verbs.h"
38#include "ipath_common.h" 38#include "ipath_common.h"
39 39
40/*
41 * Called when we might have an error that is specific to a particular
42 * PIO buffer, and may need to cancel that buffer, so it can be re-used.
43 */
44void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
45{
46 u32 piobcnt;
47 unsigned long sbuf[4];
48 /*
49 * it's possible that sendbuffererror could have bits set; might
50 * have already done this as a result of hardware error handling
51 */
52 piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
53 /* read these before writing errorclear */
54 sbuf[0] = ipath_read_kreg64(
55 dd, dd->ipath_kregs->kr_sendbuffererror);
56 sbuf[1] = ipath_read_kreg64(
57 dd, dd->ipath_kregs->kr_sendbuffererror + 1);
58 if (piobcnt > 128) {
59 sbuf[2] = ipath_read_kreg64(
60 dd, dd->ipath_kregs->kr_sendbuffererror + 2);
61 sbuf[3] = ipath_read_kreg64(
62 dd, dd->ipath_kregs->kr_sendbuffererror + 3);
63 }
64
65 if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
66 int i;
67 if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) {
68 __IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
69 "SendbufErrs %lx %lx", sbuf[0],
70 sbuf[1]);
71 if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
72 printk(" %lx %lx ", sbuf[2], sbuf[3]);
73 printk("\n");
74 }
75
76 for (i = 0; i < piobcnt; i++)
77 if (test_bit(i, sbuf))
78 ipath_disarm_piobufs(dd, i, 1);
79 dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
80 }
81}
82
83
40/* These are all rcv-related errors which we want to count for stats */ 84/* These are all rcv-related errors which we want to count for stats */
41#define E_SUM_PKTERRS \ 85#define E_SUM_PKTERRS \
42 (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \ 86 (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
@@ -68,53 +112,9 @@
68 112
69static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) 113static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
70{ 114{
71 unsigned long sbuf[4];
72 u64 ignore_this_time = 0; 115 u64 ignore_this_time = 0;
73 u32 piobcnt;
74
75 /* if possible that sendbuffererror could be valid */
76 piobcnt = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
77 /* read these before writing errorclear */
78 sbuf[0] = ipath_read_kreg64(
79 dd, dd->ipath_kregs->kr_sendbuffererror);
80 sbuf[1] = ipath_read_kreg64(
81 dd, dd->ipath_kregs->kr_sendbuffererror + 1);
82 if (piobcnt > 128) {
83 sbuf[2] = ipath_read_kreg64(
84 dd, dd->ipath_kregs->kr_sendbuffererror + 2);
85 sbuf[3] = ipath_read_kreg64(
86 dd, dd->ipath_kregs->kr_sendbuffererror + 3);
87 }
88 116
89 if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) { 117 ipath_disarm_senderrbufs(dd);
90 int i;
91
92 ipath_cdbg(PKT, "SendbufErrs %lx %lx ", sbuf[0], sbuf[1]);
93 if (ipath_debug & __IPATH_PKTDBG && piobcnt > 128)
94 printk("%lx %lx ", sbuf[2], sbuf[3]);
95 for (i = 0; i < piobcnt; i++) {
96 if (test_bit(i, sbuf)) {
97 u32 __iomem *piobuf;
98 if (i < dd->ipath_piobcnt2k)
99 piobuf = (u32 __iomem *)
100 (dd->ipath_pio2kbase +
101 i * dd->ipath_palign);
102 else
103 piobuf = (u32 __iomem *)
104 (dd->ipath_pio4kbase +
105 (i - dd->ipath_piobcnt2k) *
106 dd->ipath_4kalign);
107
108 ipath_cdbg(PKT,
109 "PIObuf[%u] @%p pbc is %x; ",
110 i, piobuf, readl(piobuf));
111
112 ipath_disarm_piobufs(dd, i, 1);
113 }
114 }
115 if (ipath_debug & __IPATH_PKTDBG)
116 printk("\n");
117 }
118 if ((errs & E_SUM_LINK_PKTERRS) && 118 if ((errs & E_SUM_LINK_PKTERRS) &&
119 !(dd->ipath_flags & IPATH_LINKACTIVE)) { 119 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
120 /* 120 /*
@@ -554,6 +554,14 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
554 ~(INFINIPATH_E_HARDWARE | 554 ~(INFINIPATH_E_HARDWARE |
555 INFINIPATH_E_IBSTATUSCHANGED); 555 INFINIPATH_E_IBSTATUSCHANGED);
556 } 556 }
557
558 /* likely due to cancel, so suppress */
559 if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
560 dd->ipath_lastcancel > jiffies) {
561 ipath_dbg("Suppressed armlaunch/spktlen after error send cancel\n");
562 errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
563 }
564
557 if (!errs) 565 if (!errs)
558 return 0; 566 return 0;
559 567
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 02134cb3e944..d7540b71b451 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -427,6 +427,9 @@ struct ipath_devdata {
427 unsigned long ipath_rcvctrl; 427 unsigned long ipath_rcvctrl;
428 /* shadow kr_sendctrl */ 428 /* shadow kr_sendctrl */
429 unsigned long ipath_sendctrl; 429 unsigned long ipath_sendctrl;
430 /* ports waiting for PIOavail intr */
431 unsigned long ipath_portpiowait;
432 unsigned long ipath_lastcancel; /* to not count armlaunch after cancel */
430 433
431 /* value we put in kr_rcvhdrcnt */ 434 /* value we put in kr_rcvhdrcnt */
432 u32 ipath_rcvhdrcnt; 435 u32 ipath_rcvhdrcnt;
@@ -490,8 +493,6 @@ struct ipath_devdata {
490 u32 ipath_htwidth; 493 u32 ipath_htwidth;
491 /* HT speed (200,400,800,1000) from HT config */ 494 /* HT speed (200,400,800,1000) from HT config */
492 u32 ipath_htspeed; 495 u32 ipath_htspeed;
493 /* ports waiting for PIOavail intr */
494 unsigned long ipath_portpiowait;
495 /* 496 /*
496 * number of sequential ibcstatus change for polling active/quiet 497 * number of sequential ibcstatus change for polling active/quiet
497 * (i.e., link not coming up). 498 * (i.e., link not coming up).
@@ -585,6 +586,7 @@ int ipath_enable_wc(struct ipath_devdata *dd);
585void ipath_disable_wc(struct ipath_devdata *dd); 586void ipath_disable_wc(struct ipath_devdata *dd);
586int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); 587int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
587void ipath_shutdown_device(struct ipath_devdata *); 588void ipath_shutdown_device(struct ipath_devdata *);
589void ipath_disarm_senderrbufs(struct ipath_devdata *);
588 590
589struct file_operations; 591struct file_operations;
590int ipath_cdev_init(int minor, char *name, struct file_operations *fops, 592int ipath_cdev_init(int minor, char *name, struct file_operations *fops,