aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan O'Sullivan <bos@pathscale.com>2007-03-15 17:44:55 -0400
committerRoland Dreier <rolandd@cisco.com>2007-04-18 23:20:55 -0400
commit8ec1077b35359c973f4b1de7c516be570a6df495 (patch)
tree21ba8b8099034a35a1fb4f8bf301223b1be4af73
parent6f5c407460bba332d6bee52e19f2305539395511 (diff)
IB/ipath: Change packet problems vs chip errors handling and reporting
Some types of packet errors are moderately common with longer IB cables and large clusters, and are not reported with prints by other IB HCA drivers. This suppresses those messages unless the new __IPATH_ERRPKTDBG bit is set in ipath_debug. Reporting of temporarily disabled frequent error interrupts was also made clearer We also distinguish between chip errors, and bad packets sent or received in the wording of the messages. Signed-off-by: Dave Olson <dave.olson@qlogic.com> Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/ipath/ipath_debug.h1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c53
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c57
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h9
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c14
6 files changed, 99 insertions, 37 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index df69f0d80b8b..42bfbdb0d3e6 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -57,6 +57,7 @@
57#define __IPATH_PROCDBG 0x100 57#define __IPATH_PROCDBG 0x100
58/* print mmap/nopage stuff, not using VDBG any more */ 58/* print mmap/nopage stuff, not using VDBG any more */
59#define __IPATH_MMDBG 0x200 59#define __IPATH_MMDBG 0x200
60#define __IPATH_ERRPKTDBG 0x400
60#define __IPATH_USER_SEND 0x1000 /* use user mode send */ 61#define __IPATH_USER_SEND 0x1000 /* use user mode send */
61#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */ 62#define __IPATH_KERNEL_SEND 0x2000 /* use kernel mode send */
62#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */ 63#define __IPATH_EPKTDBG 0x4000 /* print ethernet packet data */
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 53eb4550bcd3..cf40cf2d1fbb 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -754,9 +754,42 @@ static int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state,
754 return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT; 754 return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
755} 755}
756 756
757void ipath_decode_err(char *buf, size_t blen, ipath_err_t err) 757/*
758 * Decode the error status into strings, deciding whether to always
759 * print * it or not depending on "normal packet errors" vs everything
760 * else. Return 1 if "real" errors, otherwise 0 if only packet
761 * errors, so caller can decide what to print with the string.
762 */
763int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
758{ 764{
765 int iserr = 1;
759 *buf = '\0'; 766 *buf = '\0';
767 if (err & INFINIPATH_E_PKTERRS) {
768 if (!(err & ~INFINIPATH_E_PKTERRS))
769 iserr = 0; // if only packet errors.
770 if (ipath_debug & __IPATH_ERRPKTDBG) {
771 if (err & INFINIPATH_E_REBP)
772 strlcat(buf, "EBP ", blen);
773 if (err & INFINIPATH_E_RVCRC)
774 strlcat(buf, "VCRC ", blen);
775 if (err & INFINIPATH_E_RICRC) {
776 strlcat(buf, "CRC ", blen);
777 // clear for check below, so only once
778 err &= INFINIPATH_E_RICRC;
779 }
780 if (err & INFINIPATH_E_RSHORTPKTLEN)
781 strlcat(buf, "rshortpktlen ", blen);
782 if (err & INFINIPATH_E_SDROPPEDDATAPKT)
783 strlcat(buf, "sdroppeddatapkt ", blen);
784 if (err & INFINIPATH_E_SPKTLEN)
785 strlcat(buf, "spktlen ", blen);
786 }
787 if ((err & INFINIPATH_E_RICRC) &&
788 !(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
789 strlcat(buf, "CRC ", blen);
790 if (!iserr)
791 goto done;
792 }
760 if (err & INFINIPATH_E_RHDRLEN) 793 if (err & INFINIPATH_E_RHDRLEN)
761 strlcat(buf, "rhdrlen ", blen); 794 strlcat(buf, "rhdrlen ", blen);
762 if (err & INFINIPATH_E_RBADTID) 795 if (err & INFINIPATH_E_RBADTID)
@@ -767,12 +800,12 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
767 strlcat(buf, "rhdr ", blen); 800 strlcat(buf, "rhdr ", blen);
768 if (err & INFINIPATH_E_RLONGPKTLEN) 801 if (err & INFINIPATH_E_RLONGPKTLEN)
769 strlcat(buf, "rlongpktlen ", blen); 802 strlcat(buf, "rlongpktlen ", blen);
770 if (err & INFINIPATH_E_RSHORTPKTLEN)
771 strlcat(buf, "rshortpktlen ", blen);
772 if (err & INFINIPATH_E_RMAXPKTLEN) 803 if (err & INFINIPATH_E_RMAXPKTLEN)
773 strlcat(buf, "rmaxpktlen ", blen); 804 strlcat(buf, "rmaxpktlen ", blen);
774 if (err & INFINIPATH_E_RMINPKTLEN) 805 if (err & INFINIPATH_E_RMINPKTLEN)
775 strlcat(buf, "rminpktlen ", blen); 806 strlcat(buf, "rminpktlen ", blen);
807 if (err & INFINIPATH_E_SMINPKTLEN)
808 strlcat(buf, "sminpktlen ", blen);
776 if (err & INFINIPATH_E_RFORMATERR) 809 if (err & INFINIPATH_E_RFORMATERR)
777 strlcat(buf, "rformaterr ", blen); 810 strlcat(buf, "rformaterr ", blen);
778 if (err & INFINIPATH_E_RUNSUPVL) 811 if (err & INFINIPATH_E_RUNSUPVL)
@@ -781,32 +814,20 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
781 strlcat(buf, "runexpchar ", blen); 814 strlcat(buf, "runexpchar ", blen);
782 if (err & INFINIPATH_E_RIBFLOW) 815 if (err & INFINIPATH_E_RIBFLOW)
783 strlcat(buf, "ribflow ", blen); 816 strlcat(buf, "ribflow ", blen);
784 if (err & INFINIPATH_E_REBP)
785 strlcat(buf, "EBP ", blen);
786 if (err & INFINIPATH_E_SUNDERRUN) 817 if (err & INFINIPATH_E_SUNDERRUN)
787 strlcat(buf, "sunderrun ", blen); 818 strlcat(buf, "sunderrun ", blen);
788 if (err & INFINIPATH_E_SPIOARMLAUNCH) 819 if (err & INFINIPATH_E_SPIOARMLAUNCH)
789 strlcat(buf, "spioarmlaunch ", blen); 820 strlcat(buf, "spioarmlaunch ", blen);
790 if (err & INFINIPATH_E_SUNEXPERRPKTNUM) 821 if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
791 strlcat(buf, "sunexperrpktnum ", blen); 822 strlcat(buf, "sunexperrpktnum ", blen);
792 if (err & INFINIPATH_E_SDROPPEDDATAPKT)
793 strlcat(buf, "sdroppeddatapkt ", blen);
794 if (err & INFINIPATH_E_SDROPPEDSMPPKT) 823 if (err & INFINIPATH_E_SDROPPEDSMPPKT)
795 strlcat(buf, "sdroppedsmppkt ", blen); 824 strlcat(buf, "sdroppedsmppkt ", blen);
796 if (err & INFINIPATH_E_SMAXPKTLEN) 825 if (err & INFINIPATH_E_SMAXPKTLEN)
797 strlcat(buf, "smaxpktlen ", blen); 826 strlcat(buf, "smaxpktlen ", blen);
798 if (err & INFINIPATH_E_SMINPKTLEN)
799 strlcat(buf, "sminpktlen ", blen);
800 if (err & INFINIPATH_E_SUNSUPVL) 827 if (err & INFINIPATH_E_SUNSUPVL)
801 strlcat(buf, "sunsupVL ", blen); 828 strlcat(buf, "sunsupVL ", blen);
802 if (err & INFINIPATH_E_SPKTLEN)
803 strlcat(buf, "spktlen ", blen);
804 if (err & INFINIPATH_E_INVALIDADDR) 829 if (err & INFINIPATH_E_INVALIDADDR)
805 strlcat(buf, "invalidaddr ", blen); 830 strlcat(buf, "invalidaddr ", blen);
806 if (err & INFINIPATH_E_RICRC)
807 strlcat(buf, "CRC ", blen);
808 if (err & INFINIPATH_E_RVCRC)
809 strlcat(buf, "VCRC ", blen);
810 if (err & INFINIPATH_E_RRCVEGRFULL) 831 if (err & INFINIPATH_E_RRCVEGRFULL)
811 strlcat(buf, "rcvegrfull ", blen); 832 strlcat(buf, "rcvegrfull ", blen);
812 if (err & INFINIPATH_E_RRCVHDRFULL) 833 if (err & INFINIPATH_E_RRCVHDRFULL)
@@ -819,6 +840,8 @@ void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
819 strlcat(buf, "hardware ", blen); 840 strlcat(buf, "hardware ", blen);
820 if (err & INFINIPATH_E_RESET) 841 if (err & INFINIPATH_E_RESET)
821 strlcat(buf, "reset ", blen); 842 strlcat(buf, "reset ", blen);
843done:
844 return iserr;
822} 845}
823 846
824/** 847/**
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 72b9e279d19d..037b8e276429 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -403,10 +403,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
403 * happens so often we never want to count it. 403 * happens so often we never want to count it.
404 */ 404 */
405 if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) { 405 if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
406 ipath_decode_err(msg, sizeof msg, dd->ipath_lasterror & 406 int iserr;
407 ~INFINIPATH_E_IBSTATUSCHANGED); 407 iserr = ipath_decode_err(msg, sizeof msg,
408 dd->ipath_lasterror &
409 ~INFINIPATH_E_IBSTATUSCHANGED);
408 if (dd->ipath_lasterror & 410 if (dd->ipath_lasterror &
409 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) 411 ~(INFINIPATH_E_RRCVEGRFULL |
412 INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
410 ipath_dev_err(dd, "Suppressed %u messages for " 413 ipath_dev_err(dd, "Suppressed %u messages for "
411 "fast-repeating errors (%s) (%llx)\n", 414 "fast-repeating errors (%s) (%llx)\n",
412 supp_msgs, msg, 415 supp_msgs, msg,
@@ -420,8 +423,13 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
420 * them. So only complain about these at debug 423 * them. So only complain about these at debug
421 * level. 424 * level.
422 */ 425 */
423 ipath_dbg("Suppressed %u messages for %s\n", 426 if (iserr)
424 supp_msgs, msg); 427 ipath_dbg("Suppressed %u messages for %s\n",
428 supp_msgs, msg);
429 else
430 ipath_cdbg(ERRPKT,
431 "Suppressed %u messages for %s\n",
432 supp_msgs, msg);
425 } 433 }
426 } 434 }
427} 435}
@@ -462,7 +470,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
462{ 470{
463 char msg[512]; 471 char msg[512];
464 u64 ignore_this_time = 0; 472 u64 ignore_this_time = 0;
465 int i; 473 int i, iserr = 0;
466 int chkerrpkts = 0, noprint = 0; 474 int chkerrpkts = 0, noprint = 0;
467 unsigned supp_msgs; 475 unsigned supp_msgs;
468 476
@@ -502,6 +510,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
502 } 510 }
503 511
504 if (supp_msgs == 250000) { 512 if (supp_msgs == 250000) {
513 int s_iserr;
505 /* 514 /*
506 * It's not entirely reasonable assuming that the errors set 515 * It's not entirely reasonable assuming that the errors set
507 * in the last clear period are all responsible for the 516 * in the last clear period are all responsible for the
@@ -511,17 +520,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
511 dd->ipath_maskederrs |= dd->ipath_lasterror | errs; 520 dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
512 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 521 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
513 ~dd->ipath_maskederrs); 522 ~dd->ipath_maskederrs);
514 ipath_decode_err(msg, sizeof msg, 523 s_iserr = ipath_decode_err(msg, sizeof msg,
515 (dd->ipath_maskederrs & ~dd-> 524 (dd->ipath_maskederrs & ~dd->
516 ipath_ignorederrs)); 525 ipath_ignorederrs));
517 526
518 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & 527 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
519 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) 528 ~(INFINIPATH_E_RRCVEGRFULL |
520 ipath_dev_err(dd, "Disabling error(s) %llx because " 529 INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
521 "occurring too frequently (%s)\n", 530 ipath_dev_err(dd, "Temporarily disabling "
522 (unsigned long long) 531 "error(s) %llx reporting; too frequent (%s)\n",
523 (dd->ipath_maskederrs & 532 (unsigned long long) (dd->ipath_maskederrs &
524 ~dd->ipath_ignorederrs), msg); 533 ~dd->ipath_ignorederrs), msg);
525 else { 534 else {
526 /* 535 /*
527 * rcvegrfull and rcvhdrqfull are "normal", 536 * rcvegrfull and rcvhdrqfull are "normal",
@@ -530,8 +539,15 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
530 * processing them. So only complain about 539 * processing them. So only complain about
531 * these at debug level. 540 * these at debug level.
532 */ 541 */
533 ipath_dbg("Disabling frequent queue full errors " 542 if (s_iserr)
534 "(%s)\n", msg); 543 ipath_dbg("Temporarily disabling reporting "
544 "too frequent queue full errors (%s)\n",
545 msg);
546 else
547 ipath_cdbg(ERRPKT,
548 "Temporarily disabling reporting too"
549 " frequent packet errors (%s)\n",
550 msg);
535 } 551 }
536 552
537 /* 553 /*
@@ -589,6 +605,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
589 ipath_stats.sps_crcerrs++; 605 ipath_stats.sps_crcerrs++;
590 chkerrpkts = 1; 606 chkerrpkts = 1;
591 } 607 }
608 iserr = errs & ~(E_SUM_PKTERRS | INFINIPATH_E_PKTERRS);
609
592 610
593 /* 611 /*
594 * We don't want to print these two as they happen, or we can make 612 * We don't want to print these two as they happen, or we can make
@@ -677,8 +695,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
677 *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF; 695 *dd->ipath_statusp &= ~IPATH_STATUS_IB_CONF;
678 } 696 }
679 697
680 if (!noprint && *msg) 698 if (!noprint && *msg) {
681 ipath_dev_err(dd, "%s error\n", msg); 699 if (iserr)
700 ipath_dev_err(dd, "%s error\n", msg);
701 else
702 dev_info(&dd->pcidev->dev, "%s packet problems\n",
703 msg);
704 }
682 if (dd->ipath_state_wanted & dd->ipath_flags) { 705 if (dd->ipath_state_wanted & dd->ipath_flags) {
683 ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, " 706 ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
684 "waking\n", dd->ipath_state_wanted, 707 "waking\n", dd->ipath_state_wanted,
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index c8df65a4d19d..a2162853f5be 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -611,7 +611,7 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
611extern int ipath_diag_inuse; 611extern int ipath_diag_inuse;
612 612
613irqreturn_t ipath_intr(int irq, void *devid); 613irqreturn_t ipath_intr(int irq, void *devid);
614void ipath_decode_err(char *buf, size_t blen, ipath_err_t err); 614int ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
615#if __IPATH_INFO || __IPATH_DBG 615#if __IPATH_INFO || __IPATH_DBG
616extern const char *ipath_ibcstatus_str[]; 616extern const char *ipath_ibcstatus_str[];
617#endif 617#endif
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index e0b20529da8b..6e99eafdfd73 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -126,6 +126,15 @@
126#define INFINIPATH_E_RESET 0x0004000000000000ULL 126#define INFINIPATH_E_RESET 0x0004000000000000ULL
127#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL 127#define INFINIPATH_E_HARDWARE 0x0008000000000000ULL
128 128
129/*
130 * this is used to print "common" packet errors only when the
131 * __IPATH_ERRPKTDBG bit is set in ipath_debug.
132 */
133#define INFINIPATH_E_PKTERRS ( INFINIPATH_E_SPKTLEN \
134 | INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_RVCRC \
135 | INFINIPATH_E_RICRC | INFINIPATH_E_RSHORTPKTLEN \
136 | INFINIPATH_E_REBP )
137
129/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */ 138/* kr_hwerrclear, kr_hwerrmask, kr_hwerrstatus, bits */
130/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo 139/* TXEMEMPARITYERR bit 0: PIObuf, 1: PIOpbc, 2: launchfifo
131 * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID 140 * RXEMEMPARITYERR bit 0: rcvbuf, 1: lookupq, 2: expTID, 3: eagerTID
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index 30a825928fcf..a627342a969c 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -237,11 +237,13 @@ void ipath_get_faststats(unsigned long opaque)
237 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) 237 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
238 && time_after(jiffies, dd->ipath_unmasktime)) { 238 && time_after(jiffies, dd->ipath_unmasktime)) {
239 char ebuf[256]; 239 char ebuf[256];
240 ipath_decode_err(ebuf, sizeof ebuf, 240 int iserr;
241 iserr = ipath_decode_err(ebuf, sizeof ebuf,
241 (dd->ipath_maskederrs & ~dd-> 242 (dd->ipath_maskederrs & ~dd->
242 ipath_ignorederrs)); 243 ipath_ignorederrs));
243 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) & 244 if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
244 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL)) 245 ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
246 INFINIPATH_E_PKTERRS ))
245 ipath_dev_err(dd, "Re-enabling masked errors " 247 ipath_dev_err(dd, "Re-enabling masked errors "
246 "(%s)\n", ebuf); 248 "(%s)\n", ebuf);
247 else { 249 else {
@@ -252,8 +254,12 @@ void ipath_get_faststats(unsigned long opaque)
252 * them. So only complain about these at debug 254 * them. So only complain about these at debug
253 * level. 255 * level.
254 */ 256 */
255 ipath_dbg("Disabling frequent queue full errors " 257 if (iserr)
256 "(%s)\n", ebuf); 258 ipath_dbg("Re-enabling queue full errors (%s)\n",
259 ebuf);
260 else
261 ipath_cdbg(ERRPKT, "Re-enabling packet"
262 " problem interrupt (%s)\n", ebuf);
257 } 263 }
258 dd->ipath_maskederrs = dd->ipath_ignorederrs; 264 dd->ipath_maskederrs = dd->ipath_ignorederrs;
259 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 265 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,