aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/ipath/ipath_iba6110.c
diff options
context:
space:
mode:
authorBryan O'Sullivan <bos@pathscale.com>2007-03-15 17:45:07 -0400
committerRoland Dreier <rolandd@cisco.com>2007-04-18 23:20:58 -0400
commit9783ab405844202b452ac673677e6c8f8c9a6a99 (patch)
tree32aac9ac3ff1089a7ecb05c4ef0b825a95227694 /drivers/infiniband/hw/ipath/ipath_iba6110.c
parent820054b7ca7a54ba94d89db4b3c53a24d2d66633 (diff)
IB/ipath: Improve handling and reporting of parity errors
Mostly cleanup. Signed-off-by: Dave Olson <dave.olson@qlogic.com> Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath/ipath_iba6110.c')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c138
1 files changed, 83 insertions, 55 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index b50436c56638..8e0794d316fb 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -284,6 +284,14 @@ static const struct ipath_cregs ipath_ht_cregs = {
284#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 284#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
285#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000 285#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000
286 286
287
288/* TID entries (memory), HT-only */
289#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
290#define INFINIPATH_RT_VALID 0x8000000000000000ULL
291#define INFINIPATH_RT_ADDR_SHIFT 0
292#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
293#define INFINIPATH_RT_BUFSIZE_SHIFT 48
294
287/* 295/*
288 * masks and bits that are different in different chips, or present only 296 * masks and bits that are different in different chips, or present only
289 * in one 297 * in one
@@ -402,6 +410,14 @@ static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
402 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), 410 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
403}; 411};
404 412
413#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
414 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
415 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
416#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
417 << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
418
419static int ipath_ht_txe_recover(struct ipath_devdata *);
420
405/** 421/**
406 * ipath_ht_handle_hwerrors - display hardware errors. 422 * ipath_ht_handle_hwerrors - display hardware errors.
407 * @dd: the infinipath device 423 * @dd: the infinipath device
@@ -450,13 +466,12 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
450 466
451 /* 467 /*
452 * make sure we get this much out, unless told to be quiet, 468 * make sure we get this much out, unless told to be quiet,
469 * it's a parity error we may recover from,
453 * or it's occurred within the last 5 seconds 470 * or it's occurred within the last 5 seconds
454 */ 471 */
455 if ((hwerrs & ~(dd->ipath_lasthwerror | 472 if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
456 ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 473 RXE_EAGER_PARITY)) ||
457 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 474 (ipath_debug & __IPATH_VERBDBG))
458 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
459 (ipath_debug & __IPATH_VERBDBG))
460 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " 475 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
461 "(cleared)\n", (unsigned long long) hwerrs); 476 "(cleared)\n", (unsigned long long) hwerrs);
462 dd->ipath_lasthwerror |= hwerrs; 477 dd->ipath_lasthwerror |= hwerrs;
@@ -467,7 +482,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
467 (hwerrs & ~dd->ipath_hwe_bitsextant)); 482 (hwerrs & ~dd->ipath_hwe_bitsextant));
468 483
469 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); 484 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
470 if (ctrl & INFINIPATH_C_FREEZEMODE) { 485 if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
471 /* 486 /*
472 * parity errors in send memory are recoverable, 487 * parity errors in send memory are recoverable,
473 * just cancel the send (if indicated in * sendbuffererror), 488 * just cancel the send (if indicated in * sendbuffererror),
@@ -476,50 +491,14 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
476 * occur if a processor speculative read is done to the PIO 491 * occur if a processor speculative read is done to the PIO
477 * buffer while we are sending a packet, for example. 492 * buffer while we are sending a packet, for example.
478 */ 493 */
479 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 494 if ((hwerrs & TXE_PIO_PARITY) && ipath_ht_txe_recover(dd))
480 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 495 hwerrs &= ~TXE_PIO_PARITY;
481 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { 496 if (hwerrs & RXE_EAGER_PARITY)
482 ipath_stats.sps_txeparity++; 497 ipath_dev_err(dd, "RXE parity, Eager TID error is not "
483 ipath_dbg("Recovering from TXE parity error (%llu), " 498 "recoverable\n");
484 "hwerrstatus=%llx\n", 499 if (!hwerrs) {
485 (unsigned long long) ipath_stats.sps_txeparity, 500 ipath_dbg("Clearing freezemode on ignored or "
486 (unsigned long long) hwerrs); 501 "recovered hardware error\n");
487 ipath_disarm_senderrbufs(dd);
488 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
489 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
490 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
491 if (!hwerrs) { /* else leave in freeze mode */
492 ipath_write_kreg(dd,
493 dd->ipath_kregs->kr_control,
494 dd->ipath_control);
495 return;
496 }
497 }
498 if (hwerrs) {
499 /*
500 * if any set that we aren't ignoring; only
501 * make the complaint once, in case it's stuck
502 * or recurring, and we get here multiple
503 * times.
504 */
505 if (dd->ipath_flags & IPATH_INITTED) {
506 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
507 "mode), no longer usable, SN %.16s\n",
508 dd->ipath_serial);
509 isfatal = 1;
510 }
511 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
512 /* mark as having had error */
513 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
514 /*
515 * mark as not usable, at a minimum until driver
516 * is reloaded, probably until reboot, since no
517 * other reset is possible.
518 */
519 dd->ipath_flags &= ~IPATH_INITTED;
520 } else {
521 ipath_dbg("Clearing freezemode on ignored hardware "
522 "error\n");
523 ctrl &= ~INFINIPATH_C_FREEZEMODE; 502 ctrl &= ~INFINIPATH_C_FREEZEMODE;
524 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 503 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
525 ctrl); 504 ctrl);
@@ -587,7 +566,32 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
587 dd->ipath_hwerrmask); 566 dd->ipath_hwerrmask);
588 } 567 }
589 568
590 ipath_dev_err(dd, "%s hardware error\n", msg); 569 if (hwerrs) {
570 /*
571 * if any set that we aren't ignoring; only
572 * make the complaint once, in case it's stuck
573 * or recurring, and we get here multiple
574 * times.
575 */
576 ipath_dev_err(dd, "%s hardware error\n", msg);
577 if (dd->ipath_flags & IPATH_INITTED) {
578 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
579 "mode), no longer usable, SN %.16s\n",
580 dd->ipath_serial);
581 isfatal = 1;
582 }
583 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
584 /* mark as having had error */
585 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
586 /*
587 * mark as not usable, at a minimum until driver
588 * is reloaded, probably until reboot, since no
589 * other reset is possible.
590 */
591 dd->ipath_flags &= ~IPATH_INITTED;
592 }
593 else
594 *msg = 0; /* recovered from all of them */
591 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) 595 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
592 /* 596 /*
593 * for status file; if no trailing brace is copied, 597 * for status file; if no trailing brace is copied,
@@ -658,7 +662,8 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
658 if (n) 662 if (n)
659 snprintf(name, namelen, "%s", n); 663 snprintf(name, namelen, "%s", n);
660 664
661 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) { 665 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
666 dd->ipath_minrev > 3)) {
662 /* 667 /*
663 * This version of the driver only supports Rev 3.2 and 3.3 668 * This version of the driver only supports Rev 3.2 and 3.3
664 */ 669 */
@@ -1163,6 +1168,8 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
1163 1168
1164 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) 1169 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
1165 ipath_dev_err(dd, "MemBIST did not complete!\n"); 1170 ipath_dev_err(dd, "MemBIST did not complete!\n");
1171 if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
1172 ipath_dbg("MemBIST corrected\n");
1166 1173
1167 ipath_check_htlink(dd); 1174 ipath_check_htlink(dd);
1168 1175
@@ -1366,6 +1373,9 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
1366 u64 __iomem *tidptr, u32 type, 1373 u64 __iomem *tidptr, u32 type,
1367 unsigned long pa) 1374 unsigned long pa)
1368{ 1375{
1376 if (!dd->ipath_kregbase)
1377 return;
1378
1369 if (pa != dd->ipath_tidinvalid) { 1379 if (pa != dd->ipath_tidinvalid) {
1370 if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) { 1380 if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
1371 dev_info(&dd->pcidev->dev, 1381 dev_info(&dd->pcidev->dev,
@@ -1382,10 +1392,10 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
1382 pa |= lenvalid | INFINIPATH_RT_VALID; 1392 pa |= lenvalid | INFINIPATH_RT_VALID;
1383 } 1393 }
1384 } 1394 }
1385 if (dd->ipath_kregbase) 1395 writeq(pa, tidptr);
1386 writeq(pa, tidptr);
1387} 1396}
1388 1397
1398
1389/** 1399/**
1390 * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager 1400 * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
1391 * @dd: the infinipath device 1401 * @dd: the infinipath device
@@ -1515,7 +1525,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1515 INFINIPATH_S_ABORT); 1525 INFINIPATH_S_ABORT);
1516 1526
1517 ipath_get_eeprom_info(dd); 1527 ipath_get_eeprom_info(dd);
1518 if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && 1528 if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
1519 dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { 1529 dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
1520 /* 1530 /*
1521 * Later production QHT7040 has same changes as QHT7140, so 1531 * Later production QHT7040 has same changes as QHT7140, so
@@ -1528,6 +1538,24 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1528 return 0; 1538 return 0;
1529} 1539}
1530 1540
1541
1542static int ipath_ht_txe_recover(struct ipath_devdata *dd)
1543{
1544 int cnt = ++ipath_stats.sps_txeparity;
1545 if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
1546 if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
1547 ipath_dev_err(dd,
1548 "Too many attempts to recover from "
1549 "TXE parity, giving up\n");
1550 return 0;
1551 }
1552 dev_info(&dd->pcidev->dev,
1553 "Recovering from TXE PIO parity error\n");
1554 ipath_disarm_senderrbufs(dd, 1);
1555 return 1;
1556}
1557
1558
1531/** 1559/**
1532 * ipath_init_ht_get_base_info - set chip-specific flags for user code 1560 * ipath_init_ht_get_base_info - set chip-specific flags for user code
1533 * @dd: the infinipath device 1561 * @dd: the infinipath device