aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan O'Sullivan <bos@pathscale.com>2007-03-15 17:45:07 -0400
committerRoland Dreier <rolandd@cisco.com>2007-04-18 23:20:58 -0400
commit9783ab405844202b452ac673677e6c8f8c9a6a99 (patch)
tree32aac9ac3ff1089a7ecb05c4ef0b825a95227694
parent820054b7ca7a54ba94d89db4b3c53a24d2d66633 (diff)
IB/ipath: Improve handling and reporting of parity errors
Mostly cleanup. Signed-off-by: Dave Olson <dave.olson@qlogic.com> Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c138
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c58
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c38
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h7
8 files changed, 170 insertions, 86 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 056e10663289..13b9785e684c 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -605,8 +605,9 @@ static void __devexit cleanup_device(struct ipath_devdata *dd)
605 605
606 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", 606 ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
607 dd->ipath_pageshadow); 607 dd->ipath_pageshadow);
608 vfree(dd->ipath_pageshadow); 608 tmpp = dd->ipath_pageshadow;
609 dd->ipath_pageshadow = NULL; 609 dd->ipath_pageshadow = NULL;
610 vfree(tmpp);
610 } 611 }
611 612
612 /* 613 /*
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index a4019a6b7560..030185f90ee2 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -626,6 +626,10 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
626 } else 626 } else
627 memcpy(dd->ipath_serial, ifp->if_serial, 627 memcpy(dd->ipath_serial, ifp->if_serial,
628 sizeof ifp->if_serial); 628 sizeof ifp->if_serial);
629 if (!strstr(ifp->if_comment, "Tested successfully"))
630 ipath_dev_err(dd, "Board SN %s did not pass functional "
631 "test: %s\n", dd->ipath_serial,
632 ifp->if_comment);
629 633
630 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", 634 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
631 (unsigned long long) be64_to_cpu(dd->ipath_guid)); 635 (unsigned long long) be64_to_cpu(dd->ipath_guid));
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index b50436c56638..8e0794d316fb 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -284,6 +284,14 @@ static const struct ipath_cregs ipath_ht_cregs = {
284#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 284#define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000
285#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000 285#define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000
286 286
287
288/* TID entries (memory), HT-only */
289#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
290#define INFINIPATH_RT_VALID 0x8000000000000000ULL
291#define INFINIPATH_RT_ADDR_SHIFT 0
292#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL
293#define INFINIPATH_RT_BUFSIZE_SHIFT 48
294
287/* 295/*
288 * masks and bits that are different in different chips, or present only 296 * masks and bits that are different in different chips, or present only
289 * in one 297 * in one
@@ -402,6 +410,14 @@ static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = {
402 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), 410 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
403}; 411};
404 412
413#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
414 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
415 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
416#define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \
417 << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT)
418
419static int ipath_ht_txe_recover(struct ipath_devdata *);
420
405/** 421/**
406 * ipath_ht_handle_hwerrors - display hardware errors. 422 * ipath_ht_handle_hwerrors - display hardware errors.
407 * @dd: the infinipath device 423 * @dd: the infinipath device
@@ -450,13 +466,12 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
450 466
451 /* 467 /*
452 * make sure we get this much out, unless told to be quiet, 468 * make sure we get this much out, unless told to be quiet,
469 * it's a parity error we may recover from,
453 * or it's occurred within the last 5 seconds 470 * or it's occurred within the last 5 seconds
454 */ 471 */
455 if ((hwerrs & ~(dd->ipath_lasthwerror | 472 if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY |
456 ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 473 RXE_EAGER_PARITY)) ||
457 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 474 (ipath_debug & __IPATH_VERBDBG))
458 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) ||
459 (ipath_debug & __IPATH_VERBDBG))
460 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " 475 dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx "
461 "(cleared)\n", (unsigned long long) hwerrs); 476 "(cleared)\n", (unsigned long long) hwerrs);
462 dd->ipath_lasthwerror |= hwerrs; 477 dd->ipath_lasthwerror |= hwerrs;
@@ -467,7 +482,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
467 (hwerrs & ~dd->ipath_hwe_bitsextant)); 482 (hwerrs & ~dd->ipath_hwe_bitsextant));
468 483
469 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); 484 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
470 if (ctrl & INFINIPATH_C_FREEZEMODE) { 485 if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
471 /* 486 /*
472 * parity errors in send memory are recoverable, 487 * parity errors in send memory are recoverable,
473 * just cancel the send (if indicated in * sendbuffererror), 488 * just cancel the send (if indicated in * sendbuffererror),
@@ -476,50 +491,14 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
476 * occur if a processor speculative read is done to the PIO 491 * occur if a processor speculative read is done to the PIO
477 * buffer while we are sending a packet, for example. 492 * buffer while we are sending a packet, for example.
478 */ 493 */
479 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 494 if ((hwerrs & TXE_PIO_PARITY) && ipath_ht_txe_recover(dd))
480 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 495 hwerrs &= ~TXE_PIO_PARITY;
481 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { 496 if (hwerrs & RXE_EAGER_PARITY)
482 ipath_stats.sps_txeparity++; 497 ipath_dev_err(dd, "RXE parity, Eager TID error is not "
483 ipath_dbg("Recovering from TXE parity error (%llu), " 498 "recoverable\n");
484 "hwerrstatus=%llx\n", 499 if (!hwerrs) {
485 (unsigned long long) ipath_stats.sps_txeparity, 500 ipath_dbg("Clearing freezemode on ignored or "
486 (unsigned long long) hwerrs); 501 "recovered hardware error\n");
487 ipath_disarm_senderrbufs(dd);
488 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
489 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
490 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
491 if (!hwerrs) { /* else leave in freeze mode */
492 ipath_write_kreg(dd,
493 dd->ipath_kregs->kr_control,
494 dd->ipath_control);
495 return;
496 }
497 }
498 if (hwerrs) {
499 /*
500 * if any set that we aren't ignoring; only
501 * make the complaint once, in case it's stuck
502 * or recurring, and we get here multiple
503 * times.
504 */
505 if (dd->ipath_flags & IPATH_INITTED) {
506 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
507 "mode), no longer usable, SN %.16s\n",
508 dd->ipath_serial);
509 isfatal = 1;
510 }
511 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
512 /* mark as having had error */
513 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
514 /*
515 * mark as not usable, at a minimum until driver
516 * is reloaded, probably until reboot, since no
517 * other reset is possible.
518 */
519 dd->ipath_flags &= ~IPATH_INITTED;
520 } else {
521 ipath_dbg("Clearing freezemode on ignored hardware "
522 "error\n");
523 ctrl &= ~INFINIPATH_C_FREEZEMODE; 502 ctrl &= ~INFINIPATH_C_FREEZEMODE;
524 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 503 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
525 ctrl); 504 ctrl);
@@ -587,7 +566,32 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
587 dd->ipath_hwerrmask); 566 dd->ipath_hwerrmask);
588 } 567 }
589 568
590 ipath_dev_err(dd, "%s hardware error\n", msg); 569 if (hwerrs) {
570 /*
571 * if any set that we aren't ignoring; only
572 * make the complaint once, in case it's stuck
573 * or recurring, and we get here multiple
574 * times.
575 */
576 ipath_dev_err(dd, "%s hardware error\n", msg);
577 if (dd->ipath_flags & IPATH_INITTED) {
578 ipath_dev_err(dd, "Fatal Hardware Error (freeze "
579 "mode), no longer usable, SN %.16s\n",
580 dd->ipath_serial);
581 isfatal = 1;
582 }
583 *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
584 /* mark as having had error */
585 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
586 /*
587 * mark as not usable, at a minimum until driver
588 * is reloaded, probably until reboot, since no
589 * other reset is possible.
590 */
591 dd->ipath_flags &= ~IPATH_INITTED;
592 }
593 else
594 *msg = 0; /* recovered from all of them */
591 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) 595 if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg)
592 /* 596 /*
593 * for status file; if no trailing brace is copied, 597 * for status file; if no trailing brace is copied,
@@ -658,7 +662,8 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
658 if (n) 662 if (n)
659 snprintf(name, namelen, "%s", n); 663 snprintf(name, namelen, "%s", n);
660 664
661 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) { 665 if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
666 dd->ipath_minrev > 3)) {
662 /* 667 /*
663 * This version of the driver only supports Rev 3.2 and 3.3 668 * This version of the driver only supports Rev 3.2 and 3.3
664 */ 669 */
@@ -1163,6 +1168,8 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd)
1163 1168
1164 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) 1169 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
1165 ipath_dev_err(dd, "MemBIST did not complete!\n"); 1170 ipath_dev_err(dd, "MemBIST did not complete!\n");
1171 if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT)
1172 ipath_dbg("MemBIST corrected\n");
1166 1173
1167 ipath_check_htlink(dd); 1174 ipath_check_htlink(dd);
1168 1175
@@ -1366,6 +1373,9 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
1366 u64 __iomem *tidptr, u32 type, 1373 u64 __iomem *tidptr, u32 type,
1367 unsigned long pa) 1374 unsigned long pa)
1368{ 1375{
1376 if (!dd->ipath_kregbase)
1377 return;
1378
1369 if (pa != dd->ipath_tidinvalid) { 1379 if (pa != dd->ipath_tidinvalid) {
1370 if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) { 1380 if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) {
1371 dev_info(&dd->pcidev->dev, 1381 dev_info(&dd->pcidev->dev,
@@ -1382,10 +1392,10 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
1382 pa |= lenvalid | INFINIPATH_RT_VALID; 1392 pa |= lenvalid | INFINIPATH_RT_VALID;
1383 } 1393 }
1384 } 1394 }
1385 if (dd->ipath_kregbase) 1395 writeq(pa, tidptr);
1386 writeq(pa, tidptr);
1387} 1396}
1388 1397
1398
1389/** 1399/**
1390 * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager 1400 * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager
1391 * @dd: the infinipath device 1401 * @dd: the infinipath device
@@ -1515,7 +1525,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1515 INFINIPATH_S_ABORT); 1525 INFINIPATH_S_ABORT);
1516 1526
1517 ipath_get_eeprom_info(dd); 1527 ipath_get_eeprom_info(dd);
1518 if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && 1528 if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
1519 dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { 1529 dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') {
1520 /* 1530 /*
1521 * Later production QHT7040 has same changes as QHT7140, so 1531 * Later production QHT7040 has same changes as QHT7140, so
@@ -1528,6 +1538,24 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
1528 return 0; 1538 return 0;
1529} 1539}
1530 1540
1541
1542static int ipath_ht_txe_recover(struct ipath_devdata *dd)
1543{
1544 int cnt = ++ipath_stats.sps_txeparity;
1545 if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
1546 if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
1547 ipath_dev_err(dd,
1548 "Too many attempts to recover from "
1549 "TXE parity, giving up\n");
1550 return 0;
1551 }
1552 dev_info(&dd->pcidev->dev,
1553 "Recovering from TXE PIO parity error\n");
1554 ipath_disarm_senderrbufs(dd, 1);
1555 return 1;
1556}
1557
1558
1531/** 1559/**
1532 * ipath_init_ht_get_base_info - set chip-specific flags for user code 1560 * ipath_init_ht_get_base_info - set chip-specific flags for user code
1533 * @dd: the infinipath device 1561 * @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 5c50383880f2..aa2b51944331 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -321,6 +321,12 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
321 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), 321 INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"),
322}; 322};
323 323
324#define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \
325 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \
326 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)
327
328static int ipath_pe_txe_recover(struct ipath_devdata *);
329
324/** 330/**
325 * ipath_pe_handle_hwerrors - display hardware errors. 331 * ipath_pe_handle_hwerrors - display hardware errors.
326 * @dd: the infinipath device 332 * @dd: the infinipath device
@@ -394,25 +400,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
394 * occur if a processor speculative read is done to the PIO 400 * occur if a processor speculative read is done to the PIO
395 * buffer while we are sending a packet, for example. 401 * buffer while we are sending a packet, for example.
396 */ 402 */
397 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 403 if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd))
398 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 404 hwerrs &= ~TXE_PIO_PARITY;
399 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) {
400 ipath_stats.sps_txeparity++;
401 ipath_dbg("Recovering from TXE parity error (%llu), "
402 "hwerrstatus=%llx\n",
403 (unsigned long long) ipath_stats.sps_txeparity,
404 (unsigned long long) hwerrs);
405 ipath_disarm_senderrbufs(dd);
406 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
407 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
408 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
409 if (!hwerrs) { /* else leave in freeze mode */
410 ipath_write_kreg(dd,
411 dd->ipath_kregs->kr_control,
412 dd->ipath_control);
413 return;
414 }
415 }
416 if (hwerrs) { 405 if (hwerrs) {
417 /* 406 /*
418 * if any set that we aren't ignoring only make the 407 * if any set that we aren't ignoring only make the
@@ -581,6 +570,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd)
581 570
582 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) 571 if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST))
583 ipath_dev_err(dd, "MemBIST did not complete!\n"); 572 ipath_dev_err(dd, "MemBIST did not complete!\n");
573 if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND)
574 ipath_dbg("MemBIST corrected\n");
584 575
585 val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */ 576 val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */
586 577
@@ -1330,6 +1321,35 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd)
1330 dd->ipath_irq = 0; 1321 dd->ipath_irq = 0;
1331} 1322}
1332 1323
1324/*
1325 * On platforms using this chip, and not having ordered WC stores, we
1326 * can get TXE parity errors due to speculative reads to the PIO buffers,
1327 * and this, due to a chip bug can result in (many) false parity error
1328 * reports. So it's a debug print on those, and an info print on systems
1329 * where the speculative reads don't occur.
1330 * Because we can get lots of false errors, we have no upper limit
1331 * on recovery attempts on those platforms.
1332 */
1333static int ipath_pe_txe_recover(struct ipath_devdata *dd)
1334{
1335 if (ipath_unordered_wc())
1336 ipath_dbg("Recovering from TXE PIO parity error\n");
1337 else {
1338 int cnt = ++ipath_stats.sps_txeparity;
1339 if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) {
1340 if (cnt == IPATH_MAX_PARITY_ATTEMPTS)
1341 ipath_dev_err(dd,
1342 "Too many attempts to recover from "
1343 "TXE parity, giving up\n");
1344 return 0;
1345 }
1346 dev_info(&dd->pcidev->dev,
1347 "Recovering from TXE PIO parity error\n");
1348 }
1349 ipath_disarm_senderrbufs(dd, 1);
1350 return 1;
1351}
1352
1333/** 1353/**
1334 * ipath_init_iba6120_funcs - set up the chip-specific function pointers 1354 * ipath_init_iba6120_funcs - set up the chip-specific function pointers
1335 * @dd: the infinipath device 1355 * @dd: the infinipath device
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 1e77b55afe93..72caa9f091f6 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -590,6 +590,10 @@ static int init_housekeeping(struct ipath_devdata *dd,
590 goto done; 590 goto done;
591 } 591 }
592 592
593
594 /* clear diagctrl register, in case diags were running and crashed */
595 ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0);
596
593 /* clear the initial reset flag, in case first driver load */ 597 /* clear the initial reset flag, in case first driver load */
594 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 598 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
595 INFINIPATH_E_RESET); 599 INFINIPATH_E_RESET);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 24853310df1c..45d033169c6e 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -38,10 +38,39 @@
38#include "ipath_common.h" 38#include "ipath_common.h"
39 39
40/* 40/*
41 * clear (write) a pio buffer, to clear a parity error. This routine
42 * should only be called when in freeze mode, and the buffer should be
43 * canceled afterwards.
44 */
45static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
46{
47 u32 __iomem *pbuf;
48 u32 dwcnt; /* dword count to write */
49 if (pnum < dd->ipath_piobcnt2k) {
50 pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
51 dd->ipath_palign);
52 dwcnt = dd->ipath_piosize2k >> 2;
53 }
54 else {
55 pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
56 (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
57 dwcnt = dd->ipath_piosize4k >> 2;
58 }
59 dev_info(&dd->pcidev->dev,
60 "Rewrite PIO buffer %u, to recover from parity error\n",
61 pnum);
62 *pbuf = dwcnt+1; /* no flush required, since already in freeze */
63 while(--dwcnt)
64 *pbuf++ = 0;
65}
66
67/*
41 * Called when we might have an error that is specific to a particular 68 * Called when we might have an error that is specific to a particular
42 * PIO buffer, and may need to cancel that buffer, so it can be re-used. 69 * PIO buffer, and may need to cancel that buffer, so it can be re-used.
70 * If rewrite is true, and bits are set in the sendbufferror registers,
71 * we'll write to the buffer, for error recovery on parity errors.
43 */ 72 */
44void ipath_disarm_senderrbufs(struct ipath_devdata *dd) 73void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
45{ 74{
46 u32 piobcnt; 75 u32 piobcnt;
47 unsigned long sbuf[4]; 76 unsigned long sbuf[4];
@@ -74,8 +103,11 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
74 } 103 }
75 104
76 for (i = 0; i < piobcnt; i++) 105 for (i = 0; i < piobcnt; i++)
77 if (test_bit(i, sbuf)) 106 if (test_bit(i, sbuf)) {
107 if (rewrite)
108 ipath_clrpiobuf(dd, i);
78 ipath_disarm_piobufs(dd, i, 1); 109 ipath_disarm_piobufs(dd, i, 1);
110 }
79 dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */ 111 dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
80 } 112 }
81} 113}
@@ -114,7 +146,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
114{ 146{
115 u64 ignore_this_time = 0; 147 u64 ignore_this_time = 0;
116 148
117 ipath_disarm_senderrbufs(dd); 149 ipath_disarm_senderrbufs(dd, 0);
118 if ((errs & E_SUM_LINK_PKTERRS) && 150 if ((errs & E_SUM_LINK_PKTERRS) &&
119 !(dd->ipath_flags & IPATH_LINKACTIVE)) { 151 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
120 /* 152 /*
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 5428c2619ba9..e900c2593f44 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -590,7 +590,6 @@ int ipath_enable_wc(struct ipath_devdata *dd);
590void ipath_disable_wc(struct ipath_devdata *dd); 590void ipath_disable_wc(struct ipath_devdata *dd);
591int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); 591int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
592void ipath_shutdown_device(struct ipath_devdata *); 592void ipath_shutdown_device(struct ipath_devdata *);
593void ipath_disarm_senderrbufs(struct ipath_devdata *);
594 593
595struct file_operations; 594struct file_operations;
596int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, 595int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
@@ -713,6 +712,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *);
713void ipath_init_iba6110_funcs(struct ipath_devdata *); 712void ipath_init_iba6110_funcs(struct ipath_devdata *);
714void ipath_get_eeprom_info(struct ipath_devdata *); 713void ipath_get_eeprom_info(struct ipath_devdata *);
715u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); 714u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
715void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
716 716
717/* 717/*
718 * number of words used for protocol header if not set by ipath_userinit(); 718 * number of words used for protocol header if not set by ipath_userinit();
@@ -897,6 +897,8 @@ dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int);
897 897
898extern unsigned ipath_debug; /* debugging bit mask */ 898extern unsigned ipath_debug; /* debugging bit mask */
899 899
900#define IPATH_MAX_PARITY_ATTEMPTS 10000 /* max times to try recovery */
901
900const char *ipath_get_unit_name(int unit); 902const char *ipath_get_unit_name(int unit);
901 903
902extern struct mutex ipath_mutex; 904extern struct mutex ipath_mutex;
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 6e99eafdfd73..c182bcd62098 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -308,13 +308,6 @@
308#define INFINIPATH_XGXS_RX_POL_SHIFT 19 308#define INFINIPATH_XGXS_RX_POL_SHIFT 19
309#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL 309#define INFINIPATH_XGXS_RX_POL_MASK 0xfULL
310 310
311#define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */
312
313/* TID entries (memory), HT-only */
314#define INFINIPATH_RT_VALID 0x8000000000000000ULL
315#define INFINIPATH_RT_ADDR_SHIFT 0
316#define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF
317#define INFINIPATH_RT_BUFSIZE_SHIFT 48
318 311
319/* 312/*
320 * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our 313 * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our