diff options
author | Bryan O'Sullivan <bos@pathscale.com> | 2007-03-15 17:45:07 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2007-04-18 23:20:58 -0400 |
commit | 9783ab405844202b452ac673677e6c8f8c9a6a99 (patch) | |
tree | 32aac9ac3ff1089a7ecb05c4ef0b825a95227694 /drivers/infiniband | |
parent | 820054b7ca7a54ba94d89db4b3c53a24d2d66633 (diff) |
IB/ipath: Improve handling and reporting of parity errors
Mostly cleanup.
Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_driver.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_eeprom.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_iba6110.c | 138 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_iba6120.c | 58 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_init_chip.c | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_intr.c | 38 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_kernel.h | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_registers.h | 7 |
8 files changed, 170 insertions, 86 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 056e10663289..13b9785e684c 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c | |||
@@ -605,8 +605,9 @@ static void __devexit cleanup_device(struct ipath_devdata *dd) | |||
605 | 605 | ||
606 | ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", | 606 | ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n", |
607 | dd->ipath_pageshadow); | 607 | dd->ipath_pageshadow); |
608 | vfree(dd->ipath_pageshadow); | 608 | tmpp = dd->ipath_pageshadow; |
609 | dd->ipath_pageshadow = NULL; | 609 | dd->ipath_pageshadow = NULL; |
610 | vfree(tmpp); | ||
610 | } | 611 | } |
611 | 612 | ||
612 | /* | 613 | /* |
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index a4019a6b7560..030185f90ee2 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c | |||
@@ -626,6 +626,10 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) | |||
626 | } else | 626 | } else |
627 | memcpy(dd->ipath_serial, ifp->if_serial, | 627 | memcpy(dd->ipath_serial, ifp->if_serial, |
628 | sizeof ifp->if_serial); | 628 | sizeof ifp->if_serial); |
629 | if (!strstr(ifp->if_comment, "Tested successfully")) | ||
630 | ipath_dev_err(dd, "Board SN %s did not pass functional " | ||
631 | "test: %s\n", dd->ipath_serial, | ||
632 | ifp->if_comment); | ||
629 | 633 | ||
630 | ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", | 634 | ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", |
631 | (unsigned long long) be64_to_cpu(dd->ipath_guid)); | 635 | (unsigned long long) be64_to_cpu(dd->ipath_guid)); |
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index b50436c56638..8e0794d316fb 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c | |||
@@ -284,6 +284,14 @@ static const struct ipath_cregs ipath_ht_cregs = { | |||
284 | #define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 | 284 | #define INFINIPATH_EXTS_MEMBIST_ENDTEST 0x0000000000004000 |
285 | #define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000 | 285 | #define INFINIPATH_EXTS_MEMBIST_CORRECT 0x0000000000008000 |
286 | 286 | ||
287 | |||
288 | /* TID entries (memory), HT-only */ | ||
289 | #define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ | ||
290 | #define INFINIPATH_RT_VALID 0x8000000000000000ULL | ||
291 | #define INFINIPATH_RT_ADDR_SHIFT 0 | ||
292 | #define INFINIPATH_RT_BUFSIZE_MASK 0x3FFFULL | ||
293 | #define INFINIPATH_RT_BUFSIZE_SHIFT 48 | ||
294 | |||
287 | /* | 295 | /* |
288 | * masks and bits that are different in different chips, or present only | 296 | * masks and bits that are different in different chips, or present only |
289 | * in one | 297 | * in one |
@@ -402,6 +410,14 @@ static const struct ipath_hwerror_msgs ipath_6110_hwerror_msgs[] = { | |||
402 | INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), | 410 | INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), |
403 | }; | 411 | }; |
404 | 412 | ||
413 | #define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \ | ||
414 | INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ | ||
415 | << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) | ||
416 | #define RXE_EAGER_PARITY (INFINIPATH_HWE_RXEMEMPARITYERR_EAGERTID \ | ||
417 | << INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT) | ||
418 | |||
419 | static int ipath_ht_txe_recover(struct ipath_devdata *); | ||
420 | |||
405 | /** | 421 | /** |
406 | * ipath_ht_handle_hwerrors - display hardware errors. | 422 | * ipath_ht_handle_hwerrors - display hardware errors. |
407 | * @dd: the infinipath device | 423 | * @dd: the infinipath device |
@@ -450,13 +466,12 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, | |||
450 | 466 | ||
451 | /* | 467 | /* |
452 | * make sure we get this much out, unless told to be quiet, | 468 | * make sure we get this much out, unless told to be quiet, |
469 | * it's a parity error we may recover from, | ||
453 | * or it's occurred within the last 5 seconds | 470 | * or it's occurred within the last 5 seconds |
454 | */ | 471 | */ |
455 | if ((hwerrs & ~(dd->ipath_lasthwerror | | 472 | if ((hwerrs & ~(dd->ipath_lasthwerror | TXE_PIO_PARITY | |
456 | ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | | 473 | RXE_EAGER_PARITY)) || |
457 | INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) | 474 | (ipath_debug & __IPATH_VERBDBG)) |
458 | << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT))) || | ||
459 | (ipath_debug & __IPATH_VERBDBG)) | ||
460 | dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " | 475 | dev_info(&dd->pcidev->dev, "Hardware error: hwerr=0x%llx " |
461 | "(cleared)\n", (unsigned long long) hwerrs); | 476 | "(cleared)\n", (unsigned long long) hwerrs); |
462 | dd->ipath_lasthwerror |= hwerrs; | 477 | dd->ipath_lasthwerror |= hwerrs; |
@@ -467,7 +482,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, | |||
467 | (hwerrs & ~dd->ipath_hwe_bitsextant)); | 482 | (hwerrs & ~dd->ipath_hwe_bitsextant)); |
468 | 483 | ||
469 | ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); | 484 | ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); |
470 | if (ctrl & INFINIPATH_C_FREEZEMODE) { | 485 | if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) { |
471 | /* | 486 | /* |
472 | * parity errors in send memory are recoverable, | 487 | * parity errors in send memory are recoverable, |
473 | * just cancel the send (if indicated in * sendbuffererror), | 488 | * just cancel the send (if indicated in * sendbuffererror), |
@@ -476,50 +491,14 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, | |||
476 | * occur if a processor speculative read is done to the PIO | 491 | * occur if a processor speculative read is done to the PIO |
477 | * buffer while we are sending a packet, for example. | 492 | * buffer while we are sending a packet, for example. |
478 | */ | 493 | */ |
479 | if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | | 494 | if ((hwerrs & TXE_PIO_PARITY) && ipath_ht_txe_recover(dd)) |
480 | INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) | 495 | hwerrs &= ~TXE_PIO_PARITY; |
481 | << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { | 496 | if (hwerrs & RXE_EAGER_PARITY) |
482 | ipath_stats.sps_txeparity++; | 497 | ipath_dev_err(dd, "RXE parity, Eager TID error is not " |
483 | ipath_dbg("Recovering from TXE parity error (%llu), " | 498 | "recoverable\n"); |
484 | "hwerrstatus=%llx\n", | 499 | if (!hwerrs) { |
485 | (unsigned long long) ipath_stats.sps_txeparity, | 500 | ipath_dbg("Clearing freezemode on ignored or " |
486 | (unsigned long long) hwerrs); | 501 | "recovered hardware error\n"); |
487 | ipath_disarm_senderrbufs(dd); | ||
488 | hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | | ||
489 | INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) | ||
490 | << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); | ||
491 | if (!hwerrs) { /* else leave in freeze mode */ | ||
492 | ipath_write_kreg(dd, | ||
493 | dd->ipath_kregs->kr_control, | ||
494 | dd->ipath_control); | ||
495 | return; | ||
496 | } | ||
497 | } | ||
498 | if (hwerrs) { | ||
499 | /* | ||
500 | * if any set that we aren't ignoring; only | ||
501 | * make the complaint once, in case it's stuck | ||
502 | * or recurring, and we get here multiple | ||
503 | * times. | ||
504 | */ | ||
505 | if (dd->ipath_flags & IPATH_INITTED) { | ||
506 | ipath_dev_err(dd, "Fatal Hardware Error (freeze " | ||
507 | "mode), no longer usable, SN %.16s\n", | ||
508 | dd->ipath_serial); | ||
509 | isfatal = 1; | ||
510 | } | ||
511 | *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; | ||
512 | /* mark as having had error */ | ||
513 | *dd->ipath_statusp |= IPATH_STATUS_HWERROR; | ||
514 | /* | ||
515 | * mark as not usable, at a minimum until driver | ||
516 | * is reloaded, probably until reboot, since no | ||
517 | * other reset is possible. | ||
518 | */ | ||
519 | dd->ipath_flags &= ~IPATH_INITTED; | ||
520 | } else { | ||
521 | ipath_dbg("Clearing freezemode on ignored hardware " | ||
522 | "error\n"); | ||
523 | ctrl &= ~INFINIPATH_C_FREEZEMODE; | 502 | ctrl &= ~INFINIPATH_C_FREEZEMODE; |
524 | ipath_write_kreg(dd, dd->ipath_kregs->kr_control, | 503 | ipath_write_kreg(dd, dd->ipath_kregs->kr_control, |
525 | ctrl); | 504 | ctrl); |
@@ -587,7 +566,32 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, | |||
587 | dd->ipath_hwerrmask); | 566 | dd->ipath_hwerrmask); |
588 | } | 567 | } |
589 | 568 | ||
590 | ipath_dev_err(dd, "%s hardware error\n", msg); | 569 | if (hwerrs) { |
570 | /* | ||
571 | * if any set that we aren't ignoring; only | ||
572 | * make the complaint once, in case it's stuck | ||
573 | * or recurring, and we get here multiple | ||
574 | * times. | ||
575 | */ | ||
576 | ipath_dev_err(dd, "%s hardware error\n", msg); | ||
577 | if (dd->ipath_flags & IPATH_INITTED) { | ||
578 | ipath_dev_err(dd, "Fatal Hardware Error (freeze " | ||
579 | "mode), no longer usable, SN %.16s\n", | ||
580 | dd->ipath_serial); | ||
581 | isfatal = 1; | ||
582 | } | ||
583 | *dd->ipath_statusp &= ~IPATH_STATUS_IB_READY; | ||
584 | /* mark as having had error */ | ||
585 | *dd->ipath_statusp |= IPATH_STATUS_HWERROR; | ||
586 | /* | ||
587 | * mark as not usable, at a minimum until driver | ||
588 | * is reloaded, probably until reboot, since no | ||
589 | * other reset is possible. | ||
590 | */ | ||
591 | dd->ipath_flags &= ~IPATH_INITTED; | ||
592 | } | ||
593 | else | ||
594 | *msg = 0; /* recovered from all of them */ | ||
591 | if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) | 595 | if (isfatal && !ipath_diag_inuse && dd->ipath_freezemsg) |
592 | /* | 596 | /* |
593 | * for status file; if no trailing brace is copied, | 597 | * for status file; if no trailing brace is copied, |
@@ -658,7 +662,8 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name, | |||
658 | if (n) | 662 | if (n) |
659 | snprintf(name, namelen, "%s", n); | 663 | snprintf(name, namelen, "%s", n); |
660 | 664 | ||
661 | if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || dd->ipath_minrev > 3)) { | 665 | if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 || |
666 | dd->ipath_minrev > 3)) { | ||
662 | /* | 667 | /* |
663 | * This version of the driver only supports Rev 3.2 and 3.3 | 668 | * This version of the driver only supports Rev 3.2 and 3.3 |
664 | */ | 669 | */ |
@@ -1163,6 +1168,8 @@ static void ipath_ht_init_hwerrors(struct ipath_devdata *dd) | |||
1163 | 1168 | ||
1164 | if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) | 1169 | if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) |
1165 | ipath_dev_err(dd, "MemBIST did not complete!\n"); | 1170 | ipath_dev_err(dd, "MemBIST did not complete!\n"); |
1171 | if (extsval & INFINIPATH_EXTS_MEMBIST_CORRECT) | ||
1172 | ipath_dbg("MemBIST corrected\n"); | ||
1166 | 1173 | ||
1167 | ipath_check_htlink(dd); | 1174 | ipath_check_htlink(dd); |
1168 | 1175 | ||
@@ -1366,6 +1373,9 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd, | |||
1366 | u64 __iomem *tidptr, u32 type, | 1373 | u64 __iomem *tidptr, u32 type, |
1367 | unsigned long pa) | 1374 | unsigned long pa) |
1368 | { | 1375 | { |
1376 | if (!dd->ipath_kregbase) | ||
1377 | return; | ||
1378 | |||
1369 | if (pa != dd->ipath_tidinvalid) { | 1379 | if (pa != dd->ipath_tidinvalid) { |
1370 | if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) { | 1380 | if (unlikely((pa & ~INFINIPATH_RT_ADDR_MASK))) { |
1371 | dev_info(&dd->pcidev->dev, | 1381 | dev_info(&dd->pcidev->dev, |
@@ -1382,10 +1392,10 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd, | |||
1382 | pa |= lenvalid | INFINIPATH_RT_VALID; | 1392 | pa |= lenvalid | INFINIPATH_RT_VALID; |
1383 | } | 1393 | } |
1384 | } | 1394 | } |
1385 | if (dd->ipath_kregbase) | 1395 | writeq(pa, tidptr); |
1386 | writeq(pa, tidptr); | ||
1387 | } | 1396 | } |
1388 | 1397 | ||
1398 | |||
1389 | /** | 1399 | /** |
1390 | * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager | 1400 | * ipath_ht_clear_tid - clear all TID entries for a port, expected and eager |
1391 | * @dd: the infinipath device | 1401 | * @dd: the infinipath device |
@@ -1515,7 +1525,7 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) | |||
1515 | INFINIPATH_S_ABORT); | 1525 | INFINIPATH_S_ABORT); |
1516 | 1526 | ||
1517 | ipath_get_eeprom_info(dd); | 1527 | ipath_get_eeprom_info(dd); |
1518 | if(dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && | 1528 | if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' && |
1519 | dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { | 1529 | dd->ipath_serial[1] == '2' && dd->ipath_serial[2] == '8') { |
1520 | /* | 1530 | /* |
1521 | * Later production QHT7040 has same changes as QHT7140, so | 1531 | * Later production QHT7040 has same changes as QHT7140, so |
@@ -1528,6 +1538,24 @@ static int ipath_ht_early_init(struct ipath_devdata *dd) | |||
1528 | return 0; | 1538 | return 0; |
1529 | } | 1539 | } |
1530 | 1540 | ||
1541 | |||
1542 | static int ipath_ht_txe_recover(struct ipath_devdata *dd) | ||
1543 | { | ||
1544 | int cnt = ++ipath_stats.sps_txeparity; | ||
1545 | if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) { | ||
1546 | if (cnt == IPATH_MAX_PARITY_ATTEMPTS) | ||
1547 | ipath_dev_err(dd, | ||
1548 | "Too many attempts to recover from " | ||
1549 | "TXE parity, giving up\n"); | ||
1550 | return 0; | ||
1551 | } | ||
1552 | dev_info(&dd->pcidev->dev, | ||
1553 | "Recovering from TXE PIO parity error\n"); | ||
1554 | ipath_disarm_senderrbufs(dd, 1); | ||
1555 | return 1; | ||
1556 | } | ||
1557 | |||
1558 | |||
1531 | /** | 1559 | /** |
1532 | * ipath_init_ht_get_base_info - set chip-specific flags for user code | 1560 | * ipath_init_ht_get_base_info - set chip-specific flags for user code |
1533 | * @dd: the infinipath device | 1561 | * @dd: the infinipath device |
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 5c50383880f2..aa2b51944331 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c | |||
@@ -321,6 +321,12 @@ static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = { | |||
321 | INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), | 321 | INFINIPATH_HWE_MSG(SERDESPLLFAILED, "SerDes PLL"), |
322 | }; | 322 | }; |
323 | 323 | ||
324 | #define TXE_PIO_PARITY ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | \ | ||
325 | INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) \ | ||
326 | << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT) | ||
327 | |||
328 | static int ipath_pe_txe_recover(struct ipath_devdata *); | ||
329 | |||
324 | /** | 330 | /** |
325 | * ipath_pe_handle_hwerrors - display hardware errors. | 331 | * ipath_pe_handle_hwerrors - display hardware errors. |
326 | * @dd: the infinipath device | 332 | * @dd: the infinipath device |
@@ -394,25 +400,8 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, | |||
394 | * occur if a processor speculative read is done to the PIO | 400 | * occur if a processor speculative read is done to the PIO |
395 | * buffer while we are sending a packet, for example. | 401 | * buffer while we are sending a packet, for example. |
396 | */ | 402 | */ |
397 | if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | | 403 | if ((hwerrs & TXE_PIO_PARITY) && ipath_pe_txe_recover(dd)) |
398 | INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) | 404 | hwerrs &= ~TXE_PIO_PARITY; |
399 | << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT)) { | ||
400 | ipath_stats.sps_txeparity++; | ||
401 | ipath_dbg("Recovering from TXE parity error (%llu), " | ||
402 | "hwerrstatus=%llx\n", | ||
403 | (unsigned long long) ipath_stats.sps_txeparity, | ||
404 | (unsigned long long) hwerrs); | ||
405 | ipath_disarm_senderrbufs(dd); | ||
406 | hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | | ||
407 | INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) | ||
408 | << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); | ||
409 | if (!hwerrs) { /* else leave in freeze mode */ | ||
410 | ipath_write_kreg(dd, | ||
411 | dd->ipath_kregs->kr_control, | ||
412 | dd->ipath_control); | ||
413 | return; | ||
414 | } | ||
415 | } | ||
416 | if (hwerrs) { | 405 | if (hwerrs) { |
417 | /* | 406 | /* |
418 | * if any set that we aren't ignoring only make the | 407 | * if any set that we aren't ignoring only make the |
@@ -581,6 +570,8 @@ static void ipath_pe_init_hwerrors(struct ipath_devdata *dd) | |||
581 | 570 | ||
582 | if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) | 571 | if (!(extsval & INFINIPATH_EXTS_MEMBIST_ENDTEST)) |
583 | ipath_dev_err(dd, "MemBIST did not complete!\n"); | 572 | ipath_dev_err(dd, "MemBIST did not complete!\n"); |
573 | if (extsval & INFINIPATH_EXTS_MEMBIST_FOUND) | ||
574 | ipath_dbg("MemBIST corrected\n"); | ||
584 | 575 | ||
585 | val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */ | 576 | val = ~0ULL; /* barring bugs, all hwerrors become interrupts, */ |
586 | 577 | ||
@@ -1330,6 +1321,35 @@ static void ipath_pe_free_irq(struct ipath_devdata *dd) | |||
1330 | dd->ipath_irq = 0; | 1321 | dd->ipath_irq = 0; |
1331 | } | 1322 | } |
1332 | 1323 | ||
1324 | /* | ||
1325 | * On platforms using this chip, and not having ordered WC stores, we | ||
1326 | * can get TXE parity errors due to speculative reads to the PIO buffers, | ||
1327 | * and this, due to a chip bug can result in (many) false parity error | ||
1328 | * reports. So it's a debug print on those, and an info print on systems | ||
1329 | * where the speculative reads don't occur. | ||
1330 | * Because we can get lots of false errors, we have no upper limit | ||
1331 | * on recovery attempts on those platforms. | ||
1332 | */ | ||
1333 | static int ipath_pe_txe_recover(struct ipath_devdata *dd) | ||
1334 | { | ||
1335 | if (ipath_unordered_wc()) | ||
1336 | ipath_dbg("Recovering from TXE PIO parity error\n"); | ||
1337 | else { | ||
1338 | int cnt = ++ipath_stats.sps_txeparity; | ||
1339 | if (cnt >= IPATH_MAX_PARITY_ATTEMPTS) { | ||
1340 | if (cnt == IPATH_MAX_PARITY_ATTEMPTS) | ||
1341 | ipath_dev_err(dd, | ||
1342 | "Too many attempts to recover from " | ||
1343 | "TXE parity, giving up\n"); | ||
1344 | return 0; | ||
1345 | } | ||
1346 | dev_info(&dd->pcidev->dev, | ||
1347 | "Recovering from TXE PIO parity error\n"); | ||
1348 | } | ||
1349 | ipath_disarm_senderrbufs(dd, 1); | ||
1350 | return 1; | ||
1351 | } | ||
1352 | |||
1333 | /** | 1353 | /** |
1334 | * ipath_init_iba6120_funcs - set up the chip-specific function pointers | 1354 | * ipath_init_iba6120_funcs - set up the chip-specific function pointers |
1335 | * @dd: the infinipath device | 1355 | * @dd: the infinipath device |
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index 1e77b55afe93..72caa9f091f6 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c | |||
@@ -590,6 +590,10 @@ static int init_housekeeping(struct ipath_devdata *dd, | |||
590 | goto done; | 590 | goto done; |
591 | } | 591 | } |
592 | 592 | ||
593 | |||
594 | /* clear diagctrl register, in case diags were running and crashed */ | ||
595 | ipath_write_kreg (dd, dd->ipath_kregs->kr_hwdiagctrl, 0); | ||
596 | |||
593 | /* clear the initial reset flag, in case first driver load */ | 597 | /* clear the initial reset flag, in case first driver load */ |
594 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, | 598 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, |
595 | INFINIPATH_E_RESET); | 599 | INFINIPATH_E_RESET); |
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index 24853310df1c..45d033169c6e 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c | |||
@@ -38,10 +38,39 @@ | |||
38 | #include "ipath_common.h" | 38 | #include "ipath_common.h" |
39 | 39 | ||
40 | /* | 40 | /* |
41 | * clear (write) a pio buffer, to clear a parity error. This routine | ||
42 | * should only be called when in freeze mode, and the buffer should be | ||
43 | * canceled afterwards. | ||
44 | */ | ||
45 | static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum) | ||
46 | { | ||
47 | u32 __iomem *pbuf; | ||
48 | u32 dwcnt; /* dword count to write */ | ||
49 | if (pnum < dd->ipath_piobcnt2k) { | ||
50 | pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum * | ||
51 | dd->ipath_palign); | ||
52 | dwcnt = dd->ipath_piosize2k >> 2; | ||
53 | } | ||
54 | else { | ||
55 | pbuf = (u32 __iomem *) (dd->ipath_pio4kbase + | ||
56 | (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign); | ||
57 | dwcnt = dd->ipath_piosize4k >> 2; | ||
58 | } | ||
59 | dev_info(&dd->pcidev->dev, | ||
60 | "Rewrite PIO buffer %u, to recover from parity error\n", | ||
61 | pnum); | ||
62 | *pbuf = dwcnt+1; /* no flush required, since already in freeze */ | ||
63 | while(--dwcnt) | ||
64 | *pbuf++ = 0; | ||
65 | } | ||
66 | |||
67 | /* | ||
41 | * Called when we might have an error that is specific to a particular | 68 | * Called when we might have an error that is specific to a particular |
42 | * PIO buffer, and may need to cancel that buffer, so it can be re-used. | 69 | * PIO buffer, and may need to cancel that buffer, so it can be re-used. |
70 | * If rewrite is true, and bits are set in the sendbufferror registers, | ||
71 | * we'll write to the buffer, for error recovery on parity errors. | ||
43 | */ | 72 | */ |
44 | void ipath_disarm_senderrbufs(struct ipath_devdata *dd) | 73 | void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) |
45 | { | 74 | { |
46 | u32 piobcnt; | 75 | u32 piobcnt; |
47 | unsigned long sbuf[4]; | 76 | unsigned long sbuf[4]; |
@@ -74,8 +103,11 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd) | |||
74 | } | 103 | } |
75 | 104 | ||
76 | for (i = 0; i < piobcnt; i++) | 105 | for (i = 0; i < piobcnt; i++) |
77 | if (test_bit(i, sbuf)) | 106 | if (test_bit(i, sbuf)) { |
107 | if (rewrite) | ||
108 | ipath_clrpiobuf(dd, i); | ||
78 | ipath_disarm_piobufs(dd, i, 1); | 109 | ipath_disarm_piobufs(dd, i, 1); |
110 | } | ||
79 | dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */ | 111 | dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */ |
80 | } | 112 | } |
81 | } | 113 | } |
@@ -114,7 +146,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) | |||
114 | { | 146 | { |
115 | u64 ignore_this_time = 0; | 147 | u64 ignore_this_time = 0; |
116 | 148 | ||
117 | ipath_disarm_senderrbufs(dd); | 149 | ipath_disarm_senderrbufs(dd, 0); |
118 | if ((errs & E_SUM_LINK_PKTERRS) && | 150 | if ((errs & E_SUM_LINK_PKTERRS) && |
119 | !(dd->ipath_flags & IPATH_LINKACTIVE)) { | 151 | !(dd->ipath_flags & IPATH_LINKACTIVE)) { |
120 | /* | 152 | /* |
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index 5428c2619ba9..e900c2593f44 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h | |||
@@ -590,7 +590,6 @@ int ipath_enable_wc(struct ipath_devdata *dd); | |||
590 | void ipath_disable_wc(struct ipath_devdata *dd); | 590 | void ipath_disable_wc(struct ipath_devdata *dd); |
591 | int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); | 591 | int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp); |
592 | void ipath_shutdown_device(struct ipath_devdata *); | 592 | void ipath_shutdown_device(struct ipath_devdata *); |
593 | void ipath_disarm_senderrbufs(struct ipath_devdata *); | ||
594 | 593 | ||
595 | struct file_operations; | 594 | struct file_operations; |
596 | int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, | 595 | int ipath_cdev_init(int minor, char *name, const struct file_operations *fops, |
@@ -713,6 +712,7 @@ void ipath_init_iba6120_funcs(struct ipath_devdata *); | |||
713 | void ipath_init_iba6110_funcs(struct ipath_devdata *); | 712 | void ipath_init_iba6110_funcs(struct ipath_devdata *); |
714 | void ipath_get_eeprom_info(struct ipath_devdata *); | 713 | void ipath_get_eeprom_info(struct ipath_devdata *); |
715 | u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); | 714 | u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); |
715 | void ipath_disarm_senderrbufs(struct ipath_devdata *, int); | ||
716 | 716 | ||
717 | /* | 717 | /* |
718 | * number of words used for protocol header if not set by ipath_userinit(); | 718 | * number of words used for protocol header if not set by ipath_userinit(); |
@@ -897,6 +897,8 @@ dma_addr_t ipath_map_single(struct pci_dev *, void *, size_t, int); | |||
897 | 897 | ||
898 | extern unsigned ipath_debug; /* debugging bit mask */ | 898 | extern unsigned ipath_debug; /* debugging bit mask */ |
899 | 899 | ||
900 | #define IPATH_MAX_PARITY_ATTEMPTS 10000 /* max times to try recovery */ | ||
901 | |||
900 | const char *ipath_get_unit_name(int unit); | 902 | const char *ipath_get_unit_name(int unit); |
901 | 903 | ||
902 | extern struct mutex ipath_mutex; | 904 | extern struct mutex ipath_mutex; |
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h index 6e99eafdfd73..c182bcd62098 100644 --- a/drivers/infiniband/hw/ipath/ipath_registers.h +++ b/drivers/infiniband/hw/ipath/ipath_registers.h | |||
@@ -308,13 +308,6 @@ | |||
308 | #define INFINIPATH_XGXS_RX_POL_SHIFT 19 | 308 | #define INFINIPATH_XGXS_RX_POL_SHIFT 19 |
309 | #define INFINIPATH_XGXS_RX_POL_MASK 0xfULL | 309 | #define INFINIPATH_XGXS_RX_POL_MASK 0xfULL |
310 | 310 | ||
311 | #define INFINIPATH_RT_ADDR_MASK 0xFFFFFFFFFFULL /* 40 bits valid */ | ||
312 | |||
313 | /* TID entries (memory), HT-only */ | ||
314 | #define INFINIPATH_RT_VALID 0x8000000000000000ULL | ||
315 | #define INFINIPATH_RT_ADDR_SHIFT 0 | ||
316 | #define INFINIPATH_RT_BUFSIZE_MASK 0x3FFF | ||
317 | #define INFINIPATH_RT_BUFSIZE_SHIFT 48 | ||
318 | 311 | ||
319 | /* | 312 | /* |
320 | * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our | 313 | * IPATH_PIO_MAXIBHDR is the max IB header size allowed for in our |