diff options
author | Michael Albaugh <michael.albaugh@qlogic.com> | 2007-05-17 10:26:28 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2007-07-09 23:12:25 -0400 |
commit | aecd3b5ab19624ca9644b9df9c61615282d8923f (patch) | |
tree | d4f4b5367a40bd737899b7e3dbfcfab6021891e0 /drivers/infiniband/hw/ipath | |
parent | 8e9ab3f1c9e34d5c28446c3738983d33a3937fe0 (diff) |
IB/ipath: Log "active" time and some errors to EEPROM
We currently track various errors, now we enhance that capability by
logging some of them to EEPROM. We also now log a cumulative "active"
time defined by traffic though the InfiniPath HCA beyond the normal SM
traffic.
Signed-off-by: Michael Albaugh <michael.albaugh@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath')
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_driver.c | 3 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_eeprom.c | 233 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_iba6110.c | 22 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_iba6120.c | 27 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_init_chip.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_intr.c | 8 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_kernel.h | 38 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_stats.c | 23 | ||||
-rw-r--r-- | drivers/infiniband/hw/ipath/ipath_sysfs.c | 22 |
9 files changed, 370 insertions, 8 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c index 097593286582..e9639860b48d 100644 --- a/drivers/infiniband/hw/ipath/ipath_driver.c +++ b/drivers/infiniband/hw/ipath/ipath_driver.c | |||
@@ -2005,6 +2005,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd) | |||
2005 | ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED); | 2005 | ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED); |
2006 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); | 2006 | ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); |
2007 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); | 2007 | ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); |
2008 | |||
2009 | ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n"); | ||
2010 | ipath_update_eeprom_log(dd); | ||
2008 | } | 2011 | } |
2009 | 2012 | ||
2010 | /** | 2013 | /** |
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c index 26daac9d8b63..9be1b9ac55f0 100644 --- a/drivers/infiniband/hw/ipath/ipath_eeprom.c +++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c | |||
@@ -367,8 +367,8 @@ bail: | |||
367 | * @len: number of bytes to receive | 367 | * @len: number of bytes to receive |
368 | */ | 368 | */ |
369 | 369 | ||
370 | int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, | 370 | static int ipath_eeprom_internal_read(struct ipath_devdata *dd, |
371 | void *buffer, int len) | 371 | u8 eeprom_offset, void *buffer, int len) |
372 | { | 372 | { |
373 | /* compiler complains unless initialized */ | 373 | /* compiler complains unless initialized */ |
374 | u8 single_byte = 0; | 374 | u8 single_byte = 0; |
@@ -418,6 +418,7 @@ bail: | |||
418 | return ret; | 418 | return ret; |
419 | } | 419 | } |
420 | 420 | ||
421 | |||
421 | /** | 422 | /** |
422 | * ipath_eeprom_write - writes data to the eeprom via I2C | 423 | * ipath_eeprom_write - writes data to the eeprom via I2C |
423 | * @dd: the infinipath device | 424 | * @dd: the infinipath device |
@@ -425,8 +426,8 @@ bail: | |||
425 | * @buffer: data to write | 426 | * @buffer: data to write |
426 | * @len: number of bytes to write | 427 | * @len: number of bytes to write |
427 | */ | 428 | */ |
428 | int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, | 429 | int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset, |
429 | const void *buffer, int len) | 430 | const void *buffer, int len) |
430 | { | 431 | { |
431 | u8 single_byte; | 432 | u8 single_byte; |
432 | int sub_len; | 433 | int sub_len; |
@@ -500,6 +501,38 @@ bail: | |||
500 | return ret; | 501 | return ret; |
501 | } | 502 | } |
502 | 503 | ||
504 | /* | ||
505 | * The public entry-points ipath_eeprom_read() and ipath_eeprom_write() | ||
506 | * are now just wrappers around the internal functions. | ||
507 | */ | ||
508 | int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, | ||
509 | void *buff, int len) | ||
510 | { | ||
511 | int ret; | ||
512 | |||
513 | ret = down_interruptible(&dd->ipath_eep_sem); | ||
514 | if (!ret) { | ||
515 | ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len); | ||
516 | up(&dd->ipath_eep_sem); | ||
517 | } | ||
518 | |||
519 | return ret; | ||
520 | } | ||
521 | |||
522 | int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, | ||
523 | const void *buff, int len) | ||
524 | { | ||
525 | int ret; | ||
526 | |||
527 | ret = down_interruptible(&dd->ipath_eep_sem); | ||
528 | if (!ret) { | ||
529 | ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len); | ||
530 | up(&dd->ipath_eep_sem); | ||
531 | } | ||
532 | |||
533 | return ret; | ||
534 | } | ||
535 | |||
503 | static u8 flash_csum(struct ipath_flash *ifp, int adjust) | 536 | static u8 flash_csum(struct ipath_flash *ifp, int adjust) |
504 | { | 537 | { |
505 | u8 *ip = (u8 *) ifp; | 538 | u8 *ip = (u8 *) ifp; |
@@ -527,7 +560,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) | |||
527 | void *buf; | 560 | void *buf; |
528 | struct ipath_flash *ifp; | 561 | struct ipath_flash *ifp; |
529 | __be64 guid; | 562 | __be64 guid; |
530 | int len; | 563 | int len, eep_stat; |
531 | u8 csum, *bguid; | 564 | u8 csum, *bguid; |
532 | int t = dd->ipath_unit; | 565 | int t = dd->ipath_unit; |
533 | struct ipath_devdata *dd0 = ipath_lookup(0); | 566 | struct ipath_devdata *dd0 = ipath_lookup(0); |
@@ -571,7 +604,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) | |||
571 | goto bail; | 604 | goto bail; |
572 | } | 605 | } |
573 | 606 | ||
574 | if (ipath_eeprom_read(dd, 0, buf, len)) { | 607 | down(&dd->ipath_eep_sem); |
608 | eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len); | ||
609 | up(&dd->ipath_eep_sem); | ||
610 | |||
611 | if (eep_stat) { | ||
575 | ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); | 612 | ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); |
576 | goto done; | 613 | goto done; |
577 | } | 614 | } |
@@ -646,8 +683,192 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd) | |||
646 | ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", | 683 | ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", |
647 | (unsigned long long) be64_to_cpu(dd->ipath_guid)); | 684 | (unsigned long long) be64_to_cpu(dd->ipath_guid)); |
648 | 685 | ||
686 | memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT); | ||
687 | /* | ||
688 | * Power-on (actually "active") hours are kept as little-endian value | ||
689 | * in EEPROM, but as seconds in a (possibly as small as 24-bit) | ||
690 | * atomic_t while running. | ||
691 | */ | ||
692 | atomic_set(&dd->ipath_active_time, 0); | ||
693 | dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8); | ||
694 | |||
649 | done: | 695 | done: |
650 | vfree(buf); | 696 | vfree(buf); |
651 | 697 | ||
652 | bail:; | 698 | bail:; |
653 | } | 699 | } |
700 | |||
701 | /** | ||
702 | * ipath_update_eeprom_log - copy active-time and error counters to eeprom | ||
703 | * @dd: the infinipath device | ||
704 | * | ||
705 | * Although the time is kept as seconds in the ipath_devdata struct, it is | ||
706 | * rounded to hours for re-write, as we have only 16 bits in EEPROM. | ||
707 | * First-cut code reads whole (expected) struct ipath_flash, modifies, | ||
708 | * re-writes. Future direction: read/write only what we need, assuming | ||
709 | * that the EEPROM had to have been "good enough" for driver init, and | ||
710 | * if not, we aren't making it worse. | ||
711 | * | ||
712 | */ | ||
713 | |||
714 | int ipath_update_eeprom_log(struct ipath_devdata *dd) | ||
715 | { | ||
716 | void *buf; | ||
717 | struct ipath_flash *ifp; | ||
718 | int len, hi_water; | ||
719 | uint32_t new_time, new_hrs; | ||
720 | u8 csum; | ||
721 | int ret, idx; | ||
722 | unsigned long flags; | ||
723 | |||
724 | /* first, check if we actually need to do anything. */ | ||
725 | ret = 0; | ||
726 | for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { | ||
727 | if (dd->ipath_eep_st_new_errs[idx]) { | ||
728 | ret = 1; | ||
729 | break; | ||
730 | } | ||
731 | } | ||
732 | new_time = atomic_read(&dd->ipath_active_time); | ||
733 | |||
734 | if (ret == 0 && new_time < 3600) | ||
735 | return 0; | ||
736 | |||
737 | /* | ||
738 | * The quick-check above determined that there is something worthy | ||
739 | * of logging, so get current contents and do a more detailed idea. | ||
740 | */ | ||
741 | len = offsetof(struct ipath_flash, if_future); | ||
742 | buf = vmalloc(len); | ||
743 | ret = 1; | ||
744 | if (!buf) { | ||
745 | ipath_dev_err(dd, "Couldn't allocate memory to read %u " | ||
746 | "bytes from eeprom for logging\n", len); | ||
747 | goto bail; | ||
748 | } | ||
749 | |||
750 | /* Grab semaphore and read current EEPROM. If we get an | ||
751 | * error, let go, but if not, keep it until we finish write. | ||
752 | */ | ||
753 | ret = down_interruptible(&dd->ipath_eep_sem); | ||
754 | if (ret) { | ||
755 | ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n"); | ||
756 | goto free_bail; | ||
757 | } | ||
758 | ret = ipath_eeprom_internal_read(dd, 0, buf, len); | ||
759 | if (ret) { | ||
760 | up(&dd->ipath_eep_sem); | ||
761 | ipath_dev_err(dd, "Unable read EEPROM for logging\n"); | ||
762 | goto free_bail; | ||
763 | } | ||
764 | ifp = (struct ipath_flash *)buf; | ||
765 | |||
766 | csum = flash_csum(ifp, 0); | ||
767 | if (csum != ifp->if_csum) { | ||
768 | up(&dd->ipath_eep_sem); | ||
769 | ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n", | ||
770 | csum, ifp->if_csum); | ||
771 | ret = 1; | ||
772 | goto free_bail; | ||
773 | } | ||
774 | hi_water = 0; | ||
775 | spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); | ||
776 | for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { | ||
777 | int new_val = dd->ipath_eep_st_new_errs[idx]; | ||
778 | if (new_val) { | ||
779 | /* | ||
780 | * If we have seen any errors, add to EEPROM values | ||
781 | * We need to saturate at 0xFF (255) and we also | ||
782 | * would need to adjust the checksum if we were | ||
783 | * trying to minimize EEPROM traffic | ||
784 | * Note that we add to actual current count in EEPROM, | ||
785 | * in case it was altered while we were running. | ||
786 | */ | ||
787 | new_val += ifp->if_errcntp[idx]; | ||
788 | if (new_val > 0xFF) | ||
789 | new_val = 0xFF; | ||
790 | if (ifp->if_errcntp[idx] != new_val) { | ||
791 | ifp->if_errcntp[idx] = new_val; | ||
792 | hi_water = offsetof(struct ipath_flash, | ||
793 | if_errcntp) + idx; | ||
794 | } | ||
795 | /* | ||
796 | * update our shadow (used to minimize EEPROM | ||
797 | * traffic), to match what we are about to write. | ||
798 | */ | ||
799 | dd->ipath_eep_st_errs[idx] = new_val; | ||
800 | dd->ipath_eep_st_new_errs[idx] = 0; | ||
801 | } | ||
802 | } | ||
803 | /* | ||
804 | * now update active-time. We would like to round to the nearest hour | ||
805 | * but unless atomic_t are sure to be proper signed ints we cannot, | ||
806 | * because we need to account for what we "transfer" to EEPROM and | ||
807 | * if we log an hour at 31 minutes, then we would need to set | ||
808 | * active_time to -29 to accurately count the _next_ hour. | ||
809 | */ | ||
810 | if (new_time > 3600) { | ||
811 | new_hrs = new_time / 3600; | ||
812 | atomic_sub((new_hrs * 3600), &dd->ipath_active_time); | ||
813 | new_hrs += dd->ipath_eep_hrs; | ||
814 | if (new_hrs > 0xFFFF) | ||
815 | new_hrs = 0xFFFF; | ||
816 | dd->ipath_eep_hrs = new_hrs; | ||
817 | if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) { | ||
818 | ifp->if_powerhour[0] = new_hrs & 0xFF; | ||
819 | hi_water = offsetof(struct ipath_flash, if_powerhour); | ||
820 | } | ||
821 | if ((new_hrs >> 8) != ifp->if_powerhour[1]) { | ||
822 | ifp->if_powerhour[1] = new_hrs >> 8; | ||
823 | hi_water = offsetof(struct ipath_flash, if_powerhour) | ||
824 | + 1; | ||
825 | } | ||
826 | } | ||
827 | /* | ||
828 | * There is a tiny possibility that we could somehow fail to write | ||
829 | * the EEPROM after updating our shadows, but problems from holding | ||
830 | * the spinlock too long are a much bigger issue. | ||
831 | */ | ||
832 | spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); | ||
833 | if (hi_water) { | ||
834 | /* we made some change to the data, uopdate cksum and write */ | ||
835 | csum = flash_csum(ifp, 1); | ||
836 | ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1); | ||
837 | } | ||
838 | up(&dd->ipath_eep_sem); | ||
839 | if (ret) | ||
840 | ipath_dev_err(dd, "Failed updating EEPROM\n"); | ||
841 | |||
842 | free_bail: | ||
843 | vfree(buf); | ||
844 | bail: | ||
845 | return ret; | ||
846 | |||
847 | } | ||
848 | |||
849 | /** | ||
850 | * ipath_inc_eeprom_err - increment one of the four error counters | ||
851 | * that are logged to EEPROM. | ||
852 | * @dd: the infinipath device | ||
853 | * @eidx: 0..3, the counter to increment | ||
854 | * @incr: how much to add | ||
855 | * | ||
856 | * Each counter is 8-bits, and saturates at 255 (0xFF). They | ||
857 | * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log() | ||
858 | * is called, but it can only be called in a context that allows sleep. | ||
859 | * This function can be called even at interrupt level. | ||
860 | */ | ||
861 | |||
862 | void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr) | ||
863 | { | ||
864 | uint new_val; | ||
865 | unsigned long flags; | ||
866 | |||
867 | spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); | ||
868 | new_val = dd->ipath_eep_st_new_errs[eidx] + incr; | ||
869 | if (new_val > 255) | ||
870 | new_val = 255; | ||
871 | dd->ipath_eep_st_new_errs[eidx] = new_val; | ||
872 | spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); | ||
873 | return; | ||
874 | } | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c index 8482ea366fb1..85f408de7bf7 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6110.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c | |||
@@ -440,6 +440,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, | |||
440 | u32 bits, ctrl; | 440 | u32 bits, ctrl; |
441 | int isfatal = 0; | 441 | int isfatal = 0; |
442 | char bitsmsg[64]; | 442 | char bitsmsg[64]; |
443 | int log_idx; | ||
443 | 444 | ||
444 | hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); | 445 | hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); |
445 | 446 | ||
@@ -468,6 +469,11 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg, | |||
468 | 469 | ||
469 | hwerrs &= dd->ipath_hwerrmask; | 470 | hwerrs &= dd->ipath_hwerrmask; |
470 | 471 | ||
472 | /* We log some errors to EEPROM, check if we have any of those. */ | ||
473 | for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) | ||
474 | if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log) | ||
475 | ipath_inc_eeprom_err(dd, log_idx, 1); | ||
476 | |||
471 | /* | 477 | /* |
472 | * make sure we get this much out, unless told to be quiet, | 478 | * make sure we get this much out, unless told to be quiet, |
473 | * it's a parity error we may recover from, | 479 | * it's a parity error we may recover from, |
@@ -1171,6 +1177,22 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd) | |||
1171 | 1177 | ||
1172 | dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; | 1178 | dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; |
1173 | dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; | 1179 | dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; |
1180 | |||
1181 | /* | ||
1182 | * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. | ||
1183 | * 2 is Some Misc, 3 is reserved for future. | ||
1184 | */ | ||
1185 | dd->ipath_eep_st_masks[0].hwerrs_to_log = | ||
1186 | INFINIPATH_HWE_TXEMEMPARITYERR_MASK << | ||
1187 | INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT; | ||
1188 | |||
1189 | dd->ipath_eep_st_masks[1].hwerrs_to_log = | ||
1190 | INFINIPATH_HWE_RXEMEMPARITYERR_MASK << | ||
1191 | INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT; | ||
1192 | |||
1193 | dd->ipath_eep_st_masks[2].errs_to_log = | ||
1194 | INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET; | ||
1195 | |||
1174 | } | 1196 | } |
1175 | 1197 | ||
1176 | /** | 1198 | /** |
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c index 711590740118..207323a5b52b 100644 --- a/drivers/infiniband/hw/ipath/ipath_iba6120.c +++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c | |||
@@ -340,6 +340,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, | |||
340 | u32 bits, ctrl; | 340 | u32 bits, ctrl; |
341 | int isfatal = 0; | 341 | int isfatal = 0; |
342 | char bitsmsg[64]; | 342 | char bitsmsg[64]; |
343 | int log_idx; | ||
343 | 344 | ||
344 | hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); | 345 | hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); |
345 | if (!hwerrs) { | 346 | if (!hwerrs) { |
@@ -367,6 +368,11 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg, | |||
367 | 368 | ||
368 | hwerrs &= dd->ipath_hwerrmask; | 369 | hwerrs &= dd->ipath_hwerrmask; |
369 | 370 | ||
371 | /* We log some errors to EEPROM, check if we have any of those. */ | ||
372 | for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) | ||
373 | if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log) | ||
374 | ipath_inc_eeprom_err(dd, log_idx, 1); | ||
375 | |||
370 | /* | 376 | /* |
371 | * make sure we get this much out, unless told to be quiet, | 377 | * make sure we get this much out, unless told to be quiet, |
372 | * or it's occurred within the last 5 seconds | 378 | * or it's occurred within the last 5 seconds |
@@ -950,6 +956,27 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd) | |||
950 | 956 | ||
951 | dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; | 957 | dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; |
952 | dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; | 958 | dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; |
959 | |||
960 | /* | ||
961 | * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity. | ||
962 | * 2 is Some Misc, 3 is reserved for future. | ||
963 | */ | ||
964 | dd->ipath_eep_st_masks[0].hwerrs_to_log = | ||
965 | INFINIPATH_HWE_TXEMEMPARITYERR_MASK << | ||
966 | INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT; | ||
967 | |||
968 | /* Ignore errors in PIO/PBC on systems with unordered write-combining */ | ||
969 | if (ipath_unordered_wc()) | ||
970 | dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY; | ||
971 | |||
972 | dd->ipath_eep_st_masks[1].hwerrs_to_log = | ||
973 | INFINIPATH_HWE_RXEMEMPARITYERR_MASK << | ||
974 | INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT; | ||
975 | |||
976 | dd->ipath_eep_st_masks[2].errs_to_log = | ||
977 | INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET; | ||
978 | |||
979 | |||
953 | } | 980 | } |
954 | 981 | ||
955 | /* setup the MSI stuff again after a reset. I'd like to just call | 982 | /* setup the MSI stuff again after a reset. I'd like to just call |
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c index f6ee7a83595a..ee839346a3a4 100644 --- a/drivers/infiniband/hw/ipath/ipath_init_chip.c +++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c | |||
@@ -341,6 +341,8 @@ static int init_chip_first(struct ipath_devdata *dd, | |||
341 | spin_lock_init(&dd->ipath_tid_lock); | 341 | spin_lock_init(&dd->ipath_tid_lock); |
342 | 342 | ||
343 | spin_lock_init(&dd->ipath_gpio_lock); | 343 | spin_lock_init(&dd->ipath_gpio_lock); |
344 | spin_lock_init(&dd->ipath_eep_st_lock); | ||
345 | sema_init(&dd->ipath_eep_sem, 1); | ||
344 | 346 | ||
345 | done: | 347 | done: |
346 | *pdp = pd; | 348 | *pdp = pd; |
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c index a90d3b5699c4..d9cdd00c8233 100644 --- a/drivers/infiniband/hw/ipath/ipath_intr.c +++ b/drivers/infiniband/hw/ipath/ipath_intr.c | |||
@@ -505,6 +505,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) | |||
505 | int i, iserr = 0; | 505 | int i, iserr = 0; |
506 | int chkerrpkts = 0, noprint = 0; | 506 | int chkerrpkts = 0, noprint = 0; |
507 | unsigned supp_msgs; | 507 | unsigned supp_msgs; |
508 | int log_idx; | ||
508 | 509 | ||
509 | supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint); | 510 | supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint); |
510 | 511 | ||
@@ -518,6 +519,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs) | |||
518 | if (errs & INFINIPATH_E_HARDWARE) { | 519 | if (errs & INFINIPATH_E_HARDWARE) { |
519 | /* reuse same msg buf */ | 520 | /* reuse same msg buf */ |
520 | dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg); | 521 | dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg); |
522 | } else { | ||
523 | u64 mask; | ||
524 | for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) { | ||
525 | mask = dd->ipath_eep_st_masks[log_idx].errs_to_log; | ||
526 | if (errs & mask) | ||
527 | ipath_inc_eeprom_err(dd, log_idx, 1); | ||
528 | } | ||
521 | } | 529 | } |
522 | 530 | ||
523 | if (!noprint && (errs & ~dd->ipath_e_bitsextant)) | 531 | if (!noprint && (errs & ~dd->ipath_e_bitsextant)) |
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h index bd1088a99891..2a4414b948ee 100644 --- a/drivers/infiniband/hw/ipath/ipath_kernel.h +++ b/drivers/infiniband/hw/ipath/ipath_kernel.h | |||
@@ -57,6 +57,24 @@ | |||
57 | extern struct infinipath_stats ipath_stats; | 57 | extern struct infinipath_stats ipath_stats; |
58 | 58 | ||
59 | #define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ | 59 | #define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ |
60 | /* | ||
61 | * First-cut critierion for "device is active" is | ||
62 | * two thousand dwords combined Tx, Rx traffic per | ||
63 | * 5-second interval. SMA packets are 64 dwords, | ||
64 | * and occur "a few per second", presumably each way. | ||
65 | */ | ||
66 | #define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000) | ||
67 | /* | ||
68 | * Struct used to indicate which errors are logged in each of the | ||
69 | * error-counters that are logged to EEPROM. A counter is incremented | ||
70 | * _once_ (saturating at 255) for each event with any bits set in | ||
71 | * the error or hwerror register masks below. | ||
72 | */ | ||
73 | #define IPATH_EEP_LOG_CNT (4) | ||
74 | struct ipath_eep_log_mask { | ||
75 | u64 errs_to_log; | ||
76 | u64 hwerrs_to_log; | ||
77 | }; | ||
60 | 78 | ||
61 | struct ipath_portdata { | 79 | struct ipath_portdata { |
62 | void **port_rcvegrbuf; | 80 | void **port_rcvegrbuf; |
@@ -588,6 +606,24 @@ struct ipath_devdata { | |||
588 | /* Used to flash LEDs in override mode */ | 606 | /* Used to flash LEDs in override mode */ |
589 | struct timer_list ipath_led_override_timer; | 607 | struct timer_list ipath_led_override_timer; |
590 | 608 | ||
609 | /* Support (including locks) for EEPROM logging of errors and time */ | ||
610 | /* control access to actual counters, timer */ | ||
611 | spinlock_t ipath_eep_st_lock; | ||
612 | /* control high-level access to EEPROM */ | ||
613 | struct semaphore ipath_eep_sem; | ||
614 | /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */ | ||
615 | uint64_t ipath_traffic_wds; | ||
616 | /* active time is kept in seconds, but logged in hours */ | ||
617 | atomic_t ipath_active_time; | ||
618 | /* Below are nominal shadow of EEPROM, new since last EEPROM update */ | ||
619 | uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT]; | ||
620 | uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT]; | ||
621 | uint16_t ipath_eep_hrs; | ||
622 | /* | ||
623 | * masks for which bits of errs, hwerrs that cause | ||
624 | * each of the counters to increment. | ||
625 | */ | ||
626 | struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT]; | ||
591 | }; | 627 | }; |
592 | 628 | ||
593 | /* Private data for file operations */ | 629 | /* Private data for file operations */ |
@@ -726,6 +762,8 @@ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *); | |||
726 | void ipath_init_iba6120_funcs(struct ipath_devdata *); | 762 | void ipath_init_iba6120_funcs(struct ipath_devdata *); |
727 | void ipath_init_iba6110_funcs(struct ipath_devdata *); | 763 | void ipath_init_iba6110_funcs(struct ipath_devdata *); |
728 | void ipath_get_eeprom_info(struct ipath_devdata *); | 764 | void ipath_get_eeprom_info(struct ipath_devdata *); |
765 | int ipath_update_eeprom_log(struct ipath_devdata *dd); | ||
766 | void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); | ||
729 | u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); | 767 | u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); |
730 | void ipath_disarm_senderrbufs(struct ipath_devdata *, int); | 768 | void ipath_disarm_senderrbufs(struct ipath_devdata *, int); |
731 | 769 | ||
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c index d8b5e4cefe25..2955f368de0c 100644 --- a/drivers/infiniband/hw/ipath/ipath_stats.c +++ b/drivers/infiniband/hw/ipath/ipath_stats.c | |||
@@ -55,6 +55,7 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg) | |||
55 | u64 val64; | 55 | u64 val64; |
56 | unsigned long t0, t1; | 56 | unsigned long t0, t1; |
57 | u64 ret; | 57 | u64 ret; |
58 | unsigned long flags; | ||
58 | 59 | ||
59 | t0 = jiffies; | 60 | t0 = jiffies; |
60 | /* If fast increment counters are only 32 bits, snapshot them, | 61 | /* If fast increment counters are only 32 bits, snapshot them, |
@@ -91,12 +92,18 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg) | |||
91 | if (creg == dd->ipath_cregs->cr_wordsendcnt) { | 92 | if (creg == dd->ipath_cregs->cr_wordsendcnt) { |
92 | if (val != dd->ipath_lastsword) { | 93 | if (val != dd->ipath_lastsword) { |
93 | dd->ipath_sword += val - dd->ipath_lastsword; | 94 | dd->ipath_sword += val - dd->ipath_lastsword; |
95 | spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); | ||
96 | dd->ipath_traffic_wds += val - dd->ipath_lastsword; | ||
97 | spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); | ||
94 | dd->ipath_lastsword = val; | 98 | dd->ipath_lastsword = val; |
95 | } | 99 | } |
96 | val64 = dd->ipath_sword; | 100 | val64 = dd->ipath_sword; |
97 | } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) { | 101 | } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) { |
98 | if (val != dd->ipath_lastrword) { | 102 | if (val != dd->ipath_lastrword) { |
99 | dd->ipath_rword += val - dd->ipath_lastrword; | 103 | dd->ipath_rword += val - dd->ipath_lastrword; |
104 | spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); | ||
105 | dd->ipath_traffic_wds += val - dd->ipath_lastrword; | ||
106 | spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); | ||
100 | dd->ipath_lastrword = val; | 107 | dd->ipath_lastrword = val; |
101 | } | 108 | } |
102 | val64 = dd->ipath_rword; | 109 | val64 = dd->ipath_rword; |
@@ -200,6 +207,7 @@ void ipath_get_faststats(unsigned long opaque) | |||
200 | struct ipath_devdata *dd = (struct ipath_devdata *) opaque; | 207 | struct ipath_devdata *dd = (struct ipath_devdata *) opaque; |
201 | u32 val; | 208 | u32 val; |
202 | static unsigned cnt; | 209 | static unsigned cnt; |
210 | unsigned long flags; | ||
203 | 211 | ||
204 | /* | 212 | /* |
205 | * don't access the chip while running diags, or memory diags can | 213 | * don't access the chip while running diags, or memory diags can |
@@ -210,9 +218,20 @@ void ipath_get_faststats(unsigned long opaque) | |||
210 | /* but re-arm the timer, for diags case; won't hurt other */ | 218 | /* but re-arm the timer, for diags case; won't hurt other */ |
211 | goto done; | 219 | goto done; |
212 | 220 | ||
221 | /* | ||
222 | * We now try to maintain a "active timer", based on traffic | ||
223 | * exceeding a threshold, so we need to check the word-counts | ||
224 | * even if they are 64-bit. | ||
225 | */ | ||
226 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); | ||
227 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); | ||
228 | spin_lock_irqsave(&dd->ipath_eep_st_lock, flags); | ||
229 | if (dd->ipath_traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD) | ||
230 | atomic_add(5, &dd->ipath_active_time); /* S/B #define */ | ||
231 | dd->ipath_traffic_wds = 0; | ||
232 | spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags); | ||
233 | |||
213 | if (dd->ipath_flags & IPATH_32BITCOUNTERS) { | 234 | if (dd->ipath_flags & IPATH_32BITCOUNTERS) { |
214 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt); | ||
215 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt); | ||
216 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); | 235 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); |
217 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); | 236 | ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); |
218 | } | 237 | } |
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c index 17ec14571722..ab34d3e8b955 100644 --- a/drivers/infiniband/hw/ipath/ipath_sysfs.c +++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c | |||
@@ -613,6 +613,26 @@ static ssize_t store_led_override(struct device *dev, | |||
613 | return ret; | 613 | return ret; |
614 | } | 614 | } |
615 | 615 | ||
616 | static ssize_t show_logged_errs(struct device *dev, | ||
617 | struct device_attribute *attr, | ||
618 | char *buf) | ||
619 | { | ||
620 | struct ipath_devdata *dd = dev_get_drvdata(dev); | ||
621 | int idx, count; | ||
622 | |||
623 | /* force consistency with actual EEPROM */ | ||
624 | if (ipath_update_eeprom_log(dd) != 0) | ||
625 | return -ENXIO; | ||
626 | |||
627 | count = 0; | ||
628 | for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) { | ||
629 | count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c", | ||
630 | dd->ipath_eep_st_errs[idx], | ||
631 | idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' '); | ||
632 | } | ||
633 | |||
634 | return count; | ||
635 | } | ||
616 | 636 | ||
617 | static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); | 637 | static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); |
618 | static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); | 638 | static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); |
@@ -643,6 +663,7 @@ static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL); | |||
643 | static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); | 663 | static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); |
644 | static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); | 664 | static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); |
645 | static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override); | 665 | static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override); |
666 | static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL); | ||
646 | 667 | ||
647 | static struct attribute *dev_attributes[] = { | 668 | static struct attribute *dev_attributes[] = { |
648 | &dev_attr_guid.attr, | 669 | &dev_attr_guid.attr, |
@@ -660,6 +681,7 @@ static struct attribute *dev_attributes[] = { | |||
660 | &dev_attr_enabled.attr, | 681 | &dev_attr_enabled.attr, |
661 | &dev_attr_rx_pol_inv.attr, | 682 | &dev_attr_rx_pol_inv.attr, |
662 | &dev_attr_led_override.attr, | 683 | &dev_attr_led_override.attr, |
684 | &dev_attr_logged_errors.attr, | ||
663 | NULL | 685 | NULL |
664 | }; | 686 | }; |
665 | 687 | ||