aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/ipath
diff options
context:
space:
mode:
authorMichael Albaugh <michael.albaugh@qlogic.com>2007-05-17 10:26:28 -0400
committerRoland Dreier <rolandd@cisco.com>2007-07-09 23:12:25 -0400
commitaecd3b5ab19624ca9644b9df9c61615282d8923f (patch)
treed4f4b5367a40bd737899b7e3dbfcfab6021891e0 /drivers/infiniband/hw/ipath
parent8e9ab3f1c9e34d5c28446c3738983d33a3937fe0 (diff)
IB/ipath: Log "active" time and some errors to EEPROM
We currently track various errors, now we enhance that capability by logging some of them to EEPROM. We also now log a cumulative "active" time defined by traffic though the InfiniPath HCA beyond the normal SM traffic. Signed-off-by: Michael Albaugh <michael.albaugh@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/ipath')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_eeprom.c233
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c22
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6120.c27
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h38
-rw-r--r--drivers/infiniband/hw/ipath/ipath_stats.c23
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c22
9 files changed, 370 insertions, 8 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 097593286582..e9639860b48d 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -2005,6 +2005,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
2005 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED); 2005 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
2006 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL); 2006 ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
2007 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL); 2007 ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
2008
2009 ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
2010 ipath_update_eeprom_log(dd);
2008} 2011}
2009 2012
2010/** 2013/**
diff --git a/drivers/infiniband/hw/ipath/ipath_eeprom.c b/drivers/infiniband/hw/ipath/ipath_eeprom.c
index 26daac9d8b63..9be1b9ac55f0 100644
--- a/drivers/infiniband/hw/ipath/ipath_eeprom.c
+++ b/drivers/infiniband/hw/ipath/ipath_eeprom.c
@@ -367,8 +367,8 @@ bail:
367 * @len: number of bytes to receive 367 * @len: number of bytes to receive
368 */ 368 */
369 369
370int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset, 370static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
371 void *buffer, int len) 371 u8 eeprom_offset, void *buffer, int len)
372{ 372{
373 /* compiler complains unless initialized */ 373 /* compiler complains unless initialized */
374 u8 single_byte = 0; 374 u8 single_byte = 0;
@@ -418,6 +418,7 @@ bail:
418 return ret; 418 return ret;
419} 419}
420 420
421
421/** 422/**
422 * ipath_eeprom_write - writes data to the eeprom via I2C 423 * ipath_eeprom_write - writes data to the eeprom via I2C
423 * @dd: the infinipath device 424 * @dd: the infinipath device
@@ -425,8 +426,8 @@ bail:
425 * @buffer: data to write 426 * @buffer: data to write
426 * @len: number of bytes to write 427 * @len: number of bytes to write
427 */ 428 */
428int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset, 429int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
429 const void *buffer, int len) 430 const void *buffer, int len)
430{ 431{
431 u8 single_byte; 432 u8 single_byte;
432 int sub_len; 433 int sub_len;
@@ -500,6 +501,38 @@ bail:
500 return ret; 501 return ret;
501} 502}
502 503
504/*
505 * The public entry-points ipath_eeprom_read() and ipath_eeprom_write()
506 * are now just wrappers around the internal functions.
507 */
508int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
509 void *buff, int len)
510{
511 int ret;
512
513 ret = down_interruptible(&dd->ipath_eep_sem);
514 if (!ret) {
515 ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
516 up(&dd->ipath_eep_sem);
517 }
518
519 return ret;
520}
521
522int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
523 const void *buff, int len)
524{
525 int ret;
526
527 ret = down_interruptible(&dd->ipath_eep_sem);
528 if (!ret) {
529 ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
530 up(&dd->ipath_eep_sem);
531 }
532
533 return ret;
534}
535
503static u8 flash_csum(struct ipath_flash *ifp, int adjust) 536static u8 flash_csum(struct ipath_flash *ifp, int adjust)
504{ 537{
505 u8 *ip = (u8 *) ifp; 538 u8 *ip = (u8 *) ifp;
@@ -527,7 +560,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
527 void *buf; 560 void *buf;
528 struct ipath_flash *ifp; 561 struct ipath_flash *ifp;
529 __be64 guid; 562 __be64 guid;
530 int len; 563 int len, eep_stat;
531 u8 csum, *bguid; 564 u8 csum, *bguid;
532 int t = dd->ipath_unit; 565 int t = dd->ipath_unit;
533 struct ipath_devdata *dd0 = ipath_lookup(0); 566 struct ipath_devdata *dd0 = ipath_lookup(0);
@@ -571,7 +604,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
571 goto bail; 604 goto bail;
572 } 605 }
573 606
574 if (ipath_eeprom_read(dd, 0, buf, len)) { 607 down(&dd->ipath_eep_sem);
608 eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
609 up(&dd->ipath_eep_sem);
610
611 if (eep_stat) {
575 ipath_dev_err(dd, "Failed reading GUID from eeprom\n"); 612 ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
576 goto done; 613 goto done;
577 } 614 }
@@ -646,8 +683,192 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
646 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n", 683 ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
647 (unsigned long long) be64_to_cpu(dd->ipath_guid)); 684 (unsigned long long) be64_to_cpu(dd->ipath_guid));
648 685
686 memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
687 /*
688 * Power-on (actually "active") hours are kept as little-endian value
689 * in EEPROM, but as seconds in a (possibly as small as 24-bit)
690 * atomic_t while running.
691 */
692 atomic_set(&dd->ipath_active_time, 0);
693 dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
694
649done: 695done:
650 vfree(buf); 696 vfree(buf);
651 697
652bail:; 698bail:;
653} 699}
700
701/**
702 * ipath_update_eeprom_log - copy active-time and error counters to eeprom
703 * @dd: the infinipath device
704 *
705 * Although the time is kept as seconds in the ipath_devdata struct, it is
706 * rounded to hours for re-write, as we have only 16 bits in EEPROM.
707 * First-cut code reads whole (expected) struct ipath_flash, modifies,
708 * re-writes. Future direction: read/write only what we need, assuming
709 * that the EEPROM had to have been "good enough" for driver init, and
710 * if not, we aren't making it worse.
711 *
712 */
713
714int ipath_update_eeprom_log(struct ipath_devdata *dd)
715{
716 void *buf;
717 struct ipath_flash *ifp;
718 int len, hi_water;
719 uint32_t new_time, new_hrs;
720 u8 csum;
721 int ret, idx;
722 unsigned long flags;
723
724 /* first, check if we actually need to do anything. */
725 ret = 0;
726 for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
727 if (dd->ipath_eep_st_new_errs[idx]) {
728 ret = 1;
729 break;
730 }
731 }
732 new_time = atomic_read(&dd->ipath_active_time);
733
734 if (ret == 0 && new_time < 3600)
735 return 0;
736
737 /*
738 * The quick-check above determined that there is something worthy
739 * of logging, so get current contents and do a more detailed idea.
740 */
741 len = offsetof(struct ipath_flash, if_future);
742 buf = vmalloc(len);
743 ret = 1;
744 if (!buf) {
745 ipath_dev_err(dd, "Couldn't allocate memory to read %u "
746 "bytes from eeprom for logging\n", len);
747 goto bail;
748 }
749
750 /* Grab semaphore and read current EEPROM. If we get an
751 * error, let go, but if not, keep it until we finish write.
752 */
753 ret = down_interruptible(&dd->ipath_eep_sem);
754 if (ret) {
755 ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
756 goto free_bail;
757 }
758 ret = ipath_eeprom_internal_read(dd, 0, buf, len);
759 if (ret) {
760 up(&dd->ipath_eep_sem);
761 ipath_dev_err(dd, "Unable read EEPROM for logging\n");
762 goto free_bail;
763 }
764 ifp = (struct ipath_flash *)buf;
765
766 csum = flash_csum(ifp, 0);
767 if (csum != ifp->if_csum) {
768 up(&dd->ipath_eep_sem);
769 ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
770 csum, ifp->if_csum);
771 ret = 1;
772 goto free_bail;
773 }
774 hi_water = 0;
775 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
776 for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
777 int new_val = dd->ipath_eep_st_new_errs[idx];
778 if (new_val) {
779 /*
780 * If we have seen any errors, add to EEPROM values
781 * We need to saturate at 0xFF (255) and we also
782 * would need to adjust the checksum if we were
783 * trying to minimize EEPROM traffic
784 * Note that we add to actual current count in EEPROM,
785 * in case it was altered while we were running.
786 */
787 new_val += ifp->if_errcntp[idx];
788 if (new_val > 0xFF)
789 new_val = 0xFF;
790 if (ifp->if_errcntp[idx] != new_val) {
791 ifp->if_errcntp[idx] = new_val;
792 hi_water = offsetof(struct ipath_flash,
793 if_errcntp) + idx;
794 }
795 /*
796 * update our shadow (used to minimize EEPROM
797 * traffic), to match what we are about to write.
798 */
799 dd->ipath_eep_st_errs[idx] = new_val;
800 dd->ipath_eep_st_new_errs[idx] = 0;
801 }
802 }
803 /*
804 * now update active-time. We would like to round to the nearest hour
805 * but unless atomic_t are sure to be proper signed ints we cannot,
806 * because we need to account for what we "transfer" to EEPROM and
807 * if we log an hour at 31 minutes, then we would need to set
808 * active_time to -29 to accurately count the _next_ hour.
809 */
810 if (new_time > 3600) {
811 new_hrs = new_time / 3600;
812 atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
813 new_hrs += dd->ipath_eep_hrs;
814 if (new_hrs > 0xFFFF)
815 new_hrs = 0xFFFF;
816 dd->ipath_eep_hrs = new_hrs;
817 if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
818 ifp->if_powerhour[0] = new_hrs & 0xFF;
819 hi_water = offsetof(struct ipath_flash, if_powerhour);
820 }
821 if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
822 ifp->if_powerhour[1] = new_hrs >> 8;
823 hi_water = offsetof(struct ipath_flash, if_powerhour)
824 + 1;
825 }
826 }
827 /*
828 * There is a tiny possibility that we could somehow fail to write
829 * the EEPROM after updating our shadows, but problems from holding
830 * the spinlock too long are a much bigger issue.
831 */
832 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
833 if (hi_water) {
834 /* we made some change to the data, uopdate cksum and write */
835 csum = flash_csum(ifp, 1);
836 ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
837 }
838 up(&dd->ipath_eep_sem);
839 if (ret)
840 ipath_dev_err(dd, "Failed updating EEPROM\n");
841
842free_bail:
843 vfree(buf);
844bail:
845 return ret;
846
847}
848
849/**
850 * ipath_inc_eeprom_err - increment one of the four error counters
851 * that are logged to EEPROM.
852 * @dd: the infinipath device
853 * @eidx: 0..3, the counter to increment
854 * @incr: how much to add
855 *
856 * Each counter is 8-bits, and saturates at 255 (0xFF). They
857 * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
858 * is called, but it can only be called in a context that allows sleep.
859 * This function can be called even at interrupt level.
860 */
861
862void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
863{
864 uint new_val;
865 unsigned long flags;
866
867 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
868 new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
869 if (new_val > 255)
870 new_val = 255;
871 dd->ipath_eep_st_new_errs[eidx] = new_val;
872 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
873 return;
874}
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 8482ea366fb1..85f408de7bf7 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -440,6 +440,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
440 u32 bits, ctrl; 440 u32 bits, ctrl;
441 int isfatal = 0; 441 int isfatal = 0;
442 char bitsmsg[64]; 442 char bitsmsg[64];
443 int log_idx;
443 444
444 hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); 445 hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
445 446
@@ -468,6 +469,11 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
468 469
469 hwerrs &= dd->ipath_hwerrmask; 470 hwerrs &= dd->ipath_hwerrmask;
470 471
472 /* We log some errors to EEPROM, check if we have any of those. */
473 for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
474 if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
475 ipath_inc_eeprom_err(dd, log_idx, 1);
476
471 /* 477 /*
472 * make sure we get this much out, unless told to be quiet, 478 * make sure we get this much out, unless told to be quiet,
473 * it's a parity error we may recover from, 479 * it's a parity error we may recover from,
@@ -1171,6 +1177,22 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd)
1171 1177
1172 dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; 1178 dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
1173 dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; 1179 dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
1180
1181 /*
1182 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
1183 * 2 is Some Misc, 3 is reserved for future.
1184 */
1185 dd->ipath_eep_st_masks[0].hwerrs_to_log =
1186 INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
1187 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
1188
1189 dd->ipath_eep_st_masks[1].hwerrs_to_log =
1190 INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
1191 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
1192
1193 dd->ipath_eep_st_masks[2].errs_to_log =
1194 INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
1195
1174} 1196}
1175 1197
1176/** 1198/**
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6120.c b/drivers/infiniband/hw/ipath/ipath_iba6120.c
index 711590740118..207323a5b52b 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6120.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6120.c
@@ -340,6 +340,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
340 u32 bits, ctrl; 340 u32 bits, ctrl;
341 int isfatal = 0; 341 int isfatal = 0;
342 char bitsmsg[64]; 342 char bitsmsg[64];
343 int log_idx;
343 344
344 hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus); 345 hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
345 if (!hwerrs) { 346 if (!hwerrs) {
@@ -367,6 +368,11 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
367 368
368 hwerrs &= dd->ipath_hwerrmask; 369 hwerrs &= dd->ipath_hwerrmask;
369 370
371 /* We log some errors to EEPROM, check if we have any of those. */
372 for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
373 if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
374 ipath_inc_eeprom_err(dd, log_idx, 1);
375
370 /* 376 /*
371 * make sure we get this much out, unless told to be quiet, 377 * make sure we get this much out, unless told to be quiet,
372 * or it's occurred within the last 5 seconds 378 * or it's occurred within the last 5 seconds
@@ -950,6 +956,27 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd)
950 956
951 dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK; 957 dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
952 dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK; 958 dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
959
960 /*
961 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
962 * 2 is Some Misc, 3 is reserved for future.
963 */
964 dd->ipath_eep_st_masks[0].hwerrs_to_log =
965 INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
966 INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
967
968 /* Ignore errors in PIO/PBC on systems with unordered write-combining */
969 if (ipath_unordered_wc())
970 dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY;
971
972 dd->ipath_eep_st_masks[1].hwerrs_to_log =
973 INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
974 INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
975
976 dd->ipath_eep_st_masks[2].errs_to_log =
977 INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
978
979
953} 980}
954 981
955/* setup the MSI stuff again after a reset. I'd like to just call 982/* setup the MSI stuff again after a reset. I'd like to just call
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index f6ee7a83595a..ee839346a3a4 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -341,6 +341,8 @@ static int init_chip_first(struct ipath_devdata *dd,
341 spin_lock_init(&dd->ipath_tid_lock); 341 spin_lock_init(&dd->ipath_tid_lock);
342 342
343 spin_lock_init(&dd->ipath_gpio_lock); 343 spin_lock_init(&dd->ipath_gpio_lock);
344 spin_lock_init(&dd->ipath_eep_st_lock);
345 sema_init(&dd->ipath_eep_sem, 1);
344 346
345done: 347done:
346 *pdp = pd; 348 *pdp = pd;
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index a90d3b5699c4..d9cdd00c8233 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -505,6 +505,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
505 int i, iserr = 0; 505 int i, iserr = 0;
506 int chkerrpkts = 0, noprint = 0; 506 int chkerrpkts = 0, noprint = 0;
507 unsigned supp_msgs; 507 unsigned supp_msgs;
508 int log_idx;
508 509
509 supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint); 510 supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
510 511
@@ -518,6 +519,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
518 if (errs & INFINIPATH_E_HARDWARE) { 519 if (errs & INFINIPATH_E_HARDWARE) {
519 /* reuse same msg buf */ 520 /* reuse same msg buf */
520 dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg); 521 dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
522 } else {
523 u64 mask;
524 for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
525 mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
526 if (errs & mask)
527 ipath_inc_eeprom_err(dd, log_idx, 1);
528 }
521 } 529 }
522 530
523 if (!noprint && (errs & ~dd->ipath_e_bitsextant)) 531 if (!noprint && (errs & ~dd->ipath_e_bitsextant))
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index bd1088a99891..2a4414b948ee 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -57,6 +57,24 @@
57extern struct infinipath_stats ipath_stats; 57extern struct infinipath_stats ipath_stats;
58 58
59#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ 59#define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
60/*
61 * First-cut critierion for "device is active" is
62 * two thousand dwords combined Tx, Rx traffic per
63 * 5-second interval. SMA packets are 64 dwords,
64 * and occur "a few per second", presumably each way.
65 */
66#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
67/*
68 * Struct used to indicate which errors are logged in each of the
69 * error-counters that are logged to EEPROM. A counter is incremented
70 * _once_ (saturating at 255) for each event with any bits set in
71 * the error or hwerror register masks below.
72 */
73#define IPATH_EEP_LOG_CNT (4)
74struct ipath_eep_log_mask {
75 u64 errs_to_log;
76 u64 hwerrs_to_log;
77};
60 78
61struct ipath_portdata { 79struct ipath_portdata {
62 void **port_rcvegrbuf; 80 void **port_rcvegrbuf;
@@ -588,6 +606,24 @@ struct ipath_devdata {
588 /* Used to flash LEDs in override mode */ 606 /* Used to flash LEDs in override mode */
589 struct timer_list ipath_led_override_timer; 607 struct timer_list ipath_led_override_timer;
590 608
609 /* Support (including locks) for EEPROM logging of errors and time */
610 /* control access to actual counters, timer */
611 spinlock_t ipath_eep_st_lock;
612 /* control high-level access to EEPROM */
613 struct semaphore ipath_eep_sem;
614 /* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
615 uint64_t ipath_traffic_wds;
616 /* active time is kept in seconds, but logged in hours */
617 atomic_t ipath_active_time;
618 /* Below are nominal shadow of EEPROM, new since last EEPROM update */
619 uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
620 uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
621 uint16_t ipath_eep_hrs;
622 /*
623 * masks for which bits of errs, hwerrs that cause
624 * each of the counters to increment.
625 */
626 struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
591}; 627};
592 628
593/* Private data for file operations */ 629/* Private data for file operations */
@@ -726,6 +762,8 @@ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
726void ipath_init_iba6120_funcs(struct ipath_devdata *); 762void ipath_init_iba6120_funcs(struct ipath_devdata *);
727void ipath_init_iba6110_funcs(struct ipath_devdata *); 763void ipath_init_iba6110_funcs(struct ipath_devdata *);
728void ipath_get_eeprom_info(struct ipath_devdata *); 764void ipath_get_eeprom_info(struct ipath_devdata *);
765int ipath_update_eeprom_log(struct ipath_devdata *dd);
766void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
729u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); 767u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
730void ipath_disarm_senderrbufs(struct ipath_devdata *, int); 768void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
731 769
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index d8b5e4cefe25..2955f368de0c 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -55,6 +55,7 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
55 u64 val64; 55 u64 val64;
56 unsigned long t0, t1; 56 unsigned long t0, t1;
57 u64 ret; 57 u64 ret;
58 unsigned long flags;
58 59
59 t0 = jiffies; 60 t0 = jiffies;
60 /* If fast increment counters are only 32 bits, snapshot them, 61 /* If fast increment counters are only 32 bits, snapshot them,
@@ -91,12 +92,18 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
91 if (creg == dd->ipath_cregs->cr_wordsendcnt) { 92 if (creg == dd->ipath_cregs->cr_wordsendcnt) {
92 if (val != dd->ipath_lastsword) { 93 if (val != dd->ipath_lastsword) {
93 dd->ipath_sword += val - dd->ipath_lastsword; 94 dd->ipath_sword += val - dd->ipath_lastsword;
95 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
96 dd->ipath_traffic_wds += val - dd->ipath_lastsword;
97 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
94 dd->ipath_lastsword = val; 98 dd->ipath_lastsword = val;
95 } 99 }
96 val64 = dd->ipath_sword; 100 val64 = dd->ipath_sword;
97 } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) { 101 } else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
98 if (val != dd->ipath_lastrword) { 102 if (val != dd->ipath_lastrword) {
99 dd->ipath_rword += val - dd->ipath_lastrword; 103 dd->ipath_rword += val - dd->ipath_lastrword;
104 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
105 dd->ipath_traffic_wds += val - dd->ipath_lastrword;
106 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
100 dd->ipath_lastrword = val; 107 dd->ipath_lastrword = val;
101 } 108 }
102 val64 = dd->ipath_rword; 109 val64 = dd->ipath_rword;
@@ -200,6 +207,7 @@ void ipath_get_faststats(unsigned long opaque)
200 struct ipath_devdata *dd = (struct ipath_devdata *) opaque; 207 struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
201 u32 val; 208 u32 val;
202 static unsigned cnt; 209 static unsigned cnt;
210 unsigned long flags;
203 211
204 /* 212 /*
205 * don't access the chip while running diags, or memory diags can 213 * don't access the chip while running diags, or memory diags can
@@ -210,9 +218,20 @@ void ipath_get_faststats(unsigned long opaque)
210 /* but re-arm the timer, for diags case; won't hurt other */ 218 /* but re-arm the timer, for diags case; won't hurt other */
211 goto done; 219 goto done;
212 220
221 /*
222 * We now try to maintain a "active timer", based on traffic
223 * exceeding a threshold, so we need to check the word-counts
224 * even if they are 64-bit.
225 */
226 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
227 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
228 spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
229 if (dd->ipath_traffic_wds >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
230 atomic_add(5, &dd->ipath_active_time); /* S/B #define */
231 dd->ipath_traffic_wds = 0;
232 spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
233
213 if (dd->ipath_flags & IPATH_32BITCOUNTERS) { 234 if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
214 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
215 ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
216 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt); 235 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
217 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt); 236 ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
218 } 237 }
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index 17ec14571722..ab34d3e8b955 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -613,6 +613,26 @@ static ssize_t store_led_override(struct device *dev,
613 return ret; 613 return ret;
614} 614}
615 615
616static ssize_t show_logged_errs(struct device *dev,
617 struct device_attribute *attr,
618 char *buf)
619{
620 struct ipath_devdata *dd = dev_get_drvdata(dev);
621 int idx, count;
622
623 /* force consistency with actual EEPROM */
624 if (ipath_update_eeprom_log(dd) != 0)
625 return -ENXIO;
626
627 count = 0;
628 for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
629 count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
630 dd->ipath_eep_st_errs[idx],
631 idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
632 }
633
634 return count;
635}
616 636
617static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL); 637static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
618static DRIVER_ATTR(version, S_IRUGO, show_version, NULL); 638static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
@@ -643,6 +663,7 @@ static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
643static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL); 663static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
644static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv); 664static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
645static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override); 665static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
666static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
646 667
647static struct attribute *dev_attributes[] = { 668static struct attribute *dev_attributes[] = {
648 &dev_attr_guid.attr, 669 &dev_attr_guid.attr,
@@ -660,6 +681,7 @@ static struct attribute *dev_attributes[] = {
660 &dev_attr_enabled.attr, 681 &dev_attr_enabled.attr,
661 &dev_attr_rx_pol_inv.attr, 682 &dev_attr_rx_pol_inv.attr,
662 &dev_attr_led_override.attr, 683 &dev_attr_led_override.attr,
684 &dev_attr_logged_errors.attr,
663 NULL 685 NULL
664}; 686};
665 687