aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorDean Luick <dean.luick@intel.com>2013-07-11 15:32:14 -0400
committerRoland Dreier <roland@purestorage.com>2013-07-11 19:47:06 -0400
commit0b3ddf380ca7aa6a009cc3e1944933fff8113b6a (patch)
tree521f32edddde43fe80b81ec6f16002d5ab9d4ecd /drivers/infiniband
parent308c813b19cb676df7e5e70b5f014fa56e918677 (diff)
IB/qib: Log all SDMA errors unconditionally
This patch adds code to log SDMA errors for supportability purposes. Signed-off-by: Dean Luick <dean.luick@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/hw/qib/qib.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c114
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c56
3 files changed, 171 insertions, 1 deletions
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 5453e2b36567..4a9af795b88f 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1348,7 +1348,7 @@ static inline int __qib_sdma_running(struct qib_pportdata *ppd)
1348 return ppd->sdma_state.current_state == qib_sdma_state_s99_running; 1348 return ppd->sdma_state.current_state == qib_sdma_state_s99_running;
1349} 1349}
1350int qib_sdma_running(struct qib_pportdata *); 1350int qib_sdma_running(struct qib_pportdata *);
1351 1351void dump_sdma_state(struct qib_pportdata *ppd);
1352void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); 1352void __qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
1353void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events); 1353void qib_sdma_process_event(struct qib_pportdata *, enum qib_sdma_events);
1354 1354
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index f7c4b44b1f93..21e8b09d4bf8 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -83,6 +83,7 @@ static void ibsd_wr_allchans(struct qib_pportdata *, int, unsigned, unsigned);
83static void serdes_7322_los_enable(struct qib_pportdata *, int); 83static void serdes_7322_los_enable(struct qib_pportdata *, int);
84static int serdes_7322_init_old(struct qib_pportdata *); 84static int serdes_7322_init_old(struct qib_pportdata *);
85static int serdes_7322_init_new(struct qib_pportdata *); 85static int serdes_7322_init_new(struct qib_pportdata *);
86static void dump_sdma_7322_state(struct qib_pportdata *);
86 87
87#define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb)) 88#define BMASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb))
88 89
@@ -652,6 +653,7 @@ struct qib_chippport_specific {
652 u8 ibmalfusesnap; 653 u8 ibmalfusesnap;
653 struct qib_qsfp_data qsfp_data; 654 struct qib_qsfp_data qsfp_data;
654 char epmsgbuf[192]; /* for port error interrupt msg buffer */ 655 char epmsgbuf[192]; /* for port error interrupt msg buffer */
656 char sdmamsgbuf[192]; /* for per-port sdma error messages */
655}; 657};
656 658
657static struct { 659static struct {
@@ -1601,6 +1603,15 @@ static void sdma_7322_p_errors(struct qib_pportdata *ppd, u64 errs)
1601 1603
1602 spin_lock_irqsave(&ppd->sdma_lock, flags); 1604 spin_lock_irqsave(&ppd->sdma_lock, flags);
1603 1605
1606 if (errs != QIB_E_P_SDMAHALT) {
1607 /* SDMA errors have QIB_E_P_SDMAHALT and another bit set */
1608 qib_dev_porterr(dd, ppd->port,
1609 "SDMA %s 0x%016llx %s\n",
1610 qib_sdma_state_names[ppd->sdma_state.current_state],
1611 errs, ppd->cpspec->sdmamsgbuf);
1612 dump_sdma_7322_state(ppd);
1613 }
1614
1604 switch (ppd->sdma_state.current_state) { 1615 switch (ppd->sdma_state.current_state) {
1605 case qib_sdma_state_s00_hw_down: 1616 case qib_sdma_state_s00_hw_down:
1606 break; 1617 break;
@@ -2156,6 +2167,29 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
2156 2167
2157 qib_dev_err(dd, "%s hardware error\n", msg); 2168 qib_dev_err(dd, "%s hardware error\n", msg);
2158 2169
2170 if (hwerrs &
2171 (SYM_MASK(HwErrMask, SDmaMemReadErrMask_0) |
2172 SYM_MASK(HwErrMask, SDmaMemReadErrMask_1))) {
2173 int pidx = 0;
2174 int err;
2175 unsigned long flags;
2176 struct qib_pportdata *ppd = dd->pport;
2177 for (; pidx < dd->num_pports; ++pidx, ppd++) {
2178 err = 0;
2179 if (pidx == 0 && (hwerrs &
2180 SYM_MASK(HwErrMask, SDmaMemReadErrMask_0)))
2181 err++;
2182 if (pidx == 1 && (hwerrs &
2183 SYM_MASK(HwErrMask, SDmaMemReadErrMask_1)))
2184 err++;
2185 if (err) {
2186 spin_lock_irqsave(&ppd->sdma_lock, flags);
2187 dump_sdma_7322_state(ppd);
2188 spin_unlock_irqrestore(&ppd->sdma_lock, flags);
2189 }
2190 }
2191 }
2192
2159 if (isfatal && !dd->diag_client) { 2193 if (isfatal && !dd->diag_client) {
2160 qib_dev_err(dd, 2194 qib_dev_err(dd,
2161 "Fatal Hardware Error, no longer usable, SN %.16s\n", 2195 "Fatal Hardware Error, no longer usable, SN %.16s\n",
@@ -6753,6 +6787,86 @@ static void qib_sdma_set_7322_desc_cnt(struct qib_pportdata *ppd, unsigned cnt)
6753 qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt); 6787 qib_write_kreg_port(ppd, krp_senddmadesccnt, cnt);
6754} 6788}
6755 6789
6790/*
6791 * sdma_lock should be acquired before calling this routine
6792 */
6793static void dump_sdma_7322_state(struct qib_pportdata *ppd)
6794{
6795 u64 reg, reg1, reg2;
6796
6797 reg = qib_read_kreg_port(ppd, krp_senddmastatus);
6798 qib_dev_porterr(ppd->dd, ppd->port,
6799 "SDMA senddmastatus: 0x%016llx\n", reg);
6800
6801 reg = qib_read_kreg_port(ppd, krp_sendctrl);
6802 qib_dev_porterr(ppd->dd, ppd->port,
6803 "SDMA sendctrl: 0x%016llx\n", reg);
6804
6805 reg = qib_read_kreg_port(ppd, krp_senddmabase);
6806 qib_dev_porterr(ppd->dd, ppd->port,
6807 "SDMA senddmabase: 0x%016llx\n", reg);
6808
6809 reg = qib_read_kreg_port(ppd, krp_senddmabufmask0);
6810 reg1 = qib_read_kreg_port(ppd, krp_senddmabufmask1);
6811 reg2 = qib_read_kreg_port(ppd, krp_senddmabufmask2);
6812 qib_dev_porterr(ppd->dd, ppd->port,
6813 "SDMA senddmabufmask 0:%llx 1:%llx 2:%llx\n",
6814 reg, reg1, reg2);
6815
6816 /* get bufuse bits, clear them, and print them again if non-zero */
6817 reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0);
6818 qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg);
6819 reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1);
6820 qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg1);
6821 reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2);
6822 qib_write_kreg_port(ppd, krp_senddmabuf_use0, reg2);
6823 /* 0 and 1 should always be zero, so print as short form */
6824 qib_dev_porterr(ppd->dd, ppd->port,
6825 "SDMA current senddmabuf_use 0:%llx 1:%llx 2:%llx\n",
6826 reg, reg1, reg2);
6827 reg = qib_read_kreg_port(ppd, krp_senddmabuf_use0);
6828 reg1 = qib_read_kreg_port(ppd, krp_senddmabuf_use1);
6829 reg2 = qib_read_kreg_port(ppd, krp_senddmabuf_use2);
6830 /* 0 and 1 should always be zero, so print as short form */
6831 qib_dev_porterr(ppd->dd, ppd->port,
6832 "SDMA cleared senddmabuf_use 0:%llx 1:%llx 2:%llx\n",
6833 reg, reg1, reg2);
6834
6835 reg = qib_read_kreg_port(ppd, krp_senddmatail);
6836 qib_dev_porterr(ppd->dd, ppd->port,
6837 "SDMA senddmatail: 0x%016llx\n", reg);
6838
6839 reg = qib_read_kreg_port(ppd, krp_senddmahead);
6840 qib_dev_porterr(ppd->dd, ppd->port,
6841 "SDMA senddmahead: 0x%016llx\n", reg);
6842
6843 reg = qib_read_kreg_port(ppd, krp_senddmaheadaddr);
6844 qib_dev_porterr(ppd->dd, ppd->port,
6845 "SDMA senddmaheadaddr: 0x%016llx\n", reg);
6846
6847 reg = qib_read_kreg_port(ppd, krp_senddmalengen);
6848 qib_dev_porterr(ppd->dd, ppd->port,
6849 "SDMA senddmalengen: 0x%016llx\n", reg);
6850
6851 reg = qib_read_kreg_port(ppd, krp_senddmadesccnt);
6852 qib_dev_porterr(ppd->dd, ppd->port,
6853 "SDMA senddmadesccnt: 0x%016llx\n", reg);
6854
6855 reg = qib_read_kreg_port(ppd, krp_senddmaidlecnt);
6856 qib_dev_porterr(ppd->dd, ppd->port,
6857 "SDMA senddmaidlecnt: 0x%016llx\n", reg);
6858
6859 reg = qib_read_kreg_port(ppd, krp_senddmaprioritythld);
6860 qib_dev_porterr(ppd->dd, ppd->port,
6861 "SDMA senddmapriorityhld: 0x%016llx\n", reg);
6862
6863 reg = qib_read_kreg_port(ppd, krp_senddmareloadcnt);
6864 qib_dev_porterr(ppd->dd, ppd->port,
6865 "SDMA senddmareloadcnt: 0x%016llx\n", reg);
6866
6867 dump_sdma_state(ppd);
6868}
6869
6756static struct sdma_set_state_action sdma_7322_action_table[] = { 6870static struct sdma_set_state_action sdma_7322_action_table[] = {
6757 [qib_sdma_state_s00_hw_down] = { 6871 [qib_sdma_state_s00_hw_down] = {
6758 .go_s99_running_tofalse = 1, 6872 .go_s99_running_tofalse = 1,
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index 3fc514431212..32162d355370 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -708,6 +708,62 @@ unlock:
708 return ret; 708 return ret;
709} 709}
710 710
711/*
712 * sdma_lock should be acquired before calling this routine
713 */
714void dump_sdma_state(struct qib_pportdata *ppd)
715{
716 struct qib_sdma_desc *descq;
717 struct qib_sdma_txreq *txp, *txpnext;
718 __le64 *descqp;
719 u64 desc[2];
720 dma_addr_t addr;
721 u16 gen, dwlen, dwoffset;
722 u16 head, tail, cnt;
723
724 head = ppd->sdma_descq_head;
725 tail = ppd->sdma_descq_tail;
726 cnt = qib_sdma_descq_freecnt(ppd);
727 descq = ppd->sdma_descq;
728
729 qib_dev_porterr(ppd->dd, ppd->port,
730 "SDMA ppd->sdma_descq_head: %u\n", head);
731 qib_dev_porterr(ppd->dd, ppd->port,
732 "SDMA ppd->sdma_descq_tail: %u\n", tail);
733 qib_dev_porterr(ppd->dd, ppd->port,
734 "SDMA sdma_descq_freecnt: %u\n", cnt);
735
736 /* print info for each entry in the descriptor queue */
737 while (head != tail) {
738 char flags[6] = { 'x', 'x', 'x', 'x', 'x', 0 };
739
740 descqp = &descq[head].qw[0];
741 desc[0] = le64_to_cpu(descqp[0]);
742 desc[1] = le64_to_cpu(descqp[1]);
743 flags[0] = (desc[0] & 1<<15) ? 'I' : '-';
744 flags[1] = (desc[0] & 1<<14) ? 'L' : 'S';
745 flags[2] = (desc[0] & 1<<13) ? 'H' : '-';
746 flags[3] = (desc[0] & 1<<12) ? 'F' : '-';
747 flags[4] = (desc[0] & 1<<11) ? 'L' : '-';
748 addr = (desc[1] << 32) | ((desc[0] >> 32) & 0xfffffffcULL);
749 gen = (desc[0] >> 30) & 3ULL;
750 dwlen = (desc[0] >> 14) & (0x7ffULL << 2);
751 dwoffset = (desc[0] & 0x7ffULL) << 2;
752 qib_dev_porterr(ppd->dd, ppd->port,
753 "SDMA sdmadesc[%u]: flags:%s addr:0x%016llx gen:%u len:%u bytes offset:%u bytes\n",
754 head, flags, addr, gen, dwlen, dwoffset);
755 if (++head == ppd->sdma_descq_cnt)
756 head = 0;
757 }
758
759 /* print dma descriptor indices from the TX requests */
760 list_for_each_entry_safe(txp, txpnext, &ppd->sdma_activelist,
761 list)
762 qib_dev_porterr(ppd->dd, ppd->port,
763 "SDMA txp->start_idx: %u txp->next_descq_idx: %u\n",
764 txp->start_idx, txp->next_descq_idx);
765}
766
711void qib_sdma_process_event(struct qib_pportdata *ppd, 767void qib_sdma_process_event(struct qib_pportdata *ppd,
712 enum qib_sdma_events event) 768 enum qib_sdma_events event)
713{ 769{