aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Olson <dave.olson@qlogic.com>2008-05-07 14:00:15 -0400
committerRoland Dreier <rolandd@cisco.com>2008-05-07 14:00:15 -0400
commite2ab41cae418108f376ad1634d7507f56379f7a2 (patch)
treec14355ab189f36f02b56f1dd293d49f1fc056730
parent2889d1ef1240591fa4c72a6753e0a8d1c6e18140 (diff)
IB/ipath: Need to always request and handle PIO avail interrupts
Now that we always use PIO for vl15 on 7220, we could get stuck forever if we happened to run out of PIO buffers from the verbs code, because the setup code wouldn't run; the interrupt was also ignored if SDMA was supported. We also have to reduce the pio update threshold if we have fewer kernel buffers than the existing threshold. Clean up the initialization a bit to get ordering safer and more sensible, and use the existing ipath_chg_kernavail call to do init, rather than doing it separately. Drop unnecessary clearing of pio buffer on pio parity error. Drop incorrect updating of pioavailshadow when exitting freeze mode (software state may not match chip state if buffer has been allocated and not yet written). If we couldn't get a kernel buffer for a while, make sure we are in sync with hardware, mainly to handle the exitting freeze case. Signed-off-by: Dave Olson <dave.olson@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c128
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c72
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba7220.c21
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c95
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c82
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c7
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sdma.c13
8 files changed, 224 insertions, 202 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index f81dd4acdc60..2036d38fac47 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1428,6 +1428,40 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd)
1428 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1428 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1429} 1429}
1430 1430
1431/*
1432 * used to force update of pioavailshadow if we can't get a pio buffer.
1433 * Needed primarily due to exitting freeze mode after recovering
1434 * from errors. Done lazily, because it's safer (known to not
1435 * be writing pio buffers).
1436 */
1437static void ipath_reset_availshadow(struct ipath_devdata *dd)
1438{
1439 int i, im;
1440 unsigned long flags;
1441
1442 spin_lock_irqsave(&ipath_pioavail_lock, flags);
1443 for (i = 0; i < dd->ipath_pioavregs; i++) {
1444 u64 val, oldval;
1445 /* deal with 6110 chip bug on high register #s */
1446 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
1447 i ^ 1 : i;
1448 val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
1449 /*
1450 * busy out the buffers not in the kernel avail list,
1451 * without changing the generation bits.
1452 */
1453 oldval = dd->ipath_pioavailshadow[i];
1454 dd->ipath_pioavailshadow[i] = val |
1455 ((~dd->ipath_pioavailkernel[i] <<
1456 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
1457 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
1458 if (oldval != dd->ipath_pioavailshadow[i])
1459 ipath_dbg("shadow[%d] was %Lx, now %lx\n",
1460 i, oldval, dd->ipath_pioavailshadow[i]);
1461 }
1462 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1463}
1464
1431/** 1465/**
1432 * ipath_setrcvhdrsize - set the receive header size 1466 * ipath_setrcvhdrsize - set the receive header size
1433 * @dd: the infinipath device 1467 * @dd: the infinipath device
@@ -1482,9 +1516,12 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
1482 */ 1516 */
1483 ipath_stats.sps_nopiobufs++; 1517 ipath_stats.sps_nopiobufs++;
1484 if (!(++dd->ipath_consec_nopiobuf % 100000)) { 1518 if (!(++dd->ipath_consec_nopiobuf % 100000)) {
1485 ipath_dbg("%u pio sends with no bufavail; dmacopy: " 1519 ipath_force_pio_avail_update(dd); /* at start */
1486 "%llx %llx %llx %llx; shadow: %lx %lx %lx %lx\n", 1520 ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
1521 "%llx %llx %llx %llx\n"
1522 "ipath shadow: %lx %lx %lx %lx\n",
1487 dd->ipath_consec_nopiobuf, 1523 dd->ipath_consec_nopiobuf,
1524 (unsigned long)get_cycles(),
1488 (unsigned long long) le64_to_cpu(dma[0]), 1525 (unsigned long long) le64_to_cpu(dma[0]),
1489 (unsigned long long) le64_to_cpu(dma[1]), 1526 (unsigned long long) le64_to_cpu(dma[1]),
1490 (unsigned long long) le64_to_cpu(dma[2]), 1527 (unsigned long long) le64_to_cpu(dma[2]),
@@ -1496,14 +1533,17 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
1496 */ 1533 */
1497 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 1534 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
1498 (sizeof(shadow[0]) * 4 * 4)) 1535 (sizeof(shadow[0]) * 4 * 4))
1499 ipath_dbg("2nd group: dmacopy: %llx %llx " 1536 ipath_dbg("2nd group: dmacopy: "
1500 "%llx %llx; shadow: %lx %lx %lx %lx\n", 1537 "%llx %llx %llx %llx\n"
1538 "ipath shadow: %lx %lx %lx %lx\n",
1501 (unsigned long long)le64_to_cpu(dma[4]), 1539 (unsigned long long)le64_to_cpu(dma[4]),
1502 (unsigned long long)le64_to_cpu(dma[5]), 1540 (unsigned long long)le64_to_cpu(dma[5]),
1503 (unsigned long long)le64_to_cpu(dma[6]), 1541 (unsigned long long)le64_to_cpu(dma[6]),
1504 (unsigned long long)le64_to_cpu(dma[7]), 1542 (unsigned long long)le64_to_cpu(dma[7]),
1505 shadow[4], shadow[5], shadow[6], 1543 shadow[4], shadow[5], shadow[6], shadow[7]);
1506 shadow[7]); 1544
1545 /* at end, so update likely happened */
1546 ipath_reset_availshadow(dd);
1507 } 1547 }
1508} 1548}
1509 1549
@@ -1652,19 +1692,46 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
1652 unsigned len, int avail) 1692 unsigned len, int avail)
1653{ 1693{
1654 unsigned long flags; 1694 unsigned long flags;
1655 unsigned end; 1695 unsigned end, cnt = 0, next;
1656 1696
1657 /* There are two bits per send buffer (busy and generation) */ 1697 /* There are two bits per send buffer (busy and generation) */
1658 start *= 2; 1698 start *= 2;
1659 len *= 2; 1699 end = start + len * 2;
1660 end = start + len;
1661 1700
1662 /* Set or clear the generation bits. */
1663 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1701 spin_lock_irqsave(&ipath_pioavail_lock, flags);
1702 /* Set or clear the busy bit in the shadow. */
1664 while (start < end) { 1703 while (start < end) {
1665 if (avail) { 1704 if (avail) {
1666 __clear_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, 1705 unsigned long dma;
1667 dd->ipath_pioavailshadow); 1706 int i, im;
1707 /*
1708 * the BUSY bit will never be set, because we disarm
1709 * the user buffers before we hand them back to the
1710 * kernel. We do have to make sure the generation
1711 * bit is set correctly in shadow, since it could
1712 * have changed many times while allocated to user.
1713 * We can't use the bitmap functions on the full
1714 * dma array because it is always little-endian, so
1715 * we have to flip to host-order first.
1716 * BITS_PER_LONG is slightly wrong, since it's
1717 * always 64 bits per register in chip...
1718 * We only work on 64 bit kernels, so that's OK.
1719 */
1720 /* deal with 6110 chip bug on high register #s */
1721 i = start / BITS_PER_LONG;
1722 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
1723 i ^ 1 : i;
1724 __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
1725 + start, dd->ipath_pioavailshadow);
1726 dma = (unsigned long) le64_to_cpu(
1727 dd->ipath_pioavailregs_dma[im]);
1728 if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1729 + start) % BITS_PER_LONG, &dma))
1730 __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1731 + start, dd->ipath_pioavailshadow);
1732 else
1733 __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1734 + start, dd->ipath_pioavailshadow);
1668 __set_bit(start, dd->ipath_pioavailkernel); 1735 __set_bit(start, dd->ipath_pioavailkernel);
1669 } else { 1736 } else {
1670 __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, 1737 __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
@@ -1673,7 +1740,44 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
1673 } 1740 }
1674 start += 2; 1741 start += 2;
1675 } 1742 }
1743
1744 if (dd->ipath_pioupd_thresh) {
1745 end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
1746 next = find_first_bit(dd->ipath_pioavailkernel, end);
1747 while (next < end) {
1748 cnt++;
1749 next = find_next_bit(dd->ipath_pioavailkernel, end,
1750 next + 1);
1751 }
1752 }
1676 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1753 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1754
1755 /*
1756 * When moving buffers from kernel to user, if number assigned to
1757 * the user is less than the pio update threshold, and threshold
1758 * is supported (cnt was computed > 0), drop the update threshold
1759 * so we update at least once per allocated number of buffers.
1760 * In any case, if the kernel buffers are less than the threshold,
1761 * drop the threshold. We don't bother increasing it, having once
1762 * decreased it, since it would typically just cycle back and forth.
1763 * If we don't decrease below buffers in use, we can wait a long
1764 * time for an update, until some other context uses PIO buffers.
1765 */
1766 if (!avail && len < cnt)
1767 cnt = len;
1768 if (cnt < dd->ipath_pioupd_thresh) {
1769 dd->ipath_pioupd_thresh = cnt;
1770 ipath_dbg("Decreased pio update threshold to %u\n",
1771 dd->ipath_pioupd_thresh);
1772 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1773 dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
1774 << INFINIPATH_S_UPDTHRESH_SHIFT);
1775 dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
1776 << INFINIPATH_S_UPDTHRESH_SHIFT;
1777 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1778 dd->ipath_sendctrl);
1779 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1780 }
1677} 1781}
1678 1782
1679/** 1783/**
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 8b1752202e78..3295177c937e 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -173,47 +173,25 @@ static int ipath_get_base_info(struct file *fp,
173 (void *) dd->ipath_statusp - 173 (void *) dd->ipath_statusp -
174 (void *) dd->ipath_pioavailregs_dma; 174 (void *) dd->ipath_pioavailregs_dma;
175 if (!shared) { 175 if (!shared) {
176 kinfo->spi_piocnt = dd->ipath_pbufsport; 176 kinfo->spi_piocnt = pd->port_piocnt;
177 kinfo->spi_piobufbase = (u64) pd->port_piobufs; 177 kinfo->spi_piobufbase = (u64) pd->port_piobufs;
178 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + 178 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
179 dd->ipath_ureg_align * pd->port_port; 179 dd->ipath_ureg_align * pd->port_port;
180 } else if (master) { 180 } else if (master) {
181 kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) + 181 kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
182 (dd->ipath_pbufsport % subport_cnt); 182 (pd->port_piocnt % subport_cnt);
183 /* Master's PIO buffers are after all the slave's */ 183 /* Master's PIO buffers are after all the slave's */
184 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 184 kinfo->spi_piobufbase = (u64) pd->port_piobufs +
185 dd->ipath_palign * 185 dd->ipath_palign *
186 (dd->ipath_pbufsport - kinfo->spi_piocnt); 186 (pd->port_piocnt - kinfo->spi_piocnt);
187 } else { 187 } else {
188 unsigned slave = subport_fp(fp) - 1; 188 unsigned slave = subport_fp(fp) - 1;
189 189
190 kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; 190 kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
191 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 191 kinfo->spi_piobufbase = (u64) pd->port_piobufs +
192 dd->ipath_palign * kinfo->spi_piocnt * slave; 192 dd->ipath_palign * kinfo->spi_piocnt * slave;
193 } 193 }
194 194
195 /*
196 * Set the PIO avail update threshold to no larger
197 * than the number of buffers per process. Note that
198 * we decrease it here, but won't ever increase it.
199 */
200 if (dd->ipath_pioupd_thresh &&
201 kinfo->spi_piocnt < dd->ipath_pioupd_thresh) {
202 unsigned long flags;
203
204 dd->ipath_pioupd_thresh = kinfo->spi_piocnt;
205 ipath_dbg("Decreased pio update threshold to %u\n",
206 dd->ipath_pioupd_thresh);
207 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
208 dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
209 << INFINIPATH_S_UPDTHRESH_SHIFT);
210 dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
211 << INFINIPATH_S_UPDTHRESH_SHIFT;
212 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
213 dd->ipath_sendctrl);
214 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
215 }
216
217 if (shared) { 195 if (shared) {
218 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + 196 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
219 dd->ipath_ureg_align * pd->port_port; 197 dd->ipath_ureg_align * pd->port_port;
@@ -1309,19 +1287,19 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1309 ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; 1287 ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
1310 if (!pd->port_subport_cnt) { 1288 if (!pd->port_subport_cnt) {
1311 /* port is not shared */ 1289 /* port is not shared */
1312 piocnt = dd->ipath_pbufsport; 1290 piocnt = pd->port_piocnt;
1313 piobufs = pd->port_piobufs; 1291 piobufs = pd->port_piobufs;
1314 } else if (!subport_fp(fp)) { 1292 } else if (!subport_fp(fp)) {
1315 /* caller is the master */ 1293 /* caller is the master */
1316 piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + 1294 piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
1317 (dd->ipath_pbufsport % pd->port_subport_cnt); 1295 (pd->port_piocnt % pd->port_subport_cnt);
1318 piobufs = pd->port_piobufs + 1296 piobufs = pd->port_piobufs +
1319 dd->ipath_palign * (dd->ipath_pbufsport - piocnt); 1297 dd->ipath_palign * (pd->port_piocnt - piocnt);
1320 } else { 1298 } else {
1321 unsigned slave = subport_fp(fp) - 1; 1299 unsigned slave = subport_fp(fp) - 1;
1322 1300
1323 /* caller is a slave */ 1301 /* caller is a slave */
1324 piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; 1302 piocnt = pd->port_piocnt / pd->port_subport_cnt;
1325 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; 1303 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
1326 } 1304 }
1327 1305
@@ -1633,9 +1611,6 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
1633 port_fp(fp) = pd; 1611 port_fp(fp) = pd;
1634 pd->port_pid = current->pid; 1612 pd->port_pid = current->pid;
1635 strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); 1613 strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
1636 ipath_chg_pioavailkernel(dd,
1637 dd->ipath_pbufsport * (pd->port_port - 1),
1638 dd->ipath_pbufsport, 0);
1639 ipath_stats.sps_ports++; 1614 ipath_stats.sps_ports++;
1640 ret = 0; 1615 ret = 0;
1641 } else 1616 } else
@@ -1938,11 +1913,25 @@ static int ipath_do_user_init(struct file *fp,
1938 1913
1939 /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ 1914 /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
1940 1915
1916 /* some ports may get extra buffers, calculate that here */
1917 if (pd->port_port <= dd->ipath_ports_extrabuf)
1918 pd->port_piocnt = dd->ipath_pbufsport + 1;
1919 else
1920 pd->port_piocnt = dd->ipath_pbufsport;
1921
1941 /* for right now, kernel piobufs are at end, so port 1 is at 0 */ 1922 /* for right now, kernel piobufs are at end, so port 1 is at 0 */
1923 if (pd->port_port <= dd->ipath_ports_extrabuf)
1924 pd->port_pio_base = (dd->ipath_pbufsport + 1)
1925 * (pd->port_port - 1);
1926 else
1927 pd->port_pio_base = dd->ipath_ports_extrabuf +
1928 dd->ipath_pbufsport * (pd->port_port - 1);
1942 pd->port_piobufs = dd->ipath_piobufbase + 1929 pd->port_piobufs = dd->ipath_piobufbase +
1943 dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign; 1930 pd->port_pio_base * dd->ipath_palign;
1944 ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", 1931 ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
1945 pd->port_port, pd->port_piobufs); 1932 " first pio %u\n", pd->port_port, pd->port_piobufs,
1933 pd->port_piocnt, pd->port_pio_base);
1934 ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
1946 1935
1947 /* 1936 /*
1948 * Now allocate the rcvhdr Q and eager TIDs; skip the TID 1937 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
@@ -2107,7 +2096,6 @@ static int ipath_close(struct inode *in, struct file *fp)
2107 } 2096 }
2108 2097
2109 if (dd->ipath_kregbase) { 2098 if (dd->ipath_kregbase) {
2110 int i;
2111 /* atomically clear receive enable port and intr avail. */ 2099 /* atomically clear receive enable port and intr avail. */
2112 clear_bit(dd->ipath_r_portenable_shift + port, 2100 clear_bit(dd->ipath_r_portenable_shift + port,
2113 &dd->ipath_rcvctrl); 2101 &dd->ipath_rcvctrl);
@@ -2136,9 +2124,9 @@ static int ipath_close(struct inode *in, struct file *fp)
2136 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, 2124 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
2137 pd->port_port, dd->ipath_dummy_hdrq_phys); 2125 pd->port_port, dd->ipath_dummy_hdrq_phys);
2138 2126
2139 i = dd->ipath_pbufsport * (port - 1); 2127 ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
2140 ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport); 2128 ipath_chg_pioavailkernel(dd, pd->port_pio_base,
2141 ipath_chg_pioavailkernel(dd, i, dd->ipath_pbufsport, 1); 2129 pd->port_piocnt, 1);
2142 2130
2143 dd->ipath_f_clear_tids(dd, pd->port_port); 2131 dd->ipath_f_clear_tids(dd, pd->port_port);
2144 2132
diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c
index 5f693de66542..8eee7830f042 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba7220.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c
@@ -595,7 +595,7 @@ static void ipath_7220_txe_recover(struct ipath_devdata *dd)
595 595
596 dev_info(&dd->pcidev->dev, 596 dev_info(&dd->pcidev->dev,
597 "Recovering from TXE PIO parity error\n"); 597 "Recovering from TXE PIO parity error\n");
598 ipath_disarm_senderrbufs(dd, 1); 598 ipath_disarm_senderrbufs(dd);
599} 599}
600 600
601 601
@@ -675,10 +675,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
675 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); 675 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
676 if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) { 676 if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
677 /* 677 /*
678 * Parity errors in send memory are recoverable, 678 * Parity errors in send memory are recoverable by h/w
679 * just cancel the send (if indicated in * sendbuffererror), 679 * just do housekeeping, exit freeze mode and continue.
680 * count the occurrence, unfreeze (if no other handled
681 * hardware error bits are set), and continue.
682 */ 680 */
683 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 681 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
684 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 682 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
@@ -687,13 +685,6 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
687 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 685 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
688 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 686 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
689 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); 687 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
690 if (!hwerrs) {
691 /* else leave in freeze mode */
692 ipath_write_kreg(dd,
693 dd->ipath_kregs->kr_control,
694 dd->ipath_control);
695 goto bail;
696 }
697 } 688 }
698 if (hwerrs) { 689 if (hwerrs) {
699 /* 690 /*
@@ -723,8 +714,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
723 *dd->ipath_statusp |= IPATH_STATUS_HWERROR; 714 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
724 dd->ipath_flags &= ~IPATH_INITTED; 715 dd->ipath_flags &= ~IPATH_INITTED;
725 } else { 716 } else {
726 ipath_dbg("Clearing freezemode on ignored hardware " 717 ipath_dbg("Clearing freezemode on ignored or "
727 "error\n"); 718 "recovered hardware error\n");
728 ipath_clear_freeze(dd); 719 ipath_clear_freeze(dd);
729 } 720 }
730 } 721 }
@@ -1967,7 +1958,7 @@ static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports)
1967 dd->ipath_rcvctrl); 1958 dd->ipath_rcvctrl);
1968 dd->ipath_p0_rcvegrcnt = 2048; /* always */ 1959 dd->ipath_p0_rcvegrcnt = 2048; /* always */
1969 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 1960 if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
1970 dd->ipath_pioreserved = 1; /* reserve a buffer */ 1961 dd->ipath_pioreserved = 3; /* kpiobufs used for PIO */
1971} 1962}
1972 1963
1973 1964
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 27dd89476660..3e5baa43fc82 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -41,7 +41,7 @@
41/* 41/*
42 * min buffers we want to have per port, after driver 42 * min buffers we want to have per port, after driver
43 */ 43 */
44#define IPATH_MIN_USER_PORT_BUFCNT 8 44#define IPATH_MIN_USER_PORT_BUFCNT 7
45 45
46/* 46/*
47 * Number of ports we are configured to use (to allow for more pio 47 * Number of ports we are configured to use (to allow for more pio
@@ -54,13 +54,9 @@ MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
54 54
55/* 55/*
56 * Number of buffers reserved for driver (verbs and layered drivers.) 56 * Number of buffers reserved for driver (verbs and layered drivers.)
57 * Reserved at end of buffer list. Initialized based on 57 * Initialized based on number of PIO buffers if not set via module interface.
58 * number of PIO buffers if not set via module interface.
59 * The problem with this is that it's global, but we'll use different 58 * The problem with this is that it's global, but we'll use different
60 * numbers for different chip types. So the default value is not 59 * numbers for different chip types.
61 * very useful. I've redefined it for the 1.3 release so that it's
62 * zero unless set by the user to something else, in which case we
63 * try to respect it.
64 */ 60 */
65static ushort ipath_kpiobufs; 61static ushort ipath_kpiobufs;
66 62
@@ -546,9 +542,12 @@ static void enable_chip(struct ipath_devdata *dd, int reinit)
546 pioavail = dd->ipath_pioavailregs_dma[i ^ 1]; 542 pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
547 else 543 else
548 pioavail = dd->ipath_pioavailregs_dma[i]; 544 pioavail = dd->ipath_pioavailregs_dma[i];
549 dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail) | 545 /*
550 (~dd->ipath_pioavailkernel[i] << 546 * don't need to worry about ipath_pioavailkernel here
551 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT); 547 * because we will call ipath_chg_pioavailkernel() later
548 * in initialization, to busy out buffers as needed
549 */
550 dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
552 } 551 }
553 /* can get counters, stats, etc. */ 552 /* can get counters, stats, etc. */
554 dd->ipath_flags |= IPATH_PRESENT; 553 dd->ipath_flags |= IPATH_PRESENT;
@@ -708,12 +707,11 @@ static void verify_interrupt(unsigned long opaque)
708int ipath_init_chip(struct ipath_devdata *dd, int reinit) 707int ipath_init_chip(struct ipath_devdata *dd, int reinit)
709{ 708{
710 int ret = 0; 709 int ret = 0;
711 u32 val32, kpiobufs; 710 u32 kpiobufs, defkbufs;
712 u32 piobufs, uports; 711 u32 piobufs, uports;
713 u64 val; 712 u64 val;
714 struct ipath_portdata *pd; 713 struct ipath_portdata *pd;
715 gfp_t gfp_flags = GFP_USER | __GFP_COMP; 714 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
716 unsigned long flags;
717 715
718 ret = init_housekeeping(dd, reinit); 716 ret = init_housekeeping(dd, reinit);
719 if (ret) 717 if (ret)
@@ -753,56 +751,46 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
753 dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) 751 dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
754 / (sizeof(u64) * BITS_PER_BYTE / 2); 752 / (sizeof(u64) * BITS_PER_BYTE / 2);
755 uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0; 753 uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
756 if (ipath_kpiobufs == 0) { 754 if (piobufs > 144)
757 /* not set by user (this is default) */ 755 defkbufs = 32 + dd->ipath_pioreserved;
758 if (piobufs > 144)
759 kpiobufs = 32;
760 else
761 kpiobufs = 16;
762 }
763 else 756 else
764 kpiobufs = ipath_kpiobufs; 757 defkbufs = 16 + dd->ipath_pioreserved;
765 758
766 if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) { 759 if (ipath_kpiobufs && (ipath_kpiobufs +
760 (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
767 int i = (int) piobufs - 761 int i = (int) piobufs -
768 (int) (uports * IPATH_MIN_USER_PORT_BUFCNT); 762 (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
769 if (i < 1) 763 if (i < 1)
770 i = 1; 764 i = 1;
771 dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of " 765 dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
772 "%d for kernel leaves too few for %d user ports " 766 "%d for kernel leaves too few for %d user ports "
773 "(%d each); using %u\n", kpiobufs, 767 "(%d each); using %u\n", ipath_kpiobufs,
774 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i); 768 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
775 /* 769 /*
776 * shouldn't change ipath_kpiobufs, because could be 770 * shouldn't change ipath_kpiobufs, because could be
777 * different for different devices... 771 * different for different devices...
778 */ 772 */
779 kpiobufs = i; 773 kpiobufs = i;
780 } 774 } else if (ipath_kpiobufs)
775 kpiobufs = ipath_kpiobufs;
776 else
777 kpiobufs = defkbufs;
781 dd->ipath_lastport_piobuf = piobufs - kpiobufs; 778 dd->ipath_lastport_piobuf = piobufs - kpiobufs;
782 dd->ipath_pbufsport = 779 dd->ipath_pbufsport =
783 uports ? dd->ipath_lastport_piobuf / uports : 0; 780 uports ? dd->ipath_lastport_piobuf / uports : 0;
784 val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports); 781 /* if not an even divisor, some user ports get extra buffers */
785 if (val32 > 0) { 782 dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
786 ipath_dbg("allocating %u pbufs/port leaves %u unused, " 783 (dd->ipath_pbufsport * uports);
787 "add to kernel\n", dd->ipath_pbufsport, val32); 784 if (dd->ipath_ports_extrabuf)
788 dd->ipath_lastport_piobuf -= val32; 785 ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
789 kpiobufs += val32; 786 "ports <= %u\n", dd->ipath_pbufsport,
790 ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n", 787 dd->ipath_ports_extrabuf);
791 dd->ipath_pbufsport, val32);
792 }
793 dd->ipath_lastpioindex = 0; 788 dd->ipath_lastpioindex = 0;
794 dd->ipath_lastpioindexl = dd->ipath_piobcnt2k; 789 dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
795 ipath_chg_pioavailkernel(dd, 0, piobufs, 1); 790 /* ipath_pioavailshadow initialized earlier */
796 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " 791 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
797 "each for %u user ports\n", kpiobufs, 792 "each for %u user ports\n", kpiobufs,
798 piobufs, dd->ipath_pbufsport, uports); 793 piobufs, dd->ipath_pbufsport, uports);
799 if (dd->ipath_pioupd_thresh) {
800 if (dd->ipath_pbufsport < dd->ipath_pioupd_thresh)
801 dd->ipath_pioupd_thresh = dd->ipath_pbufsport;
802 if (kpiobufs < dd->ipath_pioupd_thresh)
803 dd->ipath_pioupd_thresh = kpiobufs;
804 }
805
806 ret = dd->ipath_f_early_init(dd); 794 ret = dd->ipath_f_early_init(dd);
807 if (ret) { 795 if (ret) {
808 ipath_dev_err(dd, "Early initialization failure\n"); 796 ipath_dev_err(dd, "Early initialization failure\n");
@@ -810,13 +798,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
810 } 798 }
811 799
812 /* 800 /*
813 * Cancel any possible active sends from early driver load.
814 * Follows early_init because some chips have to initialize
815 * PIO buffers in early_init to avoid false parity errors.
816 */
817 ipath_cancel_sends(dd, 0);
818
819 /*
820 * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be 801 * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
821 * done after early_init. 802 * done after early_init.
822 */ 803 */
@@ -836,6 +817,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
836 817
837 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr, 818 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
838 dd->ipath_pioavailregs_phys); 819 dd->ipath_pioavailregs_phys);
820
839 /* 821 /*
840 * this is to detect s/w errors, which the h/w works around by 822 * this is to detect s/w errors, which the h/w works around by
841 * ignoring the low 6 bits of address, if it wasn't aligned. 823 * ignoring the low 6 bits of address, if it wasn't aligned.
@@ -862,12 +844,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
862 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); 844 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
863 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); 845 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
864 846
865 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
866 dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE;
867 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
868 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
869 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
870
871 /* 847 /*
872 * before error clears, since we expect serdes pll errors during 848 * before error clears, since we expect serdes pll errors during
873 * this, the first time after reset 849 * this, the first time after reset
@@ -940,6 +916,19 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
940 else 916 else
941 enable_chip(dd, reinit); 917 enable_chip(dd, reinit);
942 918
919 /* after enable_chip, so pioavailshadow setup */
920 ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
921
922 /*
923 * Cancel any possible active sends from early driver load.
924 * Follows early_init because some chips have to initialize
925 * PIO buffers in early_init to avoid false parity errors.
926 * After enable and ipath_chg_pioavailkernel so we can safely
927 * enable pioavail updates and PIOENABLE; packets are now
928 * ready to go out.
929 */
930 ipath_cancel_sends(dd, 1);
931
943 if (!reinit) { 932 if (!reinit) {
944 /* 933 /*
945 * Used when we close a port, for DMA already in flight 934 * Used when we close a port, for DMA already in flight
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 45c4c068ab1e..26900b3b7a4e 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -38,42 +38,12 @@
38#include "ipath_verbs.h" 38#include "ipath_verbs.h"
39#include "ipath_common.h" 39#include "ipath_common.h"
40 40
41/*
42 * clear (write) a pio buffer, to clear a parity error. This routine
43 * should only be called when in freeze mode, and the buffer should be
44 * canceled afterwards.
45 */
46static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
47{
48 u32 __iomem *pbuf;
49 u32 dwcnt; /* dword count to write */
50 if (pnum < dd->ipath_piobcnt2k) {
51 pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
52 dd->ipath_palign);
53 dwcnt = dd->ipath_piosize2k >> 2;
54 }
55 else {
56 pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
57 (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
58 dwcnt = dd->ipath_piosize4k >> 2;
59 }
60 dev_info(&dd->pcidev->dev,
61 "Rewrite PIO buffer %u, to recover from parity error\n",
62 pnum);
63
64 /* no flush required, since already in freeze */
65 writel(dwcnt + 1, pbuf);
66 while (--dwcnt)
67 writel(0, pbuf++);
68}
69 41
70/* 42/*
71 * Called when we might have an error that is specific to a particular 43 * Called when we might have an error that is specific to a particular
72 * PIO buffer, and may need to cancel that buffer, so it can be re-used. 44 * PIO buffer, and may need to cancel that buffer, so it can be re-used.
73 * If rewrite is true, and bits are set in the sendbufferror registers,
74 * we'll write to the buffer, for error recovery on parity errors.
75 */ 45 */
76void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) 46void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
77{ 47{
78 u32 piobcnt; 48 u32 piobcnt;
79 unsigned long sbuf[4]; 49 unsigned long sbuf[4];
@@ -109,11 +79,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
109 } 79 }
110 80
111 for (i = 0; i < piobcnt; i++) 81 for (i = 0; i < piobcnt; i++)
112 if (test_bit(i, sbuf)) { 82 if (test_bit(i, sbuf))
113 if (rewrite)
114 ipath_clrpiobuf(dd, i);
115 ipath_disarm_piobufs(dd, i, 1); 83 ipath_disarm_piobufs(dd, i, 1);
116 }
117 /* ignore armlaunch errs for a bit */ 84 /* ignore armlaunch errs for a bit */
118 dd->ipath_lastcancel = jiffies+3; 85 dd->ipath_lastcancel = jiffies+3;
119 } 86 }
@@ -164,7 +131,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
164{ 131{
165 u64 ignore_this_time = 0; 132 u64 ignore_this_time = 0;
166 133
167 ipath_disarm_senderrbufs(dd, 0); 134 ipath_disarm_senderrbufs(dd);
168 if ((errs & E_SUM_LINK_PKTERRS) && 135 if ((errs & E_SUM_LINK_PKTERRS) &&
169 !(dd->ipath_flags & IPATH_LINKACTIVE)) { 136 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
170 /* 137 /*
@@ -909,8 +876,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
909 * processes (causing armlaunch), send errors due to going into freeze mode, 876 * processes (causing armlaunch), send errors due to going into freeze mode,
910 * etc., and try to avoid causing extra interrupts while doing so. 877 * etc., and try to avoid causing extra interrupts while doing so.
911 * Forcibly update the in-memory pioavail register copies after cleanup 878 * Forcibly update the in-memory pioavail register copies after cleanup
912 * because the chip won't do it for anything changing while in freeze mode 879 * because the chip won't do it while in freeze mode (the register values
913 * (we don't want to wait for the next pio buffer state change). 880 * themselves are kept correct).
914 * Make sure that we don't lose any important interrupts by using the chip 881 * Make sure that we don't lose any important interrupts by using the chip
915 * feature that says that writing 0 to a bit in *clear that is set in 882 * feature that says that writing 0 to a bit in *clear that is set in
916 * *status will cause an interrupt to be generated again (if allowed by 883 * *status will cause an interrupt to be generated again (if allowed by
@@ -918,48 +885,23 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
918 */ 885 */
919void ipath_clear_freeze(struct ipath_devdata *dd) 886void ipath_clear_freeze(struct ipath_devdata *dd)
920{ 887{
921 int i, im;
922 u64 val;
923
924 /* disable error interrupts, to avoid confusion */ 888 /* disable error interrupts, to avoid confusion */
925 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); 889 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
926 890
927 /* also disable interrupts; errormask is sometimes overwriten */ 891 /* also disable interrupts; errormask is sometimes overwriten */
928 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); 892 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
929 893
930 /*
931 * clear all sends, because they have may been
932 * completed by usercode while in freeze mode, and
933 * therefore would not be sent, and eventually
934 * might cause the process to run out of bufs
935 */
936 ipath_cancel_sends(dd, 1); 894 ipath_cancel_sends(dd, 1);
895
896 /* clear the freeze, and be sure chip saw it */
937 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 897 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
938 dd->ipath_control); 898 dd->ipath_control);
899 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
939 900
940 /* 901 /* force in-memory update now we are out of freeze */
941 * ensure pio avail updates continue (because the update
942 * won't have happened from cancel_sends because we were
943 * still in freeze
944 */
945 ipath_force_pio_avail_update(dd); 902 ipath_force_pio_avail_update(dd);
946 903
947 /* 904 /*
948 * We just enabled pioavailupdate, so dma copy is almost certainly
949 * not yet right, so read the registers directly. Similar to init
950 */
951 for (i = 0; i < dd->ipath_pioavregs; i++) {
952 /* deal with 6110 chip bug */
953 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
954 i ^ 1 : i;
955 val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im);
956 dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val);
957 dd->ipath_pioavailshadow[i] = val |
958 (~dd->ipath_pioavailkernel[i] <<
959 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
960 }
961
962 /*
963 * force new interrupt if any hwerr, error or interrupt bits are 905 * force new interrupt if any hwerr, error or interrupt bits are
964 * still set, and clear "safe" send packet errors related to freeze 906 * still set, and clear "safe" send packet errors related to freeze
965 * and cancelling sends. Re-enable error interrupts before possible 907 * and cancelling sends. Re-enable error interrupts before possible
@@ -1316,10 +1258,8 @@ irqreturn_t ipath_intr(int irq, void *data)
1316 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1258 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1317 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1259 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1318 1260
1319 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) 1261 /* always process; sdma verbs uses PIO for acks and VL15 */
1320 handle_layer_pioavail(dd); 1262 handle_layer_pioavail(dd);
1321 else
1322 ipath_dbg("unexpected BUFAVAIL intr\n");
1323 } 1263 }
1324 1264
1325 ret = IRQ_HANDLED; 1265 ret = IRQ_HANDLED;
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 202337ae90dc..02b24a340599 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -117,6 +117,10 @@ struct ipath_portdata {
117 u16 port_subport_cnt; 117 u16 port_subport_cnt;
118 /* non-zero if port is being shared. */ 118 /* non-zero if port is being shared. */
119 u16 port_subport_id; 119 u16 port_subport_id;
120 /* number of pio bufs for this port (all procs, if shared) */
121 u32 port_piocnt;
122 /* first pio buffer for this port */
123 u32 port_pio_base;
120 /* chip offset of PIO buffers for this port */ 124 /* chip offset of PIO buffers for this port */
121 u32 port_piobufs; 125 u32 port_piobufs;
122 /* how many alloc_pages() chunks in port_rcvegrbuf_pages */ 126 /* how many alloc_pages() chunks in port_rcvegrbuf_pages */
@@ -384,6 +388,8 @@ struct ipath_devdata {
384 u32 ipath_lastrpkts; 388 u32 ipath_lastrpkts;
385 /* pio bufs allocated per port */ 389 /* pio bufs allocated per port */
386 u32 ipath_pbufsport; 390 u32 ipath_pbufsport;
391 /* if remainder on bufs/port, ports < extrabuf get 1 extra */
392 u32 ipath_ports_extrabuf;
387 u32 ipath_pioupd_thresh; /* update threshold, some chips */ 393 u32 ipath_pioupd_thresh; /* update threshold, some chips */
388 /* 394 /*
389 * number of ports configured as max; zero is set to number chip 395 * number of ports configured as max; zero is set to number chip
@@ -1011,7 +1017,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *);
1011int ipath_update_eeprom_log(struct ipath_devdata *dd); 1017int ipath_update_eeprom_log(struct ipath_devdata *dd);
1012void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); 1018void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
1013u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); 1019u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
1014void ipath_disarm_senderrbufs(struct ipath_devdata *, int); 1020void ipath_disarm_senderrbufs(struct ipath_devdata *);
1015void ipath_force_pio_avail_update(struct ipath_devdata *); 1021void ipath_force_pio_avail_update(struct ipath_devdata *);
1016void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev); 1022void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
1017 1023
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 8ac5c1d82ccd..9e3fe61cbd08 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -481,9 +481,10 @@ done:
481 wake_up(&qp->wait); 481 wake_up(&qp->wait);
482} 482}
483 483
484static void want_buffer(struct ipath_devdata *dd) 484static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
485{ 485{
486 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) { 486 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
487 qp->ibqp.qp_type == IB_QPT_SMI) {
487 unsigned long flags; 488 unsigned long flags;
488 489
489 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 490 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@@ -519,7 +520,7 @@ static void ipath_no_bufs_available(struct ipath_qp *qp,
519 spin_lock_irqsave(&dev->pending_lock, flags); 520 spin_lock_irqsave(&dev->pending_lock, flags);
520 list_add_tail(&qp->piowait, &dev->piowait); 521 list_add_tail(&qp->piowait, &dev->piowait);
521 spin_unlock_irqrestore(&dev->pending_lock, flags); 522 spin_unlock_irqrestore(&dev->pending_lock, flags);
522 want_buffer(dev->dd); 523 want_buffer(dev->dd, qp);
523 dev->n_piowait++; 524 dev->n_piowait++;
524} 525}
525 526
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index 1974df7a9f78..0d07682c7310 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -449,16 +449,19 @@ int setup_sdma(struct ipath_devdata *dd)
449 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 449 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
450 dd->ipath_sdma_head_phys); 450 dd->ipath_sdma_head_phys);
451 451
452 /* Reserve all the former "kernel" piobufs */ 452 /*
453 n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - dd->ipath_pioreserved; 453 * Reserve all the former "kernel" piobufs, using high number range
454 for (i = dd->ipath_lastport_piobuf; i < n; ++i) { 454 * so we get as many 4K buffers as possible
455 */
456 n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
457 i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
458 ipath_chg_pioavailkernel(dd, i, n - i , 0);
459 for (; i < n; ++i) {
455 unsigned word = i / 64; 460 unsigned word = i / 64;
456 unsigned bit = i & 63; 461 unsigned bit = i & 63;
457 BUG_ON(word >= 3); 462 BUG_ON(word >= 3);
458 senddmabufmask[word] |= 1ULL << bit; 463 senddmabufmask[word] |= 1ULL << bit;
459 } 464 }
460 ipath_chg_pioavailkernel(dd, dd->ipath_lastport_piobuf,
461 n - dd->ipath_lastport_piobuf, 0);
462 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 465 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
463 senddmabufmask[0]); 466 senddmabufmask[0]);
464 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 467 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,