aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw/ipath
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/ipath')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c158
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c91
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba7220.c26
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c95
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c80
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h18
-rw-r--r--drivers/infiniband/hw/ipath/ipath_qp.c237
-rw-r--r--drivers/infiniband/hw/ipath/ipath_rc.c291
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ruc.c332
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sdma.c44
-rw-r--r--drivers/infiniband/hw/ipath/ipath_uc.c57
-rw-r--r--drivers/infiniband/hw/ipath/ipath_ud.c66
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_sdma.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c178
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.h64
15 files changed, 980 insertions, 759 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index acf30c06a0c0..daad09a45910 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1197,7 +1197,7 @@ void ipath_kreceive(struct ipath_portdata *pd)
1197 } 1197 }
1198 1198
1199reloop: 1199reloop:
1200 for (last = 0, i = 1; !last; i++) { 1200 for (last = 0, i = 1; !last; i += !last) {
1201 hdr = dd->ipath_f_get_msgheader(dd, rhf_addr); 1201 hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
1202 eflags = ipath_hdrget_err_flags(rhf_addr); 1202 eflags = ipath_hdrget_err_flags(rhf_addr);
1203 etype = ipath_hdrget_rcv_type(rhf_addr); 1203 etype = ipath_hdrget_rcv_type(rhf_addr);
@@ -1428,6 +1428,40 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd)
1428 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1428 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1429} 1429}
1430 1430
1431/*
1432 * used to force update of pioavailshadow if we can't get a pio buffer.
1433 * Needed primarily due to exitting freeze mode after recovering
1434 * from errors. Done lazily, because it's safer (known to not
1435 * be writing pio buffers).
1436 */
1437static void ipath_reset_availshadow(struct ipath_devdata *dd)
1438{
1439 int i, im;
1440 unsigned long flags;
1441
1442 spin_lock_irqsave(&ipath_pioavail_lock, flags);
1443 for (i = 0; i < dd->ipath_pioavregs; i++) {
1444 u64 val, oldval;
1445 /* deal with 6110 chip bug on high register #s */
1446 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
1447 i ^ 1 : i;
1448 val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
1449 /*
1450 * busy out the buffers not in the kernel avail list,
1451 * without changing the generation bits.
1452 */
1453 oldval = dd->ipath_pioavailshadow[i];
1454 dd->ipath_pioavailshadow[i] = val |
1455 ((~dd->ipath_pioavailkernel[i] <<
1456 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
1457 0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
1458 if (oldval != dd->ipath_pioavailshadow[i])
1459 ipath_dbg("shadow[%d] was %Lx, now %lx\n",
1460 i, oldval, dd->ipath_pioavailshadow[i]);
1461 }
1462 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1463}
1464
1431/** 1465/**
1432 * ipath_setrcvhdrsize - set the receive header size 1466 * ipath_setrcvhdrsize - set the receive header size
1433 * @dd: the infinipath device 1467 * @dd: the infinipath device
@@ -1482,9 +1516,12 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
1482 */ 1516 */
1483 ipath_stats.sps_nopiobufs++; 1517 ipath_stats.sps_nopiobufs++;
1484 if (!(++dd->ipath_consec_nopiobuf % 100000)) { 1518 if (!(++dd->ipath_consec_nopiobuf % 100000)) {
1485 ipath_dbg("%u pio sends with no bufavail; dmacopy: " 1519 ipath_force_pio_avail_update(dd); /* at start */
1486 "%llx %llx %llx %llx; shadow: %lx %lx %lx %lx\n", 1520 ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
1521 "%llx %llx %llx %llx\n"
1522 "ipath shadow: %lx %lx %lx %lx\n",
1487 dd->ipath_consec_nopiobuf, 1523 dd->ipath_consec_nopiobuf,
1524 (unsigned long)get_cycles(),
1488 (unsigned long long) le64_to_cpu(dma[0]), 1525 (unsigned long long) le64_to_cpu(dma[0]),
1489 (unsigned long long) le64_to_cpu(dma[1]), 1526 (unsigned long long) le64_to_cpu(dma[1]),
1490 (unsigned long long) le64_to_cpu(dma[2]), 1527 (unsigned long long) le64_to_cpu(dma[2]),
@@ -1496,14 +1533,17 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
1496 */ 1533 */
1497 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 1534 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
1498 (sizeof(shadow[0]) * 4 * 4)) 1535 (sizeof(shadow[0]) * 4 * 4))
1499 ipath_dbg("2nd group: dmacopy: %llx %llx " 1536 ipath_dbg("2nd group: dmacopy: "
1500 "%llx %llx; shadow: %lx %lx %lx %lx\n", 1537 "%llx %llx %llx %llx\n"
1538 "ipath shadow: %lx %lx %lx %lx\n",
1501 (unsigned long long)le64_to_cpu(dma[4]), 1539 (unsigned long long)le64_to_cpu(dma[4]),
1502 (unsigned long long)le64_to_cpu(dma[5]), 1540 (unsigned long long)le64_to_cpu(dma[5]),
1503 (unsigned long long)le64_to_cpu(dma[6]), 1541 (unsigned long long)le64_to_cpu(dma[6]),
1504 (unsigned long long)le64_to_cpu(dma[7]), 1542 (unsigned long long)le64_to_cpu(dma[7]),
1505 shadow[4], shadow[5], shadow[6], 1543 shadow[4], shadow[5], shadow[6], shadow[7]);
1506 shadow[7]); 1544
1545 /* at end, so update likely happened */
1546 ipath_reset_availshadow(dd);
1507 } 1547 }
1508} 1548}
1509 1549
@@ -1652,19 +1692,46 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
1652 unsigned len, int avail) 1692 unsigned len, int avail)
1653{ 1693{
1654 unsigned long flags; 1694 unsigned long flags;
1655 unsigned end; 1695 unsigned end, cnt = 0, next;
1656 1696
1657 /* There are two bits per send buffer (busy and generation) */ 1697 /* There are two bits per send buffer (busy and generation) */
1658 start *= 2; 1698 start *= 2;
1659 len *= 2; 1699 end = start + len * 2;
1660 end = start + len;
1661 1700
1662 /* Set or clear the generation bits. */
1663 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1701 spin_lock_irqsave(&ipath_pioavail_lock, flags);
1702 /* Set or clear the busy bit in the shadow. */
1664 while (start < end) { 1703 while (start < end) {
1665 if (avail) { 1704 if (avail) {
1666 __clear_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, 1705 unsigned long dma;
1667 dd->ipath_pioavailshadow); 1706 int i, im;
1707 /*
1708 * the BUSY bit will never be set, because we disarm
1709 * the user buffers before we hand them back to the
1710 * kernel. We do have to make sure the generation
1711 * bit is set correctly in shadow, since it could
1712 * have changed many times while allocated to user.
1713 * We can't use the bitmap functions on the full
1714 * dma array because it is always little-endian, so
1715 * we have to flip to host-order first.
1716 * BITS_PER_LONG is slightly wrong, since it's
1717 * always 64 bits per register in chip...
1718 * We only work on 64 bit kernels, so that's OK.
1719 */
1720 /* deal with 6110 chip bug on high register #s */
1721 i = start / BITS_PER_LONG;
1722 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
1723 i ^ 1 : i;
1724 __clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
1725 + start, dd->ipath_pioavailshadow);
1726 dma = (unsigned long) le64_to_cpu(
1727 dd->ipath_pioavailregs_dma[im]);
1728 if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1729 + start) % BITS_PER_LONG, &dma))
1730 __set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1731 + start, dd->ipath_pioavailshadow);
1732 else
1733 __clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
1734 + start, dd->ipath_pioavailshadow);
1668 __set_bit(start, dd->ipath_pioavailkernel); 1735 __set_bit(start, dd->ipath_pioavailkernel);
1669 } else { 1736 } else {
1670 __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT, 1737 __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
@@ -1673,7 +1740,44 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
1673 } 1740 }
1674 start += 2; 1741 start += 2;
1675 } 1742 }
1743
1744 if (dd->ipath_pioupd_thresh) {
1745 end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
1746 next = find_first_bit(dd->ipath_pioavailkernel, end);
1747 while (next < end) {
1748 cnt++;
1749 next = find_next_bit(dd->ipath_pioavailkernel, end,
1750 next + 1);
1751 }
1752 }
1676 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1753 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1754
1755 /*
1756 * When moving buffers from kernel to user, if number assigned to
1757 * the user is less than the pio update threshold, and threshold
1758 * is supported (cnt was computed > 0), drop the update threshold
1759 * so we update at least once per allocated number of buffers.
1760 * In any case, if the kernel buffers are less than the threshold,
1761 * drop the threshold. We don't bother increasing it, having once
1762 * decreased it, since it would typically just cycle back and forth.
1763 * If we don't decrease below buffers in use, we can wait a long
1764 * time for an update, until some other context uses PIO buffers.
1765 */
1766 if (!avail && len < cnt)
1767 cnt = len;
1768 if (cnt < dd->ipath_pioupd_thresh) {
1769 dd->ipath_pioupd_thresh = cnt;
1770 ipath_dbg("Decreased pio update threshold to %u\n",
1771 dd->ipath_pioupd_thresh);
1772 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1773 dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
1774 << INFINIPATH_S_UPDTHRESH_SHIFT);
1775 dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
1776 << INFINIPATH_S_UPDTHRESH_SHIFT;
1777 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1778 dd->ipath_sendctrl);
1779 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1780 }
1677} 1781}
1678 1782
1679/** 1783/**
@@ -1790,12 +1894,12 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
1790 */ 1894 */
1791 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) { 1895 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
1792 int skip_cancel; 1896 int skip_cancel;
1793 u64 *statp = &dd->ipath_sdma_status; 1897 unsigned long *statp = &dd->ipath_sdma_status;
1794 1898
1795 spin_lock_irqsave(&dd->ipath_sdma_lock, flags); 1899 spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
1796 skip_cancel = 1900 skip_cancel =
1797 !test_bit(IPATH_SDMA_DISABLED, statp) && 1901 test_and_set_bit(IPATH_SDMA_ABORTING, statp)
1798 test_and_set_bit(IPATH_SDMA_ABORTING, statp); 1902 && !test_bit(IPATH_SDMA_DISABLED, statp);
1799 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 1903 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
1800 if (skip_cancel) 1904 if (skip_cancel)
1801 goto bail; 1905 goto bail;
@@ -1826,6 +1930,9 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
1826 ipath_disarm_piobufs(dd, 0, 1930 ipath_disarm_piobufs(dd, 0,
1827 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k); 1931 dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
1828 1932
1933 if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
1934 set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
1935
1829 if (restore_sendctrl) { 1936 if (restore_sendctrl) {
1830 /* else done by caller later if needed */ 1937 /* else done by caller later if needed */
1831 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 1938 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@@ -1845,7 +1952,6 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
1845 /* only wait so long for intr */ 1952 /* only wait so long for intr */
1846 dd->ipath_sdma_abort_intr_timeout = jiffies + HZ; 1953 dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
1847 dd->ipath_sdma_reset_wait = 200; 1954 dd->ipath_sdma_reset_wait = 200;
1848 __set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
1849 if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status)) 1955 if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
1850 tasklet_hi_schedule(&dd->ipath_sdma_abort_task); 1956 tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
1851 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 1957 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
@@ -2510,7 +2616,7 @@ int ipath_reset_device(int unit)
2510 ipath_dbg("unit %u port %d is in use " 2616 ipath_dbg("unit %u port %d is in use "
2511 "(PID %u cmd %s), can't reset\n", 2617 "(PID %u cmd %s), can't reset\n",
2512 unit, i, 2618 unit, i,
2513 dd->ipath_pd[i]->port_pid, 2619 pid_nr(dd->ipath_pd[i]->port_pid),
2514 dd->ipath_pd[i]->port_comm); 2620 dd->ipath_pd[i]->port_comm);
2515 ret = -EBUSY; 2621 ret = -EBUSY;
2516 goto bail; 2622 goto bail;
@@ -2548,19 +2654,21 @@ bail:
2548static int ipath_signal_procs(struct ipath_devdata *dd, int sig) 2654static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
2549{ 2655{
2550 int i, sub, any = 0; 2656 int i, sub, any = 0;
2551 pid_t pid; 2657 struct pid *pid;
2552 2658
2553 if (!dd->ipath_pd) 2659 if (!dd->ipath_pd)
2554 return 0; 2660 return 0;
2555 for (i = 1; i < dd->ipath_cfgports; i++) { 2661 for (i = 1; i < dd->ipath_cfgports; i++) {
2556 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt || 2662 if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt)
2557 !dd->ipath_pd[i]->port_pid)
2558 continue; 2663 continue;
2559 pid = dd->ipath_pd[i]->port_pid; 2664 pid = dd->ipath_pd[i]->port_pid;
2665 if (!pid)
2666 continue;
2667
2560 dev_info(&dd->pcidev->dev, "context %d in use " 2668 dev_info(&dd->pcidev->dev, "context %d in use "
2561 "(PID %u), sending signal %d\n", 2669 "(PID %u), sending signal %d\n",
2562 i, pid, sig); 2670 i, pid_nr(pid), sig);
2563 kill_proc(pid, sig, 1); 2671 kill_pid(pid, sig, 1);
2564 any++; 2672 any++;
2565 for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) { 2673 for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
2566 pid = dd->ipath_pd[i]->port_subpid[sub]; 2674 pid = dd->ipath_pd[i]->port_subpid[sub];
@@ -2568,8 +2676,8 @@ static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
2568 continue; 2676 continue;
2569 dev_info(&dd->pcidev->dev, "sub-context " 2677 dev_info(&dd->pcidev->dev, "sub-context "
2570 "%d:%d in use (PID %u), sending " 2678 "%d:%d in use (PID %u), sending "
2571 "signal %d\n", i, sub, pid, sig); 2679 "signal %d\n", i, sub, pid_nr(pid), sig);
2572 kill_proc(pid, sig, 1); 2680 kill_pid(pid, sig, 1);
2573 any++; 2681 any++;
2574 } 2682 }
2575 } 2683 }
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 8b1752202e78..b472b15637f0 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -173,47 +173,25 @@ static int ipath_get_base_info(struct file *fp,
173 (void *) dd->ipath_statusp - 173 (void *) dd->ipath_statusp -
174 (void *) dd->ipath_pioavailregs_dma; 174 (void *) dd->ipath_pioavailregs_dma;
175 if (!shared) { 175 if (!shared) {
176 kinfo->spi_piocnt = dd->ipath_pbufsport; 176 kinfo->spi_piocnt = pd->port_piocnt;
177 kinfo->spi_piobufbase = (u64) pd->port_piobufs; 177 kinfo->spi_piobufbase = (u64) pd->port_piobufs;
178 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase + 178 kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
179 dd->ipath_ureg_align * pd->port_port; 179 dd->ipath_ureg_align * pd->port_port;
180 } else if (master) { 180 } else if (master) {
181 kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) + 181 kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
182 (dd->ipath_pbufsport % subport_cnt); 182 (pd->port_piocnt % subport_cnt);
183 /* Master's PIO buffers are after all the slave's */ 183 /* Master's PIO buffers are after all the slave's */
184 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 184 kinfo->spi_piobufbase = (u64) pd->port_piobufs +
185 dd->ipath_palign * 185 dd->ipath_palign *
186 (dd->ipath_pbufsport - kinfo->spi_piocnt); 186 (pd->port_piocnt - kinfo->spi_piocnt);
187 } else { 187 } else {
188 unsigned slave = subport_fp(fp) - 1; 188 unsigned slave = subport_fp(fp) - 1;
189 189
190 kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt; 190 kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
191 kinfo->spi_piobufbase = (u64) pd->port_piobufs + 191 kinfo->spi_piobufbase = (u64) pd->port_piobufs +
192 dd->ipath_palign * kinfo->spi_piocnt * slave; 192 dd->ipath_palign * kinfo->spi_piocnt * slave;
193 } 193 }
194 194
195 /*
196 * Set the PIO avail update threshold to no larger
197 * than the number of buffers per process. Note that
198 * we decrease it here, but won't ever increase it.
199 */
200 if (dd->ipath_pioupd_thresh &&
201 kinfo->spi_piocnt < dd->ipath_pioupd_thresh) {
202 unsigned long flags;
203
204 dd->ipath_pioupd_thresh = kinfo->spi_piocnt;
205 ipath_dbg("Decreased pio update threshold to %u\n",
206 dd->ipath_pioupd_thresh);
207 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
208 dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
209 << INFINIPATH_S_UPDTHRESH_SHIFT);
210 dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
211 << INFINIPATH_S_UPDTHRESH_SHIFT;
212 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
213 dd->ipath_sendctrl);
214 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
215 }
216
217 if (shared) { 195 if (shared) {
218 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase + 196 kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
219 dd->ipath_ureg_align * pd->port_port; 197 dd->ipath_ureg_align * pd->port_port;
@@ -577,7 +555,7 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
577 p = dd->ipath_pageshadow[porttid + tid]; 555 p = dd->ipath_pageshadow[porttid + tid];
578 dd->ipath_pageshadow[porttid + tid] = NULL; 556 dd->ipath_pageshadow[porttid + tid] = NULL;
579 ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n", 557 ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
580 pd->port_pid, tid); 558 pid_nr(pd->port_pid), tid);
581 dd->ipath_f_put_tid(dd, &tidbase[tid], 559 dd->ipath_f_put_tid(dd, &tidbase[tid],
582 RCVHQ_RCV_TYPE_EXPECTED, 560 RCVHQ_RCV_TYPE_EXPECTED,
583 dd->ipath_tidinvalid); 561 dd->ipath_tidinvalid);
@@ -1309,19 +1287,19 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1309 ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port; 1287 ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
1310 if (!pd->port_subport_cnt) { 1288 if (!pd->port_subport_cnt) {
1311 /* port is not shared */ 1289 /* port is not shared */
1312 piocnt = dd->ipath_pbufsport; 1290 piocnt = pd->port_piocnt;
1313 piobufs = pd->port_piobufs; 1291 piobufs = pd->port_piobufs;
1314 } else if (!subport_fp(fp)) { 1292 } else if (!subport_fp(fp)) {
1315 /* caller is the master */ 1293 /* caller is the master */
1316 piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) + 1294 piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
1317 (dd->ipath_pbufsport % pd->port_subport_cnt); 1295 (pd->port_piocnt % pd->port_subport_cnt);
1318 piobufs = pd->port_piobufs + 1296 piobufs = pd->port_piobufs +
1319 dd->ipath_palign * (dd->ipath_pbufsport - piocnt); 1297 dd->ipath_palign * (pd->port_piocnt - piocnt);
1320 } else { 1298 } else {
1321 unsigned slave = subport_fp(fp) - 1; 1299 unsigned slave = subport_fp(fp) - 1;
1322 1300
1323 /* caller is a slave */ 1301 /* caller is a slave */
1324 piocnt = dd->ipath_pbufsport / pd->port_subport_cnt; 1302 piocnt = pd->port_piocnt / pd->port_subport_cnt;
1325 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave; 1303 piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
1326 } 1304 }
1327 1305
@@ -1631,11 +1609,8 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
1631 port); 1609 port);
1632 pd->port_cnt = 1; 1610 pd->port_cnt = 1;
1633 port_fp(fp) = pd; 1611 port_fp(fp) = pd;
1634 pd->port_pid = current->pid; 1612 pd->port_pid = get_pid(task_pid(current));
1635 strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); 1613 strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
1636 ipath_chg_pioavailkernel(dd,
1637 dd->ipath_pbufsport * (pd->port_port - 1),
1638 dd->ipath_pbufsport, 0);
1639 ipath_stats.sps_ports++; 1614 ipath_stats.sps_ports++;
1640 ret = 0; 1615 ret = 0;
1641 } else 1616 } else
@@ -1818,14 +1793,15 @@ static int find_shared_port(struct file *fp,
1818 } 1793 }
1819 port_fp(fp) = pd; 1794 port_fp(fp) = pd;
1820 subport_fp(fp) = pd->port_cnt++; 1795 subport_fp(fp) = pd->port_cnt++;
1821 pd->port_subpid[subport_fp(fp)] = current->pid; 1796 pd->port_subpid[subport_fp(fp)] =
1797 get_pid(task_pid(current));
1822 tidcursor_fp(fp) = 0; 1798 tidcursor_fp(fp) = 0;
1823 pd->active_slaves |= 1 << subport_fp(fp); 1799 pd->active_slaves |= 1 << subport_fp(fp);
1824 ipath_cdbg(PROC, 1800 ipath_cdbg(PROC,
1825 "%s[%u] %u sharing %s[%u] unit:port %u:%u\n", 1801 "%s[%u] %u sharing %s[%u] unit:port %u:%u\n",
1826 current->comm, current->pid, 1802 current->comm, current->pid,
1827 subport_fp(fp), 1803 subport_fp(fp),
1828 pd->port_comm, pd->port_pid, 1804 pd->port_comm, pid_nr(pd->port_pid),
1829 dd->ipath_unit, pd->port_port); 1805 dd->ipath_unit, pd->port_port);
1830 ret = 1; 1806 ret = 1;
1831 goto done; 1807 goto done;
@@ -1938,11 +1914,25 @@ static int ipath_do_user_init(struct file *fp,
1938 1914
1939 /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ 1915 /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
1940 1916
1917 /* some ports may get extra buffers, calculate that here */
1918 if (pd->port_port <= dd->ipath_ports_extrabuf)
1919 pd->port_piocnt = dd->ipath_pbufsport + 1;
1920 else
1921 pd->port_piocnt = dd->ipath_pbufsport;
1922
1941 /* for right now, kernel piobufs are at end, so port 1 is at 0 */ 1923 /* for right now, kernel piobufs are at end, so port 1 is at 0 */
1924 if (pd->port_port <= dd->ipath_ports_extrabuf)
1925 pd->port_pio_base = (dd->ipath_pbufsport + 1)
1926 * (pd->port_port - 1);
1927 else
1928 pd->port_pio_base = dd->ipath_ports_extrabuf +
1929 dd->ipath_pbufsport * (pd->port_port - 1);
1942 pd->port_piobufs = dd->ipath_piobufbase + 1930 pd->port_piobufs = dd->ipath_piobufbase +
1943 dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign; 1931 pd->port_pio_base * dd->ipath_palign;
1944 ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n", 1932 ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
1945 pd->port_port, pd->port_piobufs); 1933 " first pio %u\n", pd->port_port, pd->port_piobufs,
1934 pd->port_piocnt, pd->port_pio_base);
1935 ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
1946 1936
1947 /* 1937 /*
1948 * Now allocate the rcvhdr Q and eager TIDs; skip the TID 1938 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
@@ -2077,7 +2067,8 @@ static int ipath_close(struct inode *in, struct file *fp)
2077 * the slave(s) don't wait for receive data forever. 2067 * the slave(s) don't wait for receive data forever.
2078 */ 2068 */
2079 pd->active_slaves &= ~(1 << fd->subport); 2069 pd->active_slaves &= ~(1 << fd->subport);
2080 pd->port_subpid[fd->subport] = 0; 2070 put_pid(pd->port_subpid[fd->subport]);
2071 pd->port_subpid[fd->subport] = NULL;
2081 mutex_unlock(&ipath_mutex); 2072 mutex_unlock(&ipath_mutex);
2082 goto bail; 2073 goto bail;
2083 } 2074 }
@@ -2085,7 +2076,7 @@ static int ipath_close(struct inode *in, struct file *fp)
2085 2076
2086 if (pd->port_hdrqfull) { 2077 if (pd->port_hdrqfull) {
2087 ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors " 2078 ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors "
2088 "during run\n", pd->port_comm, pd->port_pid, 2079 "during run\n", pd->port_comm, pid_nr(pd->port_pid),
2089 pd->port_hdrqfull); 2080 pd->port_hdrqfull);
2090 pd->port_hdrqfull = 0; 2081 pd->port_hdrqfull = 0;
2091 } 2082 }
@@ -2107,7 +2098,6 @@ static int ipath_close(struct inode *in, struct file *fp)
2107 } 2098 }
2108 2099
2109 if (dd->ipath_kregbase) { 2100 if (dd->ipath_kregbase) {
2110 int i;
2111 /* atomically clear receive enable port and intr avail. */ 2101 /* atomically clear receive enable port and intr avail. */
2112 clear_bit(dd->ipath_r_portenable_shift + port, 2102 clear_bit(dd->ipath_r_portenable_shift + port,
2113 &dd->ipath_rcvctrl); 2103 &dd->ipath_rcvctrl);
@@ -2136,9 +2126,9 @@ static int ipath_close(struct inode *in, struct file *fp)
2136 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, 2126 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
2137 pd->port_port, dd->ipath_dummy_hdrq_phys); 2127 pd->port_port, dd->ipath_dummy_hdrq_phys);
2138 2128
2139 i = dd->ipath_pbufsport * (port - 1); 2129 ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
2140 ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport); 2130 ipath_chg_pioavailkernel(dd, pd->port_pio_base,
2141 ipath_chg_pioavailkernel(dd, i, dd->ipath_pbufsport, 1); 2131 pd->port_piocnt, 1);
2142 2132
2143 dd->ipath_f_clear_tids(dd, pd->port_port); 2133 dd->ipath_f_clear_tids(dd, pd->port_port);
2144 2134
@@ -2146,11 +2136,12 @@ static int ipath_close(struct inode *in, struct file *fp)
2146 unlock_expected_tids(pd); 2136 unlock_expected_tids(pd);
2147 ipath_stats.sps_ports--; 2137 ipath_stats.sps_ports--;
2148 ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n", 2138 ipath_cdbg(PROC, "%s[%u] closed port %u:%u\n",
2149 pd->port_comm, pd->port_pid, 2139 pd->port_comm, pid_nr(pd->port_pid),
2150 dd->ipath_unit, port); 2140 dd->ipath_unit, port);
2151 } 2141 }
2152 2142
2153 pd->port_pid = 0; 2143 put_pid(pd->port_pid);
2144 pd->port_pid = NULL;
2154 dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */ 2145 dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
2155 mutex_unlock(&ipath_mutex); 2146 mutex_unlock(&ipath_mutex);
2156 ipath_free_pddata(dd, pd); /* after releasing the mutex */ 2147 ipath_free_pddata(dd, pd); /* after releasing the mutex */
diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c
index e3ec0d1bdf50..8eee7830f042 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba7220.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c
@@ -595,7 +595,7 @@ static void ipath_7220_txe_recover(struct ipath_devdata *dd)
595 595
596 dev_info(&dd->pcidev->dev, 596 dev_info(&dd->pcidev->dev,
597 "Recovering from TXE PIO parity error\n"); 597 "Recovering from TXE PIO parity error\n");
598 ipath_disarm_senderrbufs(dd, 1); 598 ipath_disarm_senderrbufs(dd);
599} 599}
600 600
601 601
@@ -675,10 +675,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
675 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control); 675 ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
676 if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) { 676 if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
677 /* 677 /*
678 * Parity errors in send memory are recoverable, 678 * Parity errors in send memory are recoverable by h/w
679 * just cancel the send (if indicated in * sendbuffererror), 679 * just do housekeeping, exit freeze mode and continue.
680 * count the occurrence, unfreeze (if no other handled
681 * hardware error bits are set), and continue.
682 */ 680 */
683 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 681 if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
684 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 682 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
@@ -687,13 +685,6 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
687 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF | 685 hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
688 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC) 686 INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
689 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT); 687 << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
690 if (!hwerrs) {
691 /* else leave in freeze mode */
692 ipath_write_kreg(dd,
693 dd->ipath_kregs->kr_control,
694 dd->ipath_control);
695 goto bail;
696 }
697 } 688 }
698 if (hwerrs) { 689 if (hwerrs) {
699 /* 690 /*
@@ -723,8 +714,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
723 *dd->ipath_statusp |= IPATH_STATUS_HWERROR; 714 *dd->ipath_statusp |= IPATH_STATUS_HWERROR;
724 dd->ipath_flags &= ~IPATH_INITTED; 715 dd->ipath_flags &= ~IPATH_INITTED;
725 } else { 716 } else {
726 ipath_dbg("Clearing freezemode on ignored hardware " 717 ipath_dbg("Clearing freezemode on ignored or "
727 "error\n"); 718 "recovered hardware error\n");
728 ipath_clear_freeze(dd); 719 ipath_clear_freeze(dd);
729 } 720 }
730 } 721 }
@@ -870,8 +861,9 @@ static int ipath_7220_boardname(struct ipath_devdata *dd, char *name,
870 "revision %u.%u!\n", 861 "revision %u.%u!\n",
871 dd->ipath_majrev, dd->ipath_minrev); 862 dd->ipath_majrev, dd->ipath_minrev);
872 ret = 1; 863 ret = 1;
873 } else if (dd->ipath_minrev == 1) { 864 } else if (dd->ipath_minrev == 1 &&
874 /* Rev1 chips are prototype. Complain, but allow use */ 865 !(dd->ipath_flags & IPATH_INITTED)) {
866 /* Rev1 chips are prototype. Complain at init, but allow use */
875 ipath_dev_err(dd, "Unsupported hardware " 867 ipath_dev_err(dd, "Unsupported hardware "
876 "revision %u.%u, Contact support@qlogic.com\n", 868 "revision %u.%u, Contact support@qlogic.com\n",
877 dd->ipath_majrev, dd->ipath_minrev); 869 dd->ipath_majrev, dd->ipath_minrev);
@@ -1966,7 +1958,7 @@ static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports)
1966 dd->ipath_rcvctrl); 1958 dd->ipath_rcvctrl);
1967 dd->ipath_p0_rcvegrcnt = 2048; /* always */ 1959 dd->ipath_p0_rcvegrcnt = 2048; /* always */
1968 if (dd->ipath_flags & IPATH_HAS_SEND_DMA) 1960 if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
1969 dd->ipath_pioreserved = 1; /* reserve a buffer */ 1961 dd->ipath_pioreserved = 3; /* kpiobufs used for PIO */
1970} 1962}
1971 1963
1972 1964
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 27dd89476660..3e5baa43fc82 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -41,7 +41,7 @@
41/* 41/*
42 * min buffers we want to have per port, after driver 42 * min buffers we want to have per port, after driver
43 */ 43 */
44#define IPATH_MIN_USER_PORT_BUFCNT 8 44#define IPATH_MIN_USER_PORT_BUFCNT 7
45 45
46/* 46/*
47 * Number of ports we are configured to use (to allow for more pio 47 * Number of ports we are configured to use (to allow for more pio
@@ -54,13 +54,9 @@ MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
54 54
55/* 55/*
56 * Number of buffers reserved for driver (verbs and layered drivers.) 56 * Number of buffers reserved for driver (verbs and layered drivers.)
57 * Reserved at end of buffer list. Initialized based on 57 * Initialized based on number of PIO buffers if not set via module interface.
58 * number of PIO buffers if not set via module interface.
59 * The problem with this is that it's global, but we'll use different 58 * The problem with this is that it's global, but we'll use different
60 * numbers for different chip types. So the default value is not 59 * numbers for different chip types.
61 * very useful. I've redefined it for the 1.3 release so that it's
62 * zero unless set by the user to something else, in which case we
63 * try to respect it.
64 */ 60 */
65static ushort ipath_kpiobufs; 61static ushort ipath_kpiobufs;
66 62
@@ -546,9 +542,12 @@ static void enable_chip(struct ipath_devdata *dd, int reinit)
546 pioavail = dd->ipath_pioavailregs_dma[i ^ 1]; 542 pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
547 else 543 else
548 pioavail = dd->ipath_pioavailregs_dma[i]; 544 pioavail = dd->ipath_pioavailregs_dma[i];
549 dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail) | 545 /*
550 (~dd->ipath_pioavailkernel[i] << 546 * don't need to worry about ipath_pioavailkernel here
551 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT); 547 * because we will call ipath_chg_pioavailkernel() later
548 * in initialization, to busy out buffers as needed
549 */
550 dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
552 } 551 }
553 /* can get counters, stats, etc. */ 552 /* can get counters, stats, etc. */
554 dd->ipath_flags |= IPATH_PRESENT; 553 dd->ipath_flags |= IPATH_PRESENT;
@@ -708,12 +707,11 @@ static void verify_interrupt(unsigned long opaque)
708int ipath_init_chip(struct ipath_devdata *dd, int reinit) 707int ipath_init_chip(struct ipath_devdata *dd, int reinit)
709{ 708{
710 int ret = 0; 709 int ret = 0;
711 u32 val32, kpiobufs; 710 u32 kpiobufs, defkbufs;
712 u32 piobufs, uports; 711 u32 piobufs, uports;
713 u64 val; 712 u64 val;
714 struct ipath_portdata *pd; 713 struct ipath_portdata *pd;
715 gfp_t gfp_flags = GFP_USER | __GFP_COMP; 714 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
716 unsigned long flags;
717 715
718 ret = init_housekeeping(dd, reinit); 716 ret = init_housekeeping(dd, reinit);
719 if (ret) 717 if (ret)
@@ -753,56 +751,46 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
753 dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2) 751 dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
754 / (sizeof(u64) * BITS_PER_BYTE / 2); 752 / (sizeof(u64) * BITS_PER_BYTE / 2);
755 uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0; 753 uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
756 if (ipath_kpiobufs == 0) { 754 if (piobufs > 144)
757 /* not set by user (this is default) */ 755 defkbufs = 32 + dd->ipath_pioreserved;
758 if (piobufs > 144)
759 kpiobufs = 32;
760 else
761 kpiobufs = 16;
762 }
763 else 756 else
764 kpiobufs = ipath_kpiobufs; 757 defkbufs = 16 + dd->ipath_pioreserved;
765 758
766 if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) { 759 if (ipath_kpiobufs && (ipath_kpiobufs +
760 (uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
767 int i = (int) piobufs - 761 int i = (int) piobufs -
768 (int) (uports * IPATH_MIN_USER_PORT_BUFCNT); 762 (int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
769 if (i < 1) 763 if (i < 1)
770 i = 1; 764 i = 1;
771 dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of " 765 dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
772 "%d for kernel leaves too few for %d user ports " 766 "%d for kernel leaves too few for %d user ports "
773 "(%d each); using %u\n", kpiobufs, 767 "(%d each); using %u\n", ipath_kpiobufs,
774 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i); 768 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
775 /* 769 /*
776 * shouldn't change ipath_kpiobufs, because could be 770 * shouldn't change ipath_kpiobufs, because could be
777 * different for different devices... 771 * different for different devices...
778 */ 772 */
779 kpiobufs = i; 773 kpiobufs = i;
780 } 774 } else if (ipath_kpiobufs)
775 kpiobufs = ipath_kpiobufs;
776 else
777 kpiobufs = defkbufs;
781 dd->ipath_lastport_piobuf = piobufs - kpiobufs; 778 dd->ipath_lastport_piobuf = piobufs - kpiobufs;
782 dd->ipath_pbufsport = 779 dd->ipath_pbufsport =
783 uports ? dd->ipath_lastport_piobuf / uports : 0; 780 uports ? dd->ipath_lastport_piobuf / uports : 0;
784 val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports); 781 /* if not an even divisor, some user ports get extra buffers */
785 if (val32 > 0) { 782 dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
786 ipath_dbg("allocating %u pbufs/port leaves %u unused, " 783 (dd->ipath_pbufsport * uports);
787 "add to kernel\n", dd->ipath_pbufsport, val32); 784 if (dd->ipath_ports_extrabuf)
788 dd->ipath_lastport_piobuf -= val32; 785 ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
789 kpiobufs += val32; 786 "ports <= %u\n", dd->ipath_pbufsport,
790 ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n", 787 dd->ipath_ports_extrabuf);
791 dd->ipath_pbufsport, val32);
792 }
793 dd->ipath_lastpioindex = 0; 788 dd->ipath_lastpioindex = 0;
794 dd->ipath_lastpioindexl = dd->ipath_piobcnt2k; 789 dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
795 ipath_chg_pioavailkernel(dd, 0, piobufs, 1); 790 /* ipath_pioavailshadow initialized earlier */
796 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " 791 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
797 "each for %u user ports\n", kpiobufs, 792 "each for %u user ports\n", kpiobufs,
798 piobufs, dd->ipath_pbufsport, uports); 793 piobufs, dd->ipath_pbufsport, uports);
799 if (dd->ipath_pioupd_thresh) {
800 if (dd->ipath_pbufsport < dd->ipath_pioupd_thresh)
801 dd->ipath_pioupd_thresh = dd->ipath_pbufsport;
802 if (kpiobufs < dd->ipath_pioupd_thresh)
803 dd->ipath_pioupd_thresh = kpiobufs;
804 }
805
806 ret = dd->ipath_f_early_init(dd); 794 ret = dd->ipath_f_early_init(dd);
807 if (ret) { 795 if (ret) {
808 ipath_dev_err(dd, "Early initialization failure\n"); 796 ipath_dev_err(dd, "Early initialization failure\n");
@@ -810,13 +798,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
810 } 798 }
811 799
812 /* 800 /*
813 * Cancel any possible active sends from early driver load.
814 * Follows early_init because some chips have to initialize
815 * PIO buffers in early_init to avoid false parity errors.
816 */
817 ipath_cancel_sends(dd, 0);
818
819 /*
820 * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be 801 * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
821 * done after early_init. 802 * done after early_init.
822 */ 803 */
@@ -836,6 +817,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
836 817
837 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr, 818 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
838 dd->ipath_pioavailregs_phys); 819 dd->ipath_pioavailregs_phys);
820
839 /* 821 /*
840 * this is to detect s/w errors, which the h/w works around by 822 * this is to detect s/w errors, which the h/w works around by
841 * ignoring the low 6 bits of address, if it wasn't aligned. 823 * ignoring the low 6 bits of address, if it wasn't aligned.
@@ -862,12 +844,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
862 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED); 844 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
863 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL); 845 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
864 846
865 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
866 dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE;
867 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
868 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
869 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
870
871 /* 847 /*
872 * before error clears, since we expect serdes pll errors during 848 * before error clears, since we expect serdes pll errors during
873 * this, the first time after reset 849 * this, the first time after reset
@@ -940,6 +916,19 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
940 else 916 else
941 enable_chip(dd, reinit); 917 enable_chip(dd, reinit);
942 918
919 /* after enable_chip, so pioavailshadow setup */
920 ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
921
922 /*
923 * Cancel any possible active sends from early driver load.
924 * Follows early_init because some chips have to initialize
925 * PIO buffers in early_init to avoid false parity errors.
926 * After enable and ipath_chg_pioavailkernel so we can safely
927 * enable pioavail updates and PIOENABLE; packets are now
928 * ready to go out.
929 */
930 ipath_cancel_sends(dd, 1);
931
943 if (!reinit) { 932 if (!reinit) {
944 /* 933 /*
945 * Used when we close a port, for DMA already in flight 934 * Used when we close a port, for DMA already in flight
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 1b58f4737c71..26900b3b7a4e 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -38,42 +38,12 @@
38#include "ipath_verbs.h" 38#include "ipath_verbs.h"
39#include "ipath_common.h" 39#include "ipath_common.h"
40 40
41/*
42 * clear (write) a pio buffer, to clear a parity error. This routine
43 * should only be called when in freeze mode, and the buffer should be
44 * canceled afterwards.
45 */
46static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
47{
48 u32 __iomem *pbuf;
49 u32 dwcnt; /* dword count to write */
50 if (pnum < dd->ipath_piobcnt2k) {
51 pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
52 dd->ipath_palign);
53 dwcnt = dd->ipath_piosize2k >> 2;
54 }
55 else {
56 pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
57 (pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
58 dwcnt = dd->ipath_piosize4k >> 2;
59 }
60 dev_info(&dd->pcidev->dev,
61 "Rewrite PIO buffer %u, to recover from parity error\n",
62 pnum);
63
64 /* no flush required, since already in freeze */
65 writel(dwcnt + 1, pbuf);
66 while (--dwcnt)
67 writel(0, pbuf++);
68}
69 41
70/* 42/*
71 * Called when we might have an error that is specific to a particular 43 * Called when we might have an error that is specific to a particular
72 * PIO buffer, and may need to cancel that buffer, so it can be re-used. 44 * PIO buffer, and may need to cancel that buffer, so it can be re-used.
73 * If rewrite is true, and bits are set in the sendbufferror registers,
74 * we'll write to the buffer, for error recovery on parity errors.
75 */ 45 */
76void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite) 46void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
77{ 47{
78 u32 piobcnt; 48 u32 piobcnt;
79 unsigned long sbuf[4]; 49 unsigned long sbuf[4];
@@ -109,11 +79,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
109 } 79 }
110 80
111 for (i = 0; i < piobcnt; i++) 81 for (i = 0; i < piobcnt; i++)
112 if (test_bit(i, sbuf)) { 82 if (test_bit(i, sbuf))
113 if (rewrite)
114 ipath_clrpiobuf(dd, i);
115 ipath_disarm_piobufs(dd, i, 1); 83 ipath_disarm_piobufs(dd, i, 1);
116 }
117 /* ignore armlaunch errs for a bit */ 84 /* ignore armlaunch errs for a bit */
118 dd->ipath_lastcancel = jiffies+3; 85 dd->ipath_lastcancel = jiffies+3;
119 } 86 }
@@ -164,7 +131,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
164{ 131{
165 u64 ignore_this_time = 0; 132 u64 ignore_this_time = 0;
166 133
167 ipath_disarm_senderrbufs(dd, 0); 134 ipath_disarm_senderrbufs(dd);
168 if ((errs & E_SUM_LINK_PKTERRS) && 135 if ((errs & E_SUM_LINK_PKTERRS) &&
169 !(dd->ipath_flags & IPATH_LINKACTIVE)) { 136 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
170 /* 137 /*
@@ -909,8 +876,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
909 * processes (causing armlaunch), send errors due to going into freeze mode, 876 * processes (causing armlaunch), send errors due to going into freeze mode,
910 * etc., and try to avoid causing extra interrupts while doing so. 877 * etc., and try to avoid causing extra interrupts while doing so.
911 * Forcibly update the in-memory pioavail register copies after cleanup 878 * Forcibly update the in-memory pioavail register copies after cleanup
912 * because the chip won't do it for anything changing while in freeze mode 879 * because the chip won't do it while in freeze mode (the register values
913 * (we don't want to wait for the next pio buffer state change). 880 * themselves are kept correct).
914 * Make sure that we don't lose any important interrupts by using the chip 881 * Make sure that we don't lose any important interrupts by using the chip
915 * feature that says that writing 0 to a bit in *clear that is set in 882 * feature that says that writing 0 to a bit in *clear that is set in
916 * *status will cause an interrupt to be generated again (if allowed by 883 * *status will cause an interrupt to be generated again (if allowed by
@@ -918,44 +885,23 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
918 */ 885 */
919void ipath_clear_freeze(struct ipath_devdata *dd) 886void ipath_clear_freeze(struct ipath_devdata *dd)
920{ 887{
921 int i, im;
922 u64 val;
923
924 /* disable error interrupts, to avoid confusion */ 888 /* disable error interrupts, to avoid confusion */
925 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); 889 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
926 890
927 /* also disable interrupts; errormask is sometimes overwriten */ 891 /* also disable interrupts; errormask is sometimes overwriten */
928 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL); 892 ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
929 893
930 /* 894 ipath_cancel_sends(dd, 1);
931 * clear all sends, because they have may been 895
932 * completed by usercode while in freeze mode, and 896 /* clear the freeze, and be sure chip saw it */
933 * therefore would not be sent, and eventually
934 * might cause the process to run out of bufs
935 */
936 ipath_cancel_sends(dd, 0);
937 ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 897 ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
938 dd->ipath_control); 898 dd->ipath_control);
899 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
939 900
940 /* ensure pio avail updates continue */ 901 /* force in-memory update now we are out of freeze */
941 ipath_force_pio_avail_update(dd); 902 ipath_force_pio_avail_update(dd);
942 903
943 /* 904 /*
944 * We just enabled pioavailupdate, so dma copy is almost certainly
945 * not yet right, so read the registers directly. Similar to init
946 */
947 for (i = 0; i < dd->ipath_pioavregs; i++) {
948 /* deal with 6110 chip bug */
949 im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
950 i ^ 1 : i;
951 val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im);
952 dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val);
953 dd->ipath_pioavailshadow[i] = val |
954 (~dd->ipath_pioavailkernel[i] <<
955 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
956 }
957
958 /*
959 * force new interrupt if any hwerr, error or interrupt bits are 905 * force new interrupt if any hwerr, error or interrupt bits are
960 * still set, and clear "safe" send packet errors related to freeze 906 * still set, and clear "safe" send packet errors related to freeze
961 * and cancelling sends. Re-enable error interrupts before possible 907 * and cancelling sends. Re-enable error interrupts before possible
@@ -1312,10 +1258,8 @@ irqreturn_t ipath_intr(int irq, void *data)
1312 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1258 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1313 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 1259 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1314 1260
1315 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) 1261 /* always process; sdma verbs uses PIO for acks and VL15 */
1316 handle_layer_pioavail(dd); 1262 handle_layer_pioavail(dd);
1317 else
1318 ipath_dbg("unexpected BUFAVAIL intr\n");
1319 } 1263 }
1320 1264
1321 ret = IRQ_HANDLED; 1265 ret = IRQ_HANDLED;
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 202337ae90dc..59a8b254b97f 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -117,6 +117,10 @@ struct ipath_portdata {
117 u16 port_subport_cnt; 117 u16 port_subport_cnt;
118 /* non-zero if port is being shared. */ 118 /* non-zero if port is being shared. */
119 u16 port_subport_id; 119 u16 port_subport_id;
120 /* number of pio bufs for this port (all procs, if shared) */
121 u32 port_piocnt;
122 /* first pio buffer for this port */
123 u32 port_pio_base;
120 /* chip offset of PIO buffers for this port */ 124 /* chip offset of PIO buffers for this port */
121 u32 port_piobufs; 125 u32 port_piobufs;
122 /* how many alloc_pages() chunks in port_rcvegrbuf_pages */ 126 /* how many alloc_pages() chunks in port_rcvegrbuf_pages */
@@ -155,8 +159,8 @@ struct ipath_portdata {
155 /* saved total number of polled urgent packets for poll edge trigger */ 159 /* saved total number of polled urgent packets for poll edge trigger */
156 u32 port_urgent_poll; 160 u32 port_urgent_poll;
157 /* pid of process using this port */ 161 /* pid of process using this port */
158 pid_t port_pid; 162 struct pid *port_pid;
159 pid_t port_subpid[INFINIPATH_MAX_SUBPORT]; 163 struct pid *port_subpid[INFINIPATH_MAX_SUBPORT];
160 /* same size as task_struct .comm[] */ 164 /* same size as task_struct .comm[] */
161 char port_comm[16]; 165 char port_comm[16];
162 /* pkeys set by this use of this port */ 166 /* pkeys set by this use of this port */
@@ -384,6 +388,8 @@ struct ipath_devdata {
384 u32 ipath_lastrpkts; 388 u32 ipath_lastrpkts;
385 /* pio bufs allocated per port */ 389 /* pio bufs allocated per port */
386 u32 ipath_pbufsport; 390 u32 ipath_pbufsport;
391 /* if remainder on bufs/port, ports < extrabuf get 1 extra */
392 u32 ipath_ports_extrabuf;
387 u32 ipath_pioupd_thresh; /* update threshold, some chips */ 393 u32 ipath_pioupd_thresh; /* update threshold, some chips */
388 /* 394 /*
389 * number of ports configured as max; zero is set to number chip 395 * number of ports configured as max; zero is set to number chip
@@ -477,7 +483,7 @@ struct ipath_devdata {
477 483
478 /* SendDMA related entries */ 484 /* SendDMA related entries */
479 spinlock_t ipath_sdma_lock; 485 spinlock_t ipath_sdma_lock;
480 u64 ipath_sdma_status; 486 unsigned long ipath_sdma_status;
481 unsigned long ipath_sdma_abort_jiffies; 487 unsigned long ipath_sdma_abort_jiffies;
482 unsigned long ipath_sdma_abort_intr_timeout; 488 unsigned long ipath_sdma_abort_intr_timeout;
483 unsigned long ipath_sdma_buf_jiffies; 489 unsigned long ipath_sdma_buf_jiffies;
@@ -816,8 +822,8 @@ struct ipath_devdata {
816#define IPATH_SDMA_DISARMED 1 822#define IPATH_SDMA_DISARMED 1
817#define IPATH_SDMA_DISABLED 2 823#define IPATH_SDMA_DISABLED 2
818#define IPATH_SDMA_LAYERBUF 3 824#define IPATH_SDMA_LAYERBUF 3
819#define IPATH_SDMA_RUNNING 62 825#define IPATH_SDMA_RUNNING 30
820#define IPATH_SDMA_SHUTDOWN 63 826#define IPATH_SDMA_SHUTDOWN 31
821 827
822/* bit combinations that correspond to abort states */ 828/* bit combinations that correspond to abort states */
823#define IPATH_SDMA_ABORT_NONE 0 829#define IPATH_SDMA_ABORT_NONE 0
@@ -1011,7 +1017,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *);
1011int ipath_update_eeprom_log(struct ipath_devdata *dd); 1017int ipath_update_eeprom_log(struct ipath_devdata *dd);
1012void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); 1018void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
1013u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); 1019u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
1014void ipath_disarm_senderrbufs(struct ipath_devdata *, int); 1020void ipath_disarm_senderrbufs(struct ipath_devdata *);
1015void ipath_force_pio_avail_update(struct ipath_devdata *); 1021void ipath_force_pio_avail_update(struct ipath_devdata *);
1016void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev); 1022void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
1017 1023
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index dd5b6e9d57c2..4715911101e4 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -242,7 +242,6 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
242{ 242{
243 struct ipath_qp *q, **qpp; 243 struct ipath_qp *q, **qpp;
244 unsigned long flags; 244 unsigned long flags;
245 int fnd = 0;
246 245
247 spin_lock_irqsave(&qpt->lock, flags); 246 spin_lock_irqsave(&qpt->lock, flags);
248 247
@@ -253,51 +252,40 @@ static void ipath_free_qp(struct ipath_qp_table *qpt, struct ipath_qp *qp)
253 *qpp = qp->next; 252 *qpp = qp->next;
254 qp->next = NULL; 253 qp->next = NULL;
255 atomic_dec(&qp->refcount); 254 atomic_dec(&qp->refcount);
256 fnd = 1;
257 break; 255 break;
258 } 256 }
259 } 257 }
260 258
261 spin_unlock_irqrestore(&qpt->lock, flags); 259 spin_unlock_irqrestore(&qpt->lock, flags);
262
263 if (!fnd)
264 return;
265
266 free_qpn(qpt, qp->ibqp.qp_num);
267
268 wait_event(qp->wait, !atomic_read(&qp->refcount));
269} 260}
270 261
271/** 262/**
272 * ipath_free_all_qps - remove all QPs from the table 263 * ipath_free_all_qps - check for QPs still in use
273 * @qpt: the QP table to empty 264 * @qpt: the QP table to empty
265 *
266 * There should not be any QPs still in use.
267 * Free memory for table.
274 */ 268 */
275void ipath_free_all_qps(struct ipath_qp_table *qpt) 269unsigned ipath_free_all_qps(struct ipath_qp_table *qpt)
276{ 270{
277 unsigned long flags; 271 unsigned long flags;
278 struct ipath_qp *qp, *nqp; 272 struct ipath_qp *qp;
279 u32 n; 273 u32 n, qp_inuse = 0;
280 274
275 spin_lock_irqsave(&qpt->lock, flags);
281 for (n = 0; n < qpt->max; n++) { 276 for (n = 0; n < qpt->max; n++) {
282 spin_lock_irqsave(&qpt->lock, flags);
283 qp = qpt->table[n]; 277 qp = qpt->table[n];
284 qpt->table[n] = NULL; 278 qpt->table[n] = NULL;
285 spin_unlock_irqrestore(&qpt->lock, flags); 279
286 280 for (; qp; qp = qp->next)
287 while (qp) { 281 qp_inuse++;
288 nqp = qp->next;
289 free_qpn(qpt, qp->ibqp.qp_num);
290 if (!atomic_dec_and_test(&qp->refcount) ||
291 !ipath_destroy_qp(&qp->ibqp))
292 ipath_dbg("QP memory leak!\n");
293 qp = nqp;
294 }
295 } 282 }
283 spin_unlock_irqrestore(&qpt->lock, flags);
296 284
297 for (n = 0; n < ARRAY_SIZE(qpt->map); n++) { 285 for (n = 0; n < ARRAY_SIZE(qpt->map); n++)
298 if (qpt->map[n].page) 286 if (qpt->map[n].page)
299 free_page((unsigned long)qpt->map[n].page); 287 free_page((unsigned long) qpt->map[n].page);
300 } 288 return qp_inuse;
301} 289}
302 290
303/** 291/**
@@ -336,11 +324,12 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
336 qp->remote_qpn = 0; 324 qp->remote_qpn = 0;
337 qp->qkey = 0; 325 qp->qkey = 0;
338 qp->qp_access_flags = 0; 326 qp->qp_access_flags = 0;
339 qp->s_busy = 0; 327 atomic_set(&qp->s_dma_busy, 0);
340 qp->s_flags &= IPATH_S_SIGNAL_REQ_WR; 328 qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
341 qp->s_hdrwords = 0; 329 qp->s_hdrwords = 0;
342 qp->s_wqe = NULL; 330 qp->s_wqe = NULL;
343 qp->s_pkt_delay = 0; 331 qp->s_pkt_delay = 0;
332 qp->s_draining = 0;
344 qp->s_psn = 0; 333 qp->s_psn = 0;
345 qp->r_psn = 0; 334 qp->r_psn = 0;
346 qp->r_msn = 0; 335 qp->r_msn = 0;
@@ -353,7 +342,8 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
353 } 342 }
354 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE; 343 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
355 qp->r_nak_state = 0; 344 qp->r_nak_state = 0;
356 qp->r_wrid_valid = 0; 345 qp->r_aflags = 0;
346 qp->r_flags = 0;
357 qp->s_rnr_timeout = 0; 347 qp->s_rnr_timeout = 0;
358 qp->s_head = 0; 348 qp->s_head = 0;
359 qp->s_tail = 0; 349 qp->s_tail = 0;
@@ -361,7 +351,6 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
361 qp->s_last = 0; 351 qp->s_last = 0;
362 qp->s_ssn = 1; 352 qp->s_ssn = 1;
363 qp->s_lsn = 0; 353 qp->s_lsn = 0;
364 qp->s_wait_credit = 0;
365 memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue)); 354 memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
366 qp->r_head_ack_queue = 0; 355 qp->r_head_ack_queue = 0;
367 qp->s_tail_ack_queue = 0; 356 qp->s_tail_ack_queue = 0;
@@ -370,17 +359,17 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
370 qp->r_rq.wq->head = 0; 359 qp->r_rq.wq->head = 0;
371 qp->r_rq.wq->tail = 0; 360 qp->r_rq.wq->tail = 0;
372 } 361 }
373 qp->r_reuse_sge = 0;
374} 362}
375 363
376/** 364/**
377 * ipath_error_qp - put a QP into an error state 365 * ipath_error_qp - put a QP into the error state
378 * @qp: the QP to put into an error state 366 * @qp: the QP to put into the error state
379 * @err: the receive completion error to signal if a RWQE is active 367 * @err: the receive completion error to signal if a RWQE is active
380 * 368 *
381 * Flushes both send and receive work queues. 369 * Flushes both send and receive work queues.
382 * Returns true if last WQE event should be generated. 370 * Returns true if last WQE event should be generated.
383 * The QP s_lock should be held and interrupts disabled. 371 * The QP s_lock should be held and interrupts disabled.
372 * If we are already in error state, just return.
384 */ 373 */
385 374
386int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err) 375int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
@@ -389,8 +378,10 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
389 struct ib_wc wc; 378 struct ib_wc wc;
390 int ret = 0; 379 int ret = 0;
391 380
392 ipath_dbg("QP%d/%d in error state (%d)\n", 381 if (qp->state == IB_QPS_ERR)
393 qp->ibqp.qp_num, qp->remote_qpn, err); 382 goto bail;
383
384 qp->state = IB_QPS_ERR;
394 385
395 spin_lock(&dev->pending_lock); 386 spin_lock(&dev->pending_lock);
396 if (!list_empty(&qp->timerwait)) 387 if (!list_empty(&qp->timerwait))
@@ -399,39 +390,21 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
399 list_del_init(&qp->piowait); 390 list_del_init(&qp->piowait);
400 spin_unlock(&dev->pending_lock); 391 spin_unlock(&dev->pending_lock);
401 392
402 wc.vendor_err = 0; 393 /* Schedule the sending tasklet to drain the send work queue. */
403 wc.byte_len = 0; 394 if (qp->s_last != qp->s_head)
404 wc.imm_data = 0; 395 ipath_schedule_send(qp);
396
397 memset(&wc, 0, sizeof(wc));
405 wc.qp = &qp->ibqp; 398 wc.qp = &qp->ibqp;
406 wc.src_qp = 0; 399 wc.opcode = IB_WC_RECV;
407 wc.wc_flags = 0; 400
408 wc.pkey_index = 0; 401 if (test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags)) {
409 wc.slid = 0;
410 wc.sl = 0;
411 wc.dlid_path_bits = 0;
412 wc.port_num = 0;
413 if (qp->r_wrid_valid) {
414 qp->r_wrid_valid = 0;
415 wc.wr_id = qp->r_wr_id; 402 wc.wr_id = qp->r_wr_id;
416 wc.opcode = IB_WC_RECV;
417 wc.status = err; 403 wc.status = err;
418 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); 404 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
419 } 405 }
420 wc.status = IB_WC_WR_FLUSH_ERR; 406 wc.status = IB_WC_WR_FLUSH_ERR;
421 407
422 while (qp->s_last != qp->s_head) {
423 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
424
425 wc.wr_id = wqe->wr.wr_id;
426 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
427 if (++qp->s_last >= qp->s_size)
428 qp->s_last = 0;
429 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
430 }
431 qp->s_cur = qp->s_tail = qp->s_head;
432 qp->s_hdrwords = 0;
433 qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
434
435 if (qp->r_rq.wq) { 408 if (qp->r_rq.wq) {
436 struct ipath_rwq *wq; 409 struct ipath_rwq *wq;
437 u32 head; 410 u32 head;
@@ -447,7 +420,6 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
447 tail = wq->tail; 420 tail = wq->tail;
448 if (tail >= qp->r_rq.size) 421 if (tail >= qp->r_rq.size)
449 tail = 0; 422 tail = 0;
450 wc.opcode = IB_WC_RECV;
451 while (tail != head) { 423 while (tail != head) {
452 wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id; 424 wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
453 if (++tail >= qp->r_rq.size) 425 if (++tail >= qp->r_rq.size)
@@ -460,6 +432,7 @@ int ipath_error_qp(struct ipath_qp *qp, enum ib_wc_status err)
460 } else if (qp->ibqp.event_handler) 432 } else if (qp->ibqp.event_handler)
461 ret = 1; 433 ret = 1;
462 434
435bail:
463 return ret; 436 return ret;
464} 437}
465 438
@@ -478,11 +451,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
478 struct ipath_ibdev *dev = to_idev(ibqp->device); 451 struct ipath_ibdev *dev = to_idev(ibqp->device);
479 struct ipath_qp *qp = to_iqp(ibqp); 452 struct ipath_qp *qp = to_iqp(ibqp);
480 enum ib_qp_state cur_state, new_state; 453 enum ib_qp_state cur_state, new_state;
481 unsigned long flags;
482 int lastwqe = 0; 454 int lastwqe = 0;
483 int ret; 455 int ret;
484 456
485 spin_lock_irqsave(&qp->s_lock, flags); 457 spin_lock_irq(&qp->s_lock);
486 458
487 cur_state = attr_mask & IB_QP_CUR_STATE ? 459 cur_state = attr_mask & IB_QP_CUR_STATE ?
488 attr->cur_qp_state : qp->state; 460 attr->cur_qp_state : qp->state;
@@ -535,16 +507,42 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
535 507
536 switch (new_state) { 508 switch (new_state) {
537 case IB_QPS_RESET: 509 case IB_QPS_RESET:
510 if (qp->state != IB_QPS_RESET) {
511 qp->state = IB_QPS_RESET;
512 spin_lock(&dev->pending_lock);
513 if (!list_empty(&qp->timerwait))
514 list_del_init(&qp->timerwait);
515 if (!list_empty(&qp->piowait))
516 list_del_init(&qp->piowait);
517 spin_unlock(&dev->pending_lock);
518 qp->s_flags &= ~IPATH_S_ANY_WAIT;
519 spin_unlock_irq(&qp->s_lock);
520 /* Stop the sending tasklet */
521 tasklet_kill(&qp->s_task);
522 wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
523 spin_lock_irq(&qp->s_lock);
524 }
538 ipath_reset_qp(qp, ibqp->qp_type); 525 ipath_reset_qp(qp, ibqp->qp_type);
539 break; 526 break;
540 527
528 case IB_QPS_SQD:
529 qp->s_draining = qp->s_last != qp->s_cur;
530 qp->state = new_state;
531 break;
532
533 case IB_QPS_SQE:
534 if (qp->ibqp.qp_type == IB_QPT_RC)
535 goto inval;
536 qp->state = new_state;
537 break;
538
541 case IB_QPS_ERR: 539 case IB_QPS_ERR:
542 lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR); 540 lastwqe = ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
543 break; 541 break;
544 542
545 default: 543 default:
544 qp->state = new_state;
546 break; 545 break;
547
548 } 546 }
549 547
550 if (attr_mask & IB_QP_PKEY_INDEX) 548 if (attr_mask & IB_QP_PKEY_INDEX)
@@ -597,8 +595,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
597 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) 595 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
598 qp->s_max_rd_atomic = attr->max_rd_atomic; 596 qp->s_max_rd_atomic = attr->max_rd_atomic;
599 597
600 qp->state = new_state; 598 spin_unlock_irq(&qp->s_lock);
601 spin_unlock_irqrestore(&qp->s_lock, flags);
602 599
603 if (lastwqe) { 600 if (lastwqe) {
604 struct ib_event ev; 601 struct ib_event ev;
@@ -612,7 +609,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
612 goto bail; 609 goto bail;
613 610
614inval: 611inval:
615 spin_unlock_irqrestore(&qp->s_lock, flags); 612 spin_unlock_irq(&qp->s_lock);
616 ret = -EINVAL; 613 ret = -EINVAL;
617 614
618bail: 615bail:
@@ -643,7 +640,7 @@ int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
643 attr->pkey_index = qp->s_pkey_index; 640 attr->pkey_index = qp->s_pkey_index;
644 attr->alt_pkey_index = 0; 641 attr->alt_pkey_index = 0;
645 attr->en_sqd_async_notify = 0; 642 attr->en_sqd_async_notify = 0;
646 attr->sq_draining = 0; 643 attr->sq_draining = qp->s_draining;
647 attr->max_rd_atomic = qp->s_max_rd_atomic; 644 attr->max_rd_atomic = qp->s_max_rd_atomic;
648 attr->max_dest_rd_atomic = qp->r_max_rd_atomic; 645 attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
649 attr->min_rnr_timer = qp->r_min_rnr_timer; 646 attr->min_rnr_timer = qp->r_min_rnr_timer;
@@ -833,6 +830,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
833 spin_lock_init(&qp->r_rq.lock); 830 spin_lock_init(&qp->r_rq.lock);
834 atomic_set(&qp->refcount, 0); 831 atomic_set(&qp->refcount, 0);
835 init_waitqueue_head(&qp->wait); 832 init_waitqueue_head(&qp->wait);
833 init_waitqueue_head(&qp->wait_dma);
836 tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp); 834 tasklet_init(&qp->s_task, ipath_do_send, (unsigned long)qp);
837 INIT_LIST_HEAD(&qp->piowait); 835 INIT_LIST_HEAD(&qp->piowait);
838 INIT_LIST_HEAD(&qp->timerwait); 836 INIT_LIST_HEAD(&qp->timerwait);
@@ -926,6 +924,7 @@ bail_ip:
926 else 924 else
927 vfree(qp->r_rq.wq); 925 vfree(qp->r_rq.wq);
928 ipath_free_qp(&dev->qp_table, qp); 926 ipath_free_qp(&dev->qp_table, qp);
927 free_qpn(&dev->qp_table, qp->ibqp.qp_num);
929bail_qp: 928bail_qp:
930 kfree(qp); 929 kfree(qp);
931bail_swq: 930bail_swq:
@@ -947,41 +946,44 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
947{ 946{
948 struct ipath_qp *qp = to_iqp(ibqp); 947 struct ipath_qp *qp = to_iqp(ibqp);
949 struct ipath_ibdev *dev = to_idev(ibqp->device); 948 struct ipath_ibdev *dev = to_idev(ibqp->device);
950 unsigned long flags;
951 949
952 spin_lock_irqsave(&qp->s_lock, flags); 950 /* Make sure HW and driver activity is stopped. */
953 qp->state = IB_QPS_ERR; 951 spin_lock_irq(&qp->s_lock);
954 spin_unlock_irqrestore(&qp->s_lock, flags); 952 if (qp->state != IB_QPS_RESET) {
955 spin_lock(&dev->n_qps_lock); 953 qp->state = IB_QPS_RESET;
956 dev->n_qps_allocated--; 954 spin_lock(&dev->pending_lock);
957 spin_unlock(&dev->n_qps_lock); 955 if (!list_empty(&qp->timerwait))
956 list_del_init(&qp->timerwait);
957 if (!list_empty(&qp->piowait))
958 list_del_init(&qp->piowait);
959 spin_unlock(&dev->pending_lock);
960 qp->s_flags &= ~IPATH_S_ANY_WAIT;
961 spin_unlock_irq(&qp->s_lock);
962 /* Stop the sending tasklet */
963 tasklet_kill(&qp->s_task);
964 wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
965 } else
966 spin_unlock_irq(&qp->s_lock);
958 967
959 /* Stop the sending tasklet. */ 968 ipath_free_qp(&dev->qp_table, qp);
960 tasklet_kill(&qp->s_task);
961 969
962 if (qp->s_tx) { 970 if (qp->s_tx) {
963 atomic_dec(&qp->refcount); 971 atomic_dec(&qp->refcount);
964 if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) 972 if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
965 kfree(qp->s_tx->txreq.map_addr); 973 kfree(qp->s_tx->txreq.map_addr);
974 spin_lock_irq(&dev->pending_lock);
975 list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
976 spin_unlock_irq(&dev->pending_lock);
977 qp->s_tx = NULL;
966 } 978 }
967 979
968 /* Make sure the QP isn't on the timeout list. */ 980 wait_event(qp->wait, !atomic_read(&qp->refcount));
969 spin_lock_irqsave(&dev->pending_lock, flags);
970 if (!list_empty(&qp->timerwait))
971 list_del_init(&qp->timerwait);
972 if (!list_empty(&qp->piowait))
973 list_del_init(&qp->piowait);
974 if (qp->s_tx)
975 list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
976 spin_unlock_irqrestore(&dev->pending_lock, flags);
977 981
978 /* 982 /* all user's cleaned up, mark it available */
979 * Make sure that the QP is not in the QPN table so receive 983 free_qpn(&dev->qp_table, qp->ibqp.qp_num);
980 * interrupts will discard packets for this QP. XXX Also remove QP 984 spin_lock(&dev->n_qps_lock);
981 * from multicast table. 985 dev->n_qps_allocated--;
982 */ 986 spin_unlock(&dev->n_qps_lock);
983 if (atomic_read(&qp->refcount) != 0)
984 ipath_free_qp(&dev->qp_table, qp);
985 987
986 if (qp->ip) 988 if (qp->ip)
987 kref_put(&qp->ip->ref, ipath_release_mmap_info); 989 kref_put(&qp->ip->ref, ipath_release_mmap_info);
@@ -1026,48 +1028,6 @@ bail:
1026} 1028}
1027 1029
1028/** 1030/**
1029 * ipath_sqerror_qp - put a QP's send queue into an error state
1030 * @qp: QP who's send queue will be put into an error state
1031 * @wc: the WC responsible for putting the QP in this state
1032 *
1033 * Flushes the send work queue.
1034 * The QP s_lock should be held and interrupts disabled.
1035 */
1036
1037void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc)
1038{
1039 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1040 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
1041
1042 ipath_dbg("Send queue error on QP%d/%d: err: %d\n",
1043 qp->ibqp.qp_num, qp->remote_qpn, wc->status);
1044
1045 spin_lock(&dev->pending_lock);
1046 if (!list_empty(&qp->timerwait))
1047 list_del_init(&qp->timerwait);
1048 if (!list_empty(&qp->piowait))
1049 list_del_init(&qp->piowait);
1050 spin_unlock(&dev->pending_lock);
1051
1052 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
1053 if (++qp->s_last >= qp->s_size)
1054 qp->s_last = 0;
1055
1056 wc->status = IB_WC_WR_FLUSH_ERR;
1057
1058 while (qp->s_last != qp->s_head) {
1059 wqe = get_swqe_ptr(qp, qp->s_last);
1060 wc->wr_id = wqe->wr.wr_id;
1061 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
1062 ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 1);
1063 if (++qp->s_last >= qp->s_size)
1064 qp->s_last = 0;
1065 }
1066 qp->s_cur = qp->s_tail = qp->s_head;
1067 qp->state = IB_QPS_SQE;
1068}
1069
1070/**
1071 * ipath_get_credit - flush the send work queue of a QP 1031 * ipath_get_credit - flush the send work queue of a QP
1072 * @qp: the qp who's send work queue to flush 1032 * @qp: the qp who's send work queue to flush
1073 * @aeth: the Acknowledge Extended Transport Header 1033 * @aeth: the Acknowledge Extended Transport Header
@@ -1093,9 +1053,10 @@ void ipath_get_credit(struct ipath_qp *qp, u32 aeth)
1093 } 1053 }
1094 1054
1095 /* Restart sending if it was blocked due to lack of credits. */ 1055 /* Restart sending if it was blocked due to lack of credits. */
1096 if (qp->s_cur != qp->s_head && 1056 if ((qp->s_flags & IPATH_S_WAIT_SSN_CREDIT) &&
1057 qp->s_cur != qp->s_head &&
1097 (qp->s_lsn == (u32) -1 || 1058 (qp->s_lsn == (u32) -1 ||
1098 ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn, 1059 ipath_cmp24(get_swqe_ptr(qp, qp->s_cur)->ssn,
1099 qp->s_lsn + 1) <= 0)) 1060 qp->s_lsn + 1) <= 0))
1100 tasklet_hi_schedule(&qp->s_task); 1061 ipath_schedule_send(qp);
1101} 1062}
diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
index c405dfba5531..108df667d2ee 100644
--- a/drivers/infiniband/hw/ipath/ipath_rc.c
+++ b/drivers/infiniband/hw/ipath/ipath_rc.c
@@ -92,6 +92,10 @@ static int ipath_make_rc_ack(struct ipath_ibdev *dev, struct ipath_qp *qp,
92 u32 bth0; 92 u32 bth0;
93 u32 bth2; 93 u32 bth2;
94 94
95 /* Don't send an ACK if we aren't supposed to. */
96 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
97 goto bail;
98
95 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 99 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
96 hwords = 5; 100 hwords = 5;
97 101
@@ -238,14 +242,25 @@ int ipath_make_rc_req(struct ipath_qp *qp)
238 ipath_make_rc_ack(dev, qp, ohdr, pmtu)) 242 ipath_make_rc_ack(dev, qp, ohdr, pmtu))
239 goto done; 243 goto done;
240 244
241 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) || 245 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
242 qp->s_rnr_timeout || qp->s_wait_credit) 246 if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
243 goto bail; 247 goto bail;
248 /* We are in the error state, flush the work request. */
249 if (qp->s_last == qp->s_head)
250 goto bail;
251 /* If DMAs are in progress, we can't flush immediately. */
252 if (atomic_read(&qp->s_dma_busy)) {
253 qp->s_flags |= IPATH_S_WAIT_DMA;
254 goto bail;
255 }
256 wqe = get_swqe_ptr(qp, qp->s_last);
257 ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
258 goto done;
259 }
244 260
245 /* Limit the number of packets sent without an ACK. */ 261 /* Leave BUSY set until RNR timeout. */
246 if (ipath_cmp24(qp->s_psn, qp->s_last_psn + IPATH_PSN_CREDIT) > 0) { 262 if (qp->s_rnr_timeout) {
247 qp->s_wait_credit = 1; 263 qp->s_flags |= IPATH_S_WAITING;
248 dev->n_rc_stalls++;
249 goto bail; 264 goto bail;
250 } 265 }
251 266
@@ -257,6 +272,9 @@ int ipath_make_rc_req(struct ipath_qp *qp)
257 wqe = get_swqe_ptr(qp, qp->s_cur); 272 wqe = get_swqe_ptr(qp, qp->s_cur);
258 switch (qp->s_state) { 273 switch (qp->s_state) {
259 default: 274 default:
275 if (!(ib_ipath_state_ops[qp->state] &
276 IPATH_PROCESS_NEXT_SEND_OK))
277 goto bail;
260 /* 278 /*
261 * Resend an old request or start a new one. 279 * Resend an old request or start a new one.
262 * 280 *
@@ -294,8 +312,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)
294 case IB_WR_SEND_WITH_IMM: 312 case IB_WR_SEND_WITH_IMM:
295 /* If no credit, return. */ 313 /* If no credit, return. */
296 if (qp->s_lsn != (u32) -1 && 314 if (qp->s_lsn != (u32) -1 &&
297 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) 315 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
316 qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
298 goto bail; 317 goto bail;
318 }
299 wqe->lpsn = wqe->psn; 319 wqe->lpsn = wqe->psn;
300 if (len > pmtu) { 320 if (len > pmtu) {
301 wqe->lpsn += (len - 1) / pmtu; 321 wqe->lpsn += (len - 1) / pmtu;
@@ -325,8 +345,10 @@ int ipath_make_rc_req(struct ipath_qp *qp)
325 case IB_WR_RDMA_WRITE_WITH_IMM: 345 case IB_WR_RDMA_WRITE_WITH_IMM:
326 /* If no credit, return. */ 346 /* If no credit, return. */
327 if (qp->s_lsn != (u32) -1 && 347 if (qp->s_lsn != (u32) -1 &&
328 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) 348 ipath_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
349 qp->s_flags |= IPATH_S_WAIT_SSN_CREDIT;
329 goto bail; 350 goto bail;
351 }
330 ohdr->u.rc.reth.vaddr = 352 ohdr->u.rc.reth.vaddr =
331 cpu_to_be64(wqe->wr.wr.rdma.remote_addr); 353 cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
332 ohdr->u.rc.reth.rkey = 354 ohdr->u.rc.reth.rkey =
@@ -570,7 +592,11 @@ int ipath_make_rc_req(struct ipath_qp *qp)
570 ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2); 592 ipath_make_ruc_header(dev, qp, ohdr, bth0 | (qp->s_state << 24), bth2);
571done: 593done:
572 ret = 1; 594 ret = 1;
595 goto unlock;
596
573bail: 597bail:
598 qp->s_flags &= ~IPATH_S_BUSY;
599unlock:
574 spin_unlock_irqrestore(&qp->s_lock, flags); 600 spin_unlock_irqrestore(&qp->s_lock, flags);
575 return ret; 601 return ret;
576} 602}
@@ -606,7 +632,11 @@ static void send_rc_ack(struct ipath_qp *qp)
606 632
607 spin_unlock_irqrestore(&qp->s_lock, flags); 633 spin_unlock_irqrestore(&qp->s_lock, flags);
608 634
635 /* Don't try to send ACKs if the link isn't ACTIVE */
609 dd = dev->dd; 636 dd = dev->dd;
637 if (!(dd->ipath_flags & IPATH_LINKACTIVE))
638 goto done;
639
610 piobuf = ipath_getpiobuf(dd, 0, NULL); 640 piobuf = ipath_getpiobuf(dd, 0, NULL);
611 if (!piobuf) { 641 if (!piobuf) {
612 /* 642 /*
@@ -668,15 +698,16 @@ static void send_rc_ack(struct ipath_qp *qp)
668 goto done; 698 goto done;
669 699
670queue_ack: 700queue_ack:
671 dev->n_rc_qacks++; 701 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK) {
672 qp->s_flags |= IPATH_S_ACK_PENDING; 702 dev->n_rc_qacks++;
673 qp->s_nak_state = qp->r_nak_state; 703 qp->s_flags |= IPATH_S_ACK_PENDING;
674 qp->s_ack_psn = qp->r_ack_psn; 704 qp->s_nak_state = qp->r_nak_state;
705 qp->s_ack_psn = qp->r_ack_psn;
706
707 /* Schedule the send tasklet. */
708 ipath_schedule_send(qp);
709 }
675 spin_unlock_irqrestore(&qp->s_lock, flags); 710 spin_unlock_irqrestore(&qp->s_lock, flags);
676
677 /* Call ipath_do_rc_send() in another thread. */
678 tasklet_hi_schedule(&qp->s_task);
679
680done: 711done:
681 return; 712 return;
682} 713}
@@ -735,7 +766,7 @@ static void reset_psn(struct ipath_qp *qp, u32 psn)
735 /* 766 /*
736 * Set the state to restart in the middle of a request. 767 * Set the state to restart in the middle of a request.
737 * Don't change the s_sge, s_cur_sge, or s_cur_size. 768 * Don't change the s_sge, s_cur_sge, or s_cur_size.
738 * See ipath_do_rc_send(). 769 * See ipath_make_rc_req().
739 */ 770 */
740 switch (opcode) { 771 switch (opcode) {
741 case IB_WR_SEND: 772 case IB_WR_SEND:
@@ -771,27 +802,14 @@ done:
771 * 802 *
772 * The QP s_lock should be held and interrupts disabled. 803 * The QP s_lock should be held and interrupts disabled.
773 */ 804 */
774void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc) 805void ipath_restart_rc(struct ipath_qp *qp, u32 psn)
775{ 806{
776 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last); 807 struct ipath_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
777 struct ipath_ibdev *dev; 808 struct ipath_ibdev *dev;
778 809
779 if (qp->s_retry == 0) { 810 if (qp->s_retry == 0) {
780 wc->wr_id = wqe->wr.wr_id; 811 ipath_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
781 wc->status = IB_WC_RETRY_EXC_ERR; 812 ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
782 wc->opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
783 wc->vendor_err = 0;
784 wc->byte_len = 0;
785 wc->qp = &qp->ibqp;
786 wc->imm_data = 0;
787 wc->src_qp = qp->remote_qpn;
788 wc->wc_flags = 0;
789 wc->pkey_index = 0;
790 wc->slid = qp->remote_ah_attr.dlid;
791 wc->sl = qp->remote_ah_attr.sl;
792 wc->dlid_path_bits = 0;
793 wc->port_num = 0;
794 ipath_sqerror_qp(qp, wc);
795 goto bail; 813 goto bail;
796 } 814 }
797 qp->s_retry--; 815 qp->s_retry--;
@@ -804,6 +822,8 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
804 spin_lock(&dev->pending_lock); 822 spin_lock(&dev->pending_lock);
805 if (!list_empty(&qp->timerwait)) 823 if (!list_empty(&qp->timerwait))
806 list_del_init(&qp->timerwait); 824 list_del_init(&qp->timerwait);
825 if (!list_empty(&qp->piowait))
826 list_del_init(&qp->piowait);
807 spin_unlock(&dev->pending_lock); 827 spin_unlock(&dev->pending_lock);
808 828
809 if (wqe->wr.opcode == IB_WR_RDMA_READ) 829 if (wqe->wr.opcode == IB_WR_RDMA_READ)
@@ -812,7 +832,7 @@ void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc)
812 dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK; 832 dev->n_rc_resends += (qp->s_psn - psn) & IPATH_PSN_MASK;
813 833
814 reset_psn(qp, psn); 834 reset_psn(qp, psn);
815 tasklet_hi_schedule(&qp->s_task); 835 ipath_schedule_send(qp);
816 836
817bail: 837bail:
818 return; 838 return;
@@ -820,13 +840,7 @@ bail:
820 840
821static inline void update_last_psn(struct ipath_qp *qp, u32 psn) 841static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
822{ 842{
823 if (qp->s_last_psn != psn) { 843 qp->s_last_psn = psn;
824 qp->s_last_psn = psn;
825 if (qp->s_wait_credit) {
826 qp->s_wait_credit = 0;
827 tasklet_hi_schedule(&qp->s_task);
828 }
829 }
830} 844}
831 845
832/** 846/**
@@ -845,6 +859,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
845{ 859{
846 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 860 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
847 struct ib_wc wc; 861 struct ib_wc wc;
862 enum ib_wc_status status;
848 struct ipath_swqe *wqe; 863 struct ipath_swqe *wqe;
849 int ret = 0; 864 int ret = 0;
850 u32 ack_psn; 865 u32 ack_psn;
@@ -909,7 +924,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
909 */ 924 */
910 update_last_psn(qp, wqe->psn - 1); 925 update_last_psn(qp, wqe->psn - 1);
911 /* Retry this request. */ 926 /* Retry this request. */
912 ipath_restart_rc(qp, wqe->psn, &wc); 927 ipath_restart_rc(qp, wqe->psn);
913 /* 928 /*
914 * No need to process the ACK/NAK since we are 929 * No need to process the ACK/NAK since we are
915 * restarting an earlier request. 930 * restarting an earlier request.
@@ -925,32 +940,23 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
925 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) { 940 wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
926 qp->s_num_rd_atomic--; 941 qp->s_num_rd_atomic--;
927 /* Restart sending task if fence is complete */ 942 /* Restart sending task if fence is complete */
928 if ((qp->s_flags & IPATH_S_FENCE_PENDING) && 943 if (((qp->s_flags & IPATH_S_FENCE_PENDING) &&
929 !qp->s_num_rd_atomic) { 944 !qp->s_num_rd_atomic) ||
930 qp->s_flags &= ~IPATH_S_FENCE_PENDING; 945 qp->s_flags & IPATH_S_RDMAR_PENDING)
931 tasklet_hi_schedule(&qp->s_task); 946 ipath_schedule_send(qp);
932 } else if (qp->s_flags & IPATH_S_RDMAR_PENDING) {
933 qp->s_flags &= ~IPATH_S_RDMAR_PENDING;
934 tasklet_hi_schedule(&qp->s_task);
935 }
936 } 947 }
937 /* Post a send completion queue entry if requested. */ 948 /* Post a send completion queue entry if requested. */
938 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || 949 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
939 (wqe->wr.send_flags & IB_SEND_SIGNALED)) { 950 (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
951 memset(&wc, 0, sizeof wc);
940 wc.wr_id = wqe->wr.wr_id; 952 wc.wr_id = wqe->wr.wr_id;
941 wc.status = IB_WC_SUCCESS; 953 wc.status = IB_WC_SUCCESS;
942 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 954 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
943 wc.vendor_err = 0;
944 wc.byte_len = wqe->length; 955 wc.byte_len = wqe->length;
945 wc.imm_data = 0;
946 wc.qp = &qp->ibqp; 956 wc.qp = &qp->ibqp;
947 wc.src_qp = qp->remote_qpn; 957 wc.src_qp = qp->remote_qpn;
948 wc.wc_flags = 0;
949 wc.pkey_index = 0;
950 wc.slid = qp->remote_ah_attr.dlid; 958 wc.slid = qp->remote_ah_attr.dlid;
951 wc.sl = qp->remote_ah_attr.sl; 959 wc.sl = qp->remote_ah_attr.sl;
952 wc.dlid_path_bits = 0;
953 wc.port_num = 0;
954 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0); 960 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
955 } 961 }
956 qp->s_retry = qp->s_retry_cnt; 962 qp->s_retry = qp->s_retry_cnt;
@@ -971,6 +977,8 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
971 } else { 977 } else {
972 if (++qp->s_last >= qp->s_size) 978 if (++qp->s_last >= qp->s_size)
973 qp->s_last = 0; 979 qp->s_last = 0;
980 if (qp->state == IB_QPS_SQD && qp->s_last == qp->s_cur)
981 qp->s_draining = 0;
974 if (qp->s_last == qp->s_tail) 982 if (qp->s_last == qp->s_tail)
975 break; 983 break;
976 wqe = get_swqe_ptr(qp, qp->s_last); 984 wqe = get_swqe_ptr(qp, qp->s_last);
@@ -994,7 +1002,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
994 */ 1002 */
995 if (ipath_cmp24(qp->s_psn, psn) <= 0) { 1003 if (ipath_cmp24(qp->s_psn, psn) <= 0) {
996 reset_psn(qp, psn + 1); 1004 reset_psn(qp, psn + 1);
997 tasklet_hi_schedule(&qp->s_task); 1005 ipath_schedule_send(qp);
998 } 1006 }
999 } else if (ipath_cmp24(qp->s_psn, psn) <= 0) { 1007 } else if (ipath_cmp24(qp->s_psn, psn) <= 0) {
1000 qp->s_state = OP(SEND_LAST); 1008 qp->s_state = OP(SEND_LAST);
@@ -1012,7 +1020,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
1012 if (qp->s_last == qp->s_tail) 1020 if (qp->s_last == qp->s_tail)
1013 goto bail; 1021 goto bail;
1014 if (qp->s_rnr_retry == 0) { 1022 if (qp->s_rnr_retry == 0) {
1015 wc.status = IB_WC_RNR_RETRY_EXC_ERR; 1023 status = IB_WC_RNR_RETRY_EXC_ERR;
1016 goto class_b; 1024 goto class_b;
1017 } 1025 }
1018 if (qp->s_rnr_retry_cnt < 7) 1026 if (qp->s_rnr_retry_cnt < 7)
@@ -1033,6 +1041,7 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
1033 ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) & 1041 ib_ipath_rnr_table[(aeth >> IPATH_AETH_CREDIT_SHIFT) &
1034 IPATH_AETH_CREDIT_MASK]; 1042 IPATH_AETH_CREDIT_MASK];
1035 ipath_insert_rnr_queue(qp); 1043 ipath_insert_rnr_queue(qp);
1044 ipath_schedule_send(qp);
1036 goto bail; 1045 goto bail;
1037 1046
1038 case 3: /* NAK */ 1047 case 3: /* NAK */
@@ -1050,37 +1059,25 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
1050 * RDMA READ response which terminates the RDMA 1059 * RDMA READ response which terminates the RDMA
1051 * READ. 1060 * READ.
1052 */ 1061 */
1053 ipath_restart_rc(qp, psn, &wc); 1062 ipath_restart_rc(qp, psn);
1054 break; 1063 break;
1055 1064
1056 case 1: /* Invalid Request */ 1065 case 1: /* Invalid Request */
1057 wc.status = IB_WC_REM_INV_REQ_ERR; 1066 status = IB_WC_REM_INV_REQ_ERR;
1058 dev->n_other_naks++; 1067 dev->n_other_naks++;
1059 goto class_b; 1068 goto class_b;
1060 1069
1061 case 2: /* Remote Access Error */ 1070 case 2: /* Remote Access Error */
1062 wc.status = IB_WC_REM_ACCESS_ERR; 1071 status = IB_WC_REM_ACCESS_ERR;
1063 dev->n_other_naks++; 1072 dev->n_other_naks++;
1064 goto class_b; 1073 goto class_b;
1065 1074
1066 case 3: /* Remote Operation Error */ 1075 case 3: /* Remote Operation Error */
1067 wc.status = IB_WC_REM_OP_ERR; 1076 status = IB_WC_REM_OP_ERR;
1068 dev->n_other_naks++; 1077 dev->n_other_naks++;
1069 class_b: 1078 class_b:
1070 wc.wr_id = wqe->wr.wr_id; 1079 ipath_send_complete(qp, wqe, status);
1071 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 1080 ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1072 wc.vendor_err = 0;
1073 wc.byte_len = 0;
1074 wc.qp = &qp->ibqp;
1075 wc.imm_data = 0;
1076 wc.src_qp = qp->remote_qpn;
1077 wc.wc_flags = 0;
1078 wc.pkey_index = 0;
1079 wc.slid = qp->remote_ah_attr.dlid;
1080 wc.sl = qp->remote_ah_attr.sl;
1081 wc.dlid_path_bits = 0;
1082 wc.port_num = 0;
1083 ipath_sqerror_qp(qp, &wc);
1084 break; 1081 break;
1085 1082
1086 default: 1083 default:
@@ -1126,8 +1123,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1126 int header_in_data) 1123 int header_in_data)
1127{ 1124{
1128 struct ipath_swqe *wqe; 1125 struct ipath_swqe *wqe;
1126 enum ib_wc_status status;
1129 unsigned long flags; 1127 unsigned long flags;
1130 struct ib_wc wc;
1131 int diff; 1128 int diff;
1132 u32 pad; 1129 u32 pad;
1133 u32 aeth; 1130 u32 aeth;
@@ -1135,6 +1132,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1135 1132
1136 spin_lock_irqsave(&qp->s_lock, flags); 1133 spin_lock_irqsave(&qp->s_lock, flags);
1137 1134
1135 /* Double check we can process this now that we hold the s_lock. */
1136 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1137 goto ack_done;
1138
1138 /* Ignore invalid responses. */ 1139 /* Ignore invalid responses. */
1139 if (ipath_cmp24(psn, qp->s_next_psn) >= 0) 1140 if (ipath_cmp24(psn, qp->s_next_psn) >= 0)
1140 goto ack_done; 1141 goto ack_done;
@@ -1159,6 +1160,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1159 if (unlikely(qp->s_last == qp->s_tail)) 1160 if (unlikely(qp->s_last == qp->s_tail))
1160 goto ack_done; 1161 goto ack_done;
1161 wqe = get_swqe_ptr(qp, qp->s_last); 1162 wqe = get_swqe_ptr(qp, qp->s_last);
1163 status = IB_WC_SUCCESS;
1162 1164
1163 switch (opcode) { 1165 switch (opcode) {
1164 case OP(ACKNOWLEDGE): 1166 case OP(ACKNOWLEDGE):
@@ -1187,6 +1189,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1187 wqe = get_swqe_ptr(qp, qp->s_last); 1189 wqe = get_swqe_ptr(qp, qp->s_last);
1188 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) 1190 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
1189 goto ack_op_err; 1191 goto ack_op_err;
1192 qp->r_flags &= ~IPATH_R_RDMAR_SEQ;
1190 /* 1193 /*
1191 * If this is a response to a resent RDMA read, we 1194 * If this is a response to a resent RDMA read, we
1192 * have to be careful to copy the data to the right 1195 * have to be careful to copy the data to the right
@@ -1200,7 +1203,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1200 /* no AETH, no ACK */ 1203 /* no AETH, no ACK */
1201 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { 1204 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1202 dev->n_rdma_seq++; 1205 dev->n_rdma_seq++;
1203 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); 1206 if (qp->r_flags & IPATH_R_RDMAR_SEQ)
1207 goto ack_done;
1208 qp->r_flags |= IPATH_R_RDMAR_SEQ;
1209 ipath_restart_rc(qp, qp->s_last_psn + 1);
1204 goto ack_done; 1210 goto ack_done;
1205 } 1211 }
1206 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) 1212 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
@@ -1261,7 +1267,10 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1261 /* ACKs READ req. */ 1267 /* ACKs READ req. */
1262 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) { 1268 if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
1263 dev->n_rdma_seq++; 1269 dev->n_rdma_seq++;
1264 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); 1270 if (qp->r_flags & IPATH_R_RDMAR_SEQ)
1271 goto ack_done;
1272 qp->r_flags |= IPATH_R_RDMAR_SEQ;
1273 ipath_restart_rc(qp, qp->s_last_psn + 1);
1265 goto ack_done; 1274 goto ack_done;
1266 } 1275 }
1267 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ)) 1276 if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
@@ -1291,31 +1300,16 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
1291 goto ack_done; 1300 goto ack_done;
1292 } 1301 }
1293 1302
1294ack_done:
1295 spin_unlock_irqrestore(&qp->s_lock, flags);
1296 goto bail;
1297
1298ack_op_err: 1303ack_op_err:
1299 wc.status = IB_WC_LOC_QP_OP_ERR; 1304 status = IB_WC_LOC_QP_OP_ERR;
1300 goto ack_err; 1305 goto ack_err;
1301 1306
1302ack_len_err: 1307ack_len_err:
1303 wc.status = IB_WC_LOC_LEN_ERR; 1308 status = IB_WC_LOC_LEN_ERR;
1304ack_err: 1309ack_err:
1305 wc.wr_id = wqe->wr.wr_id; 1310 ipath_send_complete(qp, wqe, status);
1306 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 1311 ipath_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1307 wc.vendor_err = 0; 1312ack_done:
1308 wc.byte_len = 0;
1309 wc.imm_data = 0;
1310 wc.qp = &qp->ibqp;
1311 wc.src_qp = qp->remote_qpn;
1312 wc.wc_flags = 0;
1313 wc.pkey_index = 0;
1314 wc.slid = qp->remote_ah_attr.dlid;
1315 wc.sl = qp->remote_ah_attr.sl;
1316 wc.dlid_path_bits = 0;
1317 wc.port_num = 0;
1318 ipath_sqerror_qp(qp, &wc);
1319 spin_unlock_irqrestore(&qp->s_lock, flags); 1313 spin_unlock_irqrestore(&qp->s_lock, flags);
1320bail: 1314bail:
1321 return; 1315 return;
@@ -1384,7 +1378,12 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1384 psn &= IPATH_PSN_MASK; 1378 psn &= IPATH_PSN_MASK;
1385 e = NULL; 1379 e = NULL;
1386 old_req = 1; 1380 old_req = 1;
1381
1387 spin_lock_irqsave(&qp->s_lock, flags); 1382 spin_lock_irqsave(&qp->s_lock, flags);
1383 /* Double check we can process this now that we hold the s_lock. */
1384 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1385 goto unlock_done;
1386
1388 for (i = qp->r_head_ack_queue; ; i = prev) { 1387 for (i = qp->r_head_ack_queue; ; i = prev) {
1389 if (i == qp->s_tail_ack_queue) 1388 if (i == qp->s_tail_ack_queue)
1390 old_req = 0; 1389 old_req = 0;
@@ -1512,7 +1511,7 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
1512 break; 1511 break;
1513 } 1512 }
1514 qp->r_nak_state = 0; 1513 qp->r_nak_state = 0;
1515 tasklet_hi_schedule(&qp->s_task); 1514 ipath_schedule_send(qp);
1516 1515
1517unlock_done: 1516unlock_done:
1518 spin_unlock_irqrestore(&qp->s_lock, flags); 1517 spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1523,13 +1522,12 @@ send_ack:
1523 return 0; 1522 return 0;
1524} 1523}
1525 1524
1526static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err) 1525void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
1527{ 1526{
1528 unsigned long flags; 1527 unsigned long flags;
1529 int lastwqe; 1528 int lastwqe;
1530 1529
1531 spin_lock_irqsave(&qp->s_lock, flags); 1530 spin_lock_irqsave(&qp->s_lock, flags);
1532 qp->state = IB_QPS_ERR;
1533 lastwqe = ipath_error_qp(qp, err); 1531 lastwqe = ipath_error_qp(qp, err);
1534 spin_unlock_irqrestore(&qp->s_lock, flags); 1532 spin_unlock_irqrestore(&qp->s_lock, flags);
1535 1533
@@ -1545,18 +1543,15 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
1545 1543
1546static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n) 1544static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
1547{ 1545{
1548 unsigned long flags;
1549 unsigned next; 1546 unsigned next;
1550 1547
1551 next = n + 1; 1548 next = n + 1;
1552 if (next > IPATH_MAX_RDMA_ATOMIC) 1549 if (next > IPATH_MAX_RDMA_ATOMIC)
1553 next = 0; 1550 next = 0;
1554 spin_lock_irqsave(&qp->s_lock, flags);
1555 if (n == qp->s_tail_ack_queue) { 1551 if (n == qp->s_tail_ack_queue) {
1556 qp->s_tail_ack_queue = next; 1552 qp->s_tail_ack_queue = next;
1557 qp->s_ack_state = OP(ACKNOWLEDGE); 1553 qp->s_ack_state = OP(ACKNOWLEDGE);
1558 } 1554 }
1559 spin_unlock_irqrestore(&qp->s_lock, flags);
1560} 1555}
1561 1556
1562/** 1557/**
@@ -1585,6 +1580,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1585 int diff; 1580 int diff;
1586 struct ib_reth *reth; 1581 struct ib_reth *reth;
1587 int header_in_data; 1582 int header_in_data;
1583 unsigned long flags;
1588 1584
1589 /* Validate the SLID. See Ch. 9.6.1.5 */ 1585 /* Validate the SLID. See Ch. 9.6.1.5 */
1590 if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid)) 1586 if (unlikely(be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid))
@@ -1643,11 +1639,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1643 opcode == OP(SEND_LAST) || 1639 opcode == OP(SEND_LAST) ||
1644 opcode == OP(SEND_LAST_WITH_IMMEDIATE)) 1640 opcode == OP(SEND_LAST_WITH_IMMEDIATE))
1645 break; 1641 break;
1646 nack_inv: 1642 goto nack_inv;
1647 ipath_rc_error(qp, IB_WC_REM_INV_REQ_ERR);
1648 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
1649 qp->r_ack_psn = qp->r_psn;
1650 goto send_ack;
1651 1643
1652 case OP(RDMA_WRITE_FIRST): 1644 case OP(RDMA_WRITE_FIRST):
1653 case OP(RDMA_WRITE_MIDDLE): 1645 case OP(RDMA_WRITE_MIDDLE):
@@ -1673,18 +1665,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1673 break; 1665 break;
1674 } 1666 }
1675 1667
1676 wc.imm_data = 0; 1668 memset(&wc, 0, sizeof wc);
1677 wc.wc_flags = 0;
1678 1669
1679 /* OK, process the packet. */ 1670 /* OK, process the packet. */
1680 switch (opcode) { 1671 switch (opcode) {
1681 case OP(SEND_FIRST): 1672 case OP(SEND_FIRST):
1682 if (!ipath_get_rwqe(qp, 0)) { 1673 if (!ipath_get_rwqe(qp, 0))
1683 rnr_nak: 1674 goto rnr_nak;
1684 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
1685 qp->r_ack_psn = qp->r_psn;
1686 goto send_ack;
1687 }
1688 qp->r_rcv_len = 0; 1675 qp->r_rcv_len = 0;
1689 /* FALLTHROUGH */ 1676 /* FALLTHROUGH */
1690 case OP(SEND_MIDDLE): 1677 case OP(SEND_MIDDLE):
@@ -1741,20 +1728,19 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1741 goto nack_inv; 1728 goto nack_inv;
1742 ipath_copy_sge(&qp->r_sge, data, tlen); 1729 ipath_copy_sge(&qp->r_sge, data, tlen);
1743 qp->r_msn++; 1730 qp->r_msn++;
1744 if (!qp->r_wrid_valid) 1731 if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
1745 break; 1732 break;
1746 qp->r_wrid_valid = 0;
1747 wc.wr_id = qp->r_wr_id; 1733 wc.wr_id = qp->r_wr_id;
1748 wc.status = IB_WC_SUCCESS; 1734 wc.status = IB_WC_SUCCESS;
1749 wc.opcode = IB_WC_RECV; 1735 if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
1750 wc.vendor_err = 0; 1736 opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
1737 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
1738 else
1739 wc.opcode = IB_WC_RECV;
1751 wc.qp = &qp->ibqp; 1740 wc.qp = &qp->ibqp;
1752 wc.src_qp = qp->remote_qpn; 1741 wc.src_qp = qp->remote_qpn;
1753 wc.pkey_index = 0;
1754 wc.slid = qp->remote_ah_attr.dlid; 1742 wc.slid = qp->remote_ah_attr.dlid;
1755 wc.sl = qp->remote_ah_attr.sl; 1743 wc.sl = qp->remote_ah_attr.sl;
1756 wc.dlid_path_bits = 0;
1757 wc.port_num = 0;
1758 /* Signal completion event if the solicited bit is set. */ 1744 /* Signal completion event if the solicited bit is set. */
1759 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1745 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
1760 (ohdr->bth[0] & 1746 (ohdr->bth[0] &
@@ -1815,9 +1801,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1815 next = qp->r_head_ack_queue + 1; 1801 next = qp->r_head_ack_queue + 1;
1816 if (next > IPATH_MAX_RDMA_ATOMIC) 1802 if (next > IPATH_MAX_RDMA_ATOMIC)
1817 next = 0; 1803 next = 0;
1804 spin_lock_irqsave(&qp->s_lock, flags);
1805 /* Double check we can process this while holding the s_lock. */
1806 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1807 goto unlock;
1818 if (unlikely(next == qp->s_tail_ack_queue)) { 1808 if (unlikely(next == qp->s_tail_ack_queue)) {
1819 if (!qp->s_ack_queue[next].sent) 1809 if (!qp->s_ack_queue[next].sent)
1820 goto nack_inv; 1810 goto nack_inv_unlck;
1821 ipath_update_ack_queue(qp, next); 1811 ipath_update_ack_queue(qp, next);
1822 } 1812 }
1823 e = &qp->s_ack_queue[qp->r_head_ack_queue]; 1813 e = &qp->s_ack_queue[qp->r_head_ack_queue];
@@ -1838,7 +1828,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1838 ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr, 1828 ok = ipath_rkey_ok(qp, &e->rdma_sge, len, vaddr,
1839 rkey, IB_ACCESS_REMOTE_READ); 1829 rkey, IB_ACCESS_REMOTE_READ);
1840 if (unlikely(!ok)) 1830 if (unlikely(!ok))
1841 goto nack_acc; 1831 goto nack_acc_unlck;
1842 /* 1832 /*
1843 * Update the next expected PSN. We add 1 later 1833 * Update the next expected PSN. We add 1 later
1844 * below, so only add the remainder here. 1834 * below, so only add the remainder here.
@@ -1865,13 +1855,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1865 qp->r_psn++; 1855 qp->r_psn++;
1866 qp->r_state = opcode; 1856 qp->r_state = opcode;
1867 qp->r_nak_state = 0; 1857 qp->r_nak_state = 0;
1868 barrier();
1869 qp->r_head_ack_queue = next; 1858 qp->r_head_ack_queue = next;
1870 1859
1871 /* Call ipath_do_rc_send() in another thread. */ 1860 /* Schedule the send tasklet. */
1872 tasklet_hi_schedule(&qp->s_task); 1861 ipath_schedule_send(qp);
1873 1862
1874 goto done; 1863 goto unlock;
1875 } 1864 }
1876 1865
1877 case OP(COMPARE_SWAP): 1866 case OP(COMPARE_SWAP):
@@ -1890,9 +1879,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1890 next = qp->r_head_ack_queue + 1; 1879 next = qp->r_head_ack_queue + 1;
1891 if (next > IPATH_MAX_RDMA_ATOMIC) 1880 if (next > IPATH_MAX_RDMA_ATOMIC)
1892 next = 0; 1881 next = 0;
1882 spin_lock_irqsave(&qp->s_lock, flags);
1883 /* Double check we can process this while holding the s_lock. */
1884 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK))
1885 goto unlock;
1893 if (unlikely(next == qp->s_tail_ack_queue)) { 1886 if (unlikely(next == qp->s_tail_ack_queue)) {
1894 if (!qp->s_ack_queue[next].sent) 1887 if (!qp->s_ack_queue[next].sent)
1895 goto nack_inv; 1888 goto nack_inv_unlck;
1896 ipath_update_ack_queue(qp, next); 1889 ipath_update_ack_queue(qp, next);
1897 } 1890 }
1898 if (!header_in_data) 1891 if (!header_in_data)
@@ -1902,13 +1895,13 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1902 vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | 1895 vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
1903 be32_to_cpu(ateth->vaddr[1]); 1896 be32_to_cpu(ateth->vaddr[1]);
1904 if (unlikely(vaddr & (sizeof(u64) - 1))) 1897 if (unlikely(vaddr & (sizeof(u64) - 1)))
1905 goto nack_inv; 1898 goto nack_inv_unlck;
1906 rkey = be32_to_cpu(ateth->rkey); 1899 rkey = be32_to_cpu(ateth->rkey);
1907 /* Check rkey & NAK */ 1900 /* Check rkey & NAK */
1908 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, 1901 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge,
1909 sizeof(u64), vaddr, rkey, 1902 sizeof(u64), vaddr, rkey,
1910 IB_ACCESS_REMOTE_ATOMIC))) 1903 IB_ACCESS_REMOTE_ATOMIC)))
1911 goto nack_acc; 1904 goto nack_acc_unlck;
1912 /* Perform atomic OP and save result. */ 1905 /* Perform atomic OP and save result. */
1913 maddr = (atomic64_t *) qp->r_sge.sge.vaddr; 1906 maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
1914 sdata = be64_to_cpu(ateth->swap_data); 1907 sdata = be64_to_cpu(ateth->swap_data);
@@ -1925,13 +1918,12 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1925 qp->r_psn++; 1918 qp->r_psn++;
1926 qp->r_state = opcode; 1919 qp->r_state = opcode;
1927 qp->r_nak_state = 0; 1920 qp->r_nak_state = 0;
1928 barrier();
1929 qp->r_head_ack_queue = next; 1921 qp->r_head_ack_queue = next;
1930 1922
1931 /* Call ipath_do_rc_send() in another thread. */ 1923 /* Schedule the send tasklet. */
1932 tasklet_hi_schedule(&qp->s_task); 1924 ipath_schedule_send(qp);
1933 1925
1934 goto done; 1926 goto unlock;
1935 } 1927 }
1936 1928
1937 default: 1929 default:
@@ -1947,14 +1939,31 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
1947 goto send_ack; 1939 goto send_ack;
1948 goto done; 1940 goto done;
1949 1941
1942rnr_nak:
1943 qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
1944 qp->r_ack_psn = qp->r_psn;
1945 goto send_ack;
1946
1947nack_inv_unlck:
1948 spin_unlock_irqrestore(&qp->s_lock, flags);
1949nack_inv:
1950 ipath_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
1951 qp->r_nak_state = IB_NAK_INVALID_REQUEST;
1952 qp->r_ack_psn = qp->r_psn;
1953 goto send_ack;
1954
1955nack_acc_unlck:
1956 spin_unlock_irqrestore(&qp->s_lock, flags);
1950nack_acc: 1957nack_acc:
1951 ipath_rc_error(qp, IB_WC_REM_ACCESS_ERR); 1958 ipath_rc_error(qp, IB_WC_LOC_PROT_ERR);
1952 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR; 1959 qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
1953 qp->r_ack_psn = qp->r_psn; 1960 qp->r_ack_psn = qp->r_psn;
1954
1955send_ack: 1961send_ack:
1956 send_rc_ack(qp); 1962 send_rc_ack(qp);
1963 goto done;
1957 1964
1965unlock:
1966 spin_unlock_irqrestore(&qp->s_lock, flags);
1958done: 1967done:
1959 return; 1968 return;
1960} 1969}
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 8ac5c1d82ccd..a4b5521567fe 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -78,6 +78,7 @@ const u32 ib_ipath_rnr_table[32] = {
78 * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device 78 * ipath_insert_rnr_queue - put QP on the RNR timeout list for the device
79 * @qp: the QP 79 * @qp: the QP
80 * 80 *
81 * Called with the QP s_lock held and interrupts disabled.
81 * XXX Use a simple list for now. We might need a priority 82 * XXX Use a simple list for now. We might need a priority
82 * queue if we have lots of QPs waiting for RNR timeouts 83 * queue if we have lots of QPs waiting for RNR timeouts
83 * but that should be rare. 84 * but that should be rare.
@@ -85,9 +86,9 @@ const u32 ib_ipath_rnr_table[32] = {
85void ipath_insert_rnr_queue(struct ipath_qp *qp) 86void ipath_insert_rnr_queue(struct ipath_qp *qp)
86{ 87{
87 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 88 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
88 unsigned long flags;
89 89
90 spin_lock_irqsave(&dev->pending_lock, flags); 90 /* We already did a spin_lock_irqsave(), so just use spin_lock */
91 spin_lock(&dev->pending_lock);
91 if (list_empty(&dev->rnrwait)) 92 if (list_empty(&dev->rnrwait))
92 list_add(&qp->timerwait, &dev->rnrwait); 93 list_add(&qp->timerwait, &dev->rnrwait);
93 else { 94 else {
@@ -109,7 +110,7 @@ void ipath_insert_rnr_queue(struct ipath_qp *qp)
109 nqp->s_rnr_timeout -= qp->s_rnr_timeout; 110 nqp->s_rnr_timeout -= qp->s_rnr_timeout;
110 list_add(&qp->timerwait, l); 111 list_add(&qp->timerwait, l);
111 } 112 }
112 spin_unlock_irqrestore(&dev->pending_lock, flags); 113 spin_unlock(&dev->pending_lock);
113} 114}
114 115
115/** 116/**
@@ -140,20 +141,11 @@ int ipath_init_sge(struct ipath_qp *qp, struct ipath_rwqe *wqe,
140 goto bail; 141 goto bail;
141 142
142bad_lkey: 143bad_lkey:
144 memset(&wc, 0, sizeof(wc));
143 wc.wr_id = wqe->wr_id; 145 wc.wr_id = wqe->wr_id;
144 wc.status = IB_WC_LOC_PROT_ERR; 146 wc.status = IB_WC_LOC_PROT_ERR;
145 wc.opcode = IB_WC_RECV; 147 wc.opcode = IB_WC_RECV;
146 wc.vendor_err = 0;
147 wc.byte_len = 0;
148 wc.imm_data = 0;
149 wc.qp = &qp->ibqp; 148 wc.qp = &qp->ibqp;
150 wc.src_qp = 0;
151 wc.wc_flags = 0;
152 wc.pkey_index = 0;
153 wc.slid = 0;
154 wc.sl = 0;
155 wc.dlid_path_bits = 0;
156 wc.port_num = 0;
157 /* Signal solicited completion event. */ 149 /* Signal solicited completion event. */
158 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1); 150 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
159 ret = 0; 151 ret = 0;
@@ -194,6 +186,11 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
194 } 186 }
195 187
196 spin_lock_irqsave(&rq->lock, flags); 188 spin_lock_irqsave(&rq->lock, flags);
189 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
190 ret = 0;
191 goto unlock;
192 }
193
197 wq = rq->wq; 194 wq = rq->wq;
198 tail = wq->tail; 195 tail = wq->tail;
199 /* Validate tail before using it since it is user writable. */ 196 /* Validate tail before using it since it is user writable. */
@@ -201,9 +198,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
201 tail = 0; 198 tail = 0;
202 do { 199 do {
203 if (unlikely(tail == wq->head)) { 200 if (unlikely(tail == wq->head)) {
204 spin_unlock_irqrestore(&rq->lock, flags);
205 ret = 0; 201 ret = 0;
206 goto bail; 202 goto unlock;
207 } 203 }
208 /* Make sure entry is read after head index is read. */ 204 /* Make sure entry is read after head index is read. */
209 smp_rmb(); 205 smp_rmb();
@@ -216,7 +212,7 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
216 wq->tail = tail; 212 wq->tail = tail;
217 213
218 ret = 1; 214 ret = 1;
219 qp->r_wrid_valid = 1; 215 set_bit(IPATH_R_WRID_VALID, &qp->r_aflags);
220 if (handler) { 216 if (handler) {
221 u32 n; 217 u32 n;
222 218
@@ -243,8 +239,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
243 goto bail; 239 goto bail;
244 } 240 }
245 } 241 }
242unlock:
246 spin_unlock_irqrestore(&rq->lock, flags); 243 spin_unlock_irqrestore(&rq->lock, flags);
247
248bail: 244bail:
249 return ret; 245 return ret;
250} 246}
@@ -270,38 +266,63 @@ static void ipath_ruc_loopback(struct ipath_qp *sqp)
270 struct ib_wc wc; 266 struct ib_wc wc;
271 u64 sdata; 267 u64 sdata;
272 atomic64_t *maddr; 268 atomic64_t *maddr;
269 enum ib_wc_status send_status;
273 270
271 /*
272 * Note that we check the responder QP state after
273 * checking the requester's state.
274 */
274 qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn); 275 qp = ipath_lookup_qpn(&dev->qp_table, sqp->remote_qpn);
275 if (!qp) {
276 dev->n_pkt_drops++;
277 return;
278 }
279 276
280again:
281 spin_lock_irqsave(&sqp->s_lock, flags); 277 spin_lock_irqsave(&sqp->s_lock, flags);
282 278
283 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) || 279 /* Return if we are already busy processing a work request. */
284 sqp->s_rnr_timeout) { 280 if ((sqp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
285 spin_unlock_irqrestore(&sqp->s_lock, flags); 281 !(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
286 goto done; 282 goto unlock;
287 }
288 283
289 /* Get the next send request. */ 284 sqp->s_flags |= IPATH_S_BUSY;
290 if (sqp->s_last == sqp->s_head) { 285
291 /* Send work queue is empty. */ 286again:
292 spin_unlock_irqrestore(&sqp->s_lock, flags); 287 if (sqp->s_last == sqp->s_head)
293 goto done; 288 goto clr_busy;
289 wqe = get_swqe_ptr(sqp, sqp->s_last);
290
291 /* Return if it is not OK to start a new work reqeust. */
292 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
293 if (!(ib_ipath_state_ops[sqp->state] & IPATH_FLUSH_SEND))
294 goto clr_busy;
295 /* We are in the error state, flush the work request. */
296 send_status = IB_WC_WR_FLUSH_ERR;
297 goto flush_send;
294 } 298 }
295 299
296 /* 300 /*
297 * We can rely on the entry not changing without the s_lock 301 * We can rely on the entry not changing without the s_lock
298 * being held until we update s_last. 302 * being held until we update s_last.
303 * We increment s_cur to indicate s_last is in progress.
299 */ 304 */
300 wqe = get_swqe_ptr(sqp, sqp->s_last); 305 if (sqp->s_last == sqp->s_cur) {
306 if (++sqp->s_cur >= sqp->s_size)
307 sqp->s_cur = 0;
308 }
301 spin_unlock_irqrestore(&sqp->s_lock, flags); 309 spin_unlock_irqrestore(&sqp->s_lock, flags);
302 310
303 wc.wc_flags = 0; 311 if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
304 wc.imm_data = 0; 312 dev->n_pkt_drops++;
313 /*
314 * For RC, the requester would timeout and retry so
315 * shortcut the timeouts and just signal too many retries.
316 */
317 if (sqp->ibqp.qp_type == IB_QPT_RC)
318 send_status = IB_WC_RETRY_EXC_ERR;
319 else
320 send_status = IB_WC_SUCCESS;
321 goto serr;
322 }
323
324 memset(&wc, 0, sizeof wc);
325 send_status = IB_WC_SUCCESS;
305 326
306 sqp->s_sge.sge = wqe->sg_list[0]; 327 sqp->s_sge.sge = wqe->sg_list[0];
307 sqp->s_sge.sg_list = wqe->sg_list + 1; 328 sqp->s_sge.sg_list = wqe->sg_list + 1;
@@ -313,75 +334,33 @@ again:
313 wc.imm_data = wqe->wr.ex.imm_data; 334 wc.imm_data = wqe->wr.ex.imm_data;
314 /* FALLTHROUGH */ 335 /* FALLTHROUGH */
315 case IB_WR_SEND: 336 case IB_WR_SEND:
316 if (!ipath_get_rwqe(qp, 0)) { 337 if (!ipath_get_rwqe(qp, 0))
317 rnr_nak: 338 goto rnr_nak;
318 /* Handle RNR NAK */
319 if (qp->ibqp.qp_type == IB_QPT_UC)
320 goto send_comp;
321 if (sqp->s_rnr_retry == 0) {
322 wc.status = IB_WC_RNR_RETRY_EXC_ERR;
323 goto err;
324 }
325 if (sqp->s_rnr_retry_cnt < 7)
326 sqp->s_rnr_retry--;
327 dev->n_rnr_naks++;
328 sqp->s_rnr_timeout =
329 ib_ipath_rnr_table[qp->r_min_rnr_timer];
330 ipath_insert_rnr_queue(sqp);
331 goto done;
332 }
333 break; 339 break;
334 340
335 case IB_WR_RDMA_WRITE_WITH_IMM: 341 case IB_WR_RDMA_WRITE_WITH_IMM:
336 if (unlikely(!(qp->qp_access_flags & 342 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
337 IB_ACCESS_REMOTE_WRITE))) { 343 goto inv_err;
338 wc.status = IB_WC_REM_INV_REQ_ERR;
339 goto err;
340 }
341 wc.wc_flags = IB_WC_WITH_IMM; 344 wc.wc_flags = IB_WC_WITH_IMM;
342 wc.imm_data = wqe->wr.ex.imm_data; 345 wc.imm_data = wqe->wr.ex.imm_data;
343 if (!ipath_get_rwqe(qp, 1)) 346 if (!ipath_get_rwqe(qp, 1))
344 goto rnr_nak; 347 goto rnr_nak;
345 /* FALLTHROUGH */ 348 /* FALLTHROUGH */
346 case IB_WR_RDMA_WRITE: 349 case IB_WR_RDMA_WRITE:
347 if (unlikely(!(qp->qp_access_flags & 350 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
348 IB_ACCESS_REMOTE_WRITE))) { 351 goto inv_err;
349 wc.status = IB_WC_REM_INV_REQ_ERR;
350 goto err;
351 }
352 if (wqe->length == 0) 352 if (wqe->length == 0)
353 break; 353 break;
354 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length, 354 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
355 wqe->wr.wr.rdma.remote_addr, 355 wqe->wr.wr.rdma.remote_addr,
356 wqe->wr.wr.rdma.rkey, 356 wqe->wr.wr.rdma.rkey,
357 IB_ACCESS_REMOTE_WRITE))) { 357 IB_ACCESS_REMOTE_WRITE)))
358 acc_err: 358 goto acc_err;
359 wc.status = IB_WC_REM_ACCESS_ERR;
360 err:
361 wc.wr_id = wqe->wr.wr_id;
362 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
363 wc.vendor_err = 0;
364 wc.byte_len = 0;
365 wc.qp = &sqp->ibqp;
366 wc.src_qp = sqp->remote_qpn;
367 wc.pkey_index = 0;
368 wc.slid = sqp->remote_ah_attr.dlid;
369 wc.sl = sqp->remote_ah_attr.sl;
370 wc.dlid_path_bits = 0;
371 wc.port_num = 0;
372 spin_lock_irqsave(&sqp->s_lock, flags);
373 ipath_sqerror_qp(sqp, &wc);
374 spin_unlock_irqrestore(&sqp->s_lock, flags);
375 goto done;
376 }
377 break; 359 break;
378 360
379 case IB_WR_RDMA_READ: 361 case IB_WR_RDMA_READ:
380 if (unlikely(!(qp->qp_access_flags & 362 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
381 IB_ACCESS_REMOTE_READ))) { 363 goto inv_err;
382 wc.status = IB_WC_REM_INV_REQ_ERR;
383 goto err;
384 }
385 if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length, 364 if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
386 wqe->wr.wr.rdma.remote_addr, 365 wqe->wr.wr.rdma.remote_addr,
387 wqe->wr.wr.rdma.rkey, 366 wqe->wr.wr.rdma.rkey,
@@ -394,11 +373,8 @@ again:
394 373
395 case IB_WR_ATOMIC_CMP_AND_SWP: 374 case IB_WR_ATOMIC_CMP_AND_SWP:
396 case IB_WR_ATOMIC_FETCH_AND_ADD: 375 case IB_WR_ATOMIC_FETCH_AND_ADD:
397 if (unlikely(!(qp->qp_access_flags & 376 if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
398 IB_ACCESS_REMOTE_ATOMIC))) { 377 goto inv_err;
399 wc.status = IB_WC_REM_INV_REQ_ERR;
400 goto err;
401 }
402 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64), 378 if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
403 wqe->wr.wr.atomic.remote_addr, 379 wqe->wr.wr.atomic.remote_addr,
404 wqe->wr.wr.atomic.rkey, 380 wqe->wr.wr.atomic.rkey,
@@ -415,7 +391,8 @@ again:
415 goto send_comp; 391 goto send_comp;
416 392
417 default: 393 default:
418 goto done; 394 send_status = IB_WC_LOC_QP_OP_ERR;
395 goto serr;
419 } 396 }
420 397
421 sge = &sqp->s_sge.sge; 398 sge = &sqp->s_sge.sge;
@@ -448,8 +425,7 @@ again:
448 sqp->s_len -= len; 425 sqp->s_len -= len;
449 } 426 }
450 427
451 if (wqe->wr.opcode == IB_WR_RDMA_WRITE || 428 if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
452 wqe->wr.opcode == IB_WR_RDMA_READ)
453 goto send_comp; 429 goto send_comp;
454 430
455 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM) 431 if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -458,32 +434,89 @@ again:
458 wc.opcode = IB_WC_RECV; 434 wc.opcode = IB_WC_RECV;
459 wc.wr_id = qp->r_wr_id; 435 wc.wr_id = qp->r_wr_id;
460 wc.status = IB_WC_SUCCESS; 436 wc.status = IB_WC_SUCCESS;
461 wc.vendor_err = 0;
462 wc.byte_len = wqe->length; 437 wc.byte_len = wqe->length;
463 wc.qp = &qp->ibqp; 438 wc.qp = &qp->ibqp;
464 wc.src_qp = qp->remote_qpn; 439 wc.src_qp = qp->remote_qpn;
465 wc.pkey_index = 0;
466 wc.slid = qp->remote_ah_attr.dlid; 440 wc.slid = qp->remote_ah_attr.dlid;
467 wc.sl = qp->remote_ah_attr.sl; 441 wc.sl = qp->remote_ah_attr.sl;
468 wc.dlid_path_bits = 0;
469 wc.port_num = 1; 442 wc.port_num = 1;
470 /* Signal completion event if the solicited bit is set. */ 443 /* Signal completion event if the solicited bit is set. */
471 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 444 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
472 wqe->wr.send_flags & IB_SEND_SOLICITED); 445 wqe->wr.send_flags & IB_SEND_SOLICITED);
473 446
474send_comp: 447send_comp:
448 spin_lock_irqsave(&sqp->s_lock, flags);
449flush_send:
475 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt; 450 sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
476 ipath_send_complete(sqp, wqe, IB_WC_SUCCESS); 451 ipath_send_complete(sqp, wqe, send_status);
477 goto again; 452 goto again;
478 453
454rnr_nak:
455 /* Handle RNR NAK */
456 if (qp->ibqp.qp_type == IB_QPT_UC)
457 goto send_comp;
458 /*
459 * Note: we don't need the s_lock held since the BUSY flag
460 * makes this single threaded.
461 */
462 if (sqp->s_rnr_retry == 0) {
463 send_status = IB_WC_RNR_RETRY_EXC_ERR;
464 goto serr;
465 }
466 if (sqp->s_rnr_retry_cnt < 7)
467 sqp->s_rnr_retry--;
468 spin_lock_irqsave(&sqp->s_lock, flags);
469 if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_RECV_OK))
470 goto clr_busy;
471 sqp->s_flags |= IPATH_S_WAITING;
472 dev->n_rnr_naks++;
473 sqp->s_rnr_timeout = ib_ipath_rnr_table[qp->r_min_rnr_timer];
474 ipath_insert_rnr_queue(sqp);
475 goto clr_busy;
476
477inv_err:
478 send_status = IB_WC_REM_INV_REQ_ERR;
479 wc.status = IB_WC_LOC_QP_OP_ERR;
480 goto err;
481
482acc_err:
483 send_status = IB_WC_REM_ACCESS_ERR;
484 wc.status = IB_WC_LOC_PROT_ERR;
485err:
486 /* responder goes to error state */
487 ipath_rc_error(qp, wc.status);
488
489serr:
490 spin_lock_irqsave(&sqp->s_lock, flags);
491 ipath_send_complete(sqp, wqe, send_status);
492 if (sqp->ibqp.qp_type == IB_QPT_RC) {
493 int lastwqe = ipath_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
494
495 sqp->s_flags &= ~IPATH_S_BUSY;
496 spin_unlock_irqrestore(&sqp->s_lock, flags);
497 if (lastwqe) {
498 struct ib_event ev;
499
500 ev.device = sqp->ibqp.device;
501 ev.element.qp = &sqp->ibqp;
502 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
503 sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
504 }
505 goto done;
506 }
507clr_busy:
508 sqp->s_flags &= ~IPATH_S_BUSY;
509unlock:
510 spin_unlock_irqrestore(&sqp->s_lock, flags);
479done: 511done:
480 if (atomic_dec_and_test(&qp->refcount)) 512 if (qp && atomic_dec_and_test(&qp->refcount))
481 wake_up(&qp->wait); 513 wake_up(&qp->wait);
482} 514}
483 515
484static void want_buffer(struct ipath_devdata *dd) 516static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
485{ 517{
486 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) { 518 if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
519 qp->ibqp.qp_type == IB_QPT_SMI) {
487 unsigned long flags; 520 unsigned long flags;
488 521
489 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 522 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@@ -501,26 +534,36 @@ static void want_buffer(struct ipath_devdata *dd)
501 * @dev: the device we ran out of buffers on 534 * @dev: the device we ran out of buffers on
502 * 535 *
503 * Called when we run out of PIO buffers. 536 * Called when we run out of PIO buffers.
537 * If we are now in the error state, return zero to flush the
538 * send work request.
504 */ 539 */
505static void ipath_no_bufs_available(struct ipath_qp *qp, 540static int ipath_no_bufs_available(struct ipath_qp *qp,
506 struct ipath_ibdev *dev) 541 struct ipath_ibdev *dev)
507{ 542{
508 unsigned long flags; 543 unsigned long flags;
544 int ret = 1;
509 545
510 /* 546 /*
511 * Note that as soon as want_buffer() is called and 547 * Note that as soon as want_buffer() is called and
512 * possibly before it returns, ipath_ib_piobufavail() 548 * possibly before it returns, ipath_ib_piobufavail()
513 * could be called. If we are still in the tasklet function, 549 * could be called. Therefore, put QP on the piowait list before
514 * tasklet_hi_schedule() will not call us until the next time 550 * enabling the PIO avail interrupt.
515 * tasklet_hi_schedule() is called.
516 * We leave the busy flag set so that another post send doesn't
517 * try to put the same QP on the piowait list again.
518 */ 551 */
519 spin_lock_irqsave(&dev->pending_lock, flags); 552 spin_lock_irqsave(&qp->s_lock, flags);
520 list_add_tail(&qp->piowait, &dev->piowait); 553 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
521 spin_unlock_irqrestore(&dev->pending_lock, flags); 554 dev->n_piowait++;
522 want_buffer(dev->dd); 555 qp->s_flags |= IPATH_S_WAITING;
523 dev->n_piowait++; 556 qp->s_flags &= ~IPATH_S_BUSY;
557 spin_lock(&dev->pending_lock);
558 if (list_empty(&qp->piowait))
559 list_add_tail(&qp->piowait, &dev->piowait);
560 spin_unlock(&dev->pending_lock);
561 } else
562 ret = 0;
563 spin_unlock_irqrestore(&qp->s_lock, flags);
564 if (ret)
565 want_buffer(dev->dd, qp);
566 return ret;
524} 567}
525 568
526/** 569/**
@@ -596,15 +639,13 @@ void ipath_do_send(unsigned long data)
596 struct ipath_qp *qp = (struct ipath_qp *)data; 639 struct ipath_qp *qp = (struct ipath_qp *)data;
597 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 640 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
598 int (*make_req)(struct ipath_qp *qp); 641 int (*make_req)(struct ipath_qp *qp);
599 642 unsigned long flags;
600 if (test_and_set_bit(IPATH_S_BUSY, &qp->s_busy))
601 goto bail;
602 643
603 if ((qp->ibqp.qp_type == IB_QPT_RC || 644 if ((qp->ibqp.qp_type == IB_QPT_RC ||
604 qp->ibqp.qp_type == IB_QPT_UC) && 645 qp->ibqp.qp_type == IB_QPT_UC) &&
605 qp->remote_ah_attr.dlid == dev->dd->ipath_lid) { 646 qp->remote_ah_attr.dlid == dev->dd->ipath_lid) {
606 ipath_ruc_loopback(qp); 647 ipath_ruc_loopback(qp);
607 goto clear; 648 goto bail;
608 } 649 }
609 650
610 if (qp->ibqp.qp_type == IB_QPT_RC) 651 if (qp->ibqp.qp_type == IB_QPT_RC)
@@ -614,6 +655,19 @@ void ipath_do_send(unsigned long data)
614 else 655 else
615 make_req = ipath_make_ud_req; 656 make_req = ipath_make_ud_req;
616 657
658 spin_lock_irqsave(&qp->s_lock, flags);
659
660 /* Return if we are already busy processing a work request. */
661 if ((qp->s_flags & (IPATH_S_BUSY | IPATH_S_ANY_WAIT)) ||
662 !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND)) {
663 spin_unlock_irqrestore(&qp->s_lock, flags);
664 goto bail;
665 }
666
667 qp->s_flags |= IPATH_S_BUSY;
668
669 spin_unlock_irqrestore(&qp->s_lock, flags);
670
617again: 671again:
618 /* Check for a constructed packet to be sent. */ 672 /* Check for a constructed packet to be sent. */
619 if (qp->s_hdrwords != 0) { 673 if (qp->s_hdrwords != 0) {
@@ -623,8 +677,8 @@ again:
623 */ 677 */
624 if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords, 678 if (ipath_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
625 qp->s_cur_sge, qp->s_cur_size)) { 679 qp->s_cur_sge, qp->s_cur_size)) {
626 ipath_no_bufs_available(qp, dev); 680 if (ipath_no_bufs_available(qp, dev))
627 goto bail; 681 goto bail;
628 } 682 }
629 dev->n_unicast_xmit++; 683 dev->n_unicast_xmit++;
630 /* Record that we sent the packet and s_hdr is empty. */ 684 /* Record that we sent the packet and s_hdr is empty. */
@@ -633,16 +687,20 @@ again:
633 687
634 if (make_req(qp)) 688 if (make_req(qp))
635 goto again; 689 goto again;
636clear: 690
637 clear_bit(IPATH_S_BUSY, &qp->s_busy);
638bail:; 691bail:;
639} 692}
640 693
694/*
695 * This should be called with s_lock held.
696 */
641void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe, 697void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
642 enum ib_wc_status status) 698 enum ib_wc_status status)
643{ 699{
644 unsigned long flags; 700 u32 old_last, last;
645 u32 last; 701
702 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_OR_FLUSH_SEND))
703 return;
646 704
647 /* See ch. 11.2.4.1 and 10.7.3.1 */ 705 /* See ch. 11.2.4.1 and 10.7.3.1 */
648 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) || 706 if (!(qp->s_flags & IPATH_S_SIGNAL_REQ_WR) ||
@@ -650,27 +708,25 @@ void ipath_send_complete(struct ipath_qp *qp, struct ipath_swqe *wqe,
650 status != IB_WC_SUCCESS) { 708 status != IB_WC_SUCCESS) {
651 struct ib_wc wc; 709 struct ib_wc wc;
652 710
711 memset(&wc, 0, sizeof wc);
653 wc.wr_id = wqe->wr.wr_id; 712 wc.wr_id = wqe->wr.wr_id;
654 wc.status = status; 713 wc.status = status;
655 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode]; 714 wc.opcode = ib_ipath_wc_opcode[wqe->wr.opcode];
656 wc.vendor_err = 0;
657 wc.byte_len = wqe->length;
658 wc.imm_data = 0;
659 wc.qp = &qp->ibqp; 715 wc.qp = &qp->ibqp;
660 wc.src_qp = 0; 716 if (status == IB_WC_SUCCESS)
661 wc.wc_flags = 0; 717 wc.byte_len = wqe->length;
662 wc.pkey_index = 0; 718 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
663 wc.slid = 0; 719 status != IB_WC_SUCCESS);
664 wc.sl = 0;
665 wc.dlid_path_bits = 0;
666 wc.port_num = 0;
667 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
668 } 720 }
669 721
670 spin_lock_irqsave(&qp->s_lock, flags); 722 old_last = last = qp->s_last;
671 last = qp->s_last;
672 if (++last >= qp->s_size) 723 if (++last >= qp->s_size)
673 last = 0; 724 last = 0;
674 qp->s_last = last; 725 qp->s_last = last;
675 spin_unlock_irqrestore(&qp->s_lock, flags); 726 if (qp->s_cur == old_last)
727 qp->s_cur = last;
728 if (qp->s_tail == old_last)
729 qp->s_tail = last;
730 if (qp->state == IB_QPS_SQD && last == qp->s_cur)
731 qp->s_draining = 0;
676} 732}
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index 1974df7a9f78..3697449c1ba4 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -308,13 +308,15 @@ static void sdma_abort_task(unsigned long opaque)
308 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags); 308 spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
309 309
310 /* 310 /*
311 * Don't restart sdma here. Wait until link is up to ACTIVE. 311 * Don't restart sdma here (with the exception
312 * VL15 MADs used to bring the link up use PIO, and multiple 312 * below). Wait until link is up to ACTIVE. VL15 MADs
313 * link transitions otherwise cause the sdma engine to be 313 * used to bring the link up use PIO, and multiple link
314 * transitions otherwise cause the sdma engine to be
314 * stopped and started multiple times. 315 * stopped and started multiple times.
315 * The disable is done here, including the shadow, so the 316 * The disable is done here, including the shadow,
316 * state is kept consistent. 317 * so the state is kept consistent.
317 * See ipath_restart_sdma() for the actual starting of sdma. 318 * See ipath_restart_sdma() for the actual starting
319 * of sdma.
318 */ 320 */
319 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 321 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
320 dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE; 322 dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
@@ -326,6 +328,13 @@ static void sdma_abort_task(unsigned long opaque)
326 /* make sure I see next message */ 328 /* make sure I see next message */
327 dd->ipath_sdma_abort_jiffies = 0; 329 dd->ipath_sdma_abort_jiffies = 0;
328 330
331 /*
332 * Not everything that takes SDMA offline is a link
333 * status change. If the link was up, restart SDMA.
334 */
335 if (dd->ipath_flags & IPATH_LINKACTIVE)
336 ipath_restart_sdma(dd);
337
329 goto done; 338 goto done;
330 } 339 }
331 340
@@ -427,7 +436,12 @@ int setup_sdma(struct ipath_devdata *dd)
427 goto done; 436 goto done;
428 } 437 }
429 438
430 dd->ipath_sdma_status = 0; 439 /*
440 * Set initial status as if we had been up, then gone down.
441 * This lets initial start on transition to ACTIVE be the
442 * same as restart after link flap.
443 */
444 dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
431 dd->ipath_sdma_abort_jiffies = 0; 445 dd->ipath_sdma_abort_jiffies = 0;
432 dd->ipath_sdma_generation = 0; 446 dd->ipath_sdma_generation = 0;
433 dd->ipath_sdma_descq_tail = 0; 447 dd->ipath_sdma_descq_tail = 0;
@@ -449,16 +463,19 @@ int setup_sdma(struct ipath_devdata *dd)
449 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr, 463 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
450 dd->ipath_sdma_head_phys); 464 dd->ipath_sdma_head_phys);
451 465
452 /* Reserve all the former "kernel" piobufs */ 466 /*
453 n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - dd->ipath_pioreserved; 467 * Reserve all the former "kernel" piobufs, using high number range
454 for (i = dd->ipath_lastport_piobuf; i < n; ++i) { 468 * so we get as many 4K buffers as possible
469 */
470 n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
471 i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
472 ipath_chg_pioavailkernel(dd, i, n - i , 0);
473 for (; i < n; ++i) {
455 unsigned word = i / 64; 474 unsigned word = i / 64;
456 unsigned bit = i & 63; 475 unsigned bit = i & 63;
457 BUG_ON(word >= 3); 476 BUG_ON(word >= 3);
458 senddmabufmask[word] |= 1ULL << bit; 477 senddmabufmask[word] |= 1ULL << bit;
459 } 478 }
460 ipath_chg_pioavailkernel(dd, dd->ipath_lastport_piobuf,
461 n - dd->ipath_lastport_piobuf, 0);
462 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0, 479 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
463 senddmabufmask[0]); 480 senddmabufmask[0]);
464 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1, 481 ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
@@ -615,6 +632,9 @@ void ipath_restart_sdma(struct ipath_devdata *dd)
615 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 632 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
616 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 633 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
617 634
635 /* notify upper layers */
636 ipath_ib_piobufavail(dd->verbs_dev);
637
618bail: 638bail:
619 return; 639 return;
620} 640}
diff --git a/drivers/infiniband/hw/ipath/ipath_uc.c b/drivers/infiniband/hw/ipath/ipath_uc.c
index bfe8926b5514..7fd18e833907 100644
--- a/drivers/infiniband/hw/ipath/ipath_uc.c
+++ b/drivers/infiniband/hw/ipath/ipath_uc.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -47,14 +47,30 @@ int ipath_make_uc_req(struct ipath_qp *qp)
47{ 47{
48 struct ipath_other_headers *ohdr; 48 struct ipath_other_headers *ohdr;
49 struct ipath_swqe *wqe; 49 struct ipath_swqe *wqe;
50 unsigned long flags;
50 u32 hwords; 51 u32 hwords;
51 u32 bth0; 52 u32 bth0;
52 u32 len; 53 u32 len;
53 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu); 54 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
54 int ret = 0; 55 int ret = 0;
55 56
56 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) 57 spin_lock_irqsave(&qp->s_lock, flags);
58
59 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)) {
60 if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
61 goto bail;
62 /* We are in the error state, flush the work request. */
63 if (qp->s_last == qp->s_head)
64 goto bail;
65 /* If DMAs are in progress, we can't flush immediately. */
66 if (atomic_read(&qp->s_dma_busy)) {
67 qp->s_flags |= IPATH_S_WAIT_DMA;
68 goto bail;
69 }
70 wqe = get_swqe_ptr(qp, qp->s_last);
71 ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
57 goto done; 72 goto done;
73 }
58 74
59 ohdr = &qp->s_hdr.u.oth; 75 ohdr = &qp->s_hdr.u.oth;
60 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) 76 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
@@ -69,9 +85,12 @@ int ipath_make_uc_req(struct ipath_qp *qp)
69 qp->s_wqe = NULL; 85 qp->s_wqe = NULL;
70 switch (qp->s_state) { 86 switch (qp->s_state) {
71 default: 87 default:
88 if (!(ib_ipath_state_ops[qp->state] &
89 IPATH_PROCESS_NEXT_SEND_OK))
90 goto bail;
72 /* Check if send work queue is empty. */ 91 /* Check if send work queue is empty. */
73 if (qp->s_cur == qp->s_head) 92 if (qp->s_cur == qp->s_head)
74 goto done; 93 goto bail;
75 /* 94 /*
76 * Start a new request. 95 * Start a new request.
77 */ 96 */
@@ -134,7 +153,7 @@ int ipath_make_uc_req(struct ipath_qp *qp)
134 break; 153 break;
135 154
136 default: 155 default:
137 goto done; 156 goto bail;
138 } 157 }
139 break; 158 break;
140 159
@@ -194,9 +213,14 @@ int ipath_make_uc_req(struct ipath_qp *qp)
194 ipath_make_ruc_header(to_idev(qp->ibqp.device), 213 ipath_make_ruc_header(to_idev(qp->ibqp.device),
195 qp, ohdr, bth0 | (qp->s_state << 24), 214 qp, ohdr, bth0 | (qp->s_state << 24),
196 qp->s_next_psn++ & IPATH_PSN_MASK); 215 qp->s_next_psn++ & IPATH_PSN_MASK);
216done:
197 ret = 1; 217 ret = 1;
218 goto unlock;
198 219
199done: 220bail:
221 qp->s_flags &= ~IPATH_S_BUSY;
222unlock:
223 spin_unlock_irqrestore(&qp->s_lock, flags);
200 return ret; 224 return ret;
201} 225}
202 226
@@ -258,8 +282,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
258 */ 282 */
259 opcode = be32_to_cpu(ohdr->bth[0]) >> 24; 283 opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
260 284
261 wc.imm_data = 0; 285 memset(&wc, 0, sizeof wc);
262 wc.wc_flags = 0;
263 286
264 /* Compare the PSN verses the expected PSN. */ 287 /* Compare the PSN verses the expected PSN. */
265 if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) { 288 if (unlikely(ipath_cmp24(psn, qp->r_psn) != 0)) {
@@ -322,8 +345,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
322 case OP(SEND_ONLY): 345 case OP(SEND_ONLY):
323 case OP(SEND_ONLY_WITH_IMMEDIATE): 346 case OP(SEND_ONLY_WITH_IMMEDIATE):
324 send_first: 347 send_first:
325 if (qp->r_reuse_sge) { 348 if (qp->r_flags & IPATH_R_REUSE_SGE) {
326 qp->r_reuse_sge = 0; 349 qp->r_flags &= ~IPATH_R_REUSE_SGE;
327 qp->r_sge = qp->s_rdma_read_sge; 350 qp->r_sge = qp->s_rdma_read_sge;
328 } else if (!ipath_get_rwqe(qp, 0)) { 351 } else if (!ipath_get_rwqe(qp, 0)) {
329 dev->n_pkt_drops++; 352 dev->n_pkt_drops++;
@@ -340,13 +363,13 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
340 case OP(SEND_MIDDLE): 363 case OP(SEND_MIDDLE):
341 /* Check for invalid length PMTU or posted rwqe len. */ 364 /* Check for invalid length PMTU or posted rwqe len. */
342 if (unlikely(tlen != (hdrsize + pmtu + 4))) { 365 if (unlikely(tlen != (hdrsize + pmtu + 4))) {
343 qp->r_reuse_sge = 1; 366 qp->r_flags |= IPATH_R_REUSE_SGE;
344 dev->n_pkt_drops++; 367 dev->n_pkt_drops++;
345 goto done; 368 goto done;
346 } 369 }
347 qp->r_rcv_len += pmtu; 370 qp->r_rcv_len += pmtu;
348 if (unlikely(qp->r_rcv_len > qp->r_len)) { 371 if (unlikely(qp->r_rcv_len > qp->r_len)) {
349 qp->r_reuse_sge = 1; 372 qp->r_flags |= IPATH_R_REUSE_SGE;
350 dev->n_pkt_drops++; 373 dev->n_pkt_drops++;
351 goto done; 374 goto done;
352 } 375 }
@@ -372,7 +395,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
372 /* Check for invalid length. */ 395 /* Check for invalid length. */
373 /* XXX LAST len should be >= 1 */ 396 /* XXX LAST len should be >= 1 */
374 if (unlikely(tlen < (hdrsize + pad + 4))) { 397 if (unlikely(tlen < (hdrsize + pad + 4))) {
375 qp->r_reuse_sge = 1; 398 qp->r_flags |= IPATH_R_REUSE_SGE;
376 dev->n_pkt_drops++; 399 dev->n_pkt_drops++;
377 goto done; 400 goto done;
378 } 401 }
@@ -380,7 +403,7 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
380 tlen -= (hdrsize + pad + 4); 403 tlen -= (hdrsize + pad + 4);
381 wc.byte_len = tlen + qp->r_rcv_len; 404 wc.byte_len = tlen + qp->r_rcv_len;
382 if (unlikely(wc.byte_len > qp->r_len)) { 405 if (unlikely(wc.byte_len > qp->r_len)) {
383 qp->r_reuse_sge = 1; 406 qp->r_flags |= IPATH_R_REUSE_SGE;
384 dev->n_pkt_drops++; 407 dev->n_pkt_drops++;
385 goto done; 408 goto done;
386 } 409 }
@@ -390,14 +413,10 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
390 wc.wr_id = qp->r_wr_id; 413 wc.wr_id = qp->r_wr_id;
391 wc.status = IB_WC_SUCCESS; 414 wc.status = IB_WC_SUCCESS;
392 wc.opcode = IB_WC_RECV; 415 wc.opcode = IB_WC_RECV;
393 wc.vendor_err = 0;
394 wc.qp = &qp->ibqp; 416 wc.qp = &qp->ibqp;
395 wc.src_qp = qp->remote_qpn; 417 wc.src_qp = qp->remote_qpn;
396 wc.pkey_index = 0;
397 wc.slid = qp->remote_ah_attr.dlid; 418 wc.slid = qp->remote_ah_attr.dlid;
398 wc.sl = qp->remote_ah_attr.sl; 419 wc.sl = qp->remote_ah_attr.sl;
399 wc.dlid_path_bits = 0;
400 wc.port_num = 0;
401 /* Signal completion event if the solicited bit is set. */ 420 /* Signal completion event if the solicited bit is set. */
402 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 421 ipath_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
403 (ohdr->bth[0] & 422 (ohdr->bth[0] &
@@ -488,8 +507,8 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
488 dev->n_pkt_drops++; 507 dev->n_pkt_drops++;
489 goto done; 508 goto done;
490 } 509 }
491 if (qp->r_reuse_sge) 510 if (qp->r_flags & IPATH_R_REUSE_SGE)
492 qp->r_reuse_sge = 0; 511 qp->r_flags &= ~IPATH_R_REUSE_SGE;
493 else if (!ipath_get_rwqe(qp, 1)) { 512 else if (!ipath_get_rwqe(qp, 1)) {
494 dev->n_pkt_drops++; 513 dev->n_pkt_drops++;
495 goto done; 514 goto done;
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index 8b6a261c89e3..77ca8ca74e78 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -65,9 +65,9 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
65 u32 length; 65 u32 length;
66 66
67 qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn); 67 qp = ipath_lookup_qpn(&dev->qp_table, swqe->wr.wr.ud.remote_qpn);
68 if (!qp) { 68 if (!qp || !(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
69 dev->n_pkt_drops++; 69 dev->n_pkt_drops++;
70 goto send_comp; 70 goto done;
71 } 71 }
72 72
73 rsge.sg_list = NULL; 73 rsge.sg_list = NULL;
@@ -91,14 +91,12 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
91 * present on the wire. 91 * present on the wire.
92 */ 92 */
93 length = swqe->length; 93 length = swqe->length;
94 memset(&wc, 0, sizeof wc);
94 wc.byte_len = length + sizeof(struct ib_grh); 95 wc.byte_len = length + sizeof(struct ib_grh);
95 96
96 if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 97 if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
97 wc.wc_flags = IB_WC_WITH_IMM; 98 wc.wc_flags = IB_WC_WITH_IMM;
98 wc.imm_data = swqe->wr.ex.imm_data; 99 wc.imm_data = swqe->wr.ex.imm_data;
99 } else {
100 wc.wc_flags = 0;
101 wc.imm_data = 0;
102 } 100 }
103 101
104 /* 102 /*
@@ -229,7 +227,6 @@ static void ipath_ud_loopback(struct ipath_qp *sqp, struct ipath_swqe *swqe)
229 } 227 }
230 wc.status = IB_WC_SUCCESS; 228 wc.status = IB_WC_SUCCESS;
231 wc.opcode = IB_WC_RECV; 229 wc.opcode = IB_WC_RECV;
232 wc.vendor_err = 0;
233 wc.qp = &qp->ibqp; 230 wc.qp = &qp->ibqp;
234 wc.src_qp = sqp->ibqp.qp_num; 231 wc.src_qp = sqp->ibqp.qp_num;
235 /* XXX do we know which pkey matched? Only needed for GSI. */ 232 /* XXX do we know which pkey matched? Only needed for GSI. */
@@ -248,8 +245,7 @@ drop:
248 kfree(rsge.sg_list); 245 kfree(rsge.sg_list);
249 if (atomic_dec_and_test(&qp->refcount)) 246 if (atomic_dec_and_test(&qp->refcount))
250 wake_up(&qp->wait); 247 wake_up(&qp->wait);
251send_comp: 248done:;
252 ipath_send_complete(sqp, swqe, IB_WC_SUCCESS);
253} 249}
254 250
255/** 251/**
@@ -264,6 +260,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
264 struct ipath_other_headers *ohdr; 260 struct ipath_other_headers *ohdr;
265 struct ib_ah_attr *ah_attr; 261 struct ib_ah_attr *ah_attr;
266 struct ipath_swqe *wqe; 262 struct ipath_swqe *wqe;
263 unsigned long flags;
267 u32 nwords; 264 u32 nwords;
268 u32 extra_bytes; 265 u32 extra_bytes;
269 u32 bth0; 266 u32 bth0;
@@ -271,13 +268,30 @@ int ipath_make_ud_req(struct ipath_qp *qp)
271 u16 lid; 268 u16 lid;
272 int ret = 0; 269 int ret = 0;
273 270
274 if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK))) 271 spin_lock_irqsave(&qp->s_lock, flags);
275 goto bail; 272
273 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_NEXT_SEND_OK)) {
274 if (!(ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND))
275 goto bail;
276 /* We are in the error state, flush the work request. */
277 if (qp->s_last == qp->s_head)
278 goto bail;
279 /* If DMAs are in progress, we can't flush immediately. */
280 if (atomic_read(&qp->s_dma_busy)) {
281 qp->s_flags |= IPATH_S_WAIT_DMA;
282 goto bail;
283 }
284 wqe = get_swqe_ptr(qp, qp->s_last);
285 ipath_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
286 goto done;
287 }
276 288
277 if (qp->s_cur == qp->s_head) 289 if (qp->s_cur == qp->s_head)
278 goto bail; 290 goto bail;
279 291
280 wqe = get_swqe_ptr(qp, qp->s_cur); 292 wqe = get_swqe_ptr(qp, qp->s_cur);
293 if (++qp->s_cur >= qp->s_size)
294 qp->s_cur = 0;
281 295
282 /* Construct the header. */ 296 /* Construct the header. */
283 ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr; 297 ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
@@ -288,10 +302,23 @@ int ipath_make_ud_req(struct ipath_qp *qp)
288 dev->n_unicast_xmit++; 302 dev->n_unicast_xmit++;
289 } else { 303 } else {
290 dev->n_unicast_xmit++; 304 dev->n_unicast_xmit++;
291 lid = ah_attr->dlid & 305 lid = ah_attr->dlid & ~((1 << dev->dd->ipath_lmc) - 1);
292 ~((1 << dev->dd->ipath_lmc) - 1);
293 if (unlikely(lid == dev->dd->ipath_lid)) { 306 if (unlikely(lid == dev->dd->ipath_lid)) {
307 /*
308 * If DMAs are in progress, we can't generate
309 * a completion for the loopback packet since
310 * it would be out of order.
311 * XXX Instead of waiting, we could queue a
312 * zero length descriptor so we get a callback.
313 */
314 if (atomic_read(&qp->s_dma_busy)) {
315 qp->s_flags |= IPATH_S_WAIT_DMA;
316 goto bail;
317 }
318 spin_unlock_irqrestore(&qp->s_lock, flags);
294 ipath_ud_loopback(qp, wqe); 319 ipath_ud_loopback(qp, wqe);
320 spin_lock_irqsave(&qp->s_lock, flags);
321 ipath_send_complete(qp, wqe, IB_WC_SUCCESS);
295 goto done; 322 goto done;
296 } 323 }
297 } 324 }
@@ -368,11 +395,13 @@ int ipath_make_ud_req(struct ipath_qp *qp)
368 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num); 395 ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
369 396
370done: 397done:
371 if (++qp->s_cur >= qp->s_size)
372 qp->s_cur = 0;
373 ret = 1; 398 ret = 1;
399 goto unlock;
374 400
375bail: 401bail:
402 qp->s_flags &= ~IPATH_S_BUSY;
403unlock:
404 spin_unlock_irqrestore(&qp->s_lock, flags);
376 return ret; 405 return ret;
377} 406}
378 407
@@ -506,8 +535,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
506 /* 535 /*
507 * Get the next work request entry to find where to put the data. 536 * Get the next work request entry to find where to put the data.
508 */ 537 */
509 if (qp->r_reuse_sge) 538 if (qp->r_flags & IPATH_R_REUSE_SGE)
510 qp->r_reuse_sge = 0; 539 qp->r_flags &= ~IPATH_R_REUSE_SGE;
511 else if (!ipath_get_rwqe(qp, 0)) { 540 else if (!ipath_get_rwqe(qp, 0)) {
512 /* 541 /*
513 * Count VL15 packets dropped due to no receive buffer. 542 * Count VL15 packets dropped due to no receive buffer.
@@ -523,7 +552,7 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
523 } 552 }
524 /* Silently drop packets which are too big. */ 553 /* Silently drop packets which are too big. */
525 if (wc.byte_len > qp->r_len) { 554 if (wc.byte_len > qp->r_len) {
526 qp->r_reuse_sge = 1; 555 qp->r_flags |= IPATH_R_REUSE_SGE;
527 dev->n_pkt_drops++; 556 dev->n_pkt_drops++;
528 goto bail; 557 goto bail;
529 } 558 }
@@ -535,7 +564,8 @@ void ipath_ud_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
535 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh)); 564 ipath_skip_sge(&qp->r_sge, sizeof(struct ib_grh));
536 ipath_copy_sge(&qp->r_sge, data, 565 ipath_copy_sge(&qp->r_sge, data,
537 wc.byte_len - sizeof(struct ib_grh)); 566 wc.byte_len - sizeof(struct ib_grh));
538 qp->r_wrid_valid = 0; 567 if (!test_and_clear_bit(IPATH_R_WRID_VALID, &qp->r_aflags))
568 goto bail;
539 wc.wr_id = qp->r_wr_id; 569 wc.wr_id = qp->r_wr_id;
540 wc.status = IB_WC_SUCCESS; 570 wc.status = IB_WC_SUCCESS;
541 wc.opcode = IB_WC_RECV; 571 wc.opcode = IB_WC_RECV;
diff --git a/drivers/infiniband/hw/ipath/ipath_user_sdma.h b/drivers/infiniband/hw/ipath/ipath_user_sdma.h
index e70946c1428c..fc76316c4a58 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_sdma.h
+++ b/drivers/infiniband/hw/ipath/ipath_user_sdma.h
@@ -45,8 +45,6 @@ int ipath_user_sdma_writev(struct ipath_devdata *dd,
45int ipath_user_sdma_make_progress(struct ipath_devdata *dd, 45int ipath_user_sdma_make_progress(struct ipath_devdata *dd,
46 struct ipath_user_sdma_queue *pq); 46 struct ipath_user_sdma_queue *pq);
47 47
48int ipath_user_sdma_pkt_sent(const struct ipath_user_sdma_queue *pq,
49 u32 counter);
50void ipath_user_sdma_queue_drain(struct ipath_devdata *dd, 48void ipath_user_sdma_queue_drain(struct ipath_devdata *dd,
51 struct ipath_user_sdma_queue *pq); 49 struct ipath_user_sdma_queue *pq);
52 50
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index e63927cce5b5..e0ec540042bf 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -111,16 +111,24 @@ static unsigned int ib_ipath_disable_sma;
111module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO); 111module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
112MODULE_PARM_DESC(disable_sma, "Disable the SMA"); 112MODULE_PARM_DESC(disable_sma, "Disable the SMA");
113 113
114/*
115 * Note that it is OK to post send work requests in the SQE and ERR
116 * states; ipath_do_send() will process them and generate error
117 * completions as per IB 1.2 C10-96.
118 */
114const int ib_ipath_state_ops[IB_QPS_ERR + 1] = { 119const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
115 [IB_QPS_RESET] = 0, 120 [IB_QPS_RESET] = 0,
116 [IB_QPS_INIT] = IPATH_POST_RECV_OK, 121 [IB_QPS_INIT] = IPATH_POST_RECV_OK,
117 [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, 122 [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
118 [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 123 [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
119 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK, 124 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
125 IPATH_PROCESS_NEXT_SEND_OK,
120 [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK | 126 [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
121 IPATH_POST_SEND_OK, 127 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
122 [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK, 128 [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
123 [IB_QPS_ERR] = 0, 129 IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
130 [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
131 IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
124}; 132};
125 133
126struct ipath_ucontext { 134struct ipath_ucontext {
@@ -230,18 +238,6 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
230 } 238 }
231} 239}
232 240
233static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr)
234{
235 struct ib_wc wc;
236
237 memset(&wc, 0, sizeof(wc));
238 wc.wr_id = wr->wr_id;
239 wc.status = IB_WC_WR_FLUSH_ERR;
240 wc.opcode = ib_ipath_wc_opcode[wr->opcode];
241 wc.qp = &qp->ibqp;
242 ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
243}
244
245/* 241/*
246 * Count the number of DMA descriptors needed to send length bytes of data. 242 * Count the number of DMA descriptors needed to send length bytes of data.
247 * Don't modify the ipath_sge_state to get the count. 243 * Don't modify the ipath_sge_state to get the count.
@@ -347,14 +343,8 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
347 spin_lock_irqsave(&qp->s_lock, flags); 343 spin_lock_irqsave(&qp->s_lock, flags);
348 344
349 /* Check that state is OK to post send. */ 345 /* Check that state is OK to post send. */
350 if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK))) { 346 if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
351 if (qp->state != IB_QPS_SQE && qp->state != IB_QPS_ERR) 347 goto bail_inval;
352 goto bail_inval;
353 /* C10-96 says generate a flushed completion entry. */
354 ipath_flush_wqe(qp, wr);
355 ret = 0;
356 goto bail;
357 }
358 348
359 /* IB spec says that num_sge == 0 is OK. */ 349 /* IB spec says that num_sge == 0 is OK. */
360 if (wr->num_sge > qp->s_max_sge) 350 if (wr->num_sge > qp->s_max_sge)
@@ -396,7 +386,6 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
396 386
397 wqe = get_swqe_ptr(qp, qp->s_head); 387 wqe = get_swqe_ptr(qp, qp->s_head);
398 wqe->wr = *wr; 388 wqe->wr = *wr;
399 wqe->ssn = qp->s_ssn++;
400 wqe->length = 0; 389 wqe->length = 0;
401 if (wr->num_sge) { 390 if (wr->num_sge) {
402 acc = wr->opcode >= IB_WR_RDMA_READ ? 391 acc = wr->opcode >= IB_WR_RDMA_READ ?
@@ -422,6 +411,7 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
422 goto bail_inval; 411 goto bail_inval;
423 } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu) 412 } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
424 goto bail_inval; 413 goto bail_inval;
414 wqe->ssn = qp->s_ssn++;
425 qp->s_head = next; 415 qp->s_head = next;
426 416
427 ret = 0; 417 ret = 0;
@@ -677,6 +667,7 @@ bail:;
677static void ipath_ib_timer(struct ipath_ibdev *dev) 667static void ipath_ib_timer(struct ipath_ibdev *dev)
678{ 668{
679 struct ipath_qp *resend = NULL; 669 struct ipath_qp *resend = NULL;
670 struct ipath_qp *rnr = NULL;
680 struct list_head *last; 671 struct list_head *last;
681 struct ipath_qp *qp; 672 struct ipath_qp *qp;
682 unsigned long flags; 673 unsigned long flags;
@@ -703,7 +694,9 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
703 if (--qp->s_rnr_timeout == 0) { 694 if (--qp->s_rnr_timeout == 0) {
704 do { 695 do {
705 list_del_init(&qp->timerwait); 696 list_del_init(&qp->timerwait);
706 tasklet_hi_schedule(&qp->s_task); 697 qp->timer_next = rnr;
698 rnr = qp;
699 atomic_inc(&qp->refcount);
707 if (list_empty(last)) 700 if (list_empty(last))
708 break; 701 break;
709 qp = list_entry(last->next, struct ipath_qp, 702 qp = list_entry(last->next, struct ipath_qp,
@@ -743,13 +736,15 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
743 spin_unlock_irqrestore(&dev->pending_lock, flags); 736 spin_unlock_irqrestore(&dev->pending_lock, flags);
744 737
745 /* XXX What if timer fires again while this is running? */ 738 /* XXX What if timer fires again while this is running? */
746 for (qp = resend; qp != NULL; qp = qp->timer_next) { 739 while (resend != NULL) {
747 struct ib_wc wc; 740 qp = resend;
741 resend = qp->timer_next;
748 742
749 spin_lock_irqsave(&qp->s_lock, flags); 743 spin_lock_irqsave(&qp->s_lock, flags);
750 if (qp->s_last != qp->s_tail && qp->state == IB_QPS_RTS) { 744 if (qp->s_last != qp->s_tail &&
745 ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
751 dev->n_timeouts++; 746 dev->n_timeouts++;
752 ipath_restart_rc(qp, qp->s_last_psn + 1, &wc); 747 ipath_restart_rc(qp, qp->s_last_psn + 1);
753 } 748 }
754 spin_unlock_irqrestore(&qp->s_lock, flags); 749 spin_unlock_irqrestore(&qp->s_lock, flags);
755 750
@@ -757,6 +752,19 @@ static void ipath_ib_timer(struct ipath_ibdev *dev)
757 if (atomic_dec_and_test(&qp->refcount)) 752 if (atomic_dec_and_test(&qp->refcount))
758 wake_up(&qp->wait); 753 wake_up(&qp->wait);
759 } 754 }
755 while (rnr != NULL) {
756 qp = rnr;
757 rnr = qp->timer_next;
758
759 spin_lock_irqsave(&qp->s_lock, flags);
760 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
761 ipath_schedule_send(qp);
762 spin_unlock_irqrestore(&qp->s_lock, flags);
763
764 /* Notify ipath_destroy_qp() if it is waiting. */
765 if (atomic_dec_and_test(&qp->refcount))
766 wake_up(&qp->wait);
767 }
760} 768}
761 769
762static void update_sge(struct ipath_sge_state *ss, u32 length) 770static void update_sge(struct ipath_sge_state *ss, u32 length)
@@ -1012,13 +1020,24 @@ static void sdma_complete(void *cookie, int status)
1012 struct ipath_verbs_txreq *tx = cookie; 1020 struct ipath_verbs_txreq *tx = cookie;
1013 struct ipath_qp *qp = tx->qp; 1021 struct ipath_qp *qp = tx->qp;
1014 struct ipath_ibdev *dev = to_idev(qp->ibqp.device); 1022 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1023 unsigned int flags;
1024 enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
1025 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
1015 1026
1016 /* Generate a completion queue entry if needed */ 1027 if (atomic_dec_and_test(&qp->s_dma_busy)) {
1017 if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) { 1028 spin_lock_irqsave(&qp->s_lock, flags);
1018 enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ? 1029 if (tx->wqe)
1019 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR; 1030 ipath_send_complete(qp, tx->wqe, ibs);
1020 1031 if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1032 qp->s_last != qp->s_head) ||
1033 (qp->s_flags & IPATH_S_WAIT_DMA))
1034 ipath_schedule_send(qp);
1035 spin_unlock_irqrestore(&qp->s_lock, flags);
1036 wake_up(&qp->wait_dma);
1037 } else if (tx->wqe) {
1038 spin_lock_irqsave(&qp->s_lock, flags);
1021 ipath_send_complete(qp, tx->wqe, ibs); 1039 ipath_send_complete(qp, tx->wqe, ibs);
1040 spin_unlock_irqrestore(&qp->s_lock, flags);
1022 } 1041 }
1023 1042
1024 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF) 1043 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
@@ -1029,6 +1048,21 @@ static void sdma_complete(void *cookie, int status)
1029 wake_up(&qp->wait); 1048 wake_up(&qp->wait);
1030} 1049}
1031 1050
1051static void decrement_dma_busy(struct ipath_qp *qp)
1052{
1053 unsigned int flags;
1054
1055 if (atomic_dec_and_test(&qp->s_dma_busy)) {
1056 spin_lock_irqsave(&qp->s_lock, flags);
1057 if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1058 qp->s_last != qp->s_head) ||
1059 (qp->s_flags & IPATH_S_WAIT_DMA))
1060 ipath_schedule_send(qp);
1061 spin_unlock_irqrestore(&qp->s_lock, flags);
1062 wake_up(&qp->wait_dma);
1063 }
1064}
1065
1032/* 1066/*
1033 * Compute the number of clock cycles of delay before sending the next packet. 1067 * Compute the number of clock cycles of delay before sending the next packet.
1034 * The multipliers reflect the number of clocks for the fastest rate so 1068 * The multipliers reflect the number of clocks for the fastest rate so
@@ -1067,9 +1101,12 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
1067 if (tx) { 1101 if (tx) {
1068 qp->s_tx = NULL; 1102 qp->s_tx = NULL;
1069 /* resend previously constructed packet */ 1103 /* resend previously constructed packet */
1104 atomic_inc(&qp->s_dma_busy);
1070 ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx); 1105 ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
1071 if (ret) 1106 if (ret) {
1072 qp->s_tx = tx; 1107 qp->s_tx = tx;
1108 decrement_dma_busy(qp);
1109 }
1073 goto bail; 1110 goto bail;
1074 } 1111 }
1075 1112
@@ -1120,12 +1157,14 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
1120 tx->txreq.sg_count = ndesc; 1157 tx->txreq.sg_count = ndesc;
1121 tx->map_len = (hdrwords + 2) << 2; 1158 tx->map_len = (hdrwords + 2) << 2;
1122 tx->txreq.map_addr = &tx->hdr; 1159 tx->txreq.map_addr = &tx->hdr;
1160 atomic_inc(&qp->s_dma_busy);
1123 ret = ipath_sdma_verbs_send(dd, ss, dwords, tx); 1161 ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
1124 if (ret) { 1162 if (ret) {
1125 /* save ss and length in dwords */ 1163 /* save ss and length in dwords */
1126 tx->ss = ss; 1164 tx->ss = ss;
1127 tx->len = dwords; 1165 tx->len = dwords;
1128 qp->s_tx = tx; 1166 qp->s_tx = tx;
1167 decrement_dma_busy(qp);
1129 } 1168 }
1130 goto bail; 1169 goto bail;
1131 } 1170 }
@@ -1146,6 +1185,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
1146 memcpy(piobuf, hdr, hdrwords << 2); 1185 memcpy(piobuf, hdr, hdrwords << 2);
1147 ipath_copy_from_sge(piobuf + hdrwords, ss, len); 1186 ipath_copy_from_sge(piobuf + hdrwords, ss, len);
1148 1187
1188 atomic_inc(&qp->s_dma_busy);
1149 ret = ipath_sdma_verbs_send(dd, NULL, 0, tx); 1189 ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
1150 /* 1190 /*
1151 * If we couldn't queue the DMA request, save the info 1191 * If we couldn't queue the DMA request, save the info
@@ -1156,6 +1196,7 @@ static int ipath_verbs_send_dma(struct ipath_qp *qp,
1156 tx->ss = NULL; 1196 tx->ss = NULL;
1157 tx->len = 0; 1197 tx->len = 0;
1158 qp->s_tx = tx; 1198 qp->s_tx = tx;
1199 decrement_dma_busy(qp);
1159 } 1200 }
1160 dev->n_unaligned++; 1201 dev->n_unaligned++;
1161 goto bail; 1202 goto bail;
@@ -1179,6 +1220,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,
1179 unsigned flush_wc; 1220 unsigned flush_wc;
1180 u32 control; 1221 u32 control;
1181 int ret; 1222 int ret;
1223 unsigned int flags;
1182 1224
1183 piobuf = ipath_getpiobuf(dd, plen, NULL); 1225 piobuf = ipath_getpiobuf(dd, plen, NULL);
1184 if (unlikely(piobuf == NULL)) { 1226 if (unlikely(piobuf == NULL)) {
@@ -1249,8 +1291,11 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp,
1249 } 1291 }
1250 copy_io(piobuf, ss, len, flush_wc); 1292 copy_io(piobuf, ss, len, flush_wc);
1251done: 1293done:
1252 if (qp->s_wqe) 1294 if (qp->s_wqe) {
1295 spin_lock_irqsave(&qp->s_lock, flags);
1253 ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS); 1296 ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
1297 spin_unlock_irqrestore(&qp->s_lock, flags);
1298 }
1254 ret = 0; 1299 ret = 0;
1255bail: 1300bail:
1256 return ret; 1301 return ret;
@@ -1283,19 +1328,12 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
1283 * can defer SDMA restart until link goes ACTIVE without 1328 * can defer SDMA restart until link goes ACTIVE without
1284 * worrying about just how we got there. 1329 * worrying about just how we got there.
1285 */ 1330 */
1286 if (qp->ibqp.qp_type == IB_QPT_SMI) 1331 if (qp->ibqp.qp_type == IB_QPT_SMI ||
1332 !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
1287 ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, 1333 ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1288 plen, dwords); 1334 plen, dwords);
1289 /* All non-VL15 packets are dropped if link is not ACTIVE */
1290 else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) {
1291 if (qp->s_wqe)
1292 ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
1293 ret = 0;
1294 } else if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
1295 ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1296 plen, dwords);
1297 else 1335 else
1298 ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len, 1336 ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1299 plen, dwords); 1337 plen, dwords);
1300 1338
1301 return ret; 1339 return ret;
@@ -1403,27 +1441,46 @@ bail:
1403 * This is called from ipath_intr() at interrupt level when a PIO buffer is 1441 * This is called from ipath_intr() at interrupt level when a PIO buffer is
1404 * available after ipath_verbs_send() returned an error that no buffers were 1442 * available after ipath_verbs_send() returned an error that no buffers were
1405 * available. Return 1 if we consumed all the PIO buffers and we still have 1443 * available. Return 1 if we consumed all the PIO buffers and we still have
1406 * QPs waiting for buffers (for now, just do a tasklet_hi_schedule and 1444 * QPs waiting for buffers (for now, just restart the send tasklet and
1407 * return zero). 1445 * return zero).
1408 */ 1446 */
1409int ipath_ib_piobufavail(struct ipath_ibdev *dev) 1447int ipath_ib_piobufavail(struct ipath_ibdev *dev)
1410{ 1448{
1449 struct list_head *list;
1450 struct ipath_qp *qplist;
1411 struct ipath_qp *qp; 1451 struct ipath_qp *qp;
1412 unsigned long flags; 1452 unsigned long flags;
1413 1453
1414 if (dev == NULL) 1454 if (dev == NULL)
1415 goto bail; 1455 goto bail;
1416 1456
1457 list = &dev->piowait;
1458 qplist = NULL;
1459
1417 spin_lock_irqsave(&dev->pending_lock, flags); 1460 spin_lock_irqsave(&dev->pending_lock, flags);
1418 while (!list_empty(&dev->piowait)) { 1461 while (!list_empty(list)) {
1419 qp = list_entry(dev->piowait.next, struct ipath_qp, 1462 qp = list_entry(list->next, struct ipath_qp, piowait);
1420 piowait);
1421 list_del_init(&qp->piowait); 1463 list_del_init(&qp->piowait);
1422 clear_bit(IPATH_S_BUSY, &qp->s_busy); 1464 qp->pio_next = qplist;
1423 tasklet_hi_schedule(&qp->s_task); 1465 qplist = qp;
1466 atomic_inc(&qp->refcount);
1424 } 1467 }
1425 spin_unlock_irqrestore(&dev->pending_lock, flags); 1468 spin_unlock_irqrestore(&dev->pending_lock, flags);
1426 1469
1470 while (qplist != NULL) {
1471 qp = qplist;
1472 qplist = qp->pio_next;
1473
1474 spin_lock_irqsave(&qp->s_lock, flags);
1475 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
1476 ipath_schedule_send(qp);
1477 spin_unlock_irqrestore(&qp->s_lock, flags);
1478
1479 /* Notify ipath_destroy_qp() if it is waiting. */
1480 if (atomic_dec_and_test(&qp->refcount))
1481 wake_up(&qp->wait);
1482 }
1483
1427bail: 1484bail:
1428 return 0; 1485 return 0;
1429} 1486}
@@ -2145,11 +2202,12 @@ bail:
2145void ipath_unregister_ib_device(struct ipath_ibdev *dev) 2202void ipath_unregister_ib_device(struct ipath_ibdev *dev)
2146{ 2203{
2147 struct ib_device *ibdev = &dev->ibdev; 2204 struct ib_device *ibdev = &dev->ibdev;
2148 2205 u32 qps_inuse;
2149 disable_timer(dev->dd);
2150 2206
2151 ib_unregister_device(ibdev); 2207 ib_unregister_device(ibdev);
2152 2208
2209 disable_timer(dev->dd);
2210
2153 if (!list_empty(&dev->pending[0]) || 2211 if (!list_empty(&dev->pending[0]) ||
2154 !list_empty(&dev->pending[1]) || 2212 !list_empty(&dev->pending[1]) ||
2155 !list_empty(&dev->pending[2])) 2213 !list_empty(&dev->pending[2]))
@@ -2164,7 +2222,10 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev)
2164 * Note that ipath_unregister_ib_device() can be called before all 2222 * Note that ipath_unregister_ib_device() can be called before all
2165 * the QPs are destroyed! 2223 * the QPs are destroyed!
2166 */ 2224 */
2167 ipath_free_all_qps(&dev->qp_table); 2225 qps_inuse = ipath_free_all_qps(&dev->qp_table);
2226 if (qps_inuse)
2227 ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
2228 qps_inuse);
2168 kfree(dev->qp_table.table); 2229 kfree(dev->qp_table.table);
2169 kfree(dev->lk_table.table); 2230 kfree(dev->lk_table.table);
2170 kfree(dev->txreq_bufs); 2231 kfree(dev->txreq_bufs);
@@ -2215,17 +2276,14 @@ static ssize_t show_stats(struct device *device, struct device_attribute *attr,
2215 "RC OTH NAKs %d\n" 2276 "RC OTH NAKs %d\n"
2216 "RC timeouts %d\n" 2277 "RC timeouts %d\n"
2217 "RC RDMA dup %d\n" 2278 "RC RDMA dup %d\n"
2218 "RC stalls %d\n"
2219 "piobuf wait %d\n" 2279 "piobuf wait %d\n"
2220 "no piobuf %d\n"
2221 "unaligned %d\n" 2280 "unaligned %d\n"
2222 "PKT drops %d\n" 2281 "PKT drops %d\n"
2223 "WQE errs %d\n", 2282 "WQE errs %d\n",
2224 dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks, 2283 dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
2225 dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks, 2284 dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
2226 dev->n_other_naks, dev->n_timeouts, 2285 dev->n_other_naks, dev->n_timeouts,
2227 dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait, 2286 dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
2228 dev->n_no_piobuf, dev->n_unaligned,
2229 dev->n_pkt_drops, dev->n_wqe_errs); 2287 dev->n_pkt_drops, dev->n_wqe_errs);
2230 for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) { 2288 for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
2231 const struct ipath_opcode_stats *si = &dev->opstats[i]; 2289 const struct ipath_opcode_stats *si = &dev->opstats[i];
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.h b/drivers/infiniband/hw/ipath/ipath_verbs.h
index 6514aa8306cd..9d12ae8a778e 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.h
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.h
@@ -74,6 +74,11 @@
74#define IPATH_POST_RECV_OK 0x02 74#define IPATH_POST_RECV_OK 0x02
75#define IPATH_PROCESS_RECV_OK 0x04 75#define IPATH_PROCESS_RECV_OK 0x04
76#define IPATH_PROCESS_SEND_OK 0x08 76#define IPATH_PROCESS_SEND_OK 0x08
77#define IPATH_PROCESS_NEXT_SEND_OK 0x10
78#define IPATH_FLUSH_SEND 0x20
79#define IPATH_FLUSH_RECV 0x40
80#define IPATH_PROCESS_OR_FLUSH_SEND \
81 (IPATH_PROCESS_SEND_OK | IPATH_FLUSH_SEND)
77 82
78/* IB Performance Manager status values */ 83/* IB Performance Manager status values */
79#define IB_PMA_SAMPLE_STATUS_DONE 0x00 84#define IB_PMA_SAMPLE_STATUS_DONE 0x00
@@ -353,12 +358,14 @@ struct ipath_qp {
353 struct ib_qp ibqp; 358 struct ib_qp ibqp;
354 struct ipath_qp *next; /* link list for QPN hash table */ 359 struct ipath_qp *next; /* link list for QPN hash table */
355 struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */ 360 struct ipath_qp *timer_next; /* link list for ipath_ib_timer() */
361 struct ipath_qp *pio_next; /* link for ipath_ib_piobufavail() */
356 struct list_head piowait; /* link for wait PIO buf */ 362 struct list_head piowait; /* link for wait PIO buf */
357 struct list_head timerwait; /* link for waiting for timeouts */ 363 struct list_head timerwait; /* link for waiting for timeouts */
358 struct ib_ah_attr remote_ah_attr; 364 struct ib_ah_attr remote_ah_attr;
359 struct ipath_ib_header s_hdr; /* next packet header to send */ 365 struct ipath_ib_header s_hdr; /* next packet header to send */
360 atomic_t refcount; 366 atomic_t refcount;
361 wait_queue_head_t wait; 367 wait_queue_head_t wait;
368 wait_queue_head_t wait_dma;
362 struct tasklet_struct s_task; 369 struct tasklet_struct s_task;
363 struct ipath_mmap_info *ip; 370 struct ipath_mmap_info *ip;
364 struct ipath_sge_state *s_cur_sge; 371 struct ipath_sge_state *s_cur_sge;
@@ -369,7 +376,7 @@ struct ipath_qp {
369 struct ipath_sge_state s_rdma_read_sge; 376 struct ipath_sge_state s_rdma_read_sge;
370 struct ipath_sge_state r_sge; /* current receive data */ 377 struct ipath_sge_state r_sge; /* current receive data */
371 spinlock_t s_lock; 378 spinlock_t s_lock;
372 unsigned long s_busy; 379 atomic_t s_dma_busy;
373 u16 s_pkt_delay; 380 u16 s_pkt_delay;
374 u16 s_hdrwords; /* size of s_hdr in 32 bit words */ 381 u16 s_hdrwords; /* size of s_hdr in 32 bit words */
375 u32 s_cur_size; /* size of send packet in bytes */ 382 u32 s_cur_size; /* size of send packet in bytes */
@@ -383,6 +390,7 @@ struct ipath_qp {
383 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */ 390 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
384 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */ 391 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
385 u64 r_wr_id; /* ID for current receive WQE */ 392 u64 r_wr_id; /* ID for current receive WQE */
393 unsigned long r_aflags;
386 u32 r_len; /* total length of r_sge */ 394 u32 r_len; /* total length of r_sge */
387 u32 r_rcv_len; /* receive data len processed */ 395 u32 r_rcv_len; /* receive data len processed */
388 u32 r_psn; /* expected rcv packet sequence number */ 396 u32 r_psn; /* expected rcv packet sequence number */
@@ -394,8 +402,7 @@ struct ipath_qp {
394 u8 r_state; /* opcode of last packet received */ 402 u8 r_state; /* opcode of last packet received */
395 u8 r_nak_state; /* non-zero if NAK is pending */ 403 u8 r_nak_state; /* non-zero if NAK is pending */
396 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */ 404 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
397 u8 r_reuse_sge; /* for UC receive errors */ 405 u8 r_flags;
398 u8 r_wrid_valid; /* r_wrid set but CQ entry not yet made */
399 u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */ 406 u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
400 u8 r_head_ack_queue; /* index into s_ack_queue[] */ 407 u8 r_head_ack_queue; /* index into s_ack_queue[] */
401 u8 qp_access_flags; 408 u8 qp_access_flags;
@@ -404,13 +411,13 @@ struct ipath_qp {
404 u8 s_rnr_retry_cnt; 411 u8 s_rnr_retry_cnt;
405 u8 s_retry; /* requester retry counter */ 412 u8 s_retry; /* requester retry counter */
406 u8 s_rnr_retry; /* requester RNR retry counter */ 413 u8 s_rnr_retry; /* requester RNR retry counter */
407 u8 s_wait_credit; /* limit number of unacked packets sent */
408 u8 s_pkey_index; /* PKEY index to use */ 414 u8 s_pkey_index; /* PKEY index to use */
409 u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */ 415 u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
410 u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ 416 u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
411 u8 s_tail_ack_queue; /* index into s_ack_queue[] */ 417 u8 s_tail_ack_queue; /* index into s_ack_queue[] */
412 u8 s_flags; 418 u8 s_flags;
413 u8 s_dmult; 419 u8 s_dmult;
420 u8 s_draining;
414 u8 timeout; /* Timeout for this QP */ 421 u8 timeout; /* Timeout for this QP */
415 enum ib_mtu path_mtu; 422 enum ib_mtu path_mtu;
416 u32 remote_qpn; 423 u32 remote_qpn;
@@ -428,16 +435,40 @@ struct ipath_qp {
428 struct ipath_sge r_sg_list[0]; /* verified SGEs */ 435 struct ipath_sge r_sg_list[0]; /* verified SGEs */
429}; 436};
430 437
431/* Bit definition for s_busy. */ 438/*
432#define IPATH_S_BUSY 0 439 * Atomic bit definitions for r_aflags.
440 */
441#define IPATH_R_WRID_VALID 0
442
443/*
444 * Bit definitions for r_flags.
445 */
446#define IPATH_R_REUSE_SGE 0x01
447#define IPATH_R_RDMAR_SEQ 0x02
433 448
434/* 449/*
435 * Bit definitions for s_flags. 450 * Bit definitions for s_flags.
451 *
452 * IPATH_S_FENCE_PENDING - waiting for all prior RDMA read or atomic SWQEs
453 * before processing the next SWQE
454 * IPATH_S_RDMAR_PENDING - waiting for any RDMA read or atomic SWQEs
455 * before processing the next SWQE
456 * IPATH_S_WAITING - waiting for RNR timeout or send buffer available.
457 * IPATH_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
458 * IPATH_S_WAIT_DMA - waiting for send DMA queue to drain before generating
459 * next send completion entry not via send DMA.
436 */ 460 */
437#define IPATH_S_SIGNAL_REQ_WR 0x01 461#define IPATH_S_SIGNAL_REQ_WR 0x01
438#define IPATH_S_FENCE_PENDING 0x02 462#define IPATH_S_FENCE_PENDING 0x02
439#define IPATH_S_RDMAR_PENDING 0x04 463#define IPATH_S_RDMAR_PENDING 0x04
440#define IPATH_S_ACK_PENDING 0x08 464#define IPATH_S_ACK_PENDING 0x08
465#define IPATH_S_BUSY 0x10
466#define IPATH_S_WAITING 0x20
467#define IPATH_S_WAIT_SSN_CREDIT 0x40
468#define IPATH_S_WAIT_DMA 0x80
469
470#define IPATH_S_ANY_WAIT (IPATH_S_FENCE_PENDING | IPATH_S_RDMAR_PENDING | \
471 IPATH_S_WAITING | IPATH_S_WAIT_SSN_CREDIT | IPATH_S_WAIT_DMA)
441 472
442#define IPATH_PSN_CREDIT 512 473#define IPATH_PSN_CREDIT 512
443 474
@@ -573,13 +604,11 @@ struct ipath_ibdev {
573 u32 n_rnr_naks; 604 u32 n_rnr_naks;
574 u32 n_other_naks; 605 u32 n_other_naks;
575 u32 n_timeouts; 606 u32 n_timeouts;
576 u32 n_rc_stalls;
577 u32 n_pkt_drops; 607 u32 n_pkt_drops;
578 u32 n_vl15_dropped; 608 u32 n_vl15_dropped;
579 u32 n_wqe_errs; 609 u32 n_wqe_errs;
580 u32 n_rdma_dup_busy; 610 u32 n_rdma_dup_busy;
581 u32 n_piowait; 611 u32 n_piowait;
582 u32 n_no_piobuf;
583 u32 n_unaligned; 612 u32 n_unaligned;
584 u32 port_cap_flags; 613 u32 port_cap_flags;
585 u32 pma_sample_start; 614 u32 pma_sample_start;
@@ -657,6 +686,17 @@ static inline struct ipath_ibdev *to_idev(struct ib_device *ibdev)
657 return container_of(ibdev, struct ipath_ibdev, ibdev); 686 return container_of(ibdev, struct ipath_ibdev, ibdev);
658} 687}
659 688
689/*
690 * This must be called with s_lock held.
691 */
692static inline void ipath_schedule_send(struct ipath_qp *qp)
693{
694 if (qp->s_flags & IPATH_S_ANY_WAIT)
695 qp->s_flags &= ~IPATH_S_ANY_WAIT;
696 if (!(qp->s_flags & IPATH_S_BUSY))
697 tasklet_hi_schedule(&qp->s_task);
698}
699
660int ipath_process_mad(struct ib_device *ibdev, 700int ipath_process_mad(struct ib_device *ibdev,
661 int mad_flags, 701 int mad_flags,
662 u8 port_num, 702 u8 port_num,
@@ -706,12 +746,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
706int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 746int ipath_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
707 int attr_mask, struct ib_qp_init_attr *init_attr); 747 int attr_mask, struct ib_qp_init_attr *init_attr);
708 748
709void ipath_free_all_qps(struct ipath_qp_table *qpt); 749unsigned ipath_free_all_qps(struct ipath_qp_table *qpt);
710 750
711int ipath_init_qp_table(struct ipath_ibdev *idev, int size); 751int ipath_init_qp_table(struct ipath_ibdev *idev, int size);
712 752
713void ipath_sqerror_qp(struct ipath_qp *qp, struct ib_wc *wc);
714
715void ipath_get_credit(struct ipath_qp *qp, u32 aeth); 753void ipath_get_credit(struct ipath_qp *qp, u32 aeth);
716 754
717unsigned ipath_ib_rate_to_mult(enum ib_rate rate); 755unsigned ipath_ib_rate_to_mult(enum ib_rate rate);
@@ -729,7 +767,9 @@ void ipath_uc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
729void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr, 767void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
730 int has_grh, void *data, u32 tlen, struct ipath_qp *qp); 768 int has_grh, void *data, u32 tlen, struct ipath_qp *qp);
731 769
732void ipath_restart_rc(struct ipath_qp *qp, u32 psn, struct ib_wc *wc); 770void ipath_restart_rc(struct ipath_qp *qp, u32 psn);
771
772void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err);
733 773
734int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr); 774int ipath_post_ud_send(struct ipath_qp *qp, struct ib_send_wr *wr);
735 775