aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
authorRalph Campbell <ralph.campbell@qlogic.com>2008-04-17 00:09:26 -0400
committerRoland Dreier <rolandd@cisco.com>2008-04-17 00:09:26 -0400
commitc4b4d16e090e1b68d1d4d20a28757070982b9725 (patch)
tree4ccc895d5effc5789d7e81182cc4ac76cb6f2c74 /drivers/infiniband/hw
parent4330e4dad780467d930b394b5119c0218a1e2dbe (diff)
IB/ipath: Make send buffers available for kernel if not allocated to user
A fixed partitioning of send buffers is determined at driver load time for user processes and kernel use. Since send buffers are a scarce resource, it makes sense to allow the kernel to use the buffers if they are not in use by a user process. Also, eliminate code duplication for ipath_force_pio_avail_update(). Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c306
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c21
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c14
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_registers.h2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c2
8 files changed, 225 insertions, 145 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 96a1c4172f87..af59bf380ca2 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -439,7 +439,9 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
439 goto bail; 439 goto bail;
440 } 440 }
441 441
442 piobuf = ipath_getpiobuf(dd, &pbufn); 442 plen >>= 2; /* in dwords */
443
444 piobuf = ipath_getpiobuf(dd, plen, &pbufn);
443 if (!piobuf) { 445 if (!piobuf) {
444 ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n", 446 ipath_cdbg(VERBOSE, "No PIO buffers avail unit for %u\n",
445 dd->ipath_unit); 447 dd->ipath_unit);
@@ -449,8 +451,6 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
449 /* disarm it just to be extra sure */ 451 /* disarm it just to be extra sure */
450 ipath_disarm_piobufs(dd, pbufn, 1); 452 ipath_disarm_piobufs(dd, pbufn, 1);
451 453
452 plen >>= 2; /* in dwords */
453
454 if (ipath_debug & __IPATH_PKTDBG) 454 if (ipath_debug & __IPATH_PKTDBG)
455 ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n", 455 ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
456 dd->ipath_unit, plen - 1, pbufn); 456 dd->ipath_unit, plen - 1, pbufn);
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 1299171c1e50..216da97eddd4 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -317,7 +317,7 @@ static void ipath_verify_pioperf(struct ipath_devdata *dd)
317 u32 *addr; 317 u32 *addr;
318 u64 msecs, emsecs; 318 u64 msecs, emsecs;
319 319
320 piobuf = ipath_getpiobuf(dd, &pbnum); 320 piobuf = ipath_getpiobuf(dd, 0, &pbnum);
321 if (!piobuf) { 321 if (!piobuf) {
322 dev_info(&dd->pcidev->dev, 322 dev_info(&dd->pcidev->dev,
323 "No PIObufs for checking perf, skipping\n"); 323 "No PIObufs for checking perf, skipping\n");
@@ -836,20 +836,8 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
836 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 836 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
837 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags); 837 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
838 } 838 }
839 839 /* on some older chips, update may not happen after cancel */
840 /* 840 ipath_force_pio_avail_update(dd);
841 * Disable PIOAVAILUPD, then re-enable, reading scratch in
842 * between. This seems to avoid a chip timing race that causes
843 * pioavail updates to memory to stop. We xor as we don't
844 * know the state of the bit when we're called.
845 */
846 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
847 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
848 dd->ipath_sendctrl ^ INFINIPATH_S_PIOBUFAVAILUPD);
849 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
850 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
851 dd->ipath_sendctrl);
852 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
853} 841}
854 842
855/** 843/**
@@ -1314,7 +1302,6 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd)
1314 * happens when all buffers are in use, so only cpu overhead, not 1302 * happens when all buffers are in use, so only cpu overhead, not
1315 * latency or bandwidth is affected. 1303 * latency or bandwidth is affected.
1316 */ 1304 */
1317#define _IPATH_ALL_CHECKBITS 0x5555555555555555ULL
1318 if (!dd->ipath_pioavailregs_dma) { 1305 if (!dd->ipath_pioavailregs_dma) {
1319 ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n"); 1306 ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
1320 return; 1307 return;
@@ -1359,7 +1346,7 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd)
1359 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]); 1346 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);
1360 else 1347 else
1361 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]); 1348 piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
1362 pchg = _IPATH_ALL_CHECKBITS & 1349 pchg = dd->ipath_pioavailkernel[i] &
1363 ~(dd->ipath_pioavailshadow[i] ^ piov); 1350 ~(dd->ipath_pioavailshadow[i] ^ piov);
1364 pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT; 1351 pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
1365 if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) { 1352 if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
@@ -1410,27 +1397,63 @@ int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
1410 return ret; 1397 return ret;
1411} 1398}
1412 1399
1413/** 1400/*
1414 * ipath_getpiobuf - find an available pio buffer 1401 * debugging code and stats updates if no pio buffers available.
1415 * @dd: the infinipath device 1402 */
1416 * @pbufnum: the buffer number is placed here 1403static noinline void no_pio_bufs(struct ipath_devdata *dd)
1404{
1405 unsigned long *shadow = dd->ipath_pioavailshadow;
1406 __le64 *dma = (__le64 *)dd->ipath_pioavailregs_dma;
1407
1408 dd->ipath_upd_pio_shadow = 1;
1409
1410 /*
1411 * not atomic, but if we lose a stat count in a while, that's OK
1412 */
1413 ipath_stats.sps_nopiobufs++;
1414 if (!(++dd->ipath_consec_nopiobuf % 100000)) {
1415 ipath_dbg("%u pio sends with no bufavail; dmacopy: "
1416 "%llx %llx %llx %llx; shadow: %lx %lx %lx %lx\n",
1417 dd->ipath_consec_nopiobuf,
1418 (unsigned long long) le64_to_cpu(dma[0]),
1419 (unsigned long long) le64_to_cpu(dma[1]),
1420 (unsigned long long) le64_to_cpu(dma[2]),
1421 (unsigned long long) le64_to_cpu(dma[3]),
1422 shadow[0], shadow[1], shadow[2], shadow[3]);
1423 /*
1424 * 4 buffers per byte, 4 registers above, cover rest
1425 * below
1426 */
1427 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
1428 (sizeof(shadow[0]) * 4 * 4))
1429 ipath_dbg("2nd group: dmacopy: %llx %llx "
1430 "%llx %llx; shadow: %lx %lx %lx %lx\n",
1431 (unsigned long long)le64_to_cpu(dma[4]),
1432 (unsigned long long)le64_to_cpu(dma[5]),
1433 (unsigned long long)le64_to_cpu(dma[6]),
1434 (unsigned long long)le64_to_cpu(dma[7]),
1435 shadow[4], shadow[5], shadow[6],
1436 shadow[7]);
1437 }
1438}
1439
1440/*
1441 * common code for normal driver pio buffer allocation, and reserved
1442 * allocation.
1417 * 1443 *
1418 * do appropriate marking as busy, etc. 1444 * do appropriate marking as busy, etc.
1419 * returns buffer number if one found (>=0), negative number is error. 1445 * returns buffer number if one found (>=0), negative number is error.
1420 * Used by ipath_layer_send
1421 */ 1446 */
1422u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum) 1447static u32 __iomem *ipath_getpiobuf_range(struct ipath_devdata *dd,
1448 u32 *pbufnum, u32 first, u32 last, u32 firsti)
1423{ 1449{
1424 int i, j, starti, updated = 0; 1450 int i, j, updated = 0;
1425 unsigned piobcnt, iter; 1451 unsigned piobcnt;
1426 unsigned long flags; 1452 unsigned long flags;
1427 unsigned long *shadow = dd->ipath_pioavailshadow; 1453 unsigned long *shadow = dd->ipath_pioavailshadow;
1428 u32 __iomem *buf; 1454 u32 __iomem *buf;
1429 1455
1430 piobcnt = (unsigned)(dd->ipath_piobcnt2k 1456 piobcnt = last - first;
1431 + dd->ipath_piobcnt4k);
1432 starti = dd->ipath_lastport_piobuf;
1433 iter = piobcnt - starti;
1434 if (dd->ipath_upd_pio_shadow) { 1457 if (dd->ipath_upd_pio_shadow) {
1435 /* 1458 /*
1436 * Minor optimization. If we had no buffers on last call, 1459 * Minor optimization. If we had no buffers on last call,
@@ -1438,12 +1461,10 @@ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
1438 * if no buffers were updated, to be paranoid 1461 * if no buffers were updated, to be paranoid
1439 */ 1462 */
1440 ipath_update_pio_bufs(dd); 1463 ipath_update_pio_bufs(dd);
1441 /* we scanned here, don't do it at end of scan */ 1464 updated++;
1442 updated = 1; 1465 i = first;
1443 i = starti;
1444 } else 1466 } else
1445 i = dd->ipath_lastpioindex; 1467 i = firsti;
1446
1447rescan: 1468rescan:
1448 /* 1469 /*
1449 * while test_and_set_bit() is atomic, we do that and then the 1470 * while test_and_set_bit() is atomic, we do that and then the
@@ -1451,104 +1472,141 @@ rescan:
1451 * of the remaining armlaunch errors. 1472 * of the remaining armlaunch errors.
1452 */ 1473 */
1453 spin_lock_irqsave(&ipath_pioavail_lock, flags); 1474 spin_lock_irqsave(&ipath_pioavail_lock, flags);
1454 for (j = 0; j < iter; j++, i++) { 1475 for (j = 0; j < piobcnt; j++, i++) {
1455 if (i >= piobcnt) 1476 if (i >= last)
1456 i = starti; 1477 i = first;
1457 /* 1478 if (__test_and_set_bit((2 * i) + 1, shadow))
1458 * To avoid bus lock overhead, we first find a candidate
1459 * buffer, then do the test and set, and continue if that
1460 * fails.
1461 */
1462 if (test_bit((2 * i) + 1, shadow) ||
1463 test_and_set_bit((2 * i) + 1, shadow))
1464 continue; 1479 continue;
1465 /* flip generation bit */ 1480 /* flip generation bit */
1466 change_bit(2 * i, shadow); 1481 __change_bit(2 * i, shadow);
1467 break; 1482 break;
1468 } 1483 }
1469 spin_unlock_irqrestore(&ipath_pioavail_lock, flags); 1484 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1470 1485
1471 if (j == iter) { 1486 if (j == piobcnt) {
1472 volatile __le64 *dma = dd->ipath_pioavailregs_dma;
1473
1474 /*
1475 * first time through; shadow exhausted, but may be real
1476 * buffers available, so go see; if any updated, rescan
1477 * (once)
1478 */
1479 if (!updated) { 1487 if (!updated) {
1488 /*
1489 * first time through; shadow exhausted, but may be
1490 * buffers available, try an update and then rescan.
1491 */
1480 ipath_update_pio_bufs(dd); 1492 ipath_update_pio_bufs(dd);
1481 updated = 1; 1493 updated++;
1482 i = starti; 1494 i = first;
1483 goto rescan; 1495 goto rescan;
1484 } 1496 } else if (updated == 1 && piobcnt <=
1485 dd->ipath_upd_pio_shadow = 1; 1497 ((dd->ipath_sendctrl
1486 /* 1498 >> INFINIPATH_S_UPDTHRESH_SHIFT) &
1487 * not atomic, but if we lose one once in a while, that's OK 1499 INFINIPATH_S_UPDTHRESH_MASK)) {
1488 */
1489 ipath_stats.sps_nopiobufs++;
1490 if (!(++dd->ipath_consec_nopiobuf % 100000)) {
1491 ipath_dbg(
1492 "%u pio sends with no bufavail; dmacopy: "
1493 "%llx %llx %llx %llx; shadow: "
1494 "%lx %lx %lx %lx\n",
1495 dd->ipath_consec_nopiobuf,
1496 (unsigned long long) le64_to_cpu(dma[0]),
1497 (unsigned long long) le64_to_cpu(dma[1]),
1498 (unsigned long long) le64_to_cpu(dma[2]),
1499 (unsigned long long) le64_to_cpu(dma[3]),
1500 shadow[0], shadow[1], shadow[2],
1501 shadow[3]);
1502 /* 1500 /*
1503 * 4 buffers per byte, 4 registers above, cover rest 1501 * for chips supporting and using the update
1504 * below 1502 * threshold we need to force an update of the
1503 * in-memory copy if the count is less than the
1504 * thershold, then check one more time.
1505 */ 1505 */
1506 if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) > 1506 ipath_force_pio_avail_update(dd);
1507 (sizeof(shadow[0]) * 4 * 4)) 1507 ipath_update_pio_bufs(dd);
1508 ipath_dbg("2nd group: dmacopy: %llx %llx " 1508 updated++;
1509 "%llx %llx; shadow: %lx %lx " 1509 i = first;
1510 "%lx %lx\n", 1510 goto rescan;
1511 (unsigned long long)
1512 le64_to_cpu(dma[4]),
1513 (unsigned long long)
1514 le64_to_cpu(dma[5]),
1515 (unsigned long long)
1516 le64_to_cpu(dma[6]),
1517 (unsigned long long)
1518 le64_to_cpu(dma[7]),
1519 shadow[4], shadow[5],
1520 shadow[6], shadow[7]);
1521 } 1511 }
1512
1513 no_pio_bufs(dd);
1522 buf = NULL; 1514 buf = NULL;
1523 goto bail; 1515 } else {
1516 if (i < dd->ipath_piobcnt2k)
1517 buf = (u32 __iomem *) (dd->ipath_pio2kbase +
1518 i * dd->ipath_palign);
1519 else
1520 buf = (u32 __iomem *)
1521 (dd->ipath_pio4kbase +
1522 (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
1523 if (pbufnum)
1524 *pbufnum = i;
1524 } 1525 }
1525 1526
1526 /* 1527 return buf;
1527 * set next starting place. Since it's just an optimization, 1528}
1528 * it doesn't matter who wins on this, so no locking
1529 */
1530 dd->ipath_lastpioindex = i + 1;
1531 if (dd->ipath_upd_pio_shadow)
1532 dd->ipath_upd_pio_shadow = 0;
1533 if (dd->ipath_consec_nopiobuf)
1534 dd->ipath_consec_nopiobuf = 0;
1535 if (i < dd->ipath_piobcnt2k)
1536 buf = (u32 __iomem *) (dd->ipath_pio2kbase +
1537 i * dd->ipath_palign);
1538 else
1539 buf = (u32 __iomem *)
1540 (dd->ipath_pio4kbase +
1541 (i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
1542 ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
1543 i, (i < dd->ipath_piobcnt2k) ? 2 : 4, buf);
1544 if (pbufnum)
1545 *pbufnum = i;
1546 1529
1547bail: 1530/**
1531 * ipath_getpiobuf - find an available pio buffer
1532 * @dd: the infinipath device
1533 * @plen: the size of the PIO buffer needed in 32-bit words
1534 * @pbufnum: the buffer number is placed here
1535 */
1536u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 plen, u32 *pbufnum)
1537{
1538 u32 __iomem *buf;
1539 u32 pnum, nbufs;
1540 u32 first, lasti;
1541
1542 if (plen + 1 >= IPATH_SMALLBUF_DWORDS) {
1543 first = dd->ipath_piobcnt2k;
1544 lasti = dd->ipath_lastpioindexl;
1545 } else {
1546 first = 0;
1547 lasti = dd->ipath_lastpioindex;
1548 }
1549 nbufs = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
1550 buf = ipath_getpiobuf_range(dd, &pnum, first, nbufs, lasti);
1551
1552 if (buf) {
1553 /*
1554 * Set next starting place. It's just an optimization,
1555 * it doesn't matter who wins on this, so no locking
1556 */
1557 if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
1558 dd->ipath_lastpioindexl = pnum + 1;
1559 else
1560 dd->ipath_lastpioindex = pnum + 1;
1561 if (dd->ipath_upd_pio_shadow)
1562 dd->ipath_upd_pio_shadow = 0;
1563 if (dd->ipath_consec_nopiobuf)
1564 dd->ipath_consec_nopiobuf = 0;
1565 ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
1566 pnum, (pnum < dd->ipath_piobcnt2k) ? 2 : 4, buf);
1567 if (pbufnum)
1568 *pbufnum = pnum;
1569
1570 }
1548 return buf; 1571 return buf;
1549} 1572}
1550 1573
1551/** 1574/**
1575 * ipath_chg_pioavailkernel - change which send buffers are available for kernel
1576 * @dd: the infinipath device
1577 * @start: the starting send buffer number
1578 * @len: the number of send buffers
1579 * @avail: true if the buffers are available for kernel use, false otherwise
1580 */
1581void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
1582 unsigned len, int avail)
1583{
1584 unsigned long flags;
1585 unsigned end;
1586
1587 /* There are two bits per send buffer (busy and generation) */
1588 start *= 2;
1589 len *= 2;
1590 end = start + len;
1591
1592 /* Set or clear the generation bits. */
1593 spin_lock_irqsave(&ipath_pioavail_lock, flags);
1594 while (start < end) {
1595 if (avail) {
1596 __clear_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
1597 dd->ipath_pioavailshadow);
1598 __set_bit(start, dd->ipath_pioavailkernel);
1599 } else {
1600 __set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
1601 dd->ipath_pioavailshadow);
1602 __clear_bit(start, dd->ipath_pioavailkernel);
1603 }
1604 start += 2;
1605 }
1606 spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
1607}
1608
1609/**
1552 * ipath_create_rcvhdrq - create a receive header queue 1610 * ipath_create_rcvhdrq - create a receive header queue
1553 * @dd: the infinipath device 1611 * @dd: the infinipath device
1554 * @pd: the port data 1612 * @pd: the port data
@@ -1664,6 +1722,30 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
1664 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch); 1722 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1665} 1723}
1666 1724
1725/*
1726 * Force an update of in-memory copy of the pioavail registers, when
1727 * needed for any of a variety of reasons. We read the scratch register
1728 * to make it highly likely that the update will have happened by the
1729 * time we return. If already off (as in cancel_sends above), this
1730 * routine is a nop, on the assumption that the caller will "do the
1731 * right thing".
1732 */
1733void ipath_force_pio_avail_update(struct ipath_devdata *dd)
1734{
1735 unsigned long flags;
1736
1737 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
1738 if (dd->ipath_sendctrl & INFINIPATH_S_PIOBUFAVAILUPD) {
1739 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1740 dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
1741 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1742 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1743 dd->ipath_sendctrl);
1744 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
1745 }
1746 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
1747}
1748
1667static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd, 1749static void ipath_set_ib_lstate(struct ipath_devdata *dd, int linkcmd,
1668 int linitcmd) 1750 int linitcmd)
1669{ 1751{
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index cddf29b9554e..1b232b23dbfc 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1603,6 +1603,9 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
1603 port_fp(fp) = pd; 1603 port_fp(fp) = pd;
1604 pd->port_pid = current->pid; 1604 pd->port_pid = current->pid;
1605 strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm)); 1605 strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
1606 ipath_chg_pioavailkernel(dd,
1607 dd->ipath_pbufsport * (pd->port_port - 1),
1608 dd->ipath_pbufsport, 0);
1606 ipath_stats.sps_ports++; 1609 ipath_stats.sps_ports++;
1607 ret = 0; 1610 ret = 0;
1608 } else 1611 } else
@@ -2081,6 +2084,7 @@ static int ipath_close(struct inode *in, struct file *fp)
2081 2084
2082 i = dd->ipath_pbufsport * (port - 1); 2085 i = dd->ipath_pbufsport * (port - 1);
2083 ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport); 2086 ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
2087 ipath_chg_pioavailkernel(dd, i, dd->ipath_pbufsport, 1);
2084 2088
2085 dd->ipath_f_clear_tids(dd, pd->port_port); 2089 dd->ipath_f_clear_tids(dd, pd->port_port);
2086 2090
@@ -2145,21 +2149,6 @@ static int ipath_get_slave_info(struct ipath_portdata *pd,
2145 return ret; 2149 return ret;
2146} 2150}
2147 2151
2148static int ipath_force_pio_avail_update(struct ipath_devdata *dd)
2149{
2150 unsigned long flags;
2151
2152 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
2153 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
2154 dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
2155 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
2156 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
2157 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
2158 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
2159
2160 return 0;
2161}
2162
2163static ssize_t ipath_write(struct file *fp, const char __user *data, 2152static ssize_t ipath_write(struct file *fp, const char __user *data,
2164 size_t count, loff_t *off) 2153 size_t count, loff_t *off)
2165{ 2154{
@@ -2304,7 +2293,7 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
2304 cmd.cmd.slave_mask_addr); 2293 cmd.cmd.slave_mask_addr);
2305 break; 2294 break;
2306 case IPATH_CMD_PIOAVAILUPD: 2295 case IPATH_CMD_PIOAVAILUPD:
2307 ret = ipath_force_pio_avail_update(pd->port_dd); 2296 ipath_force_pio_avail_update(pd->port_dd);
2308 break; 2297 break;
2309 case IPATH_CMD_POLL_TYPE: 2298 case IPATH_CMD_POLL_TYPE:
2310 pd->poll_type = cmd.cmd.poll_type; 2299 pd->poll_type = cmd.cmd.poll_type;
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 46c70656d538..786a5e017fdd 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -521,7 +521,9 @@ static void enable_chip(struct ipath_devdata *dd,
521 pioavail = dd->ipath_pioavailregs_dma[i ^ 1]; 521 pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
522 else 522 else
523 pioavail = dd->ipath_pioavailregs_dma[i]; 523 pioavail = dd->ipath_pioavailregs_dma[i];
524 dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail); 524 dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail) |
525 (~dd->ipath_pioavailkernel[i] <<
526 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
525 } 527 }
526 /* can get counters, stats, etc. */ 528 /* can get counters, stats, etc. */
527 dd->ipath_flags |= IPATH_PRESENT; 529 dd->ipath_flags |= IPATH_PRESENT;
@@ -743,7 +745,9 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
743 ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n", 745 ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n",
744 dd->ipath_pbufsport, val32); 746 dd->ipath_pbufsport, val32);
745 } 747 }
746 dd->ipath_lastpioindex = dd->ipath_lastport_piobuf; 748 dd->ipath_lastpioindex = 0;
749 dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
750 ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
747 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u " 751 ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
748 "each for %u user ports\n", kpiobufs, 752 "each for %u user ports\n", kpiobufs,
749 piobufs, dd->ipath_pbufsport, uports); 753 piobufs, dd->ipath_pbufsport, uports);
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 5608e3268a62..d1e13a46093d 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -804,7 +804,6 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
804{ 804{
805 int i, im; 805 int i, im;
806 u64 val; 806 u64 val;
807 unsigned long flags;
808 807
809 /* disable error interrupts, to avoid confusion */ 808 /* disable error interrupts, to avoid confusion */
810 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL); 809 ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
@@ -823,14 +822,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
823 dd->ipath_control); 822 dd->ipath_control);
824 823
825 /* ensure pio avail updates continue */ 824 /* ensure pio avail updates continue */
826 spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags); 825 ipath_force_pio_avail_update(dd);
827 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
828 dd->ipath_sendctrl & ~INFINIPATH_S_PIOBUFAVAILUPD);
829 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
830 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
831 dd->ipath_sendctrl);
832 ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
833 spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
834 826
835 /* 827 /*
836 * We just enabled pioavailupdate, so dma copy is almost certainly 828 * We just enabled pioavailupdate, so dma copy is almost certainly
@@ -842,7 +834,9 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
842 i ^ 1 : i; 834 i ^ 1 : i;
843 val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im); 835 val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im);
844 dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val); 836 dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val);
845 dd->ipath_pioavailshadow[i] = val; 837 dd->ipath_pioavailshadow[i] = val |
838 (~dd->ipath_pioavailkernel[i] <<
839 INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
846 } 840 }
847 841
848 /* 842 /*
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 2510359b1c83..3a15af26b093 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -191,6 +191,9 @@ struct ipath_skbinfo {
191 dma_addr_t phys; 191 dma_addr_t phys;
192}; 192};
193 193
194/* max dwords in small buffer packet */
195#define IPATH_SMALLBUF_DWORDS (dd->ipath_piosize2k >> 2)
196
194/* 197/*
195 * Possible IB config parameters for ipath_f_get/set_ib_cfg() 198 * Possible IB config parameters for ipath_f_get/set_ib_cfg()
196 */ 199 */
@@ -366,6 +369,7 @@ struct ipath_devdata {
366 * get to multiple devices 369 * get to multiple devices
367 */ 370 */
368 u32 ipath_lastpioindex; 371 u32 ipath_lastpioindex;
372 u32 ipath_lastpioindexl;
369 /* max length of freezemsg */ 373 /* max length of freezemsg */
370 u32 ipath_freezelen; 374 u32 ipath_freezelen;
371 /* 375 /*
@@ -453,6 +457,8 @@ struct ipath_devdata {
453 * init time. 457 * init time.
454 */ 458 */
455 unsigned long ipath_pioavailshadow[8]; 459 unsigned long ipath_pioavailshadow[8];
460 /* bitmap of send buffers available for the kernel to use with PIO. */
461 unsigned long ipath_pioavailkernel[8];
456 /* shadow of kr_gpio_out, for rmw ops */ 462 /* shadow of kr_gpio_out, for rmw ops */
457 u64 ipath_gpio_out; 463 u64 ipath_gpio_out;
458 /* shadow the gpio mask register */ 464 /* shadow the gpio mask register */
@@ -869,13 +875,16 @@ void ipath_hol_event(unsigned long);
869 875
870/* free up any allocated data at closes */ 876/* free up any allocated data at closes */
871void ipath_free_data(struct ipath_portdata *dd); 877void ipath_free_data(struct ipath_portdata *dd);
872u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *); 878u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32, u32 *);
879void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
880 unsigned len, int avail);
873void ipath_init_iba6120_funcs(struct ipath_devdata *); 881void ipath_init_iba6120_funcs(struct ipath_devdata *);
874void ipath_init_iba6110_funcs(struct ipath_devdata *); 882void ipath_init_iba6110_funcs(struct ipath_devdata *);
875void ipath_get_eeprom_info(struct ipath_devdata *); 883void ipath_get_eeprom_info(struct ipath_devdata *);
876int ipath_update_eeprom_log(struct ipath_devdata *dd); 884int ipath_update_eeprom_log(struct ipath_devdata *dd);
877void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr); 885void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
878u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg); 886u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
887void ipath_force_pio_avail_update(struct ipath_devdata *);
879void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev); 888void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
880 889
881/* 890/*
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index 16d0d74f1dda..61e562148496 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -66,6 +66,8 @@
66 66
67/* kr_sendctrl bits */ 67/* kr_sendctrl bits */
68#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16 68#define INFINIPATH_S_DISARMPIOBUF_SHIFT 16
69#define INFINIPATH_S_UPDTHRESH_SHIFT 24
70#define INFINIPATH_S_UPDTHRESH_MASK 0x1f
69 71
70#define IPATH_S_ABORT 0 72#define IPATH_S_ABORT 0
71#define IPATH_S_PIOINTBUFAVAIL 1 73#define IPATH_S_PIOINTBUFAVAIL 1
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 2f9bc29313af..2e6b6f6265b6 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -875,7 +875,7 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, u32 *hdr, u32 hdrwords,
875 unsigned flush_wc; 875 unsigned flush_wc;
876 int ret; 876 int ret;
877 877
878 piobuf = ipath_getpiobuf(dd, NULL); 878 piobuf = ipath_getpiobuf(dd, plen, NULL);
879 if (unlikely(piobuf == NULL)) { 879 if (unlikely(piobuf == NULL)) {
880 ret = -EBUSY; 880 ret = -EBUSY;
881 goto bail; 881 goto bail;