aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBryan O'Sullivan <bos@pathscale.com>2006-07-01 07:36:03 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-07-01 12:56:00 -0400
commitf37bda92461313ad3bbfbf5660adc849c69718bf (patch)
treea5fe4737ca6b8fcbe2cf9b58466d6340ee12fe56
parent06993ca6bc46419027b45198a58447f4f05c14f6 (diff)
[PATCH] IB/ipath: memory management cleanups
Made in-memory rcvhdrq tail update be in dma_alloc'ed memory, not random user or special kernel (needed for ppc, also "just the right thing to do"). Some cleanups to make unexpected link transitions less likely to produce complaints about packet errors, and also to not leave SMA packets stuck and unable to go out. A few other random debug and comment cleanups. Always init rcvhdrq head/tail registers to 0, to avoid race conditions (should have been that way some time ago). Signed-off-by: Dave Olson <dave.olson@qlogic.com> Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com> Cc: "Michael S. Tsirkin" <mst@mellanox.co.il> Cc: Roland Dreier <rolandd@cisco.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--drivers/infiniband/hw/ipath/ipath_common.h11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c225
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c290
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c43
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c63
-rw-r--r--drivers/infiniband/hw/ipath/ipath_kernel.h17
6 files changed, 279 insertions, 370 deletions
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 9b6ec76c118e..3a849e71638e 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -311,6 +311,9 @@ struct ipath_base_info {
311 __u32 spi_rcv_egrchunksize; 311 __u32 spi_rcv_egrchunksize;
312 /* total size of mmap to cover full rcvegrbuffers */ 312 /* total size of mmap to cover full rcvegrbuffers */
313 __u32 spi_rcv_egrbuftotlen; 313 __u32 spi_rcv_egrbuftotlen;
314 __u32 spi_filler_for_align;
315 /* address of readonly memory copy of the rcvhdrq tail register. */
316 __u64 spi_rcvhdr_tailaddr;
314} __attribute__ ((aligned(8))); 317} __attribute__ ((aligned(8)));
315 318
316 319
@@ -380,13 +383,7 @@ struct ipath_user_info {
380 */ 383 */
381 __u32 spu_rcvhdrsize; 384 __u32 spu_rcvhdrsize;
382 385
383 /* 386 __u64 spu_unused; /* kept for compatible layout */
384 * cache line aligned (64 byte) user address to
385 * which the rcvhdrtail register will be written by infinipath
386 * whenever it changes, so that no chip registers are read in
387 * the performance path.
388 */
389 __u64 spu_rcvhdraddr;
390 387
391 /* 388 /*
392 * address of struct base_info to write to 389 * address of struct base_info to write to
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index c92f8e0a117a..0b88642381f8 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -131,14 +131,6 @@ static struct pci_driver ipath_driver = {
131 .id_table = ipath_pci_tbl, 131 .id_table = ipath_pci_tbl,
132}; 132};
133 133
134/*
135 * This is where port 0's rcvhdrtail register is written back; we also
136 * want nothing else sharing the cache line, so make it a cache line
137 * in size. Used for all units.
138 */
139volatile __le64 *ipath_port0_rcvhdrtail;
140dma_addr_t ipath_port0_rcvhdrtail_dma;
141static int port0_rcvhdrtail_refs;
142 134
143static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev, 135static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
144 u32 *bar0, u32 *bar1) 136 u32 *bar0, u32 *bar1)
@@ -268,47 +260,6 @@ int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp)
268 return nunits; 260 return nunits;
269} 261}
270 262
271static int init_port0_rcvhdrtail(struct pci_dev *pdev)
272{
273 int ret;
274
275 mutex_lock(&ipath_mutex);
276
277 if (!ipath_port0_rcvhdrtail) {
278 ipath_port0_rcvhdrtail =
279 dma_alloc_coherent(&pdev->dev,
280 IPATH_PORT0_RCVHDRTAIL_SIZE,
281 &ipath_port0_rcvhdrtail_dma,
282 GFP_KERNEL);
283
284 if (!ipath_port0_rcvhdrtail) {
285 ret = -ENOMEM;
286 goto bail;
287 }
288 }
289 port0_rcvhdrtail_refs++;
290 ret = 0;
291
292bail:
293 mutex_unlock(&ipath_mutex);
294
295 return ret;
296}
297
298static void cleanup_port0_rcvhdrtail(struct pci_dev *pdev)
299{
300 mutex_lock(&ipath_mutex);
301
302 if (!--port0_rcvhdrtail_refs) {
303 dma_free_coherent(&pdev->dev, IPATH_PORT0_RCVHDRTAIL_SIZE,
304 (void *) ipath_port0_rcvhdrtail,
305 ipath_port0_rcvhdrtail_dma);
306 ipath_port0_rcvhdrtail = NULL;
307 }
308
309 mutex_unlock(&ipath_mutex);
310}
311
312/* 263/*
313 * These next two routines are placeholders in case we don't have per-arch 264 * These next two routines are placeholders in case we don't have per-arch
314 * code for controlling write combining. If explicit control of write 265 * code for controlling write combining. If explicit control of write
@@ -333,20 +284,12 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
333 u32 bar0 = 0, bar1 = 0; 284 u32 bar0 = 0, bar1 = 0;
334 u8 rev; 285 u8 rev;
335 286
336 ret = init_port0_rcvhdrtail(pdev);
337 if (ret < 0) {
338 printk(KERN_ERR IPATH_DRV_NAME
339 ": Could not allocate port0_rcvhdrtail: error %d\n",
340 -ret);
341 goto bail;
342 }
343
344 dd = ipath_alloc_devdata(pdev); 287 dd = ipath_alloc_devdata(pdev);
345 if (IS_ERR(dd)) { 288 if (IS_ERR(dd)) {
346 ret = PTR_ERR(dd); 289 ret = PTR_ERR(dd);
347 printk(KERN_ERR IPATH_DRV_NAME 290 printk(KERN_ERR IPATH_DRV_NAME
348 ": Could not allocate devdata: error %d\n", -ret); 291 ": Could not allocate devdata: error %d\n", -ret);
349 goto bail_rcvhdrtail; 292 goto bail;
350 } 293 }
351 294
352 ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit); 295 ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
@@ -574,9 +517,6 @@ bail_disable:
574bail_devdata: 517bail_devdata:
575 ipath_free_devdata(pdev, dd); 518 ipath_free_devdata(pdev, dd);
576 519
577bail_rcvhdrtail:
578 cleanup_port0_rcvhdrtail(pdev);
579
580bail: 520bail:
581 return ret; 521 return ret;
582} 522}
@@ -608,7 +548,6 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
608 pci_disable_device(pdev); 548 pci_disable_device(pdev);
609 549
610 ipath_free_devdata(pdev, dd); 550 ipath_free_devdata(pdev, dd);
611 cleanup_port0_rcvhdrtail(pdev);
612} 551}
613 552
614/* general driver use */ 553/* general driver use */
@@ -1383,26 +1322,20 @@ bail:
1383 * @dd: the infinipath device 1322 * @dd: the infinipath device
1384 * @pd: the port data 1323 * @pd: the port data
1385 * 1324 *
1386 * this *must* be physically contiguous memory, and for now, 1325 * this must be contiguous memory (from an i/o perspective), and must be
1387 * that limits it to what kmalloc can do. 1326 * DMA'able (which means for some systems, it will go through an IOMMU,
1327 * or be forced into a low address range).
1388 */ 1328 */
1389int ipath_create_rcvhdrq(struct ipath_devdata *dd, 1329int ipath_create_rcvhdrq(struct ipath_devdata *dd,
1390 struct ipath_portdata *pd) 1330 struct ipath_portdata *pd)
1391{ 1331{
1392 int ret = 0, amt; 1332 int ret = 0;
1393 1333
1394 amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1395 sizeof(u32), PAGE_SIZE);
1396 if (!pd->port_rcvhdrq) { 1334 if (!pd->port_rcvhdrq) {
1397 /* 1335 dma_addr_t phys_hdrqtail;
1398 * not using REPEAT isn't viable; at 128KB, we can easily
1399 * fail this. The problem with REPEAT is we can block here
1400 * "forever". There isn't an inbetween, unfortunately. We
1401 * could reduce the risk by never freeing the rcvhdrq except
1402 * at unload, but even then, the first time a port is used,
1403 * we could delay for some time...
1404 */
1405 gfp_t gfp_flags = GFP_USER | __GFP_COMP; 1336 gfp_t gfp_flags = GFP_USER | __GFP_COMP;
1337 int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1338 sizeof(u32), PAGE_SIZE);
1406 1339
1407 pd->port_rcvhdrq = dma_alloc_coherent( 1340 pd->port_rcvhdrq = dma_alloc_coherent(
1408 &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys, 1341 &dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
@@ -1415,6 +1348,16 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
1415 ret = -ENOMEM; 1348 ret = -ENOMEM;
1416 goto bail; 1349 goto bail;
1417 } 1350 }
1351 pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
1352 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, GFP_KERNEL);
1353 if (!pd->port_rcvhdrtail_kvaddr) {
1354 ipath_dev_err(dd, "attempt to allocate 1 page "
1355 "for port %u rcvhdrqtailaddr failed\n",
1356 pd->port_port);
1357 ret = -ENOMEM;
1358 goto bail;
1359 }
1360 pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
1418 1361
1419 pd->port_rcvhdrq_size = amt; 1362 pd->port_rcvhdrq_size = amt;
1420 1363
@@ -1424,20 +1367,28 @@ int ipath_create_rcvhdrq(struct ipath_devdata *dd,
1424 (unsigned long) pd->port_rcvhdrq_phys, 1367 (unsigned long) pd->port_rcvhdrq_phys,
1425 (unsigned long) pd->port_rcvhdrq_size, 1368 (unsigned long) pd->port_rcvhdrq_size,
1426 pd->port_port); 1369 pd->port_port);
1427 } else { 1370
1428 /* 1371 ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx physical\n",
1429 * clear for security, sanity, and/or debugging, each 1372 pd->port_port,
1430 * time we reuse 1373 (unsigned long long) phys_hdrqtail);
1431 */
1432 memset(pd->port_rcvhdrq, 0, amt);
1433 } 1374 }
1375 else
1376 ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
1377 "hdrtailaddr@%p %llx physical\n",
1378 pd->port_port, pd->port_rcvhdrq,
1379 pd->port_rcvhdrq_phys, pd->port_rcvhdrtail_kvaddr,
1380 (unsigned long long)pd->port_rcvhdrqtailaddr_phys);
1381
1382 /* clear for security and sanity on each use */
1383 memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
1384 memset((void *)pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
1434 1385
1435 /* 1386 /*
1436 * tell chip each time we init it, even if we are re-using previous 1387 * tell chip each time we init it, even if we are re-using previous
1437 * memory (we zero it at process close) 1388 * memory (we zero the register at process close)
1438 */ 1389 */
1439 ipath_cdbg(VERBOSE, "writing port %d rcvhdraddr as %lx\n", 1390 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
1440 pd->port_port, (unsigned long) pd->port_rcvhdrq_phys); 1391 pd->port_port, pd->port_rcvhdrqtailaddr_phys);
1441 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr, 1392 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
1442 pd->port_port, pd->port_rcvhdrq_phys); 1393 pd->port_port, pd->port_rcvhdrq_phys);
1443 1394
@@ -1525,15 +1476,27 @@ void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
1525 [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED", 1476 [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
1526 [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE" 1477 [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
1527 }; 1478 };
1479 int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
1480 INFINIPATH_IBCC_LINKCMD_MASK;
1481
1528 ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate " 1482 ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate "
1529 "is %s\n", dd->ipath_unit, 1483 "is %s\n", dd->ipath_unit,
1530 what[(which >> INFINIPATH_IBCC_LINKCMD_SHIFT) & 1484 what[linkcmd],
1531 INFINIPATH_IBCC_LINKCMD_MASK],
1532 ipath_ibcstatus_str[ 1485 ipath_ibcstatus_str[
1533 (ipath_read_kreg64 1486 (ipath_read_kreg64
1534 (dd, dd->ipath_kregs->kr_ibcstatus) >> 1487 (dd, dd->ipath_kregs->kr_ibcstatus) >>
1535 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) & 1488 INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
1536 INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]); 1489 INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
1490 /* flush all queued sends when going to DOWN or INIT, to be sure that
1491 * they don't block SMA and other MAD packets */
1492 if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
1493 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
1494 INFINIPATH_S_ABORT);
1495 ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
1496 (unsigned)(dd->ipath_piobcnt2k +
1497 dd->ipath_piobcnt4k) -
1498 dd->ipath_lastport_piobuf);
1499 }
1537 1500
1538 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl, 1501 ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
1539 dd->ipath_ibcctrl | which); 1502 dd->ipath_ibcctrl | which);
@@ -1681,60 +1644,54 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
1681/** 1644/**
1682 * ipath_free_pddata - free a port's allocated data 1645 * ipath_free_pddata - free a port's allocated data
1683 * @dd: the infinipath device 1646 * @dd: the infinipath device
1684 * @port: the port 1647 * @pd: the portdata structure
1685 * @freehdrq: free the port data structure if true
1686 * 1648 *
1687 * when closing, free up any allocated data for a port, if the 1649 * free up any allocated data for a port
1688 * reference count goes to zero 1650 * This should not touch anything that would affect a simultaneous
1689 * Note: this also optionally frees the portdata itself! 1651 * re-allocation of port data, because it is called after ipath_mutex
1690 * Any changes here have to be matched up with the reinit case 1652 * is released (and can be called from reinit as well).
1691 * of ipath_init_chip(), which calls this routine on reinit after reset. 1653 * It should never change any chip state, or global driver state.
1654 * (The only exception to global state is freeing the port0 port0_skbs.)
1692 */ 1655 */
1693void ipath_free_pddata(struct ipath_devdata *dd, u32 port, int freehdrq) 1656void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
1694{ 1657{
1695 struct ipath_portdata *pd = dd->ipath_pd[port];
1696
1697 if (!pd) 1658 if (!pd)
1698 return; 1659 return;
1699 if (freehdrq) 1660
1700 /* 1661 if (pd->port_rcvhdrq) {
1701 * only clear and free portdata if we are going to also
1702 * release the hdrq, otherwise we leak the hdrq on each
1703 * open/close cycle
1704 */
1705 dd->ipath_pd[port] = NULL;
1706 if (freehdrq && pd->port_rcvhdrq) {
1707 ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p " 1662 ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
1708 "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq, 1663 "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
1709 (unsigned long) pd->port_rcvhdrq_size); 1664 (unsigned long) pd->port_rcvhdrq_size);
1710 dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size, 1665 dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
1711 pd->port_rcvhdrq, pd->port_rcvhdrq_phys); 1666 pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
1712 pd->port_rcvhdrq = NULL; 1667 pd->port_rcvhdrq = NULL;
1668 if (pd->port_rcvhdrtail_kvaddr) {
1669 dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
1670 (void *)pd->port_rcvhdrtail_kvaddr,
1671 pd->port_rcvhdrqtailaddr_phys);
1672 pd->port_rcvhdrtail_kvaddr = NULL;
1673 }
1713 } 1674 }
1714 if (port && pd->port_rcvegrbuf) { 1675 if (pd->port_port && pd->port_rcvegrbuf) {
1715 /* always free this */ 1676 unsigned e;
1716 if (pd->port_rcvegrbuf) { 1677
1717 unsigned e; 1678 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
1718 1679 void *base = pd->port_rcvegrbuf[e];
1719 for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) { 1680 size_t size = pd->port_rcvegrbuf_size;
1720 void *base = pd->port_rcvegrbuf[e]; 1681
1721 size_t size = pd->port_rcvegrbuf_size; 1682 ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
1722 1683 "chunk %u/%u\n", base,
1723 ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), " 1684 (unsigned long) size,
1724 "chunk %u/%u\n", base, 1685 e, pd->port_rcvegrbuf_chunks);
1725 (unsigned long) size, 1686 dma_free_coherent(&dd->pcidev->dev, size,
1726 e, pd->port_rcvegrbuf_chunks); 1687 base, pd->port_rcvegrbuf_phys[e]);
1727 dma_free_coherent(
1728 &dd->pcidev->dev, size, base,
1729 pd->port_rcvegrbuf_phys[e]);
1730 }
1731 vfree(pd->port_rcvegrbuf);
1732 pd->port_rcvegrbuf = NULL;
1733 vfree(pd->port_rcvegrbuf_phys);
1734 pd->port_rcvegrbuf_phys = NULL;
1735 } 1688 }
1689 vfree(pd->port_rcvegrbuf);
1690 pd->port_rcvegrbuf = NULL;
1691 vfree(pd->port_rcvegrbuf_phys);
1692 pd->port_rcvegrbuf_phys = NULL;
1736 pd->port_rcvegrbuf_chunks = 0; 1693 pd->port_rcvegrbuf_chunks = 0;
1737 } else if (port == 0 && dd->ipath_port0_skbs) { 1694 } else if (pd->port_port == 0 && dd->ipath_port0_skbs) {
1738 unsigned e; 1695 unsigned e;
1739 struct sk_buff **skbs = dd->ipath_port0_skbs; 1696 struct sk_buff **skbs = dd->ipath_port0_skbs;
1740 1697
@@ -1746,10 +1703,8 @@ void ipath_free_pddata(struct ipath_devdata *dd, u32 port, int freehdrq)
1746 dev_kfree_skb(skbs[e]); 1703 dev_kfree_skb(skbs[e]);
1747 vfree(skbs); 1704 vfree(skbs);
1748 } 1705 }
1749 if (freehdrq) { 1706 kfree(pd->port_tid_pg_list);
1750 kfree(pd->port_tid_pg_list); 1707 kfree(pd);
1751 kfree(pd);
1752 }
1753} 1708}
1754 1709
1755static int __init infinipath_init(void) 1710static int __init infinipath_init(void)
@@ -1874,10 +1829,14 @@ static void cleanup_device(struct ipath_devdata *dd)
1874 1829
1875 /* 1830 /*
1876 * free any resources still in use (usually just kernel ports) 1831 * free any resources still in use (usually just kernel ports)
1877 * at unload 1832 * at unload; we do for portcnt, not cfgports, because cfgports
1833 * could have changed while we were loaded.
1878 */ 1834 */
1879 for (port = 0; port < dd->ipath_cfgports; port++) 1835 for (port = 0; port < dd->ipath_portcnt; port++) {
1880 ipath_free_pddata(dd, port, 1); 1836 struct ipath_portdata *pd = dd->ipath_pd[port];
1837 dd->ipath_pd[port] = NULL;
1838 ipath_free_pddata(dd, pd);
1839 }
1881 kfree(dd->ipath_pd); 1840 kfree(dd->ipath_pd);
1882 /* 1841 /*
1883 * debuggability, in case some cleanup path tries to use it 1842 * debuggability, in case some cleanup path tries to use it
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 03689dbe1a9e..e89d3a17acd9 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -123,6 +123,7 @@ static int ipath_get_base_info(struct ipath_portdata *pd,
123 * on to yet another method of dealing with this 123 * on to yet another method of dealing with this
124 */ 124 */
125 kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys; 125 kinfo->spi_rcvhdr_base = (u64) pd->port_rcvhdrq_phys;
126 kinfo->spi_rcvhdr_tailaddr = (u64)pd->port_rcvhdrqtailaddr_phys;
126 kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys; 127 kinfo->spi_rcv_egrbufs = (u64) pd->port_rcvegr_phys;
127 kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys; 128 kinfo->spi_pioavailaddr = (u64) dd->ipath_pioavailregs_phys;
128 kinfo->spi_status = (u64) kinfo->spi_pioavailaddr + 129 kinfo->spi_status = (u64) kinfo->spi_pioavailaddr +
@@ -785,11 +786,12 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
785 786
786bail_rcvegrbuf_phys: 787bail_rcvegrbuf_phys:
787 for (e = 0; e < pd->port_rcvegrbuf_chunks && 788 for (e = 0; e < pd->port_rcvegrbuf_chunks &&
788 pd->port_rcvegrbuf[e]; e++) 789 pd->port_rcvegrbuf[e]; e++) {
789 dma_free_coherent(&dd->pcidev->dev, size, 790 dma_free_coherent(&dd->pcidev->dev, size,
790 pd->port_rcvegrbuf[e], 791 pd->port_rcvegrbuf[e],
791 pd->port_rcvegrbuf_phys[e]); 792 pd->port_rcvegrbuf_phys[e]);
792 793
794 }
793 vfree(pd->port_rcvegrbuf_phys); 795 vfree(pd->port_rcvegrbuf_phys);
794 pd->port_rcvegrbuf_phys = NULL; 796 pd->port_rcvegrbuf_phys = NULL;
795bail_rcvegrbuf: 797bail_rcvegrbuf:
@@ -804,10 +806,7 @@ static int ipath_do_user_init(struct ipath_portdata *pd,
804{ 806{
805 int ret = 0; 807 int ret = 0;
806 struct ipath_devdata *dd = pd->port_dd; 808 struct ipath_devdata *dd = pd->port_dd;
807 u64 physaddr, uaddr, off, atmp;
808 struct page *pagep;
809 u32 head32; 809 u32 head32;
810 u64 head;
811 810
812 /* for now, if major version is different, bail */ 811 /* for now, if major version is different, bail */
813 if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) { 812 if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
@@ -832,54 +831,6 @@ static int ipath_do_user_init(struct ipath_portdata *pd,
832 831
833 /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */ 832 /* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
834 833
835 /* set up for the rcvhdr Q tail register writeback to user memory */
836 if (!uinfo->spu_rcvhdraddr ||
837 !access_ok(VERIFY_WRITE, (u64 __user *) (unsigned long)
838 uinfo->spu_rcvhdraddr, sizeof(u64))) {
839 ipath_dbg("Port %d rcvhdrtail addr %llx not valid\n",
840 pd->port_port,
841 (unsigned long long) uinfo->spu_rcvhdraddr);
842 ret = -EINVAL;
843 goto done;
844 }
845
846 off = offset_in_page(uinfo->spu_rcvhdraddr);
847 uaddr = PAGE_MASK & (unsigned long) uinfo->spu_rcvhdraddr;
848 ret = ipath_get_user_pages_nocopy(uaddr, &pagep);
849 if (ret) {
850 dev_info(&dd->pcidev->dev, "Failed to lookup and lock "
851 "address %llx for rcvhdrtail: errno %d\n",
852 (unsigned long long) uinfo->spu_rcvhdraddr, -ret);
853 goto done;
854 }
855 ipath_stats.sps_pagelocks++;
856 pd->port_rcvhdrtail_uaddr = uaddr;
857 pd->port_rcvhdrtail_pagep = pagep;
858 pd->port_rcvhdrtail_kvaddr =
859 page_address(pagep);
860 pd->port_rcvhdrtail_kvaddr += off;
861 physaddr = page_to_phys(pagep) + off;
862 ipath_cdbg(VERBOSE, "port %d user addr %llx hdrtailaddr, %llx "
863 "physical (off=%llx)\n",
864 pd->port_port,
865 (unsigned long long) uinfo->spu_rcvhdraddr,
866 (unsigned long long) physaddr, (unsigned long long) off);
867 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
868 pd->port_port, physaddr);
869 atmp = ipath_read_kreg64_port(dd,
870 dd->ipath_kregs->kr_rcvhdrtailaddr,
871 pd->port_port);
872 if (physaddr != atmp) {
873 ipath_dev_err(dd,
874 "Catastrophic software error, "
875 "RcvHdrTailAddr%u written as %llx, "
876 "read back as %llx\n", pd->port_port,
877 (unsigned long long) physaddr,
878 (unsigned long long) atmp);
879 ret = -EINVAL;
880 goto done;
881 }
882
883 /* for right now, kernel piobufs are at end, so port 1 is at 0 */ 834 /* for right now, kernel piobufs are at end, so port 1 is at 0 */
884 pd->port_piobufs = dd->ipath_piobufbase + 835 pd->port_piobufs = dd->ipath_piobufbase +
885 dd->ipath_pbufsport * (pd->port_port - 836 dd->ipath_pbufsport * (pd->port_port -
@@ -898,26 +849,18 @@ static int ipath_do_user_init(struct ipath_portdata *pd,
898 ret = ipath_create_user_egr(pd); 849 ret = ipath_create_user_egr(pd);
899 if (ret) 850 if (ret)
900 goto done; 851 goto done;
901 /* enable receives now */
902 /* atomically set enable bit for this port */
903 set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
904 &dd->ipath_rcvctrl);
905 852
906 /* 853 /*
907 * set the head registers for this port to the current values 854 * set the eager head register for this port to the current values
908 * of the tail pointers, since we don't know if they were 855 * of the tail pointers, since we don't know if they were
909 * updated on last use of the port. 856 * updated on last use of the port.
910 */ 857 */
911 head32 = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
912 head = (u64) head32;
913 ipath_write_ureg(dd, ur_rcvhdrhead, head, pd->port_port);
914 head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port); 858 head32 = ipath_read_ureg32(dd, ur_rcvegrindextail, pd->port_port);
915 ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port); 859 ipath_write_ureg(dd, ur_rcvegrindexhead, head32, pd->port_port);
916 dd->ipath_lastegrheads[pd->port_port] = -1; 860 dd->ipath_lastegrheads[pd->port_port] = -1;
917 dd->ipath_lastrcvhdrqtails[pd->port_port] = -1; 861 dd->ipath_lastrcvhdrqtails[pd->port_port] = -1;
918 ipath_cdbg(VERBOSE, "Wrote port%d head %llx, egrhead %x from " 862 ipath_cdbg(VERBOSE, "Wrote port%d egrhead %x from tail regs\n",
919 "tail regs\n", pd->port_port, 863 pd->port_port, head32);
920 (unsigned long long) head, head32);
921 pd->port_tidcursor = 0; /* start at beginning after open */ 864 pd->port_tidcursor = 0; /* start at beginning after open */
922 /* 865 /*
923 * now enable the port; the tail registers will be written to memory 866 * now enable the port; the tail registers will be written to memory
@@ -926,24 +869,76 @@ static int ipath_do_user_init(struct ipath_portdata *pd,
926 * transition from 0 to 1, so clear it first, then set it as part of 869 * transition from 0 to 1, so clear it first, then set it as part of
927 * enabling the port. This will (very briefly) affect any other 870 * enabling the port. This will (very briefly) affect any other
928 * open ports, but it shouldn't be long enough to be an issue. 871 * open ports, but it shouldn't be long enough to be an issue.
872 * We explictly set the in-memory copy to 0 beforehand, so we don't
873 * have to wait to be sure the DMA update has happened.
929 */ 874 */
875 *pd->port_rcvhdrtail_kvaddr = 0ULL;
876 set_bit(INFINIPATH_R_PORTENABLE_SHIFT + pd->port_port,
877 &dd->ipath_rcvctrl);
930 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 878 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
931 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD); 879 dd->ipath_rcvctrl & ~INFINIPATH_R_TAILUPD);
932 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl, 880 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
933 dd->ipath_rcvctrl); 881 dd->ipath_rcvctrl);
934
935done: 882done:
936 return ret; 883 return ret;
937} 884}
938 885
886
887/* common code for the mappings on dma_alloc_coherent mem */
888static int ipath_mmap_mem(struct vm_area_struct *vma,
889 struct ipath_portdata *pd, unsigned len,
890 int write_ok, dma_addr_t addr, char *what)
891{
892 struct ipath_devdata *dd = pd->port_dd;
893 unsigned pfn = (unsigned long)addr >> PAGE_SHIFT;
894 int ret;
895
896 if ((vma->vm_end - vma->vm_start) > len) {
897 dev_info(&dd->pcidev->dev,
898 "FAIL on %s: len %lx > %x\n", what,
899 vma->vm_end - vma->vm_start, len);
900 ret = -EFAULT;
901 goto bail;
902 }
903
904 if (!write_ok) {
905 if (vma->vm_flags & VM_WRITE) {
906 dev_info(&dd->pcidev->dev,
907 "%s must be mapped readonly\n", what);
908 ret = -EPERM;
909 goto bail;
910 }
911
912 /* don't allow them to later change with mprotect */
913 vma->vm_flags &= ~VM_MAYWRITE;
914 }
915
916 ret = remap_pfn_range(vma, vma->vm_start, pfn,
917 len, vma->vm_page_prot);
918 if (ret)
919 dev_info(&dd->pcidev->dev,
920 "%s port%u mmap of %lx, %x bytes r%c failed: %d\n",
921 what, pd->port_port, (unsigned long)addr, len,
922 write_ok?'w':'o', ret);
923 else
924 ipath_cdbg(VERBOSE, "%s port%u mmaped %lx, %x bytes r%c\n",
925 what, pd->port_port, (unsigned long)addr, len,
926 write_ok?'w':'o');
927bail:
928 return ret;
929}
930
939static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd, 931static int mmap_ureg(struct vm_area_struct *vma, struct ipath_devdata *dd,
940 u64 ureg) 932 u64 ureg)
941{ 933{
942 unsigned long phys; 934 unsigned long phys;
943 int ret; 935 int ret;
944 936
945 /* it's the real hardware, so io_remap works */ 937 /*
946 938 * This is real hardware, so use io_remap. This is the mechanism
939 * for the user process to update the head registers for their port
940 * in the chip.
941 */
947 if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) { 942 if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
948 dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen " 943 dev_info(&dd->pcidev->dev, "FAIL mmap userreg: reqlen "
949 "%lx > PAGE\n", vma->vm_end - vma->vm_start); 944 "%lx > PAGE\n", vma->vm_end - vma->vm_start);
@@ -969,10 +964,11 @@ static int mmap_piobufs(struct vm_area_struct *vma,
969 int ret; 964 int ret;
970 965
971 /* 966 /*
972 * When we map the PIO buffers, we want to map them as writeonly, no 967 * When we map the PIO buffers in the chip, we want to map them as
973 * read possible. 968 * writeonly, no read possible. This prevents access to previous
969 * process data, and catches users who might try to read the i/o
970 * space due to a bug.
974 */ 971 */
975
976 if ((vma->vm_end - vma->vm_start) > 972 if ((vma->vm_end - vma->vm_start) >
977 (dd->ipath_pbufsport * dd->ipath_palign)) { 973 (dd->ipath_pbufsport * dd->ipath_palign)) {
978 dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: " 974 dev_info(&dd->pcidev->dev, "FAIL mmap piobufs: "
@@ -983,11 +979,10 @@ static int mmap_piobufs(struct vm_area_struct *vma,
983 } 979 }
984 980
985 phys = dd->ipath_physaddr + pd->port_piobufs; 981 phys = dd->ipath_physaddr + pd->port_piobufs;
982
986 /* 983 /*
987 * Do *NOT* mark this as non-cached (PWT bit), or we don't get the 984 * Don't mark this as non-cached, or we don't get the
988 * write combining behavior we want on the PIO buffers! 985 * write combining behavior we want on the PIO buffers!
989 * vma->vm_page_prot =
990 * pgprot_noncached(vma->vm_page_prot);
991 */ 986 */
992 987
993 if (vma->vm_flags & VM_READ) { 988 if (vma->vm_flags & VM_READ) {
@@ -999,8 +994,7 @@ static int mmap_piobufs(struct vm_area_struct *vma,
999 } 994 }
1000 995
1001 /* don't allow them to later change to readable with mprotect */ 996 /* don't allow them to later change to readable with mprotect */
1002 997 vma->vm_flags &= ~VM_MAYREAD;
1003 vma->vm_flags &= ~VM_MAYWRITE;
1004 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND; 998 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND;
1005 999
1006 ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT, 1000 ret = io_remap_pfn_range(vma, vma->vm_start, phys >> PAGE_SHIFT,
@@ -1019,11 +1013,6 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
1019 dma_addr_t *phys; 1013 dma_addr_t *phys;
1020 int ret; 1014 int ret;
1021 1015
1022 if (!pd->port_rcvegrbuf) {
1023 ret = -EFAULT;
1024 goto bail;
1025 }
1026
1027 size = pd->port_rcvegrbuf_size; 1016 size = pd->port_rcvegrbuf_size;
1028 total_size = pd->port_rcvegrbuf_chunks * size; 1017 total_size = pd->port_rcvegrbuf_chunks * size;
1029 if ((vma->vm_end - vma->vm_start) > total_size) { 1018 if ((vma->vm_end - vma->vm_start) > total_size) {
@@ -1041,13 +1030,12 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
1041 ret = -EPERM; 1030 ret = -EPERM;
1042 goto bail; 1031 goto bail;
1043 } 1032 }
1033 /* don't allow them to later change to writeable with mprotect */
1034 vma->vm_flags &= ~VM_MAYWRITE;
1044 1035
1045 start = vma->vm_start; 1036 start = vma->vm_start;
1046 phys = pd->port_rcvegrbuf_phys; 1037 phys = pd->port_rcvegrbuf_phys;
1047 1038
1048 /* don't allow them to later change to writeable with mprotect */
1049 vma->vm_flags &= ~VM_MAYWRITE;
1050
1051 for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) { 1039 for (i = 0; i < pd->port_rcvegrbuf_chunks; i++, start += size) {
1052 ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT, 1040 ret = remap_pfn_range(vma, start, phys[i] >> PAGE_SHIFT,
1053 size, vma->vm_page_prot); 1041 size, vma->vm_page_prot);
@@ -1060,78 +1048,6 @@ bail:
1060 return ret; 1048 return ret;
1061} 1049}
1062 1050
1063static int mmap_rcvhdrq(struct vm_area_struct *vma,
1064 struct ipath_portdata *pd)
1065{
1066 struct ipath_devdata *dd = pd->port_dd;
1067 size_t total_size;
1068 int ret;
1069
1070 /*
1071 * kmalloc'ed memory, physically contiguous; this is from
1072 * spi_rcvhdr_base; we allow user to map read-write so they can
1073 * write hdrq entries to allow protocol code to directly poll
1074 * whether a hdrq entry has been written.
1075 */
1076 total_size = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
1077 sizeof(u32), PAGE_SIZE);
1078 if ((vma->vm_end - vma->vm_start) > total_size) {
1079 dev_info(&dd->pcidev->dev,
1080 "FAIL on rcvhdrq: reqlen %lx > actual %lx\n",
1081 vma->vm_end - vma->vm_start,
1082 (unsigned long) total_size);
1083 ret = -EFAULT;
1084 goto bail;
1085 }
1086
1087 ret = remap_pfn_range(vma, vma->vm_start,
1088 pd->port_rcvhdrq_phys >> PAGE_SHIFT,
1089 vma->vm_end - vma->vm_start,
1090 vma->vm_page_prot);
1091bail:
1092 return ret;
1093}
1094
1095static int mmap_pioavailregs(struct vm_area_struct *vma,
1096 struct ipath_portdata *pd)
1097{
1098 struct ipath_devdata *dd = pd->port_dd;
1099 int ret;
1100
1101 /*
1102 * when we map the PIO bufferavail registers, we want to map them as
1103 * readonly, no write possible.
1104 *
1105 * kmalloc'ed memory, physically contiguous, one page only, readonly
1106 */
1107
1108 if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) {
1109 dev_info(&dd->pcidev->dev, "FAIL on pioavailregs_dma: "
1110 "reqlen %lx > actual %lx\n",
1111 vma->vm_end - vma->vm_start,
1112 (unsigned long) PAGE_SIZE);
1113 ret = -EFAULT;
1114 goto bail;
1115 }
1116
1117 if (vma->vm_flags & VM_WRITE) {
1118 dev_info(&dd->pcidev->dev,
1119 "Can't map pioavailregs as writable (flags=%lx)\n",
1120 vma->vm_flags);
1121 ret = -EPERM;
1122 goto bail;
1123 }
1124
1125 /* don't allow them to later change with mprotect */
1126 vma->vm_flags &= ~VM_MAYWRITE;
1127
1128 ret = remap_pfn_range(vma, vma->vm_start,
1129 dd->ipath_pioavailregs_phys >> PAGE_SHIFT,
1130 PAGE_SIZE, vma->vm_page_prot);
1131bail:
1132 return ret;
1133}
1134
1135/** 1051/**
1136 * ipath_mmap - mmap various structures into user space 1052 * ipath_mmap - mmap various structures into user space
1137 * @fp: the file pointer 1053 * @fp: the file pointer
@@ -1151,6 +1067,7 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1151 1067
1152 pd = port_fp(fp); 1068 pd = port_fp(fp);
1153 dd = pd->port_dd; 1069 dd = pd->port_dd;
1070
1154 /* 1071 /*
1155 * This is the ipath_do_user_init() code, mapping the shared buffers 1072 * This is the ipath_do_user_init() code, mapping the shared buffers
1156 * into the user process. The address referred to by vm_pgoff is the 1073 * into the user process. The address referred to by vm_pgoff is the
@@ -1160,29 +1077,59 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
1160 pgaddr = vma->vm_pgoff << PAGE_SHIFT; 1077 pgaddr = vma->vm_pgoff << PAGE_SHIFT;
1161 1078
1162 /* 1079 /*
1163 * note that ureg does *NOT* have the kregvirt as part of it, to be 1080 * Must fit in 40 bits for our hardware; some checked elsewhere,
1164 * sure that for 32 bit programs, we don't end up trying to map a > 1081 * but we'll be paranoid. Check for 0 is mostly in case one of the
1165 * 44 address. Has to match ipath_get_base_info() code that sets 1082 * allocations failed, but user called mmap anyway. We want to catch
1166 * __spi_uregbase 1083 * that before it can match.
1167 */ 1084 */
1085 if (!pgaddr || pgaddr >= (1ULL<<40)) {
1086 ipath_dev_err(dd, "Bad phys addr %llx, start %lx, end %lx\n",
1087 (unsigned long long)pgaddr, vma->vm_start, vma->vm_end);
1088 return -EINVAL;
1089 }
1168 1090
1091 /* just the offset of the port user registers, not physical addr */
1169 ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port; 1092 ureg = dd->ipath_uregbase + dd->ipath_palign * pd->port_port;
1170 1093
1171 ipath_cdbg(MM, "pgaddr %llx vm_start=%lx len %lx port %u:%u\n", 1094 ipath_cdbg(MM, "ushare: pgaddr %llx vm_start=%lx, vmlen %lx\n",
1172 (unsigned long long) pgaddr, vma->vm_start, 1095 (unsigned long long) pgaddr, vma->vm_start,
1173 vma->vm_end - vma->vm_start, dd->ipath_unit, 1096 vma->vm_end - vma->vm_start);
1174 pd->port_port);
1175 1097
1176 if (pgaddr == ureg) 1098 if (vma->vm_start & (PAGE_SIZE-1)) {
1099 ipath_dev_err(dd,
1100 "vm_start not aligned: %lx, end=%lx phys %lx\n",
1101 vma->vm_start, vma->vm_end, (unsigned long)pgaddr);
1102 ret = -EINVAL;
1103 }
1104 else if (pgaddr == ureg)
1177 ret = mmap_ureg(vma, dd, ureg); 1105 ret = mmap_ureg(vma, dd, ureg);
1178 else if (pgaddr == pd->port_piobufs) 1106 else if (pgaddr == pd->port_piobufs)
1179 ret = mmap_piobufs(vma, dd, pd); 1107 ret = mmap_piobufs(vma, dd, pd);
1180 else if (pgaddr == (u64) pd->port_rcvegr_phys) 1108 else if (pgaddr == (u64) pd->port_rcvegr_phys)
1181 ret = mmap_rcvegrbufs(vma, pd); 1109 ret = mmap_rcvegrbufs(vma, pd);
1182 else if (pgaddr == (u64) pd->port_rcvhdrq_phys) 1110 else if (pgaddr == (u64) pd->port_rcvhdrq_phys) {
1183 ret = mmap_rcvhdrq(vma, pd); 1111 /*
1112 * The rcvhdrq itself; readonly except on HT-400 (so have
1113 * to allow writable mapping), multiple pages, contiguous
1114 * from an i/o perspective.
1115 */
1116 unsigned total_size =
1117 ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize
1118 * sizeof(u32), PAGE_SIZE);
1119 ret = ipath_mmap_mem(vma, pd, total_size, 1,
1120 pd->port_rcvhdrq_phys,
1121 "rcvhdrq");
1122 }
1123 else if (pgaddr == (u64)pd->port_rcvhdrqtailaddr_phys)
1124 /* in-memory copy of rcvhdrq tail register */
1125 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
1126 pd->port_rcvhdrqtailaddr_phys,
1127 "rcvhdrq tail");
1184 else if (pgaddr == dd->ipath_pioavailregs_phys) 1128 else if (pgaddr == dd->ipath_pioavailregs_phys)
1185 ret = mmap_pioavailregs(vma, pd); 1129 /* in-memory copy of pioavail registers */
1130 ret = ipath_mmap_mem(vma, pd, PAGE_SIZE, 0,
1131 dd->ipath_pioavailregs_phys,
1132 "pioavail registers");
1186 else 1133 else
1187 ret = -EINVAL; 1134 ret = -EINVAL;
1188 1135
@@ -1539,14 +1486,6 @@ static int ipath_close(struct inode *in, struct file *fp)
1539 } 1486 }
1540 1487
1541 if (dd->ipath_kregbase) { 1488 if (dd->ipath_kregbase) {
1542 if (pd->port_rcvhdrtail_uaddr) {
1543 pd->port_rcvhdrtail_uaddr = 0;
1544 pd->port_rcvhdrtail_kvaddr = NULL;
1545 ipath_release_user_pages_on_close(
1546 &pd->port_rcvhdrtail_pagep, 1);
1547 pd->port_rcvhdrtail_pagep = NULL;
1548 ipath_stats.sps_pageunlocks++;
1549 }
1550 ipath_write_kreg_port( 1489 ipath_write_kreg_port(
1551 dd, dd->ipath_kregs->kr_rcvhdrtailaddr, 1490 dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
1552 port, 0ULL); 1491 port, 0ULL);
@@ -1583,9 +1522,9 @@ static int ipath_close(struct inode *in, struct file *fp)
1583 1522
1584 dd->ipath_f_clear_tids(dd, pd->port_port); 1523 dd->ipath_f_clear_tids(dd, pd->port_port);
1585 1524
1586 ipath_free_pddata(dd, pd->port_port, 0); 1525 dd->ipath_pd[pd->port_port] = NULL; /* before releasing mutex */
1587
1588 mutex_unlock(&ipath_mutex); 1526 mutex_unlock(&ipath_mutex);
1527 ipath_free_pddata(dd, pd); /* after releasing the mutex */
1589 1528
1590 return ret; 1529 return ret;
1591} 1530}
@@ -1905,3 +1844,4 @@ void ipath_user_remove(struct ipath_devdata *dd)
1905bail: 1844bail:
1906 return; 1845 return;
1907} 1846}
1847
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 37cd96e96e3e..385605f63f0f 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -411,17 +411,8 @@ static int init_pioavailregs(struct ipath_devdata *dd)
411 /* and its length */ 411 /* and its length */
412 dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]); 412 dd->ipath_freezelen = L1_CACHE_BYTES - sizeof(dd->ipath_statusp[0]);
413 413
414 if (dd->ipath_unit * 64 > (IPATH_PORT0_RCVHDRTAIL_SIZE - 64)) { 414 ret = 0;
415 ipath_dev_err(dd, "unit %u too large for port 0 "
416 "rcvhdrtail buffer size\n", dd->ipath_unit);
417 ret = -ENODEV;
418 }
419 else
420 ret = 0;
421 415
422 /* so we can get current tail in ipath_kreceive(), per chip */
423 dd->ipath_hdrqtailptr = &ipath_port0_rcvhdrtail[
424 dd->ipath_unit * (64 / sizeof(*ipath_port0_rcvhdrtail))];
425done: 416done:
426 return ret; 417 return ret;
427} 418}
@@ -654,7 +645,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
654{ 645{
655 int ret = 0, i; 646 int ret = 0, i;
656 u32 val32, kpiobufs; 647 u32 val32, kpiobufs;
657 u64 val, atmp; 648 u64 val;
658 struct ipath_portdata *pd = NULL; /* keep gcc4 happy */ 649 struct ipath_portdata *pd = NULL; /* keep gcc4 happy */
659 650
660 ret = init_housekeeping(dd, &pd, reinit); 651 ret = init_housekeeping(dd, &pd, reinit);
@@ -777,24 +768,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
777 goto done; 768 goto done;
778 } 769 }
779 770
780 val = ipath_port0_rcvhdrtail_dma + dd->ipath_unit * 64;
781
782 /* verify that the alignment requirement was met */
783 ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
784 0, val);
785 atmp = ipath_read_kreg64_port(
786 dd, dd->ipath_kregs->kr_rcvhdrtailaddr, 0);
787 if (val != atmp) {
788 ipath_dev_err(dd, "Catastrophic software error, "
789 "RcvHdrTailAddr0 written as %llx, "
790 "read back as %llx from %x\n",
791 (unsigned long long) val,
792 (unsigned long long) atmp,
793 dd->ipath_kregs->kr_rcvhdrtailaddr);
794 ret = -EINVAL;
795 goto done;
796 }
797
798 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP); 771 ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvbthqp, IPATH_KD_QP);
799 772
800 /* 773 /*
@@ -845,12 +818,18 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
845 * re-init, the simplest way to handle this is to free 818 * re-init, the simplest way to handle this is to free
846 * existing, and re-allocate. 819 * existing, and re-allocate.
847 */ 820 */
848 if (reinit) 821 if (reinit) {
849 ipath_free_pddata(dd, 0, 0); 822 struct ipath_portdata *pd = dd->ipath_pd[0];
823 dd->ipath_pd[0] = NULL;
824 ipath_free_pddata(dd, pd);
825 }
850 dd->ipath_f_tidtemplate(dd); 826 dd->ipath_f_tidtemplate(dd);
851 ret = ipath_create_rcvhdrq(dd, pd); 827 ret = ipath_create_rcvhdrq(dd, pd);
852 if (!ret) 828 if (!ret) {
829 dd->ipath_hdrqtailptr =
830 (volatile __le64 *)pd->port_rcvhdrtail_kvaddr;
853 ret = create_port0_egr(dd); 831 ret = create_port0_egr(dd);
832 }
854 if (ret) 833 if (ret)
855 ipath_dev_err(dd, "failed to allocate port 0 (kernel) " 834 ipath_dev_err(dd, "failed to allocate port 0 (kernel) "
856 "rcvhdrq and/or egr bufs\n"); 835 "rcvhdrq and/or egr bufs\n");
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 9004be32f3e6..8a4d732b4858 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -37,6 +37,7 @@
37#include "ips_common.h" 37#include "ips_common.h"
38#include "ipath_layer.h" 38#include "ipath_layer.h"
39 39
40/* These are all rcv-related errors which we want to count for stats */
40#define E_SUM_PKTERRS \ 41#define E_SUM_PKTERRS \
41 (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \ 42 (INFINIPATH_E_RHDRLEN | INFINIPATH_E_RBADTID | \
42 INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \ 43 INFINIPATH_E_RBADVERSION | INFINIPATH_E_RHDR | \
@@ -45,6 +46,7 @@
45 INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \ 46 INFINIPATH_E_RFORMATERR | INFINIPATH_E_RUNSUPVL | \
46 INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP) 47 INFINIPATH_E_RUNEXPCHAR | INFINIPATH_E_REBP)
47 48
49/* These are all send-related errors which we want to count for stats */
48#define E_SUM_ERRS \ 50#define E_SUM_ERRS \
49 (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \ 51 (INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SUNEXPERRPKTNUM | \
50 INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \ 52 INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
@@ -52,6 +54,18 @@
52 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \ 54 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
53 INFINIPATH_E_INVALIDADDR) 55 INFINIPATH_E_INVALIDADDR)
54 56
57/*
58 * these are errors that can occur when the link changes state while
59 * a packet is being sent or received. This doesn't cover things
60 * like EBP or VCRC that can be the result of a sending having the
61 * link change state, so we receive a "known bad" packet.
62 */
63#define E_SUM_LINK_PKTERRS \
64 (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
65 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
66 INFINIPATH_E_RSHORTPKTLEN | INFINIPATH_E_RMINPKTLEN | \
67 INFINIPATH_E_RUNEXPCHAR)
68
55static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs) 69static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
56{ 70{
57 unsigned long sbuf[4]; 71 unsigned long sbuf[4];
@@ -101,9 +115,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
101 if (ipath_debug & __IPATH_PKTDBG) 115 if (ipath_debug & __IPATH_PKTDBG)
102 printk("\n"); 116 printk("\n");
103 } 117 }
104 if ((errs & (INFINIPATH_E_SDROPPEDDATAPKT | 118 if ((errs & E_SUM_LINK_PKTERRS) &&
105 INFINIPATH_E_SDROPPEDSMPPKT |
106 INFINIPATH_E_SMINPKTLEN)) &&
107 !(dd->ipath_flags & IPATH_LINKACTIVE)) { 119 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
108 /* 120 /*
109 * This can happen when SMA is trying to bring the link 121 * This can happen when SMA is trying to bring the link
@@ -112,11 +124,9 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
112 * valid. We don't want to confuse people, so we just 124 * valid. We don't want to confuse people, so we just
113 * don't print them, except at debug 125 * don't print them, except at debug
114 */ 126 */
115 ipath_dbg("Ignoring pktsend errors %llx, because not " 127 ipath_dbg("Ignoring packet errors %llx, because link not "
116 "yet active\n", (unsigned long long) errs); 128 "ACTIVE\n", (unsigned long long) errs);
117 ignore_this_time = INFINIPATH_E_SDROPPEDDATAPKT | 129 ignore_this_time = errs & E_SUM_LINK_PKTERRS;
118 INFINIPATH_E_SDROPPEDSMPPKT |
119 INFINIPATH_E_SMINPKTLEN;
120 } 130 }
121 131
122 return ignore_this_time; 132 return ignore_this_time;
@@ -157,7 +167,29 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
157 */ 167 */
158 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus); 168 val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
159 lstate = val & IPATH_IBSTATE_MASK; 169 lstate = val & IPATH_IBSTATE_MASK;
160 if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM || 170
171 /*
172 * this is confusing enough when it happens that I want to always put it
173 * on the console and in the logs. If it was a requested state change,
174 * we'll have already cleared the flags, so we won't print this warning
175 */
176 if ((lstate != IPATH_IBSTATE_ARM && lstate != IPATH_IBSTATE_ACTIVE)
177 && (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
178 dev_info(&dd->pcidev->dev, "Link state changed from %s to %s\n",
179 (dd->ipath_flags & IPATH_LINKARMED) ? "ARM" : "ACTIVE",
180 ib_linkstate(lstate));
181 /*
182 * Flush all queued sends when link went to DOWN or INIT,
183 * to be sure that they don't block SMA and other MAD packets
184 */
185 ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
186 INFINIPATH_S_ABORT);
187 ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
188 (unsigned)(dd->ipath_piobcnt2k +
189 dd->ipath_piobcnt4k) -
190 dd->ipath_lastport_piobuf);
191 }
192 else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
161 lstate == IPATH_IBSTATE_ACTIVE) { 193 lstate == IPATH_IBSTATE_ACTIVE) {
162 /* 194 /*
163 * only print at SMA if there is a change, debug if not 195 * only print at SMA if there is a change, debug if not
@@ -380,6 +412,19 @@ static void handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
380 412
381 if (errs & E_SUM_ERRS) 413 if (errs & E_SUM_ERRS)
382 ignore_this_time = handle_e_sum_errs(dd, errs); 414 ignore_this_time = handle_e_sum_errs(dd, errs);
415 else if ((errs & E_SUM_LINK_PKTERRS) &&
416 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
417 /*
418 * This can happen when SMA is trying to bring the link
419 * up, but the IB link changes state at the "wrong" time.
420 * The IB logic then complains that the packet isn't
421 * valid. We don't want to confuse people, so we just
422 * don't print them, except at debug
423 */
424 ipath_dbg("Ignoring packet errors %llx, because link not "
425 "ACTIVE\n", (unsigned long long) errs);
426 ignore_this_time = errs & E_SUM_LINK_PKTERRS;
427 }
383 428
384 if (supp_msgs == 250000) { 429 if (supp_msgs == 250000) {
385 /* 430 /*
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index d9764c289bd1..493100dfbf59 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -62,9 +62,7 @@ struct ipath_portdata {
62 /* rcvhdrq base, needs mmap before useful */ 62 /* rcvhdrq base, needs mmap before useful */
63 void *port_rcvhdrq; 63 void *port_rcvhdrq;
64 /* kernel virtual address where hdrqtail is updated */ 64 /* kernel virtual address where hdrqtail is updated */
65 u64 *port_rcvhdrtail_kvaddr; 65 volatile __le64 *port_rcvhdrtail_kvaddr;
66 /* page * used for uaddr */
67 struct page *port_rcvhdrtail_pagep;
68 /* 66 /*
69 * temp buffer for expected send setup, allocated at open, instead 67 * temp buffer for expected send setup, allocated at open, instead
70 * of each setup call 68 * of each setup call
@@ -79,11 +77,7 @@ struct ipath_portdata {
79 dma_addr_t port_rcvegr_phys; 77 dma_addr_t port_rcvegr_phys;
80 /* mmap of hdrq, must fit in 44 bits */ 78 /* mmap of hdrq, must fit in 44 bits */
81 dma_addr_t port_rcvhdrq_phys; 79 dma_addr_t port_rcvhdrq_phys;
82 /* 80 dma_addr_t port_rcvhdrqtailaddr_phys;
83 * the actual user address that we ipath_mlock'ed, so we can
84 * ipath_munlock it at close
85 */
86 unsigned long port_rcvhdrtail_uaddr;
87 /* 81 /*
88 * number of opens on this instance (0 or 1; ignoring forks, dup, 82 * number of opens on this instance (0 or 1; ignoring forks, dup,
89 * etc. for now) 83 * etc. for now)
@@ -515,11 +509,6 @@ struct ipath_devdata {
515 u8 ipath_lmc; 509 u8 ipath_lmc;
516}; 510};
517 511
518extern volatile __le64 *ipath_port0_rcvhdrtail;
519extern dma_addr_t ipath_port0_rcvhdrtail_dma;
520
521#define IPATH_PORT0_RCVHDRTAIL_SIZE PAGE_SIZE
522
523extern struct list_head ipath_dev_list; 512extern struct list_head ipath_dev_list;
524extern spinlock_t ipath_devs_lock; 513extern spinlock_t ipath_devs_lock;
525extern struct ipath_devdata *ipath_lookup(int unit); 514extern struct ipath_devdata *ipath_lookup(int unit);
@@ -579,7 +568,7 @@ void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
579 unsigned cnt); 568 unsigned cnt);
580 569
581int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *); 570int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
582void ipath_free_pddata(struct ipath_devdata *, u32, int); 571void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
583 572
584int ipath_parse_ushort(const char *str, unsigned short *valp); 573int ipath_parse_ushort(const char *str, unsigned short *valp);
585 574