aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
authorRamkrishna Vepa <ramkrishna.vepa@intel.com>2013-05-28 12:57:33 -0400
committerRoland Dreier <roland@purestorage.com>2013-06-21 20:19:48 -0400
commite0f30baca1ebe5547f6760f760b8c4e189fc1203 (patch)
tree5b5b8858c292da3138f1ab8535872005f06f3e50 /drivers/infiniband/hw
parentab4a13d69bf01b098906c60e7598d10752401a56 (diff)
IB/qib: Add optional NUMA affinity
This patch adds context relative numa affinity conditioned on the module parameter numa_aware. The qib_ctxtdata has an additional node_id member and qib_create_ctxtdata() has an addition node_id parameter. The allocations within the hdr queue and eager queue setup routines now take this additional member and adjust allocations as necesary. PSM will pass the either current numa node or the node closest to the HCA depending on numa_aware. Verbs will always use the node closest to the HCA. Reviewed-by: Dean Luick <dean.luick@intel.com> Signed-off-by: Ramkrishna Vepa <ramkrishna.vepa@intel.com> Signed-off-by: Vinit Agnihotri <vinit.abhay.agnihotri@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/qib/qib.h7
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c6
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c39
3 files changed, 43 insertions, 9 deletions
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index cecbd43f9212..2ee82e6550c7 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -154,6 +154,8 @@ struct qib_ctxtdata {
154 */ 154 */
155 /* instead of calculating it */ 155 /* instead of calculating it */
156 unsigned ctxt; 156 unsigned ctxt;
157 /* local node of context */
158 int node_id;
157 /* non-zero if ctxt is being shared. */ 159 /* non-zero if ctxt is being shared. */
158 u16 subctxt_cnt; 160 u16 subctxt_cnt;
159 /* non-zero if ctxt is being shared. */ 161 /* non-zero if ctxt is being shared. */
@@ -1088,6 +1090,8 @@ struct qib_devdata {
1088 u16 psxmitwait_check_rate; 1090 u16 psxmitwait_check_rate;
1089 /* high volume overflow errors defered to tasklet */ 1091 /* high volume overflow errors defered to tasklet */
1090 struct tasklet_struct error_tasklet; 1092 struct tasklet_struct error_tasklet;
1093
1094 int assigned_node_id; /* NUMA node closest to HCA */
1091}; 1095};
1092 1096
1093/* hol_state values */ 1097/* hol_state values */
@@ -1167,7 +1171,7 @@ int qib_create_rcvhdrq(struct qib_devdata *, struct qib_ctxtdata *);
1167int qib_setup_eagerbufs(struct qib_ctxtdata *); 1171int qib_setup_eagerbufs(struct qib_ctxtdata *);
1168void qib_set_ctxtcnt(struct qib_devdata *); 1172void qib_set_ctxtcnt(struct qib_devdata *);
1169int qib_create_ctxts(struct qib_devdata *dd); 1173int qib_create_ctxts(struct qib_devdata *dd);
1170struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32); 1174struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *, u32, int);
1171void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8); 1175void qib_init_pportdata(struct qib_pportdata *, struct qib_devdata *, u8, u8);
1172void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *); 1176void qib_free_ctxtdata(struct qib_devdata *, struct qib_ctxtdata *);
1173 1177
@@ -1458,6 +1462,7 @@ extern unsigned qib_n_krcv_queues;
1458extern unsigned qib_sdma_fetch_arb; 1462extern unsigned qib_sdma_fetch_arb;
1459extern unsigned qib_compat_ddr_negotiate; 1463extern unsigned qib_compat_ddr_negotiate;
1460extern int qib_special_trigger; 1464extern int qib_special_trigger;
1465extern unsigned qib_numa_aware;
1461 1466
1462extern struct mutex qib_mutex; 1467extern struct mutex qib_mutex;
1463 1468
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index b56c9428f3c5..65b2fc3f957c 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1263,8 +1263,12 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
1263 struct qib_ctxtdata *rcd; 1263 struct qib_ctxtdata *rcd;
1264 void *ptmp = NULL; 1264 void *ptmp = NULL;
1265 int ret; 1265 int ret;
1266 int numa_id;
1266 1267
1267 rcd = qib_create_ctxtdata(ppd, ctxt); 1268 numa_id = qib_numa_aware ? numa_node_id() :
1269 dd->assigned_node_id;
1270
1271 rcd = qib_create_ctxtdata(ppd, ctxt, numa_id);
1268 1272
1269 /* 1273 /*
1270 * Allocate memory for use in qib_tid_update() at open to 1274 * Allocate memory for use in qib_tid_update() at open to
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index 4b64c885fa0d..e02217b5c46d 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -67,6 +67,11 @@ ushort qib_cfgctxts;
67module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO); 67module_param_named(cfgctxts, qib_cfgctxts, ushort, S_IRUGO);
68MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use"); 68MODULE_PARM_DESC(cfgctxts, "Set max number of contexts to use");
69 69
70unsigned qib_numa_aware;
71module_param_named(numa_aware, qib_numa_aware, uint, S_IRUGO);
72MODULE_PARM_DESC(numa_aware,
73 "0 -> PSM allocation close to HCA, 1 -> PSM allocation local to process");
74
70/* 75/*
71 * If set, do not write to any regs if avoidable, hack to allow 76 * If set, do not write to any regs if avoidable, hack to allow
72 * check for deranged default register values. 77 * check for deranged default register values.
@@ -124,6 +129,11 @@ int qib_create_ctxts(struct qib_devdata *dd)
124{ 129{
125 unsigned i; 130 unsigned i;
126 int ret; 131 int ret;
132 int local_node_id = pcibus_to_node(dd->pcidev->bus);
133
134 if (local_node_id < 0)
135 local_node_id = numa_node_id();
136 dd->assigned_node_id = local_node_id;
127 137
128 /* 138 /*
129 * Allocate full ctxtcnt array, rather than just cfgctxts, because 139 * Allocate full ctxtcnt array, rather than just cfgctxts, because
@@ -146,7 +156,8 @@ int qib_create_ctxts(struct qib_devdata *dd)
146 continue; 156 continue;
147 157
148 ppd = dd->pport + (i % dd->num_pports); 158 ppd = dd->pport + (i % dd->num_pports);
149 rcd = qib_create_ctxtdata(ppd, i); 159
160 rcd = qib_create_ctxtdata(ppd, i, dd->assigned_node_id);
150 if (!rcd) { 161 if (!rcd) {
151 qib_dev_err(dd, 162 qib_dev_err(dd,
152 "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); 163 "Unable to allocate ctxtdata for Kernel ctxt, failing\n");
@@ -164,14 +175,16 @@ done:
164/* 175/*
165 * Common code for user and kernel context setup. 176 * Common code for user and kernel context setup.
166 */ 177 */
167struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt) 178struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt,
179 int node_id)
168{ 180{
169 struct qib_devdata *dd = ppd->dd; 181 struct qib_devdata *dd = ppd->dd;
170 struct qib_ctxtdata *rcd; 182 struct qib_ctxtdata *rcd;
171 183
172 rcd = kzalloc(sizeof(*rcd), GFP_KERNEL); 184 rcd = kzalloc_node(sizeof(*rcd), GFP_KERNEL, node_id);
173 if (rcd) { 185 if (rcd) {
174 INIT_LIST_HEAD(&rcd->qp_wait_list); 186 INIT_LIST_HEAD(&rcd->qp_wait_list);
187 rcd->node_id = node_id;
175 rcd->ppd = ppd; 188 rcd->ppd = ppd;
176 rcd->dd = dd; 189 rcd->dd = dd;
177 rcd->cnt = 1; 190 rcd->cnt = 1;
@@ -1524,6 +1537,7 @@ static void qib_remove_one(struct pci_dev *pdev)
1524int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd) 1537int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
1525{ 1538{
1526 unsigned amt; 1539 unsigned amt;
1540 int old_node_id;
1527 1541
1528 if (!rcd->rcvhdrq) { 1542 if (!rcd->rcvhdrq) {
1529 dma_addr_t phys_hdrqtail; 1543 dma_addr_t phys_hdrqtail;
@@ -1533,9 +1547,13 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
1533 sizeof(u32), PAGE_SIZE); 1547 sizeof(u32), PAGE_SIZE);
1534 gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? 1548 gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ?
1535 GFP_USER : GFP_KERNEL; 1549 GFP_USER : GFP_KERNEL;
1550
1551 old_node_id = dev_to_node(&dd->pcidev->dev);
1552 set_dev_node(&dd->pcidev->dev, rcd->node_id);
1536 rcd->rcvhdrq = dma_alloc_coherent( 1553 rcd->rcvhdrq = dma_alloc_coherent(
1537 &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, 1554 &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys,
1538 gfp_flags | __GFP_COMP); 1555 gfp_flags | __GFP_COMP);
1556 set_dev_node(&dd->pcidev->dev, old_node_id);
1539 1557
1540 if (!rcd->rcvhdrq) { 1558 if (!rcd->rcvhdrq) {
1541 qib_dev_err(dd, 1559 qib_dev_err(dd,
@@ -1551,9 +1569,11 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
1551 } 1569 }
1552 1570
1553 if (!(dd->flags & QIB_NODMA_RTAIL)) { 1571 if (!(dd->flags & QIB_NODMA_RTAIL)) {
1572 set_dev_node(&dd->pcidev->dev, rcd->node_id);
1554 rcd->rcvhdrtail_kvaddr = dma_alloc_coherent( 1573 rcd->rcvhdrtail_kvaddr = dma_alloc_coherent(
1555 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, 1574 &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail,
1556 gfp_flags); 1575 gfp_flags);
1576 set_dev_node(&dd->pcidev->dev, old_node_id);
1557 if (!rcd->rcvhdrtail_kvaddr) 1577 if (!rcd->rcvhdrtail_kvaddr)
1558 goto bail_free; 1578 goto bail_free;
1559 rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; 1579 rcd->rcvhdrqtailaddr_phys = phys_hdrqtail;
@@ -1597,6 +1617,7 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
1597 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff; 1617 unsigned e, egrcnt, egrperchunk, chunk, egrsize, egroff;
1598 size_t size; 1618 size_t size;
1599 gfp_t gfp_flags; 1619 gfp_t gfp_flags;
1620 int old_node_id;
1600 1621
1601 /* 1622 /*
1602 * GFP_USER, but without GFP_FS, so buffer cache can be 1623 * GFP_USER, but without GFP_FS, so buffer cache can be
@@ -1615,25 +1636,29 @@ int qib_setup_eagerbufs(struct qib_ctxtdata *rcd)
1615 size = rcd->rcvegrbuf_size; 1636 size = rcd->rcvegrbuf_size;
1616 if (!rcd->rcvegrbuf) { 1637 if (!rcd->rcvegrbuf) {
1617 rcd->rcvegrbuf = 1638 rcd->rcvegrbuf =
1618 kzalloc(chunk * sizeof(rcd->rcvegrbuf[0]), 1639 kzalloc_node(chunk * sizeof(rcd->rcvegrbuf[0]),
1619 GFP_KERNEL); 1640 GFP_KERNEL, rcd->node_id);
1620 if (!rcd->rcvegrbuf) 1641 if (!rcd->rcvegrbuf)
1621 goto bail; 1642 goto bail;
1622 } 1643 }
1623 if (!rcd->rcvegrbuf_phys) { 1644 if (!rcd->rcvegrbuf_phys) {
1624 rcd->rcvegrbuf_phys = 1645 rcd->rcvegrbuf_phys =
1625 kmalloc(chunk * sizeof(rcd->rcvegrbuf_phys[0]), 1646 kmalloc_node(chunk * sizeof(rcd->rcvegrbuf_phys[0]),
1626 GFP_KERNEL); 1647 GFP_KERNEL, rcd->node_id);
1627 if (!rcd->rcvegrbuf_phys) 1648 if (!rcd->rcvegrbuf_phys)
1628 goto bail_rcvegrbuf; 1649 goto bail_rcvegrbuf;
1629 } 1650 }
1630 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) { 1651 for (e = 0; e < rcd->rcvegrbuf_chunks; e++) {
1631 if (rcd->rcvegrbuf[e]) 1652 if (rcd->rcvegrbuf[e])
1632 continue; 1653 continue;
1654
1655 old_node_id = dev_to_node(&dd->pcidev->dev);
1656 set_dev_node(&dd->pcidev->dev, rcd->node_id);
1633 rcd->rcvegrbuf[e] = 1657 rcd->rcvegrbuf[e] =
1634 dma_alloc_coherent(&dd->pcidev->dev, size, 1658 dma_alloc_coherent(&dd->pcidev->dev, size,
1635 &rcd->rcvegrbuf_phys[e], 1659 &rcd->rcvegrbuf_phys[e],
1636 gfp_flags); 1660 gfp_flags);
1661 set_dev_node(&dd->pcidev->dev, old_node_id);
1637 if (!rcd->rcvegrbuf[e]) 1662 if (!rcd->rcvegrbuf[e])
1638 goto bail_rcvegrbuf_phys; 1663 goto bail_rcvegrbuf_phys;
1639 } 1664 }