aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2008-07-15 02:48:47 -0400
committerRoland Dreier <rolandd@cisco.com>2008-07-15 02:48:47 -0400
commita7d834c4bc6be73e8f83eaa5072fac3c5549f7f2 (patch)
treeaec2ed171dee9fc29b2a968a8048808d85b2cec1 /drivers/infiniband
parent468f2239bcc71ae0f345c3fe58c797cf4627daf4 (diff)
IPoIB/cm: Fix racy use of receive WR/SGL in ipoib_cm_post_receive_nonsrq()
For devices that don't support SRQs, ipoib_cm_post_receive_nonsrq() is called from both ipoib_cm_handle_rx_wc() and ipoib_cm_nonsrq_init_rx(), and these two callers are not synchronized against each other. However, ipoib_cm_post_receive_nonsrq() always reuses the same receive work request and scatter list structures, so multiple callers can end up stepping on each other, which leads to posting garbled work requests. Fix this by having the caller pass in the ib_recv_wr and ib_sge structures to use, and allocating new local structures in ipoib_cm_nonsrq_init_rx(). Based on a patch by Pradeep Satyanarayana <pradeep@us.ibm.com> and David Wilder <dwilder@us.ibm.com>, with debugging help from Hoang-Nam Nguyen <hnguyen@de.ibm.com>. Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c63
1 files changed, 47 insertions, 16 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 6223fc39af70..37bf67b2a26f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -111,18 +111,20 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
111} 111}
112 112
113static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, 113static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
114 struct ipoib_cm_rx *rx, int id) 114 struct ipoib_cm_rx *rx,
115 struct ib_recv_wr *wr,
116 struct ib_sge *sge, int id)
115{ 117{
116 struct ipoib_dev_priv *priv = netdev_priv(dev); 118 struct ipoib_dev_priv *priv = netdev_priv(dev);
117 struct ib_recv_wr *bad_wr; 119 struct ib_recv_wr *bad_wr;
118 int i, ret; 120 int i, ret;
119 121
120 priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; 122 wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
121 123
122 for (i = 0; i < IPOIB_CM_RX_SG; ++i) 124 for (i = 0; i < IPOIB_CM_RX_SG; ++i)
123 priv->cm.rx_sge[i].addr = rx->rx_ring[id].mapping[i]; 125 sge[i].addr = rx->rx_ring[id].mapping[i];
124 126
125 ret = ib_post_recv(rx->qp, &priv->cm.rx_wr, &bad_wr); 127 ret = ib_post_recv(rx->qp, wr, &bad_wr);
126 if (unlikely(ret)) { 128 if (unlikely(ret)) {
127 ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); 129 ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
128 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, 130 ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
@@ -320,10 +322,33 @@ static int ipoib_cm_modify_rx_qp(struct net_device *dev,
320 return 0; 322 return 0;
321} 323}
322 324
325static void ipoib_cm_init_rx_wr(struct net_device *dev,
326 struct ib_recv_wr *wr,
327 struct ib_sge *sge)
328{
329 struct ipoib_dev_priv *priv = netdev_priv(dev);
330 int i;
331
332 for (i = 0; i < priv->cm.num_frags; ++i)
333 sge[i].lkey = priv->mr->lkey;
334
335 sge[0].length = IPOIB_CM_HEAD_SIZE;
336 for (i = 1; i < priv->cm.num_frags; ++i)
337 sge[i].length = PAGE_SIZE;
338
339 wr->next = NULL;
340 wr->sg_list = priv->cm.rx_sge;
341 wr->num_sge = priv->cm.num_frags;
342}
343
323static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id, 344static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_id,
324 struct ipoib_cm_rx *rx) 345 struct ipoib_cm_rx *rx)
325{ 346{
326 struct ipoib_dev_priv *priv = netdev_priv(dev); 347 struct ipoib_dev_priv *priv = netdev_priv(dev);
348 struct {
349 struct ib_recv_wr wr;
350 struct ib_sge sge[IPOIB_CM_RX_SG];
351 } *t;
327 int ret; 352 int ret;
328 int i; 353 int i;
329 354
@@ -331,6 +356,14 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
331 if (!rx->rx_ring) 356 if (!rx->rx_ring)
332 return -ENOMEM; 357 return -ENOMEM;
333 358
359 t = kmalloc(sizeof *t, GFP_KERNEL);
360 if (!t) {
361 ret = -ENOMEM;
362 goto err_free;
363 }
364
365 ipoib_cm_init_rx_wr(dev, &t->wr, t->sge);
366
334 spin_lock_irq(&priv->lock); 367 spin_lock_irq(&priv->lock);
335 368
336 if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) { 369 if (priv->cm.nonsrq_conn_qp >= ipoib_max_conn_qp) {
@@ -349,8 +382,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
349 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i); 382 ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);
350 ret = -ENOMEM; 383 ret = -ENOMEM;
351 goto err_count; 384 goto err_count;
352 } 385 }
353 ret = ipoib_cm_post_receive_nonsrq(dev, rx, i); 386 ret = ipoib_cm_post_receive_nonsrq(dev, rx, &t->wr, t->sge, i);
354 if (ret) { 387 if (ret) {
355 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq " 388 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq "
356 "failed for buf %d\n", i); 389 "failed for buf %d\n", i);
@@ -361,6 +394,8 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
361 394
362 rx->recv_count = ipoib_recvq_size; 395 rx->recv_count = ipoib_recvq_size;
363 396
397 kfree(t);
398
364 return 0; 399 return 0;
365 400
366err_count: 401err_count:
@@ -369,6 +404,7 @@ err_count:
369 spin_unlock_irq(&priv->lock); 404 spin_unlock_irq(&priv->lock);
370 405
371err_free: 406err_free:
407 kfree(t);
372 ipoib_cm_free_rx_ring(dev, rx->rx_ring); 408 ipoib_cm_free_rx_ring(dev, rx->rx_ring);
373 409
374 return ret; 410 return ret;
@@ -637,7 +673,10 @@ repost:
637 ipoib_warn(priv, "ipoib_cm_post_receive_srq failed " 673 ipoib_warn(priv, "ipoib_cm_post_receive_srq failed "
638 "for buf %d\n", wr_id); 674 "for buf %d\n", wr_id);
639 } else { 675 } else {
640 if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p, wr_id))) { 676 if (unlikely(ipoib_cm_post_receive_nonsrq(dev, p,
677 &priv->cm.rx_wr,
678 priv->cm.rx_sge,
679 wr_id))) {
641 --p->recv_count; 680 --p->recv_count;
642 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed " 681 ipoib_warn(priv, "ipoib_cm_post_receive_nonsrq failed "
643 "for buf %d\n", wr_id); 682 "for buf %d\n", wr_id);
@@ -1502,15 +1541,7 @@ int ipoib_cm_dev_init(struct net_device *dev)
1502 priv->cm.num_frags = IPOIB_CM_RX_SG; 1541 priv->cm.num_frags = IPOIB_CM_RX_SG;
1503 } 1542 }
1504 1543
1505 for (i = 0; i < priv->cm.num_frags; ++i) 1544 ipoib_cm_init_rx_wr(dev, &priv->cm.rx_wr, priv->cm.rx_sge);
1506 priv->cm.rx_sge[i].lkey = priv->mr->lkey;
1507
1508 priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
1509 for (i = 1; i < priv->cm.num_frags; ++i)
1510 priv->cm.rx_sge[i].length = PAGE_SIZE;
1511 priv->cm.rx_wr.next = NULL;
1512 priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
1513 priv->cm.rx_wr.num_sge = priv->cm.num_frags;
1514 1545
1515 if (ipoib_cm_has_srq(dev)) { 1546 if (ipoib_cm_has_srq(dev)) {
1516 for (i = 0; i < ipoib_recvq_size; ++i) { 1547 for (i = 0; i < ipoib_recvq_size; ++i) {