aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/hw
diff options
context:
space:
mode:
authorRoland Dreier <rolandd@cisco.com>2006-08-11 11:56:57 -0400
committerRoland Dreier <rolandd@cisco.com>2006-08-11 11:56:57 -0400
commita19aa5c5fdda8b556ab238177ee27c5ef7873c94 (patch)
tree6c770c8fbbe3270bf1416c3edd8894d214e62657 /drivers/infiniband/hw
parente54b82d739d4a2ef992976c8c0692cdf89286420 (diff)
IB/mthca: Fix potential AB-BA deadlock with CQ locks
When destroying a QP, mthca locks both the QP's send CQ and receive CQ. However, the following scenario is perfectly valid: QP_a: send_cq == CQ_x, recv_cq == CQ_y QP_b: send_cq == CQ_y, recv_cq == CQ_x The old mthca code simply locked send_cq and then recv_cq, which in this case could lead to an AB-BA deadlock if QP_a and QP_b were destroyed simultaneously. We can fix this by changing the locking code to lock the CQ with the lower CQ number first, which will create a consistent lock ordering. Also, the second CQ is locked with spin_lock_nested() to tell lockdep that we know what we're doing with the lock nesting. This bug was found by lockdep. Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.h4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c42
2 files changed, 32 insertions, 14 deletions
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h
index 8de2887ba15c..9a5bece3fa5c 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.h
+++ b/drivers/infiniband/hw/mthca/mthca_provider.h
@@ -136,8 +136,8 @@ struct mthca_ah {
136 * We have one global lock that protects dev->cq/qp_table. Each 136 * We have one global lock that protects dev->cq/qp_table. Each
137 * struct mthca_cq/qp also has its own lock. An individual qp lock 137 * struct mthca_cq/qp also has its own lock. An individual qp lock
138 * may be taken inside of an individual cq lock. Both cqs attached to 138 * may be taken inside of an individual cq lock. Both cqs attached to
139 * a qp may be locked, with the send cq locked first. No other 139 * a qp may be locked, with the cq with the lower cqn locked first.
140 * nesting should be done. 140 * No other nesting should be done.
141 * 141 *
142 * Each struct mthca_cq/qp also has an ref count, protected by the 142 * Each struct mthca_cq/qp also has an ref count, protected by the
143 * corresponding table lock. The pointer from the cq/qp_table to the 143 * corresponding table lock. The pointer from the cq/qp_table to the
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 157b4f8ac407..2e8f6f36e0a5 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1263,6 +1263,32 @@ int mthca_alloc_qp(struct mthca_dev *dev,
1263 return 0; 1263 return 0;
1264} 1264}
1265 1265
1266static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
1267{
1268 if (send_cq == recv_cq)
1269 spin_lock_irq(&send_cq->lock);
1270 else if (send_cq->cqn < recv_cq->cqn) {
1271 spin_lock_irq(&send_cq->lock);
1272 spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
1273 } else {
1274 spin_lock_irq(&recv_cq->lock);
1275 spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
1276 }
1277}
1278
1279static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq)
1280{
1281 if (send_cq == recv_cq)
1282 spin_unlock_irq(&send_cq->lock);
1283 else if (send_cq->cqn < recv_cq->cqn) {
1284 spin_unlock(&recv_cq->lock);
1285 spin_unlock_irq(&send_cq->lock);
1286 } else {
1287 spin_unlock(&send_cq->lock);
1288 spin_unlock_irq(&recv_cq->lock);
1289 }
1290}
1291
1266int mthca_alloc_sqp(struct mthca_dev *dev, 1292int mthca_alloc_sqp(struct mthca_dev *dev,
1267 struct mthca_pd *pd, 1293 struct mthca_pd *pd,
1268 struct mthca_cq *send_cq, 1294 struct mthca_cq *send_cq,
@@ -1315,17 +1341,13 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
1315 * Lock CQs here, so that CQ polling code can do QP lookup 1341 * Lock CQs here, so that CQ polling code can do QP lookup
1316 * without taking a lock. 1342 * without taking a lock.
1317 */ 1343 */
1318 spin_lock_irq(&send_cq->lock); 1344 mthca_lock_cqs(send_cq, recv_cq);
1319 if (send_cq != recv_cq)
1320 spin_lock(&recv_cq->lock);
1321 1345
1322 spin_lock(&dev->qp_table.lock); 1346 spin_lock(&dev->qp_table.lock);
1323 mthca_array_clear(&dev->qp_table.qp, mqpn); 1347 mthca_array_clear(&dev->qp_table.qp, mqpn);
1324 spin_unlock(&dev->qp_table.lock); 1348 spin_unlock(&dev->qp_table.lock);
1325 1349
1326 if (send_cq != recv_cq) 1350 mthca_unlock_cqs(send_cq, recv_cq);
1327 spin_unlock(&recv_cq->lock);
1328 spin_unlock_irq(&send_cq->lock);
1329 1351
1330 err_out: 1352 err_out:
1331 dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size, 1353 dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size,
@@ -1359,9 +1381,7 @@ void mthca_free_qp(struct mthca_dev *dev,
1359 * Lock CQs here, so that CQ polling code can do QP lookup 1381 * Lock CQs here, so that CQ polling code can do QP lookup
1360 * without taking a lock. 1382 * without taking a lock.
1361 */ 1383 */
1362 spin_lock_irq(&send_cq->lock); 1384 mthca_lock_cqs(send_cq, recv_cq);
1363 if (send_cq != recv_cq)
1364 spin_lock(&recv_cq->lock);
1365 1385
1366 spin_lock(&dev->qp_table.lock); 1386 spin_lock(&dev->qp_table.lock);
1367 mthca_array_clear(&dev->qp_table.qp, 1387 mthca_array_clear(&dev->qp_table.qp,
@@ -1369,9 +1389,7 @@ void mthca_free_qp(struct mthca_dev *dev,
1369 --qp->refcount; 1389 --qp->refcount;
1370 spin_unlock(&dev->qp_table.lock); 1390 spin_unlock(&dev->qp_table.lock);
1371 1391
1372 if (send_cq != recv_cq) 1392 mthca_unlock_cqs(send_cq, recv_cq);
1373 spin_unlock(&recv_cq->lock);
1374 spin_unlock_irq(&send_cq->lock);
1375 1393
1376 wait_event(qp->wait, !get_qp_refcount(dev, qp)); 1394 wait_event(qp->wait, !get_qp_refcount(dev, qp));
1377 1395