diff options
author | Roland Dreier <rolandd@cisco.com> | 2006-08-11 11:56:57 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2006-08-11 11:56:57 -0400 |
commit | a19aa5c5fdda8b556ab238177ee27c5ef7873c94 (patch) | |
tree | 6c770c8fbbe3270bf1416c3edd8894d214e62657 /drivers/infiniband/hw | |
parent | e54b82d739d4a2ef992976c8c0692cdf89286420 (diff) |
IB/mthca: Fix potential AB-BA deadlock with CQ locks
When destroying a QP, mthca locks both the QP's send CQ and receive
CQ. However, the following scenario is perfectly valid:
QP_a: send_cq == CQ_x, recv_cq == CQ_y
QP_b: send_cq == CQ_y, recv_cq == CQ_x
The old mthca code simply locked send_cq and then recv_cq, which in
this case could lead to an AB-BA deadlock if QP_a and QP_b were
destroyed simultaneously.
We can fix this by changing the locking code to lock the CQ with the
lower CQ number first, which will create a consistent lock ordering.
Also, the second CQ is locked with spin_lock_nested() to tell lockdep
that we know what we're doing with the lock nesting.
This bug was found by lockdep.
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw')
-rw-r--r-- | drivers/infiniband/hw/mthca/mthca_provider.h | 4 | ||||
-rw-r--r-- | drivers/infiniband/hw/mthca/mthca_qp.c | 42 |
2 files changed, 32 insertions, 14 deletions
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.h b/drivers/infiniband/hw/mthca/mthca_provider.h index 8de2887ba15c..9a5bece3fa5c 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.h +++ b/drivers/infiniband/hw/mthca/mthca_provider.h | |||
@@ -136,8 +136,8 @@ struct mthca_ah { | |||
136 | * We have one global lock that protects dev->cq/qp_table. Each | 136 | * We have one global lock that protects dev->cq/qp_table. Each |
137 | * struct mthca_cq/qp also has its own lock. An individual qp lock | 137 | * struct mthca_cq/qp also has its own lock. An individual qp lock |
138 | * may be taken inside of an individual cq lock. Both cqs attached to | 138 | * may be taken inside of an individual cq lock. Both cqs attached to |
139 | * a qp may be locked, with the send cq locked first. No other | 139 | * a qp may be locked, with the cq with the lower cqn locked first. |
140 | * nesting should be done. | 140 | * No other nesting should be done. |
141 | * | 141 | * |
142 | * Each struct mthca_cq/qp also has an ref count, protected by the | 142 | * Each struct mthca_cq/qp also has an ref count, protected by the |
143 | * corresponding table lock. The pointer from the cq/qp_table to the | 143 | * corresponding table lock. The pointer from the cq/qp_table to the |
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index 157b4f8ac407..2e8f6f36e0a5 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c | |||
@@ -1263,6 +1263,32 @@ int mthca_alloc_qp(struct mthca_dev *dev, | |||
1263 | return 0; | 1263 | return 0; |
1264 | } | 1264 | } |
1265 | 1265 | ||
1266 | static void mthca_lock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq) | ||
1267 | { | ||
1268 | if (send_cq == recv_cq) | ||
1269 | spin_lock_irq(&send_cq->lock); | ||
1270 | else if (send_cq->cqn < recv_cq->cqn) { | ||
1271 | spin_lock_irq(&send_cq->lock); | ||
1272 | spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); | ||
1273 | } else { | ||
1274 | spin_lock_irq(&recv_cq->lock); | ||
1275 | spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); | ||
1276 | } | ||
1277 | } | ||
1278 | |||
1279 | static void mthca_unlock_cqs(struct mthca_cq *send_cq, struct mthca_cq *recv_cq) | ||
1280 | { | ||
1281 | if (send_cq == recv_cq) | ||
1282 | spin_unlock_irq(&send_cq->lock); | ||
1283 | else if (send_cq->cqn < recv_cq->cqn) { | ||
1284 | spin_unlock(&recv_cq->lock); | ||
1285 | spin_unlock_irq(&send_cq->lock); | ||
1286 | } else { | ||
1287 | spin_unlock(&send_cq->lock); | ||
1288 | spin_unlock_irq(&recv_cq->lock); | ||
1289 | } | ||
1290 | } | ||
1291 | |||
1266 | int mthca_alloc_sqp(struct mthca_dev *dev, | 1292 | int mthca_alloc_sqp(struct mthca_dev *dev, |
1267 | struct mthca_pd *pd, | 1293 | struct mthca_pd *pd, |
1268 | struct mthca_cq *send_cq, | 1294 | struct mthca_cq *send_cq, |
@@ -1315,17 +1341,13 @@ int mthca_alloc_sqp(struct mthca_dev *dev, | |||
1315 | * Lock CQs here, so that CQ polling code can do QP lookup | 1341 | * Lock CQs here, so that CQ polling code can do QP lookup |
1316 | * without taking a lock. | 1342 | * without taking a lock. |
1317 | */ | 1343 | */ |
1318 | spin_lock_irq(&send_cq->lock); | 1344 | mthca_lock_cqs(send_cq, recv_cq); |
1319 | if (send_cq != recv_cq) | ||
1320 | spin_lock(&recv_cq->lock); | ||
1321 | 1345 | ||
1322 | spin_lock(&dev->qp_table.lock); | 1346 | spin_lock(&dev->qp_table.lock); |
1323 | mthca_array_clear(&dev->qp_table.qp, mqpn); | 1347 | mthca_array_clear(&dev->qp_table.qp, mqpn); |
1324 | spin_unlock(&dev->qp_table.lock); | 1348 | spin_unlock(&dev->qp_table.lock); |
1325 | 1349 | ||
1326 | if (send_cq != recv_cq) | 1350 | mthca_unlock_cqs(send_cq, recv_cq); |
1327 | spin_unlock(&recv_cq->lock); | ||
1328 | spin_unlock_irq(&send_cq->lock); | ||
1329 | 1351 | ||
1330 | err_out: | 1352 | err_out: |
1331 | dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size, | 1353 | dma_free_coherent(&dev->pdev->dev, sqp->header_buf_size, |
@@ -1359,9 +1381,7 @@ void mthca_free_qp(struct mthca_dev *dev, | |||
1359 | * Lock CQs here, so that CQ polling code can do QP lookup | 1381 | * Lock CQs here, so that CQ polling code can do QP lookup |
1360 | * without taking a lock. | 1382 | * without taking a lock. |
1361 | */ | 1383 | */ |
1362 | spin_lock_irq(&send_cq->lock); | 1384 | mthca_lock_cqs(send_cq, recv_cq); |
1363 | if (send_cq != recv_cq) | ||
1364 | spin_lock(&recv_cq->lock); | ||
1365 | 1385 | ||
1366 | spin_lock(&dev->qp_table.lock); | 1386 | spin_lock(&dev->qp_table.lock); |
1367 | mthca_array_clear(&dev->qp_table.qp, | 1387 | mthca_array_clear(&dev->qp_table.qp, |
@@ -1369,9 +1389,7 @@ void mthca_free_qp(struct mthca_dev *dev, | |||
1369 | --qp->refcount; | 1389 | --qp->refcount; |
1370 | spin_unlock(&dev->qp_table.lock); | 1390 | spin_unlock(&dev->qp_table.lock); |
1371 | 1391 | ||
1372 | if (send_cq != recv_cq) | 1392 | mthca_unlock_cqs(send_cq, recv_cq); |
1373 | spin_unlock(&recv_cq->lock); | ||
1374 | spin_unlock_irq(&send_cq->lock); | ||
1375 | 1393 | ||
1376 | wait_event(qp->wait, !get_qp_refcount(dev, qp)); | 1394 | wait_event(qp->wait, !get_qp_refcount(dev, qp)); |
1377 | 1395 | ||