aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorSean Hefty <sean.hefty@intel.com>2012-04-25 13:42:35 -0400
committerRoland Dreier <roland@purestorage.com>2012-05-08 14:17:34 -0400
commitb6cec8aa4a799d1e146095f0ba52454710f5ede4 (patch)
tree784c2ed70e9900ee90bf788e51d76c1b15767b3a /drivers/infiniband
parent5909ce545db415ae8c26e849df862e8cc1acf571 (diff)
RDMA/cma: Fix lockdep false positive recursive locking
The following lockdep problem was reported by Or Gerlitz <ogerlitz@mellanox.com>: [ INFO: possible recursive locking detected ] 3.3.0-32035-g1b2649e-dirty #4 Not tainted --------------------------------------------- kworker/5:1/418 is trying to acquire lock: (&id_priv->handler_mutex){+.+.+.}, at: [<ffffffffa0138a41>] rdma_destroy_i d+0x33/0x1f0 [rdma_cm] but task is already holding lock: (&id_priv->handler_mutex){+.+.+.}, at: [<ffffffffa0135130>] cma_disable_ca llback+0x24/0x45 [rdma_cm] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&id_priv->handler_mutex); lock(&id_priv->handler_mutex); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by kworker/5:1/418: #0: (ib_cm){.+.+.+}, at: [<ffffffff81042ac1>] process_one_work+0x210/0x4a 6 #1: ((&(&work->work)->work)){+.+.+.}, at: [<ffffffff81042ac1>] process_on e_work+0x210/0x4a6 #2: (&id_priv->handler_mutex){+.+.+.}, at: [<ffffffffa0135130>] cma_disab le_callback+0x24/0x45 [rdma_cm] stack backtrace: Pid: 418, comm: kworker/5:1 Not tainted 3.3.0-32035-g1b2649e-dirty #4 Call Trace: [<ffffffff8102b0fb>] ? console_unlock+0x1f4/0x204 [<ffffffff81068771>] __lock_acquire+0x16b5/0x174e [<ffffffff8106461f>] ? save_trace+0x3f/0xb3 [<ffffffff810688fa>] lock_acquire+0xf0/0x116 [<ffffffffa0138a41>] ? rdma_destroy_id+0x33/0x1f0 [rdma_cm] [<ffffffff81364351>] mutex_lock_nested+0x64/0x2ce [<ffffffffa0138a41>] ? rdma_destroy_id+0x33/0x1f0 [rdma_cm] [<ffffffff81065a78>] ? trace_hardirqs_on_caller+0x11e/0x155 [<ffffffff81065abc>] ? trace_hardirqs_on+0xd/0xf [<ffffffffa0138a41>] rdma_destroy_id+0x33/0x1f0 [rdma_cm] [<ffffffffa0139c02>] cma_req_handler+0x418/0x644 [rdma_cm] [<ffffffffa012ee88>] cm_process_work+0x32/0x119 [ib_cm] [<ffffffffa0130299>] cm_req_handler+0x928/0x982 [ib_cm] [<ffffffffa01302f3>] ? cm_req_handler+0x982/0x982 [ib_cm] [<ffffffffa0130326>] cm_work_handler+0x33/0xfe5 [ib_cm] [<ffffffff81065a78>] ? trace_hardirqs_on_caller+0x11e/0x155 [<ffffffffa01302f3>] ? cm_req_handler+0x982/0x982 [ib_cm] [<ffffffff81042b6e>] process_one_work+0x2bd/0x4a6 [<ffffffff81042ac1>] ? process_one_work+0x210/0x4a6 [<ffffffff813669f3>] ? _raw_spin_unlock_irq+0x2b/0x40 [<ffffffff8104316e>] worker_thread+0x1d6/0x350 [<ffffffff81042f98>] ? rescuer_thread+0x241/0x241 [<ffffffff81046a32>] kthread+0x84/0x8c [<ffffffff8136e854>] kernel_thread_helper+0x4/0x10 [<ffffffff81366d59>] ? retint_restore_args+0xe/0xe [<ffffffff810469ae>] ? __init_kthread_worker+0x56/0x56 [<ffffffff8136e850>] ? gs_change+0xb/0xb The actual locking is fine, since we're dealing with different locks, but from the same lock class. cma_disable_callback() acquires the listening id mutex, whereas rdma_destroy_id() acquires the mutex for the new connection id. To fix this, delay the call to rdma_destroy_id() until we've released the listening id mutex. Signed-off-by: Sean Hefty <sean.hefty@intel.com> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/cma.c42
1 files changed, 22 insertions, 20 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index e3e470fecaa9..79c7eebb970f 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1218,13 +1218,13 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1218 } 1218 }
1219 if (!conn_id) { 1219 if (!conn_id) {
1220 ret = -ENOMEM; 1220 ret = -ENOMEM;
1221 goto out; 1221 goto err1;
1222 } 1222 }
1223 1223
1224 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 1224 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1225 ret = cma_acquire_dev(conn_id); 1225 ret = cma_acquire_dev(conn_id);
1226 if (ret) 1226 if (ret)
1227 goto release_conn_id; 1227 goto err2;
1228 1228
1229 conn_id->cm_id.ib = cm_id; 1229 conn_id->cm_id.ib = cm_id;
1230 cm_id->context = conn_id; 1230 cm_id->context = conn_id;
@@ -1236,31 +1236,33 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1236 */ 1236 */
1237 atomic_inc(&conn_id->refcount); 1237 atomic_inc(&conn_id->refcount);
1238 ret = conn_id->id.event_handler(&conn_id->id, &event); 1238 ret = conn_id->id.event_handler(&conn_id->id, &event);
1239 if (!ret) { 1239 if (ret)
1240 /* 1240 goto err3;
1241 * Acquire mutex to prevent user executing rdma_destroy_id() 1241
1242 * while we're accessing the cm_id. 1242 /*
1243 */ 1243 * Acquire mutex to prevent user executing rdma_destroy_id()
1244 mutex_lock(&lock); 1244 * while we're accessing the cm_id.
1245 if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) 1245 */
1246 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 1246 mutex_lock(&lock);
1247 mutex_unlock(&lock); 1247 if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD))
1248 mutex_unlock(&conn_id->handler_mutex); 1248 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1249 cma_deref_id(conn_id); 1249 mutex_unlock(&lock);
1250 goto out; 1250 mutex_unlock(&conn_id->handler_mutex);
1251 } 1251 mutex_unlock(&listen_id->handler_mutex);
1252 cma_deref_id(conn_id); 1252 cma_deref_id(conn_id);
1253 return 0;
1253 1254
1255err3:
1256 cma_deref_id(conn_id);
1254 /* Destroy the CM ID by returning a non-zero value. */ 1257 /* Destroy the CM ID by returning a non-zero value. */
1255 conn_id->cm_id.ib = NULL; 1258 conn_id->cm_id.ib = NULL;
1256 1259err2:
1257release_conn_id:
1258 cma_exch(conn_id, RDMA_CM_DESTROYING); 1260 cma_exch(conn_id, RDMA_CM_DESTROYING);
1259 mutex_unlock(&conn_id->handler_mutex); 1261 mutex_unlock(&conn_id->handler_mutex);
1260 rdma_destroy_id(&conn_id->id); 1262err1:
1261
1262out:
1263 mutex_unlock(&listen_id->handler_mutex); 1263 mutex_unlock(&listen_id->handler_mutex);
1264 if (conn_id)
1265 rdma_destroy_id(&conn_id->id);
1264 return ret; 1266 return ret;
1265} 1267}
1266 1268