diff options
author | Sean Hefty <sean.hefty@intel.com> | 2012-04-25 13:42:35 -0400 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2012-05-08 14:17:34 -0400 |
commit | b6cec8aa4a799d1e146095f0ba52454710f5ede4 (patch) | |
tree | 784c2ed70e9900ee90bf788e51d76c1b15767b3a /drivers/infiniband | |
parent | 5909ce545db415ae8c26e849df862e8cc1acf571 (diff) |
RDMA/cma: Fix lockdep false positive recursive locking
The following lockdep problem was reported by Or Gerlitz <ogerlitz@mellanox.com>:
[ INFO: possible recursive locking detected ]
3.3.0-32035-g1b2649e-dirty #4 Not tainted
---------------------------------------------
kworker/5:1/418 is trying to acquire lock:
(&id_priv->handler_mutex){+.+.+.}, at: [<ffffffffa0138a41>] rdma_destroy_i d+0x33/0x1f0 [rdma_cm]
but task is already holding lock:
(&id_priv->handler_mutex){+.+.+.}, at: [<ffffffffa0135130>] cma_disable_ca llback+0x24/0x45 [rdma_cm]
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(&id_priv->handler_mutex);
lock(&id_priv->handler_mutex);
*** DEADLOCK ***
May be due to missing lock nesting notation
3 locks held by kworker/5:1/418:
#0: (ib_cm){.+.+.+}, at: [<ffffffff81042ac1>] process_one_work+0x210/0x4a 6
#1: ((&(&work->work)->work)){+.+.+.}, at: [<ffffffff81042ac1>] process_on e_work+0x210/0x4a6
#2: (&id_priv->handler_mutex){+.+.+.}, at: [<ffffffffa0135130>] cma_disab le_callback+0x24/0x45 [rdma_cm]
stack backtrace:
Pid: 418, comm: kworker/5:1 Not tainted 3.3.0-32035-g1b2649e-dirty #4
Call Trace:
[<ffffffff8102b0fb>] ? console_unlock+0x1f4/0x204
[<ffffffff81068771>] __lock_acquire+0x16b5/0x174e
[<ffffffff8106461f>] ? save_trace+0x3f/0xb3
[<ffffffff810688fa>] lock_acquire+0xf0/0x116
[<ffffffffa0138a41>] ? rdma_destroy_id+0x33/0x1f0 [rdma_cm]
[<ffffffff81364351>] mutex_lock_nested+0x64/0x2ce
[<ffffffffa0138a41>] ? rdma_destroy_id+0x33/0x1f0 [rdma_cm]
[<ffffffff81065a78>] ? trace_hardirqs_on_caller+0x11e/0x155
[<ffffffff81065abc>] ? trace_hardirqs_on+0xd/0xf
[<ffffffffa0138a41>] rdma_destroy_id+0x33/0x1f0 [rdma_cm]
[<ffffffffa0139c02>] cma_req_handler+0x418/0x644 [rdma_cm]
[<ffffffffa012ee88>] cm_process_work+0x32/0x119 [ib_cm]
[<ffffffffa0130299>] cm_req_handler+0x928/0x982 [ib_cm]
[<ffffffffa01302f3>] ? cm_req_handler+0x982/0x982 [ib_cm]
[<ffffffffa0130326>] cm_work_handler+0x33/0xfe5 [ib_cm]
[<ffffffff81065a78>] ? trace_hardirqs_on_caller+0x11e/0x155
[<ffffffffa01302f3>] ? cm_req_handler+0x982/0x982 [ib_cm]
[<ffffffff81042b6e>] process_one_work+0x2bd/0x4a6
[<ffffffff81042ac1>] ? process_one_work+0x210/0x4a6
[<ffffffff813669f3>] ? _raw_spin_unlock_irq+0x2b/0x40
[<ffffffff8104316e>] worker_thread+0x1d6/0x350
[<ffffffff81042f98>] ? rescuer_thread+0x241/0x241
[<ffffffff81046a32>] kthread+0x84/0x8c
[<ffffffff8136e854>] kernel_thread_helper+0x4/0x10
[<ffffffff81366d59>] ? retint_restore_args+0xe/0xe
[<ffffffff810469ae>] ? __init_kthread_worker+0x56/0x56
[<ffffffff8136e850>] ? gs_change+0xb/0xb
The actual locking is fine, since we're dealing with different locks,
but from the same lock class. cma_disable_callback() acquires the
listening id mutex, whereas rdma_destroy_id() acquires the mutex for
the new connection id. To fix this, delay the call to
rdma_destroy_id() until we've released the listening id mutex.
Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/core/cma.c | 42 |
1 files changed, 22 insertions, 20 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e3e470fecaa9..79c7eebb970f 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c | |||
@@ -1218,13 +1218,13 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) | |||
1218 | } | 1218 | } |
1219 | if (!conn_id) { | 1219 | if (!conn_id) { |
1220 | ret = -ENOMEM; | 1220 | ret = -ENOMEM; |
1221 | goto out; | 1221 | goto err1; |
1222 | } | 1222 | } |
1223 | 1223 | ||
1224 | mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); | 1224 | mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); |
1225 | ret = cma_acquire_dev(conn_id); | 1225 | ret = cma_acquire_dev(conn_id); |
1226 | if (ret) | 1226 | if (ret) |
1227 | goto release_conn_id; | 1227 | goto err2; |
1228 | 1228 | ||
1229 | conn_id->cm_id.ib = cm_id; | 1229 | conn_id->cm_id.ib = cm_id; |
1230 | cm_id->context = conn_id; | 1230 | cm_id->context = conn_id; |
@@ -1236,31 +1236,33 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) | |||
1236 | */ | 1236 | */ |
1237 | atomic_inc(&conn_id->refcount); | 1237 | atomic_inc(&conn_id->refcount); |
1238 | ret = conn_id->id.event_handler(&conn_id->id, &event); | 1238 | ret = conn_id->id.event_handler(&conn_id->id, &event); |
1239 | if (!ret) { | 1239 | if (ret) |
1240 | /* | 1240 | goto err3; |
1241 | * Acquire mutex to prevent user executing rdma_destroy_id() | 1241 | |
1242 | * while we're accessing the cm_id. | 1242 | /* |
1243 | */ | 1243 | * Acquire mutex to prevent user executing rdma_destroy_id() |
1244 | mutex_lock(&lock); | 1244 | * while we're accessing the cm_id. |
1245 | if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) | 1245 | */ |
1246 | ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); | 1246 | mutex_lock(&lock); |
1247 | mutex_unlock(&lock); | 1247 | if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) |
1248 | mutex_unlock(&conn_id->handler_mutex); | 1248 | ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); |
1249 | cma_deref_id(conn_id); | 1249 | mutex_unlock(&lock); |
1250 | goto out; | 1250 | mutex_unlock(&conn_id->handler_mutex); |
1251 | } | 1251 | mutex_unlock(&listen_id->handler_mutex); |
1252 | cma_deref_id(conn_id); | 1252 | cma_deref_id(conn_id); |
1253 | return 0; | ||
1253 | 1254 | ||
1255 | err3: | ||
1256 | cma_deref_id(conn_id); | ||
1254 | /* Destroy the CM ID by returning a non-zero value. */ | 1257 | /* Destroy the CM ID by returning a non-zero value. */ |
1255 | conn_id->cm_id.ib = NULL; | 1258 | conn_id->cm_id.ib = NULL; |
1256 | 1259 | err2: | |
1257 | release_conn_id: | ||
1258 | cma_exch(conn_id, RDMA_CM_DESTROYING); | 1260 | cma_exch(conn_id, RDMA_CM_DESTROYING); |
1259 | mutex_unlock(&conn_id->handler_mutex); | 1261 | mutex_unlock(&conn_id->handler_mutex); |
1260 | rdma_destroy_id(&conn_id->id); | 1262 | err1: |
1261 | |||
1262 | out: | ||
1263 | mutex_unlock(&listen_id->handler_mutex); | 1263 | mutex_unlock(&listen_id->handler_mutex); |
1264 | if (conn_id) | ||
1265 | rdma_destroy_id(&conn_id->id); | ||
1264 | return ret; | 1266 | return ret; |
1265 | } | 1267 | } |
1266 | 1268 | ||