aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSean Hefty <sean.hefty@intel.com>2011-02-23 11:11:32 -0500
committerRoland Dreier <roland@purestorage.com>2011-03-15 13:00:28 -0400
commit25ae21a10112875763c18b385624df713a288a05 (patch)
treee05138aae6bd98f055ce2be0437922cee8e9f17e
parenta5bbef0b2deb7b943f095181309ecc9e1fc91c0f (diff)
RDMA/cma: Fix crash in request handlers
Doug Ledford and Red Hat reported a crash when running the rdma_cm on a real-time OS. The crash has the following call trace: cm_process_work cma_req_handler cma_disable_callback rdma_create_id kzalloc init_completion cma_get_net_info cma_save_net_info cma_any_addr cma_zero_addr rdma_translate_ip rdma_copy_addr cma_acquire_dev rdma_addr_get_sgid ib_find_cached_gid cma_attach_to_dev ucma_event_handler kzalloc ib_copy_ah_attr_to_user cma_comp [ preempted ] cma_write copy_from_user ucma_destroy_id copy_from_user _ucma_find_context ucma_put_ctx ucma_free_ctx rdma_destroy_id cma_exch cma_cancel_operation rdma_node_get_transport rt_mutex_slowunlock bad_area_nosemaphore oops_enter They were able to reproduce the crash multiple times with the following details: Crash seems to always happen on the: mutex_unlock(&conn_id->handler_mutex); as conn_id looks to have been freed during this code path. An examination of the code shows that a race exists in the request handlers. When a new connection request is received, the rdma_cm allocates a new connection identifier. This identifier has a single reference count on it. If a user calls rdma_destroy_id() from another thread after receiving a callback, rdma_destroy_id will proceed to destroy the id and free the associated memory. However, the request handlers may still be in the process of running. When control returns to the request handlers, they can attempt to access the newly created identifiers. Fix this by holding a reference on the newly created rdma_cm_id until the request handler is through accessing it. Signed-off-by: Sean Hefty <sean.hefty@intel.com> Acked-by: Doug Ledford <dledford@redhat.com> Cc: <stable@kernel.org> Signed-off-by: Roland Dreier <roland@purestorage.com>
-rw-r--r--drivers/infiniband/core/cma.c15
1 files changed, 15 insertions, 0 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 6884da24fde1..e450c5a87727 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -1210,6 +1210,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1210 cm_id->context = conn_id; 1210 cm_id->context = conn_id;
1211 cm_id->cm_handler = cma_ib_handler; 1211 cm_id->cm_handler = cma_ib_handler;
1212 1212
1213 /*
1214 * Protect against the user destroying conn_id from another thread
1215 * until we're done accessing it.
1216 */
1217 atomic_inc(&conn_id->refcount);
1213 ret = conn_id->id.event_handler(&conn_id->id, &event); 1218 ret = conn_id->id.event_handler(&conn_id->id, &event);
1214 if (!ret) { 1219 if (!ret) {
1215 /* 1220 /*
@@ -1222,8 +1227,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1222 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 1227 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1223 mutex_unlock(&lock); 1228 mutex_unlock(&lock);
1224 mutex_unlock(&conn_id->handler_mutex); 1229 mutex_unlock(&conn_id->handler_mutex);
1230 cma_deref_id(conn_id);
1225 goto out; 1231 goto out;
1226 } 1232 }
1233 cma_deref_id(conn_id);
1227 1234
1228 /* Destroy the CM ID by returning a non-zero value. */ 1235 /* Destroy the CM ID by returning a non-zero value. */
1229 conn_id->cm_id.ib = NULL; 1236 conn_id->cm_id.ib = NULL;
@@ -1425,17 +1432,25 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1425 event.param.conn.private_data_len = iw_event->private_data_len; 1432 event.param.conn.private_data_len = iw_event->private_data_len;
1426 event.param.conn.initiator_depth = attr.max_qp_init_rd_atom; 1433 event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
1427 event.param.conn.responder_resources = attr.max_qp_rd_atom; 1434 event.param.conn.responder_resources = attr.max_qp_rd_atom;
1435
1436 /*
1437 * Protect against the user destroying conn_id from another thread
1438 * until we're done accessing it.
1439 */
1440 atomic_inc(&conn_id->refcount);
1428 ret = conn_id->id.event_handler(&conn_id->id, &event); 1441 ret = conn_id->id.event_handler(&conn_id->id, &event);
1429 if (ret) { 1442 if (ret) {
1430 /* User wants to destroy the CM ID */ 1443 /* User wants to destroy the CM ID */
1431 conn_id->cm_id.iw = NULL; 1444 conn_id->cm_id.iw = NULL;
1432 cma_exch(conn_id, CMA_DESTROYING); 1445 cma_exch(conn_id, CMA_DESTROYING);
1433 mutex_unlock(&conn_id->handler_mutex); 1446 mutex_unlock(&conn_id->handler_mutex);
1447 cma_deref_id(conn_id);
1434 rdma_destroy_id(&conn_id->id); 1448 rdma_destroy_id(&conn_id->id);
1435 goto out; 1449 goto out;
1436 } 1450 }
1437 1451
1438 mutex_unlock(&conn_id->handler_mutex); 1452 mutex_unlock(&conn_id->handler_mutex);
1453 cma_deref_id(conn_id);
1439 1454
1440out: 1455out:
1441 if (dev) 1456 if (dev)