diff options
author | Sean Hefty <sean.hefty@intel.com> | 2011-02-23 11:11:32 -0500 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2011-03-15 13:00:28 -0400 |
commit | 25ae21a10112875763c18b385624df713a288a05 (patch) | |
tree | e05138aae6bd98f055ce2be0437922cee8e9f17e /drivers/infiniband | |
parent | a5bbef0b2deb7b943f095181309ecc9e1fc91c0f (diff) |
RDMA/cma: Fix crash in request handlers
Doug Ledford and Red Hat reported a crash when running the rdma_cm on
a real-time OS. The crash has the following call trace:
cm_process_work
cma_req_handler
cma_disable_callback
rdma_create_id
kzalloc
init_completion
cma_get_net_info
cma_save_net_info
cma_any_addr
cma_zero_addr
rdma_translate_ip
rdma_copy_addr
cma_acquire_dev
rdma_addr_get_sgid
ib_find_cached_gid
cma_attach_to_dev
ucma_event_handler
kzalloc
ib_copy_ah_attr_to_user
cma_comp
[ preempted ]
cma_write
copy_from_user
ucma_destroy_id
copy_from_user
_ucma_find_context
ucma_put_ctx
ucma_free_ctx
rdma_destroy_id
cma_exch
cma_cancel_operation
rdma_node_get_transport
rt_mutex_slowunlock
bad_area_nosemaphore
oops_enter
They were able to reproduce the crash multiple times with the
following details:
Crash seems to always happen on the:
mutex_unlock(&conn_id->handler_mutex);
as conn_id looks to have been freed during this code path.
An examination of the code shows that a race exists in the request
handlers. When a new connection request is received, the rdma_cm
allocates a new connection identifier. This identifier has a single
reference count on it. If a user calls rdma_destroy_id() from another
thread after receiving a callback, rdma_destroy_id will proceed to
destroy the id and free the associated memory. However, the request
handlers may still be in the process of running. When control returns
to the request handlers, they can attempt to access the newly created
identifiers.
Fix this by holding a reference on the newly created rdma_cm_id until
the request handler is through accessing it.
Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Acked-by: Doug Ledford <dledford@redhat.com>
Cc: <stable@kernel.org>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/core/cma.c | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 6884da24fde1..e450c5a87727 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c | |||
@@ -1210,6 +1210,11 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) | |||
1210 | cm_id->context = conn_id; | 1210 | cm_id->context = conn_id; |
1211 | cm_id->cm_handler = cma_ib_handler; | 1211 | cm_id->cm_handler = cma_ib_handler; |
1212 | 1212 | ||
1213 | /* | ||
1214 | * Protect against the user destroying conn_id from another thread | ||
1215 | * until we're done accessing it. | ||
1216 | */ | ||
1217 | atomic_inc(&conn_id->refcount); | ||
1213 | ret = conn_id->id.event_handler(&conn_id->id, &event); | 1218 | ret = conn_id->id.event_handler(&conn_id->id, &event); |
1214 | if (!ret) { | 1219 | if (!ret) { |
1215 | /* | 1220 | /* |
@@ -1222,8 +1227,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) | |||
1222 | ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); | 1227 | ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); |
1223 | mutex_unlock(&lock); | 1228 | mutex_unlock(&lock); |
1224 | mutex_unlock(&conn_id->handler_mutex); | 1229 | mutex_unlock(&conn_id->handler_mutex); |
1230 | cma_deref_id(conn_id); | ||
1225 | goto out; | 1231 | goto out; |
1226 | } | 1232 | } |
1233 | cma_deref_id(conn_id); | ||
1227 | 1234 | ||
1228 | /* Destroy the CM ID by returning a non-zero value. */ | 1235 | /* Destroy the CM ID by returning a non-zero value. */ |
1229 | conn_id->cm_id.ib = NULL; | 1236 | conn_id->cm_id.ib = NULL; |
@@ -1425,17 +1432,25 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id, | |||
1425 | event.param.conn.private_data_len = iw_event->private_data_len; | 1432 | event.param.conn.private_data_len = iw_event->private_data_len; |
1426 | event.param.conn.initiator_depth = attr.max_qp_init_rd_atom; | 1433 | event.param.conn.initiator_depth = attr.max_qp_init_rd_atom; |
1427 | event.param.conn.responder_resources = attr.max_qp_rd_atom; | 1434 | event.param.conn.responder_resources = attr.max_qp_rd_atom; |
1435 | |||
1436 | /* | ||
1437 | * Protect against the user destroying conn_id from another thread | ||
1438 | * until we're done accessing it. | ||
1439 | */ | ||
1440 | atomic_inc(&conn_id->refcount); | ||
1428 | ret = conn_id->id.event_handler(&conn_id->id, &event); | 1441 | ret = conn_id->id.event_handler(&conn_id->id, &event); |
1429 | if (ret) { | 1442 | if (ret) { |
1430 | /* User wants to destroy the CM ID */ | 1443 | /* User wants to destroy the CM ID */ |
1431 | conn_id->cm_id.iw = NULL; | 1444 | conn_id->cm_id.iw = NULL; |
1432 | cma_exch(conn_id, CMA_DESTROYING); | 1445 | cma_exch(conn_id, CMA_DESTROYING); |
1433 | mutex_unlock(&conn_id->handler_mutex); | 1446 | mutex_unlock(&conn_id->handler_mutex); |
1447 | cma_deref_id(conn_id); | ||
1434 | rdma_destroy_id(&conn_id->id); | 1448 | rdma_destroy_id(&conn_id->id); |
1435 | goto out; | 1449 | goto out; |
1436 | } | 1450 | } |
1437 | 1451 | ||
1438 | mutex_unlock(&conn_id->handler_mutex); | 1452 | mutex_unlock(&conn_id->handler_mutex); |
1453 | cma_deref_id(conn_id); | ||
1439 | 1454 | ||
1440 | out: | 1455 | out: |
1441 | if (dev) | 1456 | if (dev) |