aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorSean Hefty <sean.hefty@intel.com>2007-09-24 16:19:09 -0400
committerRoland Dreier <rolandd@cisco.com>2007-10-16 15:25:00 -0400
commitc5483388bb4d771007ef36478db038e07922a020 (patch)
treec3e7304a88c0e856d28abaf3219a82e518ea6e9c /drivers/infiniband
parentab8403c424a35364a3a2c753f7c5917fcbb4d809 (diff)
RDMA/cma: Add locking around QP accesses
If a user allocates a QP on an rdma_cm_id, the rdma_cm will automatically transition the QP through its states (RTR, RTS, error, etc.) While the QP state transitions are occurring, the QP itself must remain valid. Provide locking around the QP pointer to prevent its destruction while accessing the pointer. This fixes an issue reported by Olaf Kirch from Oracle that resulted in a system crash: "An incoming connection arrives and we decide to tear down the nascent connection. The remote ends decides to do the same. We start to shut down the connection, and call rdma_destroy_qp on our cm_id. ... Now apparently a 'connect reject' message comes in from the other host, and cma_ib_handler() is called with an event of IB_CM_REJ_RECEIVED. It calls cma_modify_qp_err, which for some odd reason tries to modify the exact same QP we just destroyed." Signed-off-by: Sean Hefty <sean.hefty@intel.com> Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/core/cma.c90
1 files changed, 60 insertions, 30 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 93644f82592c..01ae052ac3f4 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -121,6 +121,8 @@ struct rdma_id_private {
121 121
122 enum cma_state state; 122 enum cma_state state;
123 spinlock_t lock; 123 spinlock_t lock;
124 struct mutex qp_mutex;
125
124 struct completion comp; 126 struct completion comp;
125 atomic_t refcount; 127 atomic_t refcount;
126 wait_queue_head_t wait_remove; 128 wait_queue_head_t wait_remove;
@@ -389,6 +391,7 @@ struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
389 id_priv->id.event_handler = event_handler; 391 id_priv->id.event_handler = event_handler;
390 id_priv->id.ps = ps; 392 id_priv->id.ps = ps;
391 spin_lock_init(&id_priv->lock); 393 spin_lock_init(&id_priv->lock);
394 mutex_init(&id_priv->qp_mutex);
392 init_completion(&id_priv->comp); 395 init_completion(&id_priv->comp);
393 atomic_set(&id_priv->refcount, 1); 396 atomic_set(&id_priv->refcount, 1);
394 init_waitqueue_head(&id_priv->wait_remove); 397 init_waitqueue_head(&id_priv->wait_remove);
@@ -474,61 +477,86 @@ EXPORT_SYMBOL(rdma_create_qp);
474 477
475void rdma_destroy_qp(struct rdma_cm_id *id) 478void rdma_destroy_qp(struct rdma_cm_id *id)
476{ 479{
477 ib_destroy_qp(id->qp); 480 struct rdma_id_private *id_priv;
481
482 id_priv = container_of(id, struct rdma_id_private, id);
483 mutex_lock(&id_priv->qp_mutex);
484 ib_destroy_qp(id_priv->id.qp);
485 id_priv->id.qp = NULL;
486 mutex_unlock(&id_priv->qp_mutex);
478} 487}
479EXPORT_SYMBOL(rdma_destroy_qp); 488EXPORT_SYMBOL(rdma_destroy_qp);
480 489
481static int cma_modify_qp_rtr(struct rdma_cm_id *id) 490static int cma_modify_qp_rtr(struct rdma_id_private *id_priv)
482{ 491{
483 struct ib_qp_attr qp_attr; 492 struct ib_qp_attr qp_attr;
484 int qp_attr_mask, ret; 493 int qp_attr_mask, ret;
485 494
486 if (!id->qp) 495 mutex_lock(&id_priv->qp_mutex);
487 return 0; 496 if (!id_priv->id.qp) {
497 ret = 0;
498 goto out;
499 }
488 500
489 /* Need to update QP attributes from default values. */ 501 /* Need to update QP attributes from default values. */
490 qp_attr.qp_state = IB_QPS_INIT; 502 qp_attr.qp_state = IB_QPS_INIT;
491 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); 503 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
492 if (ret) 504 if (ret)
493 return ret; 505 goto out;
494 506
495 ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); 507 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
496 if (ret) 508 if (ret)
497 return ret; 509 goto out;
498 510
499 qp_attr.qp_state = IB_QPS_RTR; 511 qp_attr.qp_state = IB_QPS_RTR;
500 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); 512 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
501 if (ret) 513 if (ret)
502 return ret; 514 goto out;
503 515
504 return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); 516 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
517out:
518 mutex_unlock(&id_priv->qp_mutex);
519 return ret;
505} 520}
506 521
507static int cma_modify_qp_rts(struct rdma_cm_id *id) 522static int cma_modify_qp_rts(struct rdma_id_private *id_priv)
508{ 523{
509 struct ib_qp_attr qp_attr; 524 struct ib_qp_attr qp_attr;
510 int qp_attr_mask, ret; 525 int qp_attr_mask, ret;
511 526
512 if (!id->qp) 527 mutex_lock(&id_priv->qp_mutex);
513 return 0; 528 if (!id_priv->id.qp) {
529 ret = 0;
530 goto out;
531 }
514 532
515 qp_attr.qp_state = IB_QPS_RTS; 533 qp_attr.qp_state = IB_QPS_RTS;
516 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask); 534 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
517 if (ret) 535 if (ret)
518 return ret; 536 goto out;
519 537
520 return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask); 538 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
539out:
540 mutex_unlock(&id_priv->qp_mutex);
541 return ret;
521} 542}
522 543
523static int cma_modify_qp_err(struct rdma_cm_id *id) 544static int cma_modify_qp_err(struct rdma_id_private *id_priv)
524{ 545{
525 struct ib_qp_attr qp_attr; 546 struct ib_qp_attr qp_attr;
547 int ret;
526 548
527 if (!id->qp) 549 mutex_lock(&id_priv->qp_mutex);
528 return 0; 550 if (!id_priv->id.qp) {
551 ret = 0;
552 goto out;
553 }
529 554
530 qp_attr.qp_state = IB_QPS_ERR; 555 qp_attr.qp_state = IB_QPS_ERR;
531 return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE); 556 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, IB_QP_STATE);
557out:
558 mutex_unlock(&id_priv->qp_mutex);
559 return ret;
532} 560}
533 561
534static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv, 562static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
@@ -857,11 +885,11 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
857{ 885{
858 int ret; 886 int ret;
859 887
860 ret = cma_modify_qp_rtr(&id_priv->id); 888 ret = cma_modify_qp_rtr(id_priv);
861 if (ret) 889 if (ret)
862 goto reject; 890 goto reject;
863 891
864 ret = cma_modify_qp_rts(&id_priv->id); 892 ret = cma_modify_qp_rts(id_priv);
865 if (ret) 893 if (ret)
866 goto reject; 894 goto reject;
867 895
@@ -871,7 +899,7 @@ static int cma_rep_recv(struct rdma_id_private *id_priv)
871 899
872 return 0; 900 return 0;
873reject: 901reject:
874 cma_modify_qp_err(&id_priv->id); 902 cma_modify_qp_err(id_priv);
875 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED, 903 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
876 NULL, 0, NULL, 0); 904 NULL, 0, NULL, 0);
877 return ret; 905 return ret;
@@ -947,7 +975,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
947 /* ignore event */ 975 /* ignore event */
948 goto out; 976 goto out;
949 case IB_CM_REJ_RECEIVED: 977 case IB_CM_REJ_RECEIVED:
950 cma_modify_qp_err(&id_priv->id); 978 cma_modify_qp_err(id_priv);
951 event.status = ib_event->param.rej_rcvd.reason; 979 event.status = ib_event->param.rej_rcvd.reason;
952 event.event = RDMA_CM_EVENT_REJECTED; 980 event.event = RDMA_CM_EVENT_REJECTED;
953 event.param.conn.private_data = ib_event->private_data; 981 event.param.conn.private_data = ib_event->private_data;
@@ -2264,7 +2292,7 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
2264 sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr; 2292 sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2265 cm_id->remote_addr = *sin; 2293 cm_id->remote_addr = *sin;
2266 2294
2267 ret = cma_modify_qp_rtr(&id_priv->id); 2295 ret = cma_modify_qp_rtr(id_priv);
2268 if (ret) 2296 if (ret)
2269 goto out; 2297 goto out;
2270 2298
@@ -2331,7 +2359,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
2331 int qp_attr_mask, ret; 2359 int qp_attr_mask, ret;
2332 2360
2333 if (id_priv->id.qp) { 2361 if (id_priv->id.qp) {
2334 ret = cma_modify_qp_rtr(&id_priv->id); 2362 ret = cma_modify_qp_rtr(id_priv);
2335 if (ret) 2363 if (ret)
2336 goto out; 2364 goto out;
2337 2365
@@ -2370,7 +2398,7 @@ static int cma_accept_iw(struct rdma_id_private *id_priv,
2370 struct iw_cm_conn_param iw_param; 2398 struct iw_cm_conn_param iw_param;
2371 int ret; 2399 int ret;
2372 2400
2373 ret = cma_modify_qp_rtr(&id_priv->id); 2401 ret = cma_modify_qp_rtr(id_priv);
2374 if (ret) 2402 if (ret)
2375 return ret; 2403 return ret;
2376 2404
@@ -2442,7 +2470,7 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2442 2470
2443 return 0; 2471 return 0;
2444reject: 2472reject:
2445 cma_modify_qp_err(id); 2473 cma_modify_qp_err(id_priv);
2446 rdma_reject(id, NULL, 0); 2474 rdma_reject(id, NULL, 0);
2447 return ret; 2475 return ret;
2448} 2476}
@@ -2512,7 +2540,7 @@ int rdma_disconnect(struct rdma_cm_id *id)
2512 2540
2513 switch (rdma_node_get_transport(id->device->node_type)) { 2541 switch (rdma_node_get_transport(id->device->node_type)) {
2514 case RDMA_TRANSPORT_IB: 2542 case RDMA_TRANSPORT_IB:
2515 ret = cma_modify_qp_err(id); 2543 ret = cma_modify_qp_err(id_priv);
2516 if (ret) 2544 if (ret)
2517 goto out; 2545 goto out;
2518 /* Initiate or respond to a disconnect. */ 2546 /* Initiate or respond to a disconnect. */
@@ -2543,9 +2571,11 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2543 cma_disable_remove(id_priv, CMA_ADDR_RESOLVED)) 2571 cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
2544 return 0; 2572 return 0;
2545 2573
2574 mutex_lock(&id_priv->qp_mutex);
2546 if (!status && id_priv->id.qp) 2575 if (!status && id_priv->id.qp)
2547 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 2576 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2548 multicast->rec.mlid); 2577 multicast->rec.mlid);
2578 mutex_unlock(&id_priv->qp_mutex);
2549 2579
2550 memset(&event, 0, sizeof event); 2580 memset(&event, 0, sizeof event);
2551 event.status = status; 2581 event.status = status;