diff options
author | Bart Van Assche <bvanassche@acm.org> | 2014-10-21 12:00:35 -0400 |
---|---|---|
committer | Christoph Hellwig <hch@lst.de> | 2014-11-12 06:05:24 -0500 |
commit | 7dad6b2e440d810273946b0e7092a8fe043c3b8a (patch) | |
tree | cfb1127e6a552d864506126f6dfc6186d8b3357c /drivers/infiniband/ulp | |
parent | d92c0da71a35dfddccca7bfa932829504311359e (diff) |
IB/srp: Fix a race condition triggered by destroying a queue pair
At least LID reassignment can trigger a race condition in the SRP
initiator driver, namely the receive completion handler trying to
post a request on a QP during or after QP destruction and before
the CQ's have been destroyed. Avoid this race by modifying a QP
into the error state and by waiting until all receive completions
have been processed before destroying a QP.
Reported-by: Max Gurtuvoy <maxg@mellanox.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 59 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.h | 2 |
2 files changed, 52 insertions, 9 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index aac844a6eef6..57b5ff1bbcb6 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c | |||
@@ -453,6 +453,41 @@ static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target) | |||
453 | dev->max_pages_per_mr); | 453 | dev->max_pages_per_mr); |
454 | } | 454 | } |
455 | 455 | ||
456 | /** | ||
457 | * srp_destroy_qp() - destroy an RDMA queue pair | ||
458 | * @ch: SRP RDMA channel. | ||
459 | * | ||
460 | * Change a queue pair into the error state and wait until all receive | ||
461 | * completions have been processed before destroying it. This avoids that | ||
462 | * the receive completion handler can access the queue pair while it is | ||
463 | * being destroyed. | ||
464 | */ | ||
465 | static void srp_destroy_qp(struct srp_rdma_ch *ch) | ||
466 | { | ||
467 | struct srp_target_port *target = ch->target; | ||
468 | static struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR }; | ||
469 | static struct ib_recv_wr wr = { .wr_id = SRP_LAST_WR_ID }; | ||
470 | struct ib_recv_wr *bad_wr; | ||
471 | int ret; | ||
472 | |||
473 | /* Destroying a QP and reusing ch->done is only safe if not connected */ | ||
474 | WARN_ON_ONCE(target->connected); | ||
475 | |||
476 | ret = ib_modify_qp(ch->qp, &attr, IB_QP_STATE); | ||
477 | WARN_ONCE(ret, "ib_cm_init_qp_attr() returned %d\n", ret); | ||
478 | if (ret) | ||
479 | goto out; | ||
480 | |||
481 | init_completion(&ch->done); | ||
482 | ret = ib_post_recv(ch->qp, &wr, &bad_wr); | ||
483 | WARN_ONCE(ret, "ib_post_recv() returned %d\n", ret); | ||
484 | if (ret == 0) | ||
485 | wait_for_completion(&ch->done); | ||
486 | |||
487 | out: | ||
488 | ib_destroy_qp(ch->qp); | ||
489 | } | ||
490 | |||
456 | static int srp_create_ch_ib(struct srp_rdma_ch *ch) | 491 | static int srp_create_ch_ib(struct srp_rdma_ch *ch) |
457 | { | 492 | { |
458 | struct srp_target_port *target = ch->target; | 493 | struct srp_target_port *target = ch->target; |
@@ -469,8 +504,9 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) | |||
469 | if (!init_attr) | 504 | if (!init_attr) |
470 | return -ENOMEM; | 505 | return -ENOMEM; |
471 | 506 | ||
507 | /* + 1 for SRP_LAST_WR_ID */ | ||
472 | recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch, | 508 | recv_cq = ib_create_cq(dev->dev, srp_recv_completion, NULL, ch, |
473 | target->queue_size, ch->comp_vector); | 509 | target->queue_size + 1, ch->comp_vector); |
474 | if (IS_ERR(recv_cq)) { | 510 | if (IS_ERR(recv_cq)) { |
475 | ret = PTR_ERR(recv_cq); | 511 | ret = PTR_ERR(recv_cq); |
476 | goto err; | 512 | goto err; |
@@ -487,7 +523,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) | |||
487 | 523 | ||
488 | init_attr->event_handler = srp_qp_event; | 524 | init_attr->event_handler = srp_qp_event; |
489 | init_attr->cap.max_send_wr = m * target->queue_size; | 525 | init_attr->cap.max_send_wr = m * target->queue_size; |
490 | init_attr->cap.max_recv_wr = target->queue_size; | 526 | init_attr->cap.max_recv_wr = target->queue_size + 1; |
491 | init_attr->cap.max_recv_sge = 1; | 527 | init_attr->cap.max_recv_sge = 1; |
492 | init_attr->cap.max_send_sge = 1; | 528 | init_attr->cap.max_send_sge = 1; |
493 | init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; | 529 | init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; |
@@ -530,7 +566,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) | |||
530 | } | 566 | } |
531 | 567 | ||
532 | if (ch->qp) | 568 | if (ch->qp) |
533 | ib_destroy_qp(ch->qp); | 569 | srp_destroy_qp(ch); |
534 | if (ch->recv_cq) | 570 | if (ch->recv_cq) |
535 | ib_destroy_cq(ch->recv_cq); | 571 | ib_destroy_cq(ch->recv_cq); |
536 | if (ch->send_cq) | 572 | if (ch->send_cq) |
@@ -586,7 +622,7 @@ static void srp_free_ch_ib(struct srp_target_port *target, | |||
586 | if (ch->fmr_pool) | 622 | if (ch->fmr_pool) |
587 | ib_destroy_fmr_pool(ch->fmr_pool); | 623 | ib_destroy_fmr_pool(ch->fmr_pool); |
588 | } | 624 | } |
589 | ib_destroy_qp(ch->qp); | 625 | srp_destroy_qp(ch); |
590 | ib_destroy_cq(ch->send_cq); | 626 | ib_destroy_cq(ch->send_cq); |
591 | ib_destroy_cq(ch->recv_cq); | 627 | ib_destroy_cq(ch->recv_cq); |
592 | 628 | ||
@@ -1883,8 +1919,15 @@ static void srp_tl_err_work(struct work_struct *work) | |||
1883 | } | 1919 | } |
1884 | 1920 | ||
1885 | static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, | 1921 | static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status, |
1886 | bool send_err, struct srp_target_port *target) | 1922 | bool send_err, struct srp_rdma_ch *ch) |
1887 | { | 1923 | { |
1924 | struct srp_target_port *target = ch->target; | ||
1925 | |||
1926 | if (wr_id == SRP_LAST_WR_ID) { | ||
1927 | complete(&ch->done); | ||
1928 | return; | ||
1929 | } | ||
1930 | |||
1888 | if (target->connected && !target->qp_in_error) { | 1931 | if (target->connected && !target->qp_in_error) { |
1889 | if (wr_id & LOCAL_INV_WR_ID_MASK) { | 1932 | if (wr_id & LOCAL_INV_WR_ID_MASK) { |
1890 | shost_printk(KERN_ERR, target->scsi_host, PFX | 1933 | shost_printk(KERN_ERR, target->scsi_host, PFX |
@@ -1915,8 +1958,7 @@ static void srp_recv_completion(struct ib_cq *cq, void *ch_ptr) | |||
1915 | if (likely(wc.status == IB_WC_SUCCESS)) { | 1958 | if (likely(wc.status == IB_WC_SUCCESS)) { |
1916 | srp_handle_recv(ch, &wc); | 1959 | srp_handle_recv(ch, &wc); |
1917 | } else { | 1960 | } else { |
1918 | srp_handle_qp_err(wc.wr_id, wc.status, false, | 1961 | srp_handle_qp_err(wc.wr_id, wc.status, false, ch); |
1919 | ch->target); | ||
1920 | } | 1962 | } |
1921 | } | 1963 | } |
1922 | } | 1964 | } |
@@ -1932,8 +1974,7 @@ static void srp_send_completion(struct ib_cq *cq, void *ch_ptr) | |||
1932 | iu = (struct srp_iu *) (uintptr_t) wc.wr_id; | 1974 | iu = (struct srp_iu *) (uintptr_t) wc.wr_id; |
1933 | list_add(&iu->list, &ch->free_tx); | 1975 | list_add(&iu->list, &ch->free_tx); |
1934 | } else { | 1976 | } else { |
1935 | srp_handle_qp_err(wc.wr_id, wc.status, true, | 1977 | srp_handle_qp_err(wc.wr_id, wc.status, true, ch); |
1936 | ch->target); | ||
1937 | } | 1978 | } |
1938 | } | 1979 | } |
1939 | } | 1980 | } |
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index ca7c6f065434..a611556406ac 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h | |||
@@ -70,6 +70,8 @@ enum { | |||
70 | 70 | ||
71 | LOCAL_INV_WR_ID_MASK = 1, | 71 | LOCAL_INV_WR_ID_MASK = 1, |
72 | FAST_REG_WR_ID_MASK = 2, | 72 | FAST_REG_WR_ID_MASK = 2, |
73 | |||
74 | SRP_LAST_WR_ID = 0xfffffffcU, | ||
73 | }; | 75 | }; |
74 | 76 | ||
75 | enum srp_target_state { | 77 | enum srp_target_state { |