From c9b03c1ae55decc721310b79d8f50d44fbb37dc7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 3 Sep 2011 09:34:48 +0200 Subject: IB/srp: Increase block layer timeout Increase the block layer timeout for disks so that it is above the InfiniBand transport layer timeout. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 45 +++++++++++++++++++++++++++++++++++++ drivers/infiniband/ulp/srp/ib_srp.h | 2 ++ 2 files changed, 47 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 922d845f76b0..5aa70e96ec90 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1419,6 +1419,33 @@ err: return -ENOMEM; } +static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask) +{ + uint64_t T_tr_ns, max_compl_time_ms; + uint32_t rq_tmo_jiffies; + + /* + * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair, + * table 91), both the QP timeout and the retry count have to be set + * for RC QP's during the RTR to RTS transition. + */ + WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) != + (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)); + + /* + * Set target->rq_tmo_jiffies to one second more than the largest time + * it can take before an error completion is generated. See also + * C9-140..142 in the IBTA spec for more information about how to + * convert the QP Local ACK Timeout value to nanoseconds. + */ + T_tr_ns = 4096 * (1ULL << qp_attr->timeout); + max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns; + do_div(max_compl_time_ms, NSEC_PER_MSEC); + rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000); + + return rq_tmo_jiffies; +} + static void srp_cm_rep_handler(struct ib_cm_id *cm_id, struct srp_login_rsp *lrsp, struct srp_target_port *target) @@ -1478,6 +1505,8 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id, if (ret) goto error_free; + target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask); + ret = ib_modify_qp(target->qp, qp_attr, attr_mask); if (ret) goto error_free; @@ -1729,6 +1758,21 @@ static int srp_reset_host(struct scsi_cmnd *scmnd) return ret; } +static int srp_slave_configure(struct scsi_device *sdev) +{ + struct Scsi_Host *shost = sdev->host; + struct srp_target_port *target = host_to_target(shost); + struct request_queue *q = sdev->request_queue; + unsigned long timeout; + + if (sdev->type == TYPE_DISK) { + timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies); + blk_queue_rq_timeout(q, timeout); + } + + return 0; +} + static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1861,6 +1905,7 @@ static struct scsi_host_template srp_template = { .module = THIS_MODULE, .name = "InfiniBand SRP initiator", .proc_name = DRV_NAME, + .slave_configure = srp_slave_configure, .info = srp_target_info, .queuecommand = srp_queuecommand, .eh_abort_handler = srp_abort, diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 020caf0c3789..e3a6304ba87b 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -163,6 +163,8 @@ struct srp_target_port { struct ib_sa_query *path_query; int path_query_id; + u32 rq_tmo_jiffies; + struct ib_cm_id *cm_id; int max_ti_iu_len; -- cgit v1.2.2 From 09be70a238005cc33f2a52b0aeae52f117e81582 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 17 Mar 2012 17:18:54 +0000 Subject: IB/srp: Eliminate state SRP_TARGET_CONNECTING Block the SCSI host while reconnecting instead of representing the reconnection activity as a distinct SRP target state. This allows us to eliminate the target state SRP_TARGET_CONNECTING. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 25 ++++++++++++++----------- drivers/infiniband/ulp/srp/ib_srp.h | 1 - 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 5aa70e96ec90..a2261995c550 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -646,13 +646,16 @@ static void srp_reset_req(struct srp_target_port *target, struct srp_request *re static int srp_reconnect_target(struct srp_target_port *target) { + struct Scsi_Host *shost = target->scsi_host; struct ib_qp_attr qp_attr; struct ib_wc wc; int i, ret; - if (!srp_change_state(target, SRP_TARGET_LIVE, SRP_TARGET_CONNECTING)) + if (target->state != SRP_TARGET_LIVE) return -EAGAIN; + scsi_target_block(&shost->shost_gendev); + srp_disconnect_target(target); /* * Now get a new local CM ID so that we avoid confusing the @@ -660,16 +663,16 @@ static int srp_reconnect_target(struct srp_target_port *target) */ ret = srp_new_cm_id(target); if (ret) - goto err; + goto unblock; qp_attr.qp_state = IB_QPS_RESET; ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE); if (ret) - goto err; + goto unblock; ret = srp_init_qp(target, target->qp); if (ret) - goto err; + goto unblock; while (ib_poll_cq(target->recv_cq, 1, &wc) > 0) ; /* nothing */ @@ -688,11 +691,15 @@ static int srp_reconnect_target(struct srp_target_port *target) target->qp_in_error = 0; ret = srp_connect_target(target); + +unblock: + scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING : + SDEV_TRANSPORT_OFFLINE); + if (ret) goto err; - if (!srp_change_state(target, SRP_TARGET_CONNECTING, SRP_TARGET_LIVE)) - ret = -EAGAIN; + shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n"); return ret; @@ -710,7 +717,7 @@ err: * the flush_scheduled_work() in srp_remove_one(). */ spin_lock_irq(&target->lock); - if (target->state == SRP_TARGET_CONNECTING) { + if (target->state == SRP_TARGET_LIVE) { target->state = SRP_TARGET_DEAD; INIT_WORK(&target->work, srp_remove_work); queue_work(ib_wq, &target->work); @@ -1311,9 +1318,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) unsigned long flags; int len; - if (target->state == SRP_TARGET_CONNECTING) - goto err; - if (target->state == SRP_TARGET_DEAD || target->state == SRP_TARGET_REMOVED) { scmnd->result = DID_BAD_TARGET << 16; @@ -1377,7 +1381,6 @@ err_iu: err_unlock: spin_unlock_irqrestore(&target->lock, flags); -err: return SCSI_MLQUEUE_HOST_BUSY; } diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index e3a6304ba87b..8b436cee46ad 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -80,7 +80,6 @@ enum { enum srp_target_state { SRP_TARGET_LIVE, - SRP_TARGET_CONNECTING, SRP_TARGET_DEAD, SRP_TARGET_REMOVED }; -- cgit v1.2.2 From f3718231203a165dbfd5c5bd0d9a6db522bb73e3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 19 Apr 2012 14:42:54 +0000 Subject: IB/srp: Keep processing commands during host removal Some SCSI upper layer drivers, e.g. sd, issue SCSI commands from inside scsi_remove_host() (see the sd_shutdown() call in sd_remove()). Make sure that these commands have a chance to reach the SCSI device. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index a2261995c550..9371d582d0dc 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1318,13 +1318,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) unsigned long flags; int len; - if (target->state == SRP_TARGET_DEAD || - target->state == SRP_TARGET_REMOVED) { - scmnd->result = DID_BAD_TARGET << 16; - scmnd->scsi_done(scmnd); - return 0; - } - spin_lock_irqsave(&target->lock, flags); iu = __srp_get_tx_iu(target, SRP_IU_CMD); if (!iu) -- cgit v1.2.2 From 224db157431299582a65fb9d4c9228363146f8a9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 24 Oct 2012 15:05:37 +0200 Subject: IB/srp: Simplify SCSI error handling Since scsi_remove_host() has been modified so that SCSI error handling functions will no longer be invoked after scsi_remove_host() returns, the test at the start of srp_send_tsk_mgmt() is now superfluous. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 9371d582d0dc..baafee085e33 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1661,10 +1661,6 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target, struct srp_iu *iu; struct srp_tsk_mgmt *tsk_mgmt; - if (target->state == SRP_TARGET_DEAD || - target->state == SRP_TARGET_REMOVED) - return -1; - init_completion(&target->tsk_mgmt_done); spin_lock_irq(&target->lock); -- cgit v1.2.2 From 948d1e889e5bd6ceffcc31db2beba39331087df3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sat, 3 Sep 2011 09:25:42 +0200 Subject: IB/srp: Introduce srp_handle_qp_err() Introduce the function srp_handle_qp_err(), change the type of qp_in_error from int into bool and move the initialization of that variable from srp_reconnect_target() to srp_connect_target(). Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 37 ++++++++++++++++++++----------------- drivers/infiniband/ulp/srp/ib_srp.h | 2 +- 2 files changed, 21 insertions(+), 18 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index baafee085e33..c28833070201 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -515,6 +515,8 @@ static int srp_connect_target(struct srp_target_port *target) int retries = 3; int ret; + target->qp_in_error = false; + ret = srp_lookup_path(target); if (ret) return ret; @@ -689,7 +691,6 @@ static int srp_reconnect_target(struct srp_target_port *target) for (i = 0; i < SRP_SQ_SIZE; ++i) list_add(&target->tx_ring[i]->list, &target->free_tx); - target->qp_in_error = 0; ret = srp_connect_target(target); unblock: @@ -1269,6 +1270,15 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc) PFX "Recv failed with error code %d\n", res); } +static void srp_handle_qp_err(enum ib_wc_status wc_status, + enum ib_wc_opcode wc_opcode, + struct srp_target_port *target) +{ + shost_printk(KERN_ERR, target->scsi_host, PFX "failed %s status %d\n", + wc_opcode & IB_WC_RECV ? "receive" : "send", wc_status); + target->qp_in_error = true; +} + static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) { struct srp_target_port *target = target_ptr; @@ -1276,15 +1286,12 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); while (ib_poll_cq(cq, 1, &wc) > 0) { - if (wc.status) { - shost_printk(KERN_ERR, target->scsi_host, - PFX "failed receive status %d\n", - wc.status); - target->qp_in_error = 1; + if (likely(wc.status == IB_WC_SUCCESS)) { + srp_handle_recv(target, &wc); + } else { + srp_handle_qp_err(wc.status, wc.opcode, target); break; } - - srp_handle_recv(target, &wc); } } @@ -1295,16 +1302,13 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr) struct srp_iu *iu; while (ib_poll_cq(cq, 1, &wc) > 0) { - if (wc.status) { - shost_printk(KERN_ERR, target->scsi_host, - PFX "failed send status %d\n", - wc.status); - target->qp_in_error = 1; + if (likely(wc.status == IB_WC_SUCCESS)) { + iu = (struct srp_iu *) (uintptr_t) wc.wr_id; + list_add(&iu->list, &target->free_tx); + } else { + srp_handle_qp_err(wc.status, wc.opcode, target); break; } - - iu = (struct srp_iu *) (uintptr_t) wc.wr_id; - list_add(&iu->list, &target->free_tx); } } @@ -2269,7 +2273,6 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err_free_ib; - target->qp_in_error = 0; ret = srp_connect_target(target); if (ret) { shost_printk(KERN_ERR, target->scsi_host, diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 8b436cee46ad..02dc3acb718c 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -179,7 +179,7 @@ struct srp_target_port { struct list_head list; struct completion done; int status; - int qp_in_error; + bool qp_in_error; struct completion tsk_mgmt_done; u8 tsk_mgmt_status; -- cgit v1.2.2 From 4f0af69799b0e7b9805c4d174169f5ed7bf315c1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 26 Nov 2012 11:16:40 +0100 Subject: IB/srp: Process all error completions If the RDMA RC connection is closed, tell the SCSI mid-layer to terminate all pending commands instead of only the first. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index c28833070201..f0843ef28e14 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1274,8 +1274,12 @@ static void srp_handle_qp_err(enum ib_wc_status wc_status, enum ib_wc_opcode wc_opcode, struct srp_target_port *target) { - shost_printk(KERN_ERR, target->scsi_host, PFX "failed %s status %d\n", - wc_opcode & IB_WC_RECV ? "receive" : "send", wc_status); + if (!target->qp_in_error) { + shost_printk(KERN_ERR, target->scsi_host, + PFX "failed %s status %d\n", + wc_opcode & IB_WC_RECV ? "receive" : "send", + wc_status); + } target->qp_in_error = true; } @@ -1290,7 +1294,6 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) srp_handle_recv(target, &wc); } else { srp_handle_qp_err(wc.status, wc.opcode, target); - break; } } } @@ -1307,7 +1310,6 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr) list_add(&iu->list, &target->free_tx); } else { srp_handle_qp_err(wc.status, wc.opcode, target); - break; } } } -- cgit v1.2.2 From 294c875a65269361defd7aeb97804ba99eb57cbf Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 25 Dec 2011 12:18:12 +0000 Subject: IB/srp: Suppress superfluous error messages Keep track of the connection state. Only report QP errors while connected. Only invoke ib_send_cm_dreq() when connected so that invoking srp_disconnect_target() after having received a DREQ does not cause an error message to be printed. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 38 +++++++++++++++++++++++++++++-------- drivers/infiniband/ulp/srp/ib_srp.h | 1 + 2 files changed, 31 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index f0843ef28e14..3c64bf400c20 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -428,17 +428,34 @@ static int srp_send_req(struct srp_target_port *target) return status; } +static bool srp_change_conn_state(struct srp_target_port *target, + bool connected) +{ + bool changed = false; + + spin_lock_irq(&target->lock); + if (target->connected != connected) { + target->connected = connected; + changed = true; + } + spin_unlock_irq(&target->lock); + + return changed; +} + static void srp_disconnect_target(struct srp_target_port *target) { - /* XXX should send SRP_I_LOGOUT request */ + if (srp_change_conn_state(target, false)) { + /* XXX should send SRP_I_LOGOUT request */ - init_completion(&target->done); - if (ib_send_cm_dreq(target->cm_id, NULL, 0)) { - shost_printk(KERN_DEBUG, target->scsi_host, - PFX "Sending CM DREQ failed\n"); - return; + init_completion(&target->done); + if (ib_send_cm_dreq(target->cm_id, NULL, 0)) { + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "Sending CM DREQ failed\n"); + } else { + wait_for_completion(&target->done); + } } - wait_for_completion(&target->done); } static bool srp_change_state(struct srp_target_port *target, @@ -515,6 +532,8 @@ static int srp_connect_target(struct srp_target_port *target) int retries = 3; int ret; + WARN_ON_ONCE(target->connected); + target->qp_in_error = false; ret = srp_lookup_path(target); @@ -536,6 +555,7 @@ static int srp_connect_target(struct srp_target_port *target) */ switch (target->status) { case 0: + srp_change_conn_state(target, true); return 0; case SRP_PORT_REDIRECT: @@ -1274,7 +1294,7 @@ static void srp_handle_qp_err(enum ib_wc_status wc_status, enum ib_wc_opcode wc_opcode, struct srp_target_port *target) { - if (!target->qp_in_error) { + if (target->connected && !target->qp_in_error) { shost_printk(KERN_ERR, target->scsi_host, PFX "failed %s status %d\n", wc_opcode & IB_WC_RECV ? "receive" : "send", @@ -1630,6 +1650,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) case IB_CM_DREQ_RECEIVED: shost_printk(KERN_WARNING, target->scsi_host, PFX "DREQ received - connection closed\n"); + srp_change_conn_state(target, false); if (ib_send_cm_drep(cm_id, NULL, 0)) shost_printk(KERN_ERR, target->scsi_host, PFX "Sending CM DREP failed\n"); @@ -1942,6 +1963,7 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) spin_unlock(&host->target_lock); target->state = SRP_TARGET_LIVE; + target->connected = false; scsi_scan_target(&target->scsi_host->shost_gendev, 0, target->scsi_id, SCAN_WILD_CARD, 0); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 02dc3acb718c..ef95fa4ca3ef 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -163,6 +163,7 @@ struct srp_target_port { int path_query_id; u32 rq_tmo_jiffies; + bool connected; struct ib_cm_id *cm_id; -- cgit v1.2.2 From ee12d6a80cfcd08b862ed3c8e109442e466b0302 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Sun, 25 Dec 2011 19:41:07 +0000 Subject: IB/srp: Introduce the helper function srp_remove_target() Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 3c64bf400c20..beb68786001e 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -506,6 +506,17 @@ static void srp_del_scsi_host_attr(struct Scsi_Host *shost) device_remove_file(&shost->shost_dev, *attr); } +static void srp_remove_target(struct srp_target_port *target) +{ + srp_del_scsi_host_attr(target->scsi_host); + srp_remove_host(target->scsi_host); + scsi_remove_host(target->scsi_host); + ib_destroy_cm_id(target->cm_id); + srp_free_target_ib(target); + srp_free_req_data(target); + scsi_host_put(target->scsi_host); +} + static void srp_remove_work(struct work_struct *work) { struct srp_target_port *target = @@ -518,13 +529,7 @@ static void srp_remove_work(struct work_struct *work) list_del(&target->list); spin_unlock(&target->srp_host->target_lock); - srp_del_scsi_host_attr(target->scsi_host); - srp_remove_host(target->scsi_host); - scsi_remove_host(target->scsi_host); - ib_destroy_cm_id(target->cm_id); - srp_free_target_ib(target); - srp_free_req_data(target); - scsi_host_put(target->scsi_host); + srp_remove_target(target); } static int srp_connect_target(struct srp_target_port *target) -- cgit v1.2.2 From ef6c49d87c3418c442a22e55e3ce2f91b163d69e Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 26 Dec 2011 16:49:18 +0000 Subject: IB/srp: Eliminate state SRP_TARGET_DEAD Only queue removal work after having changed the target state into SRP_TARGET_REMOVED and not if that state was already equal to SRP_TARGET_REMOVED. That allows us to remove the state SRP_TARGET_DEAD. Add a call to srp_disconnect_target() in srp_remove_target() -- due to previous changes it is now safe to invoke that function even if the IB connection has already been disconnected. This change allows us to replace the target removal code in srp_remove_one() by an (indirect) call to srp_remove_target(). Rename srp_target_port.work into srp_target_port.remove_work to reflect its usage. Signed-off-by: Bart Van Assche Acked-by: David Dillow Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 83 ++++++++++++++----------------------- drivers/infiniband/ulp/srp/ib_srp.h | 5 +-- 2 files changed, 32 insertions(+), 56 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index beb68786001e..95590a38e88a 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -428,6 +428,23 @@ static int srp_send_req(struct srp_target_port *target) return status; } +static bool srp_queue_remove_work(struct srp_target_port *target) +{ + bool changed = false; + + spin_lock_irq(&target->lock); + if (target->state != SRP_TARGET_REMOVED) { + target->state = SRP_TARGET_REMOVED; + changed = true; + } + spin_unlock_irq(&target->lock); + + if (changed) + queue_work(system_long_wq, &target->remove_work); + + return changed; +} + static bool srp_change_conn_state(struct srp_target_port *target, bool connected) { @@ -458,21 +475,6 @@ static void srp_disconnect_target(struct srp_target_port *target) } } -static bool srp_change_state(struct srp_target_port *target, - enum srp_target_state old, - enum srp_target_state new) -{ - bool changed = false; - - spin_lock_irq(&target->lock); - if (target->state == old) { - target->state = new; - changed = true; - } - spin_unlock_irq(&target->lock); - return changed; -} - static void srp_free_req_data(struct srp_target_port *target) { struct ib_device *ibdev = target->srp_host->srp_dev->dev; @@ -508,9 +510,12 @@ static void srp_del_scsi_host_attr(struct Scsi_Host *shost) static void srp_remove_target(struct srp_target_port *target) { + WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); + srp_del_scsi_host_attr(target->scsi_host); srp_remove_host(target->scsi_host); scsi_remove_host(target->scsi_host); + srp_disconnect_target(target); ib_destroy_cm_id(target->cm_id); srp_free_target_ib(target); srp_free_req_data(target); @@ -520,10 +525,9 @@ static void srp_remove_target(struct srp_target_port *target) static void srp_remove_work(struct work_struct *work) { struct srp_target_port *target = - container_of(work, struct srp_target_port, work); + container_of(work, struct srp_target_port, remove_work); - if (!srp_change_state(target, SRP_TARGET_DEAD, SRP_TARGET_REMOVED)) - return; + WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); spin_lock(&target->srp_host->target_lock); list_del(&target->list); @@ -738,17 +742,8 @@ err: * However, we have to defer the real removal because we * are in the context of the SCSI error handler now, which * will deadlock if we call scsi_remove_host(). - * - * Schedule our work inside the lock to avoid a race with - * the flush_scheduled_work() in srp_remove_one(). */ - spin_lock_irq(&target->lock); - if (target->state == SRP_TARGET_LIVE) { - target->state = SRP_TARGET_DEAD; - INIT_WORK(&target->work, srp_remove_work); - queue_work(ib_wq, &target->work); - } - spin_unlock_irq(&target->lock); + srp_queue_remove_work(target); return ret; } @@ -2258,6 +2253,7 @@ static ssize_t srp_create_target(struct device *dev, sizeof (struct srp_indirect_buf) + target->cmd_sg_cnt * sizeof (struct srp_direct_buf); + INIT_WORK(&target->remove_work, srp_remove_work); spin_lock_init(&target->lock); INIT_LIST_HEAD(&target->free_tx); INIT_LIST_HEAD(&target->free_reqs); @@ -2491,8 +2487,7 @@ static void srp_remove_one(struct ib_device *device) { struct srp_device *srp_dev; struct srp_host *host, *tmp_host; - LIST_HEAD(target_list); - struct srp_target_port *target, *tmp_target; + struct srp_target_port *target; srp_dev = ib_get_client_data(device, &srp_client); @@ -2505,35 +2500,17 @@ static void srp_remove_one(struct ib_device *device) wait_for_completion(&host->released); /* - * Mark all target ports as removed, so we stop queueing - * commands and don't try to reconnect. + * Remove all target ports. */ spin_lock(&host->target_lock); - list_for_each_entry(target, &host->target_list, list) { - spin_lock_irq(&target->lock); - target->state = SRP_TARGET_REMOVED; - spin_unlock_irq(&target->lock); - } + list_for_each_entry(target, &host->target_list, list) + srp_queue_remove_work(target); spin_unlock(&host->target_lock); /* - * Wait for any reconnection tasks that may have - * started before we marked our target ports as - * removed, and any target port removal tasks. + * Wait for target port removal tasks. */ - flush_workqueue(ib_wq); - - list_for_each_entry_safe(target, tmp_target, - &host->target_list, list) { - srp_del_scsi_host_attr(target->scsi_host); - srp_remove_host(target->scsi_host); - scsi_remove_host(target->scsi_host); - srp_disconnect_target(target); - ib_destroy_cm_id(target->cm_id); - srp_free_target_ib(target); - srp_free_req_data(target); - scsi_host_put(target->scsi_host); - } + flush_workqueue(system_long_wq); kfree(host); } diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index ef95fa4ca3ef..de2d0b3c0bfe 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -80,8 +80,7 @@ enum { enum srp_target_state { SRP_TARGET_LIVE, - SRP_TARGET_DEAD, - SRP_TARGET_REMOVED + SRP_TARGET_REMOVED, }; enum srp_iu_type { @@ -175,7 +174,7 @@ struct srp_target_port { struct srp_iu *rx_ring[SRP_RQ_SIZE]; struct srp_request req_ring[SRP_CMD_SQ_SIZE]; - struct work_struct work; + struct work_struct remove_work; struct list_head list; struct completion done; -- cgit v1.2.2 From 73aa89ed9e2bebf0c3fff4504e6dff1421b5c819 Mon Sep 17 00:00:00 2001 From: Ishai Rabinovitz Date: Mon, 26 Nov 2012 11:44:53 +0100 Subject: IB/srp: destroy and recreate QP and CQs when reconnecting HW QP FATAL errors persist over a reset operation, but we can recover from that by recreating the QP and associated CQs for each connection. Creating a new QP/CQ also completely forecloses any possibility of getting stale completions or packets on the new connection. Signed-off-by: Ishai Rabinovitz Signed-off-by: Michael S. Tsirkin [ updated to current code from OFED, cleaned up commit message ] Signed-off-by: David Dillow Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 66 ++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 31 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 95590a38e88a..85771eba9c80 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -222,27 +222,29 @@ static int srp_new_cm_id(struct srp_target_port *target) static int srp_create_target_ib(struct srp_target_port *target) { struct ib_qp_init_attr *init_attr; + struct ib_cq *recv_cq, *send_cq; + struct ib_qp *qp; int ret; init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); if (!init_attr) return -ENOMEM; - target->recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0); - if (IS_ERR(target->recv_cq)) { - ret = PTR_ERR(target->recv_cq); + recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, + srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0); + if (IS_ERR(recv_cq)) { + ret = PTR_ERR(recv_cq); goto err; } - target->send_cq = ib_create_cq(target->srp_host->srp_dev->dev, - srp_send_completion, NULL, target, SRP_SQ_SIZE, 0); - if (IS_ERR(target->send_cq)) { - ret = PTR_ERR(target->send_cq); + send_cq = ib_create_cq(target->srp_host->srp_dev->dev, + srp_send_completion, NULL, target, SRP_SQ_SIZE, 0); + if (IS_ERR(send_cq)) { + ret = PTR_ERR(send_cq); goto err_recv_cq; } - ib_req_notify_cq(target->recv_cq, IB_CQ_NEXT_COMP); + ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP); init_attr->event_handler = srp_qp_event; init_attr->cap.max_send_wr = SRP_SQ_SIZE; @@ -251,30 +253,41 @@ static int srp_create_target_ib(struct srp_target_port *target) init_attr->cap.max_send_sge = 1; init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; init_attr->qp_type = IB_QPT_RC; - init_attr->send_cq = target->send_cq; - init_attr->recv_cq = target->recv_cq; + init_attr->send_cq = send_cq; + init_attr->recv_cq = recv_cq; - target->qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr); - if (IS_ERR(target->qp)) { - ret = PTR_ERR(target->qp); + qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); goto err_send_cq; } - ret = srp_init_qp(target, target->qp); + ret = srp_init_qp(target, qp); if (ret) goto err_qp; + if (target->qp) + ib_destroy_qp(target->qp); + if (target->recv_cq) + ib_destroy_cq(target->recv_cq); + if (target->send_cq) + ib_destroy_cq(target->send_cq); + + target->qp = qp; + target->recv_cq = recv_cq; + target->send_cq = send_cq; + kfree(init_attr); return 0; err_qp: - ib_destroy_qp(target->qp); + ib_destroy_qp(qp); err_send_cq: - ib_destroy_cq(target->send_cq); + ib_destroy_cq(send_cq); err_recv_cq: - ib_destroy_cq(target->recv_cq); + ib_destroy_cq(recv_cq); err: kfree(init_attr); @@ -289,6 +302,9 @@ static void srp_free_target_ib(struct srp_target_port *target) ib_destroy_cq(target->send_cq); ib_destroy_cq(target->recv_cq); + target->qp = NULL; + target->send_cq = target->recv_cq = NULL; + for (i = 0; i < SRP_RQ_SIZE; ++i) srp_free_iu(target->srp_host, target->rx_ring[i]); for (i = 0; i < SRP_SQ_SIZE; ++i) @@ -678,8 +694,6 @@ static void srp_reset_req(struct srp_target_port *target, struct srp_request *re static int srp_reconnect_target(struct srp_target_port *target) { struct Scsi_Host *shost = target->scsi_host; - struct ib_qp_attr qp_attr; - struct ib_wc wc; int i, ret; if (target->state != SRP_TARGET_LIVE) @@ -696,20 +710,10 @@ static int srp_reconnect_target(struct srp_target_port *target) if (ret) goto unblock; - qp_attr.qp_state = IB_QPS_RESET; - ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE); - if (ret) - goto unblock; - - ret = srp_init_qp(target, target->qp); + ret = srp_create_target_ib(target); if (ret) goto unblock; - while (ib_poll_cq(target->recv_cq, 1, &wc) > 0) - ; /* nothing */ - while (ib_poll_cq(target->send_cq, 1, &wc) > 0) - ; /* nothing */ - for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { struct srp_request *req = &target->req_ring[i]; if (req->scmnd) -- cgit v1.2.2 From 55d93898a14528a5d4199ba572bea1f0bec5870e Mon Sep 17 00:00:00 2001 From: Vu Pham Date: Mon, 26 Nov 2012 11:57:47 +0100 Subject: IB/srp: send disconnect request without waiting for CM timewait exit Now that SRP recreates the CM ID, QP, and CQ for each connection, there is no need to wait for the timewait state to complete. Signed-off-by: Vu Pham Signed-off-by: David Dillow Signed-off-by: Bart Van Assche Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 85771eba9c80..84d9298ed989 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -481,12 +481,9 @@ static void srp_disconnect_target(struct srp_target_port *target) if (srp_change_conn_state(target, false)) { /* XXX should send SRP_I_LOGOUT request */ - init_completion(&target->done); if (ib_send_cm_dreq(target->cm_id, NULL, 0)) { shost_printk(KERN_DEBUG, target->scsi_host, PFX "Sending CM DREQ failed\n"); - } else { - wait_for_completion(&target->done); } } } @@ -1664,7 +1661,6 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) shost_printk(KERN_ERR, target->scsi_host, PFX "connection closed\n"); - comp = 1; target->status = 0; break; -- cgit v1.2.2 From dc1bdbd9b8a077018d82230bc378f1bcfd8adba8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 16 Sep 2011 20:41:13 +0200 Subject: IB/srp: Allow SRP disconnect through sysfs Make it possible to disconnect the IB RC connection used by the SRP protocol to communicate with a target. Have the SRP transport layer create a sysfs "delete" attribute for initiator drivers that support this functionality. Signed-off-by: Bart Van Assche Acked-by: David Dillow Cc: FUJITA Tomonori Cc: Robert Jennings Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/srp/ib_srp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 84d9298ed989..d5088ce78290 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -549,6 +549,13 @@ static void srp_remove_work(struct work_struct *work) srp_remove_target(target); } +static void srp_rport_delete(struct srp_rport *rport) +{ + struct srp_target_port *target = rport->lld_data; + + srp_queue_remove_work(target); +} + static int srp_connect_target(struct srp_target_port *target) { int retries = 3; @@ -1958,6 +1965,8 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target) return PTR_ERR(rport); } + rport->lld_data = target; + spin_lock(&host->target_lock); list_add_tail(&target->list, &host->target_list); spin_unlock(&host->target_lock); @@ -2524,6 +2533,7 @@ static void srp_remove_one(struct ib_device *device) } static struct srp_function_template ib_srp_transport_functions = { + .rport_delete = srp_rport_delete, }; static int __init srp_init_module(void) -- cgit v1.2.2