aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorBart Van Assche <bvanassche@acm.org>2013-02-21 12:19:04 -0500
committerRoland Dreier <roland@purestorage.com>2013-02-25 12:31:14 -0500
commitc7c4e7ff8047e43c45628b85ac200582e9404c39 (patch)
tree597202700b1feadb06339fbabdb0e84fbdf1876c /drivers
parent3780d1f08856f692116bcf026e4acf1c521df1c7 (diff)
IB/srp: Avoid endless SCSI error handling loop
If a SCSI command times out it is passed to the SCSI error handler. The SCSI error handler will try to abort the commands that timed out. If aborting fails, a device reset will be attempted. If the device reset also fails a host reset will be attempted. If the host reset also fails the whole procedure will be repeated. srp_abort() and srp_reset_device() fail for a QP in the error state. srp_reset_host() fails after host removal has started. Hence if the SCSI error handler gets invoked after host removal has started and with the QP in the error state an endless loop will be triggered. Modify the SCSI error handling functions in ib_srp as follows: - Abort SCSI commands properly even if the QP is in the error state. - Make srp_reset_host() reset SCSI requests even after host removal has already started or if reconnecting fails. Signed-off-by: Bart Van Assche <bvanassche@acm.org> Acked-by: David Dillow <dave@thedillows.org> Cc: <stable@vger.kernel.org> # 3.8 Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c29
1 files changed, 15 insertions, 14 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 263325848462..8a7eb9f98a0c 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -700,23 +700,24 @@ static int srp_reconnect_target(struct srp_target_port *target)
700 struct Scsi_Host *shost = target->scsi_host; 700 struct Scsi_Host *shost = target->scsi_host;
701 int i, ret; 701 int i, ret;
702 702
703 if (target->state != SRP_TARGET_LIVE)
704 return -EAGAIN;
705
706 scsi_target_block(&shost->shost_gendev); 703 scsi_target_block(&shost->shost_gendev);
707 704
708 srp_disconnect_target(target); 705 srp_disconnect_target(target);
709 /* 706 /*
710 * Now get a new local CM ID so that we avoid confusing the 707 * Now get a new local CM ID so that we avoid confusing the target in
711 * target in case things are really fouled up. 708 * case things are really fouled up. Doing so also ensures that all CM
709 * callbacks will have finished before a new QP is allocated.
712 */ 710 */
713 ret = srp_new_cm_id(target); 711 ret = srp_new_cm_id(target);
714 if (ret) 712 /*
715 goto unblock; 713 * Whether or not creating a new CM ID succeeded, create a new
716 714 * QP. This guarantees that all completion callback function
717 ret = srp_create_target_ib(target); 715 * invocations have finished before request resetting starts.
718 if (ret) 716 */
719 goto unblock; 717 if (ret == 0)
718 ret = srp_create_target_ib(target);
719 else
720 srp_create_target_ib(target);
720 721
721 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { 722 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
722 struct srp_request *req = &target->req_ring[i]; 723 struct srp_request *req = &target->req_ring[i];
@@ -728,9 +729,9 @@ static int srp_reconnect_target(struct srp_target_port *target)
728 for (i = 0; i < SRP_SQ_SIZE; ++i) 729 for (i = 0; i < SRP_SQ_SIZE; ++i)
729 list_add(&target->tx_ring[i]->list, &target->free_tx); 730 list_add(&target->tx_ring[i]->list, &target->free_tx);
730 731
731 ret = srp_connect_target(target); 732 if (ret == 0)
733 ret = srp_connect_target(target);
732 734
733unblock:
734 scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING : 735 scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING :
735 SDEV_TRANSPORT_OFFLINE); 736 SDEV_TRANSPORT_OFFLINE);
736 737
@@ -1739,7 +1740,7 @@ static int srp_abort(struct scsi_cmnd *scmnd)
1739 1740
1740 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 1741 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
1741 1742
1742 if (!req || target->qp_in_error || !srp_claim_req(target, req, scmnd)) 1743 if (!req || !srp_claim_req(target, req, scmnd))
1743 return FAILED; 1744 return FAILED;
1744 srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, 1745 srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
1745 SRP_TSK_ABORT_TASK); 1746 SRP_TSK_ABORT_TASK);