diff options
author | Bart Van Assche <bvanassche@acm.org> | 2013-10-26 08:34:27 -0400 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2013-11-08 17:43:15 -0500 |
commit | ed9b2264fb393327a6c8a4229d8df55df596188e (patch) | |
tree | 585689ca11664e8048dddb59e3f19b7f2c8313e1 /drivers/infiniband/ulp | |
parent | 29c17324803c8a3bb5b2b69309e43571164cc4de (diff) |
IB/srp: Use SRP transport layer error recovery
Enable fast_io_fail_tmo and dev_loss_tmo functionality for the IB SRP
initiator. Add kernel module parameters that allow to specify default
values for these parameters.
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: David Dillow <dillowda@ornl.gov>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 141 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.h | 1 |
2 files changed, 101 insertions, 41 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 6edab7855f5e..15b4d2ce4989 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c | |||
@@ -86,6 +86,27 @@ module_param(topspin_workarounds, int, 0444); | |||
86 | MODULE_PARM_DESC(topspin_workarounds, | 86 | MODULE_PARM_DESC(topspin_workarounds, |
87 | "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); | 87 | "Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); |
88 | 88 | ||
89 | static struct kernel_param_ops srp_tmo_ops; | ||
90 | |||
91 | static int srp_fast_io_fail_tmo = 15; | ||
92 | module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, | ||
93 | S_IRUGO | S_IWUSR); | ||
94 | MODULE_PARM_DESC(fast_io_fail_tmo, | ||
95 | "Number of seconds between the observation of a transport" | ||
96 | " layer error and failing all I/O. \"off\" means that this" | ||
97 | " functionality is disabled."); | ||
98 | |||
99 | static int srp_dev_loss_tmo = 60; | ||
100 | module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, | ||
101 | S_IRUGO | S_IWUSR); | ||
102 | MODULE_PARM_DESC(dev_loss_tmo, | ||
103 | "Maximum number of seconds that the SRP transport should" | ||
104 | " insulate transport layer errors. After this time has been" | ||
105 | " exceeded the SCSI host is removed. Should be" | ||
106 | " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT) | ||
107 | " if fast_io_fail_tmo has not been set. \"off\" means that" | ||
108 | " this functionality is disabled."); | ||
109 | |||
89 | static void srp_add_one(struct ib_device *device); | 110 | static void srp_add_one(struct ib_device *device); |
90 | static void srp_remove_one(struct ib_device *device); | 111 | static void srp_remove_one(struct ib_device *device); |
91 | static void srp_recv_completion(struct ib_cq *cq, void *target_ptr); | 112 | static void srp_recv_completion(struct ib_cq *cq, void *target_ptr); |
@@ -102,6 +123,44 @@ static struct ib_client srp_client = { | |||
102 | 123 | ||
103 | static struct ib_sa_client srp_sa_client; | 124 | static struct ib_sa_client srp_sa_client; |
104 | 125 | ||
126 | static int srp_tmo_get(char *buffer, const struct kernel_param *kp) | ||
127 | { | ||
128 | int tmo = *(int *)kp->arg; | ||
129 | |||
130 | if (tmo >= 0) | ||
131 | return sprintf(buffer, "%d", tmo); | ||
132 | else | ||
133 | return sprintf(buffer, "off"); | ||
134 | } | ||
135 | |||
136 | static int srp_tmo_set(const char *val, const struct kernel_param *kp) | ||
137 | { | ||
138 | int tmo, res; | ||
139 | |||
140 | if (strncmp(val, "off", 3) != 0) { | ||
141 | res = kstrtoint(val, 0, &tmo); | ||
142 | if (res) | ||
143 | goto out; | ||
144 | } else { | ||
145 | tmo = -1; | ||
146 | } | ||
147 | if (kp->arg == &srp_fast_io_fail_tmo) | ||
148 | res = srp_tmo_valid(tmo, srp_dev_loss_tmo); | ||
149 | else | ||
150 | res = srp_tmo_valid(srp_fast_io_fail_tmo, tmo); | ||
151 | if (res) | ||
152 | goto out; | ||
153 | *(int *)kp->arg = tmo; | ||
154 | |||
155 | out: | ||
156 | return res; | ||
157 | } | ||
158 | |||
159 | static struct kernel_param_ops srp_tmo_ops = { | ||
160 | .get = srp_tmo_get, | ||
161 | .set = srp_tmo_set, | ||
162 | }; | ||
163 | |||
105 | static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) | 164 | static inline struct srp_target_port *host_to_target(struct Scsi_Host *host) |
106 | { | 165 | { |
107 | return (struct srp_target_port *) host->hostdata; | 166 | return (struct srp_target_port *) host->hostdata; |
@@ -688,23 +747,42 @@ static void srp_free_req(struct srp_target_port *target, | |||
688 | spin_unlock_irqrestore(&target->lock, flags); | 747 | spin_unlock_irqrestore(&target->lock, flags); |
689 | } | 748 | } |
690 | 749 | ||
691 | static void srp_reset_req(struct srp_target_port *target, struct srp_request *req) | 750 | static void srp_finish_req(struct srp_target_port *target, |
751 | struct srp_request *req, int result) | ||
692 | { | 752 | { |
693 | struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL); | 753 | struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL); |
694 | 754 | ||
695 | if (scmnd) { | 755 | if (scmnd) { |
696 | srp_free_req(target, req, scmnd, 0); | 756 | srp_free_req(target, req, scmnd, 0); |
697 | scmnd->result = DID_RESET << 16; | 757 | scmnd->result = result; |
698 | scmnd->scsi_done(scmnd); | 758 | scmnd->scsi_done(scmnd); |
699 | } | 759 | } |
700 | } | 760 | } |
701 | 761 | ||
702 | static int srp_reconnect_target(struct srp_target_port *target) | 762 | static void srp_terminate_io(struct srp_rport *rport) |
703 | { | 763 | { |
704 | struct Scsi_Host *shost = target->scsi_host; | 764 | struct srp_target_port *target = rport->lld_data; |
705 | int i, ret; | 765 | int i; |
766 | |||
767 | for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { | ||
768 | struct srp_request *req = &target->req_ring[i]; | ||
769 | srp_finish_req(target, req, DID_TRANSPORT_FAILFAST << 16); | ||
770 | } | ||
771 | } | ||
706 | 772 | ||
707 | scsi_target_block(&shost->shost_gendev); | 773 | /* |
774 | * It is up to the caller to ensure that srp_rport_reconnect() calls are | ||
775 | * serialized and that no concurrent srp_queuecommand(), srp_abort(), | ||
776 | * srp_reset_device() or srp_reset_host() calls will occur while this function | ||
777 | * is in progress. One way to realize that is not to call this function | ||
778 | * directly but to call srp_reconnect_rport() instead since that last function | ||
779 | * serializes calls of this function via rport->mutex and also blocks | ||
780 | * srp_queuecommand() calls before invoking this function. | ||
781 | */ | ||
782 | static int srp_rport_reconnect(struct srp_rport *rport) | ||
783 | { | ||
784 | struct srp_target_port *target = rport->lld_data; | ||
785 | int i, ret; | ||
708 | 786 | ||
709 | srp_disconnect_target(target); | 787 | srp_disconnect_target(target); |
710 | /* | 788 | /* |
@@ -725,8 +803,7 @@ static int srp_reconnect_target(struct srp_target_port *target) | |||
725 | 803 | ||
726 | for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { | 804 | for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { |
727 | struct srp_request *req = &target->req_ring[i]; | 805 | struct srp_request *req = &target->req_ring[i]; |
728 | if (req->scmnd) | 806 | srp_finish_req(target, req, DID_RESET << 16); |
729 | srp_reset_req(target, req); | ||
730 | } | 807 | } |
731 | 808 | ||
732 | INIT_LIST_HEAD(&target->free_tx); | 809 | INIT_LIST_HEAD(&target->free_tx); |
@@ -736,28 +813,9 @@ static int srp_reconnect_target(struct srp_target_port *target) | |||
736 | if (ret == 0) | 813 | if (ret == 0) |
737 | ret = srp_connect_target(target); | 814 | ret = srp_connect_target(target); |
738 | 815 | ||
739 | scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING : | 816 | if (ret == 0) |
740 | SDEV_TRANSPORT_OFFLINE); | 817 | shost_printk(KERN_INFO, target->scsi_host, |
741 | target->transport_offline = !!ret; | 818 | PFX "reconnect succeeded\n"); |
742 | |||
743 | if (ret) | ||
744 | goto err; | ||
745 | |||
746 | shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n"); | ||
747 | |||
748 | return ret; | ||
749 | |||
750 | err: | ||
751 | shost_printk(KERN_ERR, target->scsi_host, | ||
752 | PFX "reconnect failed (%d), removing target port.\n", ret); | ||
753 | |||
754 | /* | ||
755 | * We couldn't reconnect, so kill our target port off. | ||
756 | * However, we have to defer the real removal because we | ||
757 | * are in the context of the SCSI error handler now, which | ||
758 | * will deadlock if we call scsi_remove_host(). | ||
759 | */ | ||
760 | srp_queue_remove_work(target); | ||
761 | 819 | ||
762 | return ret; | 820 | return ret; |
763 | } | 821 | } |
@@ -1356,10 +1414,11 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) | |||
1356 | struct srp_cmd *cmd; | 1414 | struct srp_cmd *cmd; |
1357 | struct ib_device *dev; | 1415 | struct ib_device *dev; |
1358 | unsigned long flags; | 1416 | unsigned long flags; |
1359 | int len; | 1417 | int len, result; |
1360 | 1418 | ||
1361 | if (unlikely(target->transport_offline)) { | 1419 | result = srp_chkready(target->rport); |
1362 | scmnd->result = DID_NO_CONNECT << 16; | 1420 | if (unlikely(result)) { |
1421 | scmnd->result = result; | ||
1363 | scmnd->scsi_done(scmnd); | 1422 | scmnd->scsi_done(scmnd); |
1364 | return 0; | 1423 | return 0; |
1365 | } | 1424 | } |
@@ -1757,7 +1816,7 @@ static int srp_abort(struct scsi_cmnd *scmnd) | |||
1757 | if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, | 1816 | if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun, |
1758 | SRP_TSK_ABORT_TASK) == 0) | 1817 | SRP_TSK_ABORT_TASK) == 0) |
1759 | ret = SUCCESS; | 1818 | ret = SUCCESS; |
1760 | else if (target->transport_offline) | 1819 | else if (target->rport->state == SRP_RPORT_LOST) |
1761 | ret = FAST_IO_FAIL; | 1820 | ret = FAST_IO_FAIL; |
1762 | else | 1821 | else |
1763 | ret = FAILED; | 1822 | ret = FAILED; |
@@ -1784,7 +1843,7 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) | |||
1784 | for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { | 1843 | for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { |
1785 | struct srp_request *req = &target->req_ring[i]; | 1844 | struct srp_request *req = &target->req_ring[i]; |
1786 | if (req->scmnd && req->scmnd->device == scmnd->device) | 1845 | if (req->scmnd && req->scmnd->device == scmnd->device) |
1787 | srp_reset_req(target, req); | 1846 | srp_finish_req(target, req, DID_RESET << 16); |
1788 | } | 1847 | } |
1789 | 1848 | ||
1790 | return SUCCESS; | 1849 | return SUCCESS; |
@@ -1793,14 +1852,10 @@ static int srp_reset_device(struct scsi_cmnd *scmnd) | |||
1793 | static int srp_reset_host(struct scsi_cmnd *scmnd) | 1852 | static int srp_reset_host(struct scsi_cmnd *scmnd) |
1794 | { | 1853 | { |
1795 | struct srp_target_port *target = host_to_target(scmnd->device->host); | 1854 | struct srp_target_port *target = host_to_target(scmnd->device->host); |
1796 | int ret = FAILED; | ||
1797 | 1855 | ||
1798 | shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); | 1856 | shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n"); |
1799 | 1857 | ||
1800 | if (!srp_reconnect_target(target)) | 1858 | return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED; |
1801 | ret = SUCCESS; | ||
1802 | |||
1803 | return ret; | ||
1804 | } | 1859 | } |
1805 | 1860 | ||
1806 | static int srp_slave_configure(struct scsi_device *sdev) | 1861 | static int srp_slave_configure(struct scsi_device *sdev) |
@@ -2637,7 +2692,13 @@ static void srp_remove_one(struct ib_device *device) | |||
2637 | } | 2692 | } |
2638 | 2693 | ||
2639 | static struct srp_function_template ib_srp_transport_functions = { | 2694 | static struct srp_function_template ib_srp_transport_functions = { |
2695 | .has_rport_state = true, | ||
2696 | .reset_timer_if_blocked = true, | ||
2697 | .fast_io_fail_tmo = &srp_fast_io_fail_tmo, | ||
2698 | .dev_loss_tmo = &srp_dev_loss_tmo, | ||
2699 | .reconnect = srp_rport_reconnect, | ||
2640 | .rport_delete = srp_rport_delete, | 2700 | .rport_delete = srp_rport_delete, |
2701 | .terminate_rport_io = srp_terminate_io, | ||
2641 | }; | 2702 | }; |
2642 | 2703 | ||
2643 | static int __init srp_init_module(void) | 2704 | static int __init srp_init_module(void) |
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 2a1768fcc57d..fd1817e48ad4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h | |||
@@ -140,7 +140,6 @@ struct srp_target_port { | |||
140 | unsigned int cmd_sg_cnt; | 140 | unsigned int cmd_sg_cnt; |
141 | unsigned int indirect_size; | 141 | unsigned int indirect_size; |
142 | bool allow_ext_sg; | 142 | bool allow_ext_sg; |
143 | bool transport_offline; | ||
144 | 143 | ||
145 | /* Everything above this point is used in the hot path of | 144 | /* Everything above this point is used in the hot path of |
146 | * command processing. Try to keep them packed into cachelines. | 145 | * command processing. Try to keep them packed into cachelines. |