diff options
author | Bart Van Assche <bvanassche@acm.org> | 2013-10-26 08:37:17 -0400 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2013-11-08 17:43:16 -0500 |
commit | a95cadb9dafef41a755b11680529c2b49e7f59bd (patch) | |
tree | 32a9f6a4e7d45af4788446eef1eb99159307ecad /drivers/infiniband/ulp | |
parent | 8c64e4531c3c3bedf11d723196270d4a7553db45 (diff) |
IB/srp: Add periodic reconnect functionality
After a transport layer occurred, periodically try to reconnect
to the target until the dev_loss timer expires. Protect the
callback functions that can be invoked from inside the SCSI EH
against concurrent invocation with srp_reconnect_rport() via the
rport mutex. Change the default dev_loss_tmo from 60s into 600s
to give the reconnect mechanism a chance to kick in.
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: David Dillow <dillowda@ornl.gov>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 52 |
1 files changed, 46 insertions, 6 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 99c893d1c2ac..ebbe01bdd306 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c | |||
@@ -88,6 +88,11 @@ MODULE_PARM_DESC(topspin_workarounds, | |||
88 | 88 | ||
89 | static struct kernel_param_ops srp_tmo_ops; | 89 | static struct kernel_param_ops srp_tmo_ops; |
90 | 90 | ||
91 | static int srp_reconnect_delay = 10; | ||
92 | module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay, | ||
93 | S_IRUGO | S_IWUSR); | ||
94 | MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts"); | ||
95 | |||
91 | static int srp_fast_io_fail_tmo = 15; | 96 | static int srp_fast_io_fail_tmo = 15; |
92 | module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, | 97 | module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, |
93 | S_IRUGO | S_IWUSR); | 98 | S_IRUGO | S_IWUSR); |
@@ -96,7 +101,7 @@ MODULE_PARM_DESC(fast_io_fail_tmo, | |||
96 | " layer error and failing all I/O. \"off\" means that this" | 101 | " layer error and failing all I/O. \"off\" means that this" |
97 | " functionality is disabled."); | 102 | " functionality is disabled."); |
98 | 103 | ||
99 | static int srp_dev_loss_tmo = 60; | 104 | static int srp_dev_loss_tmo = 600; |
100 | module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, | 105 | module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, |
101 | S_IRUGO | S_IWUSR); | 106 | S_IRUGO | S_IWUSR); |
102 | MODULE_PARM_DESC(dev_loss_tmo, | 107 | MODULE_PARM_DESC(dev_loss_tmo, |
@@ -144,10 +149,14 @@ static int srp_tmo_set(const char *val, const struct kernel_param *kp) | |||
144 | } else { | 149 | } else { |
145 | tmo = -1; | 150 | tmo = -1; |
146 | } | 151 | } |
147 | if (kp->arg == &srp_fast_io_fail_tmo) | 152 | if (kp->arg == &srp_reconnect_delay) |
148 | res = srp_tmo_valid(-1, tmo, srp_dev_loss_tmo); | 153 | res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo, |
154 | srp_dev_loss_tmo); | ||
155 | else if (kp->arg == &srp_fast_io_fail_tmo) | ||
156 | res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo); | ||
149 | else | 157 | else |
150 | res = srp_tmo_valid(-1, srp_fast_io_fail_tmo, tmo); | 158 | res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo, |
159 | tmo); | ||
151 | if (res) | 160 | if (res) |
152 | goto out; | 161 | goto out; |
153 | *(int *)kp->arg = tmo; | 162 | *(int *)kp->arg = tmo; |
@@ -1426,18 +1435,29 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr) | |||
1426 | static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) | 1435 | static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) |
1427 | { | 1436 | { |
1428 | struct srp_target_port *target = host_to_target(shost); | 1437 | struct srp_target_port *target = host_to_target(shost); |
1438 | struct srp_rport *rport = target->rport; | ||
1429 | struct srp_request *req; | 1439 | struct srp_request *req; |
1430 | struct srp_iu *iu; | 1440 | struct srp_iu *iu; |
1431 | struct srp_cmd *cmd; | 1441 | struct srp_cmd *cmd; |
1432 | struct ib_device *dev; | 1442 | struct ib_device *dev; |
1433 | unsigned long flags; | 1443 | unsigned long flags; |
1434 | int len, result; | 1444 | int len, result; |
1445 | const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler; | ||
1446 | |||
1447 | /* | ||
1448 | * The SCSI EH thread is the only context from which srp_queuecommand() | ||
1449 | * can get invoked for blocked devices (SDEV_BLOCK / | ||
1450 | * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by | ||
1451 | * locking the rport mutex if invoked from inside the SCSI EH. | ||
1452 | */ | ||
1453 | if (in_scsi_eh) | ||
1454 | mutex_lock(&rport->mutex); | ||
1435 | 1455 | ||
1436 | result = srp_chkready(target->rport); | 1456 | result = srp_chkready(target->rport); |
1437 | if (unlikely(result)) { | 1457 | if (unlikely(result)) { |
1438 | scmnd->result = result; | 1458 | scmnd->result = result; |
1439 | scmnd->scsi_done(scmnd); | 1459 | scmnd->scsi_done(scmnd); |
1440 | return 0; | 1460 | goto unlock_rport; |
1441 | } | 1461 | } |
1442 | 1462 | ||
1443 | spin_lock_irqsave(&target->lock, flags); | 1463 | spin_lock_irqsave(&target->lock, flags); |
@@ -1482,6 +1502,10 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) | |||
1482 | goto err_unmap; | 1502 | goto err_unmap; |
1483 | } | 1503 | } |
1484 | 1504 | ||
1505 | unlock_rport: | ||
1506 | if (in_scsi_eh) | ||
1507 | mutex_unlock(&rport->mutex); | ||
1508 | |||
1485 | return 0; | 1509 | return 0; |
1486 | 1510 | ||
1487 | err_unmap: | 1511 | err_unmap: |
@@ -1496,6 +1520,9 @@ err_iu: | |||
1496 | err_unlock: | 1520 | err_unlock: |
1497 | spin_unlock_irqrestore(&target->lock, flags); | 1521 | spin_unlock_irqrestore(&target->lock, flags); |
1498 | 1522 | ||
1523 | if (in_scsi_eh) | ||
1524 | mutex_unlock(&rport->mutex); | ||
1525 | |||
1499 | return SCSI_MLQUEUE_HOST_BUSY; | 1526 | return SCSI_MLQUEUE_HOST_BUSY; |
1500 | } | 1527 | } |
1501 | 1528 | ||
@@ -1780,6 +1807,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) | |||
1780 | static int srp_send_tsk_mgmt(struct srp_target_port *target, | 1807 | static int srp_send_tsk_mgmt(struct srp_target_port *target, |
1781 | u64 req_tag, unsigned int lun, u8 func) | 1808 | u64 req_tag, unsigned int lun, u8 func) |
1782 | { | 1809 | { |
1810 | struct srp_rport *rport = target->rport; | ||
1783 | struct ib_device *dev = target->srp_host->srp_dev->dev; | 1811 | struct ib_device *dev = target->srp_host->srp_dev->dev; |
1784 | struct srp_iu *iu; | 1812 | struct srp_iu *iu; |
1785 | struct srp_tsk_mgmt *tsk_mgmt; | 1813 | struct srp_tsk_mgmt *tsk_mgmt; |
@@ -1789,12 +1817,20 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target, | |||
1789 | 1817 | ||
1790 | init_completion(&target->tsk_mgmt_done); | 1818 | init_completion(&target->tsk_mgmt_done); |
1791 | 1819 | ||
1820 | /* | ||
1821 | * Lock the rport mutex to avoid that srp_create_target_ib() is | ||
1822 | * invoked while a task management function is being sent. | ||
1823 | */ | ||
1824 | mutex_lock(&rport->mutex); | ||
1792 | spin_lock_irq(&target->lock); | 1825 | spin_lock_irq(&target->lock); |
1793 | iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT); | 1826 | iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT); |
1794 | spin_unlock_irq(&target->lock); | 1827 | spin_unlock_irq(&target->lock); |
1795 | 1828 | ||
1796 | if (!iu) | 1829 | if (!iu) { |
1830 | mutex_unlock(&rport->mutex); | ||
1831 | |||
1797 | return -1; | 1832 | return -1; |
1833 | } | ||
1798 | 1834 | ||
1799 | ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, | 1835 | ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, |
1800 | DMA_TO_DEVICE); | 1836 | DMA_TO_DEVICE); |
@@ -1811,8 +1847,11 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target, | |||
1811 | DMA_TO_DEVICE); | 1847 | DMA_TO_DEVICE); |
1812 | if (srp_post_send(target, iu, sizeof *tsk_mgmt)) { | 1848 | if (srp_post_send(target, iu, sizeof *tsk_mgmt)) { |
1813 | srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT); | 1849 | srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT); |
1850 | mutex_unlock(&rport->mutex); | ||
1851 | |||
1814 | return -1; | 1852 | return -1; |
1815 | } | 1853 | } |
1854 | mutex_unlock(&rport->mutex); | ||
1816 | 1855 | ||
1817 | if (!wait_for_completion_timeout(&target->tsk_mgmt_done, | 1856 | if (!wait_for_completion_timeout(&target->tsk_mgmt_done, |
1818 | msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) | 1857 | msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) |
@@ -2713,6 +2752,7 @@ static void srp_remove_one(struct ib_device *device) | |||
2713 | static struct srp_function_template ib_srp_transport_functions = { | 2752 | static struct srp_function_template ib_srp_transport_functions = { |
2714 | .has_rport_state = true, | 2753 | .has_rport_state = true, |
2715 | .reset_timer_if_blocked = true, | 2754 | .reset_timer_if_blocked = true, |
2755 | .reconnect_delay = &srp_reconnect_delay, | ||
2716 | .fast_io_fail_tmo = &srp_fast_io_fail_tmo, | 2756 | .fast_io_fail_tmo = &srp_fast_io_fail_tmo, |
2717 | .dev_loss_tmo = &srp_dev_loss_tmo, | 2757 | .dev_loss_tmo = &srp_dev_loss_tmo, |
2718 | .reconnect = srp_rport_reconnect, | 2758 | .reconnect = srp_rport_reconnect, |