aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband/ulp
diff options
context:
space:
mode:
authorBart Van Assche <bvanassche@acm.org>2013-10-26 08:37:17 -0400
committerRoland Dreier <roland@purestorage.com>2013-11-08 17:43:16 -0500
commita95cadb9dafef41a755b11680529c2b49e7f59bd (patch)
tree32a9f6a4e7d45af4788446eef1eb99159307ecad /drivers/infiniband/ulp
parent8c64e4531c3c3bedf11d723196270d4a7553db45 (diff)
IB/srp: Add periodic reconnect functionality
After a transport layer occurred, periodically try to reconnect to the target until the dev_loss timer expires. Protect the callback functions that can be invoked from inside the SCSI EH against concurrent invocation with srp_reconnect_rport() via the rport mutex. Change the default dev_loss_tmo from 60s into 600s to give the reconnect mechanism a chance to kick in. Signed-off-by: Bart Van Assche <bvanassche@acm.org> Acked-by: David Dillow <dillowda@ornl.gov> Signed-off-by: Roland Dreier <roland@purestorage.com>
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c52
1 files changed, 46 insertions, 6 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 99c893d1c2ac..ebbe01bdd306 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -88,6 +88,11 @@ MODULE_PARM_DESC(topspin_workarounds,
88 88
89static struct kernel_param_ops srp_tmo_ops; 89static struct kernel_param_ops srp_tmo_ops;
90 90
91static int srp_reconnect_delay = 10;
92module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
93 S_IRUGO | S_IWUSR);
94MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
95
91static int srp_fast_io_fail_tmo = 15; 96static int srp_fast_io_fail_tmo = 15;
92module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo, 97module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
93 S_IRUGO | S_IWUSR); 98 S_IRUGO | S_IWUSR);
@@ -96,7 +101,7 @@ MODULE_PARM_DESC(fast_io_fail_tmo,
96 " layer error and failing all I/O. \"off\" means that this" 101 " layer error and failing all I/O. \"off\" means that this"
97 " functionality is disabled."); 102 " functionality is disabled.");
98 103
99static int srp_dev_loss_tmo = 60; 104static int srp_dev_loss_tmo = 600;
100module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo, 105module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
101 S_IRUGO | S_IWUSR); 106 S_IRUGO | S_IWUSR);
102MODULE_PARM_DESC(dev_loss_tmo, 107MODULE_PARM_DESC(dev_loss_tmo,
@@ -144,10 +149,14 @@ static int srp_tmo_set(const char *val, const struct kernel_param *kp)
144 } else { 149 } else {
145 tmo = -1; 150 tmo = -1;
146 } 151 }
147 if (kp->arg == &srp_fast_io_fail_tmo) 152 if (kp->arg == &srp_reconnect_delay)
148 res = srp_tmo_valid(-1, tmo, srp_dev_loss_tmo); 153 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
154 srp_dev_loss_tmo);
155 else if (kp->arg == &srp_fast_io_fail_tmo)
156 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
149 else 157 else
150 res = srp_tmo_valid(-1, srp_fast_io_fail_tmo, tmo); 158 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
159 tmo);
151 if (res) 160 if (res)
152 goto out; 161 goto out;
153 *(int *)kp->arg = tmo; 162 *(int *)kp->arg = tmo;
@@ -1426,18 +1435,29 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
1426static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) 1435static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1427{ 1436{
1428 struct srp_target_port *target = host_to_target(shost); 1437 struct srp_target_port *target = host_to_target(shost);
1438 struct srp_rport *rport = target->rport;
1429 struct srp_request *req; 1439 struct srp_request *req;
1430 struct srp_iu *iu; 1440 struct srp_iu *iu;
1431 struct srp_cmd *cmd; 1441 struct srp_cmd *cmd;
1432 struct ib_device *dev; 1442 struct ib_device *dev;
1433 unsigned long flags; 1443 unsigned long flags;
1434 int len, result; 1444 int len, result;
1445 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
1446
1447 /*
1448 * The SCSI EH thread is the only context from which srp_queuecommand()
1449 * can get invoked for blocked devices (SDEV_BLOCK /
1450 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
1451 * locking the rport mutex if invoked from inside the SCSI EH.
1452 */
1453 if (in_scsi_eh)
1454 mutex_lock(&rport->mutex);
1435 1455
1436 result = srp_chkready(target->rport); 1456 result = srp_chkready(target->rport);
1437 if (unlikely(result)) { 1457 if (unlikely(result)) {
1438 scmnd->result = result; 1458 scmnd->result = result;
1439 scmnd->scsi_done(scmnd); 1459 scmnd->scsi_done(scmnd);
1440 return 0; 1460 goto unlock_rport;
1441 } 1461 }
1442 1462
1443 spin_lock_irqsave(&target->lock, flags); 1463 spin_lock_irqsave(&target->lock, flags);
@@ -1482,6 +1502,10 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1482 goto err_unmap; 1502 goto err_unmap;
1483 } 1503 }
1484 1504
1505unlock_rport:
1506 if (in_scsi_eh)
1507 mutex_unlock(&rport->mutex);
1508
1485 return 0; 1509 return 0;
1486 1510
1487err_unmap: 1511err_unmap:
@@ -1496,6 +1520,9 @@ err_iu:
1496err_unlock: 1520err_unlock:
1497 spin_unlock_irqrestore(&target->lock, flags); 1521 spin_unlock_irqrestore(&target->lock, flags);
1498 1522
1523 if (in_scsi_eh)
1524 mutex_unlock(&rport->mutex);
1525
1499 return SCSI_MLQUEUE_HOST_BUSY; 1526 return SCSI_MLQUEUE_HOST_BUSY;
1500} 1527}
1501 1528
@@ -1780,6 +1807,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1780static int srp_send_tsk_mgmt(struct srp_target_port *target, 1807static int srp_send_tsk_mgmt(struct srp_target_port *target,
1781 u64 req_tag, unsigned int lun, u8 func) 1808 u64 req_tag, unsigned int lun, u8 func)
1782{ 1809{
1810 struct srp_rport *rport = target->rport;
1783 struct ib_device *dev = target->srp_host->srp_dev->dev; 1811 struct ib_device *dev = target->srp_host->srp_dev->dev;
1784 struct srp_iu *iu; 1812 struct srp_iu *iu;
1785 struct srp_tsk_mgmt *tsk_mgmt; 1813 struct srp_tsk_mgmt *tsk_mgmt;
@@ -1789,12 +1817,20 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
1789 1817
1790 init_completion(&target->tsk_mgmt_done); 1818 init_completion(&target->tsk_mgmt_done);
1791 1819
1820 /*
1821 * Lock the rport mutex to avoid that srp_create_target_ib() is
1822 * invoked while a task management function is being sent.
1823 */
1824 mutex_lock(&rport->mutex);
1792 spin_lock_irq(&target->lock); 1825 spin_lock_irq(&target->lock);
1793 iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT); 1826 iu = __srp_get_tx_iu(target, SRP_IU_TSK_MGMT);
1794 spin_unlock_irq(&target->lock); 1827 spin_unlock_irq(&target->lock);
1795 1828
1796 if (!iu) 1829 if (!iu) {
1830 mutex_unlock(&rport->mutex);
1831
1797 return -1; 1832 return -1;
1833 }
1798 1834
1799 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, 1835 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
1800 DMA_TO_DEVICE); 1836 DMA_TO_DEVICE);
@@ -1811,8 +1847,11 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
1811 DMA_TO_DEVICE); 1847 DMA_TO_DEVICE);
1812 if (srp_post_send(target, iu, sizeof *tsk_mgmt)) { 1848 if (srp_post_send(target, iu, sizeof *tsk_mgmt)) {
1813 srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT); 1849 srp_put_tx_iu(target, iu, SRP_IU_TSK_MGMT);
1850 mutex_unlock(&rport->mutex);
1851
1814 return -1; 1852 return -1;
1815 } 1853 }
1854 mutex_unlock(&rport->mutex);
1816 1855
1817 if (!wait_for_completion_timeout(&target->tsk_mgmt_done, 1856 if (!wait_for_completion_timeout(&target->tsk_mgmt_done,
1818 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS))) 1857 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
@@ -2713,6 +2752,7 @@ static void srp_remove_one(struct ib_device *device)
2713static struct srp_function_template ib_srp_transport_functions = { 2752static struct srp_function_template ib_srp_transport_functions = {
2714 .has_rport_state = true, 2753 .has_rport_state = true,
2715 .reset_timer_if_blocked = true, 2754 .reset_timer_if_blocked = true,
2755 .reconnect_delay = &srp_reconnect_delay,
2716 .fast_io_fail_tmo = &srp_fast_io_fail_tmo, 2756 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
2717 .dev_loss_tmo = &srp_dev_loss_tmo, 2757 .dev_loss_tmo = &srp_dev_loss_tmo,
2718 .reconnect = srp_rport_reconnect, 2758 .reconnect = srp_rport_reconnect,