aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Smart <James.Smart@Emulex.Com>2007-04-27 11:53:17 -0400
committerJames Bottomley <jejb@mulgrave.il.steeleye.com>2007-05-06 10:33:20 -0400
commit92740b24ce6ddac6534fae985aab602548692186 (patch)
tree9bee34a7786a0c0b796770b89688cfae70b86ec0
parentac09c349080008fdd54a15616a1b14771772d867 (diff)
[SCSI] fc_transport: make all rports wait dev_loss_tmo before removing them
Per the comment in the change - it's not always prudent to immediately remove the rport upon first notice of a disconnect. Make all rports wait dev_loss_tmo before being deleted (and each could have a separate dev_loss_tmo value). The original post was: http://marc.info/?l=linux-scsi&m=117392196006703&w=2 The repost contains the following changes: - Bug fix in fc_starget_delete(). Dev_loss_tmo_callbk() was called prior to tearing down the target. The callback is to be the last thing called, as it tells the LLDD that the rport is completely finished and can be torn down. Rework so that terminate_rport_io() is called to terminate the outstanding io. Isolated work so it's is simply "starget" work. - Fix holes in original patch. There were code paths that did not expect the dev_loss_tmo timer to be running for the non-fcp rports. - Bug Fix: the transport wasn't protecting against a LLDD calling fc_remote_port_delete() back-to-back. Thus, the dev_loss_tmo timer could be restarted such that it fires after the rport had been deleted. Validate rport state before starting the timer. Signed-off-by: James Smart <James.Smart@emulex.com> Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
-rw-r--r--drivers/scsi/scsi_transport_fc.c158
1 files changed, 97 insertions, 61 deletions
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 14c4f065b2b8..b4d1ece46f78 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -1718,31 +1718,12 @@ fc_starget_delete(struct work_struct *work)
1718 struct fc_rport *rport = 1718 struct fc_rport *rport =
1719 container_of(work, struct fc_rport, stgt_delete_work); 1719 container_of(work, struct fc_rport, stgt_delete_work);
1720 struct Scsi_Host *shost = rport_to_shost(rport); 1720 struct Scsi_Host *shost = rport_to_shost(rport);
1721 unsigned long flags;
1722 struct fc_internal *i = to_fc_internal(shost->transportt); 1721 struct fc_internal *i = to_fc_internal(shost->transportt);
1723 1722
1724 /* 1723 /* Involve the LLDD if possible to terminate all io on the rport. */
1725 * Involve the LLDD if possible. All io on the rport is to 1724 if (i->f->terminate_rport_io)
1726 * be terminated, either as part of the dev_loss_tmo callback
1727 * processing, or via the terminate_rport_io function.
1728 */
1729 if (i->f->dev_loss_tmo_callbk)
1730 i->f->dev_loss_tmo_callbk(rport);
1731 else if (i->f->terminate_rport_io)
1732 i->f->terminate_rport_io(rport); 1725 i->f->terminate_rport_io(rport);
1733 1726
1734 spin_lock_irqsave(shost->host_lock, flags);
1735 if (rport->flags & FC_RPORT_DEVLOSS_PENDING) {
1736 spin_unlock_irqrestore(shost->host_lock, flags);
1737 if (!cancel_delayed_work(&rport->fail_io_work))
1738 fc_flush_devloss(shost);
1739 if (!cancel_delayed_work(&rport->dev_loss_work))
1740 fc_flush_devloss(shost);
1741 spin_lock_irqsave(shost->host_lock, flags);
1742 rport->flags &= ~FC_RPORT_DEVLOSS_PENDING;
1743 }
1744 spin_unlock_irqrestore(shost->host_lock, flags);
1745
1746 scsi_remove_target(&rport->dev); 1727 scsi_remove_target(&rport->dev);
1747} 1728}
1748 1729
@@ -1760,6 +1741,7 @@ fc_rport_final_delete(struct work_struct *work)
1760 struct device *dev = &rport->dev; 1741 struct device *dev = &rport->dev;
1761 struct Scsi_Host *shost = rport_to_shost(rport); 1742 struct Scsi_Host *shost = rport_to_shost(rport);
1762 struct fc_internal *i = to_fc_internal(shost->transportt); 1743 struct fc_internal *i = to_fc_internal(shost->transportt);
1744 unsigned long flags;
1763 1745
1764 /* 1746 /*
1765 * if a scan is pending, flush the SCSI Host work_q so that 1747 * if a scan is pending, flush the SCSI Host work_q so that
@@ -1768,13 +1750,37 @@ fc_rport_final_delete(struct work_struct *work)
1768 if (rport->flags & FC_RPORT_SCAN_PENDING) 1750 if (rport->flags & FC_RPORT_SCAN_PENDING)
1769 scsi_flush_work(shost); 1751 scsi_flush_work(shost);
1770 1752
1753 /* involve the LLDD to terminate all pending i/o */
1754 if (i->f->terminate_rport_io)
1755 i->f->terminate_rport_io(rport);
1756
1757 /*
1758 * Cancel any outstanding timers. These should really exist
1759 * only when rmmod'ing the LLDD and we're asking for
1760 * immediate termination of the rports
1761 */
1762 spin_lock_irqsave(shost->host_lock, flags);
1763 if (rport->flags & FC_RPORT_DEVLOSS_PENDING) {
1764 spin_unlock_irqrestore(shost->host_lock, flags);
1765 if (!cancel_delayed_work(&rport->fail_io_work))
1766 fc_flush_devloss(shost);
1767 if (!cancel_delayed_work(&rport->dev_loss_work))
1768 fc_flush_devloss(shost);
1769 spin_lock_irqsave(shost->host_lock, flags);
1770 rport->flags &= ~FC_RPORT_DEVLOSS_PENDING;
1771 }
1772 spin_unlock_irqrestore(shost->host_lock, flags);
1773
1771 /* Delete SCSI target and sdevs */ 1774 /* Delete SCSI target and sdevs */
1772 if (rport->scsi_target_id != -1) 1775 if (rport->scsi_target_id != -1)
1773 fc_starget_delete(&rport->stgt_delete_work); 1776 fc_starget_delete(&rport->stgt_delete_work);
1774 else if (i->f->dev_loss_tmo_callbk) 1777
1778 /*
1779 * Notify the driver that the rport is now dead. The LLDD will
1780 * also guarantee that any communication to the rport is terminated
1781 */
1782 if (i->f->dev_loss_tmo_callbk)
1775 i->f->dev_loss_tmo_callbk(rport); 1783 i->f->dev_loss_tmo_callbk(rport);
1776 else if (i->f->terminate_rport_io)
1777 i->f->terminate_rport_io(rport);
1778 1784
1779 transport_remove_device(dev); 1785 transport_remove_device(dev);
1780 device_del(dev); 1786 device_del(dev);
@@ -1963,8 +1969,6 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
1963 } 1969 }
1964 1970
1965 if (match) { 1971 if (match) {
1966 struct delayed_work *work =
1967 &rport->dev_loss_work;
1968 1972
1969 memcpy(&rport->node_name, &ids->node_name, 1973 memcpy(&rport->node_name, &ids->node_name,
1970 sizeof(rport->node_name)); 1974 sizeof(rport->node_name));
@@ -1982,46 +1986,61 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
1982 fci->f->dd_fcrport_size); 1986 fci->f->dd_fcrport_size);
1983 1987
1984 /* 1988 /*
1985 * If we were blocked, we were a target. 1989 * If we were not a target, cancel the
1986 * If no longer a target, we leave the timer 1990 * io terminate and rport timers, and
1987 * running in case the port changes roles 1991 * we're done.
1988 * prior to the timer expiring. If the timer 1992 *
1989 * fires, the target will be torn down. 1993 * If we were a target, but our new role
1994 * doesn't indicate a target, leave the
1995 * timers running expecting the role to
1996 * change as the target fully logs in. If
1997 * it doesn't, the target will be torn down.
1998 *
1999 * If we were a target, and our role shows
2000 * we're still a target, cancel the timers
2001 * and kick off a scan.
1990 */ 2002 */
1991 if (!(ids->roles & FC_RPORT_ROLE_FCP_TARGET))
1992 return rport;
1993 2003
1994 /* restart the target */ 2004 /* was a target, not in roles */
2005 if ((rport->scsi_target_id != -1) &&
2006 (!(ids->roles & FC_RPORT_ROLE_FCP_TARGET)))
2007 return rport;
1995 2008
1996 /* 2009 /*
1997 * Stop the target timers first. Take no action 2010 * Stop the fail io and dev_loss timers.
1998 * on the del_timer failure as the state 2011 * If they flush, the port_state will
1999 * machine state change will validate the 2012 * be checked and will NOOP the function.
2000 * transaction.
2001 */ 2013 */
2002 if (!cancel_delayed_work(&rport->fail_io_work)) 2014 if (!cancel_delayed_work(&rport->fail_io_work))
2003 fc_flush_devloss(shost); 2015 fc_flush_devloss(shost);
2004 if (!cancel_delayed_work(work)) 2016 if (!cancel_delayed_work(&rport->dev_loss_work))
2005 fc_flush_devloss(shost); 2017 fc_flush_devloss(shost);
2006 2018
2007 spin_lock_irqsave(shost->host_lock, flags); 2019 spin_lock_irqsave(shost->host_lock, flags);
2008 2020
2009 rport->flags &= ~FC_RPORT_DEVLOSS_PENDING; 2021 rport->flags &= ~FC_RPORT_DEVLOSS_PENDING;
2010 2022
2011 /* initiate a scan of the target */ 2023 /* if target, initiate a scan */
2012 rport->flags |= FC_RPORT_SCAN_PENDING; 2024 if (rport->scsi_target_id != -1) {
2013 scsi_queue_work(shost, &rport->scan_work); 2025 rport->flags |= FC_RPORT_SCAN_PENDING;
2014 2026 scsi_queue_work(shost,
2015 spin_unlock_irqrestore(shost->host_lock, flags); 2027 &rport->scan_work);
2016 2028 spin_unlock_irqrestore(shost->host_lock,
2017 scsi_target_unblock(&rport->dev); 2029 flags);
2030 scsi_target_unblock(&rport->dev);
2031 } else
2032 spin_unlock_irqrestore(shost->host_lock,
2033 flags);
2018 2034
2019 return rport; 2035 return rport;
2020 } 2036 }
2021 } 2037 }
2022 } 2038 }
2023 2039
2024 /* Search the bindings array */ 2040 /*
2041 * Search the bindings array
2042 * Note: if never a FCP target, you won't be on this list
2043 */
2025 if (fc_host->tgtid_bind_type != FC_TGTID_BIND_NONE) { 2044 if (fc_host->tgtid_bind_type != FC_TGTID_BIND_NONE) {
2026 2045
2027 /* search for a matching consistent binding */ 2046 /* search for a matching consistent binding */
@@ -2158,15 +2177,24 @@ fc_remote_port_delete(struct fc_rport *rport)
2158 2177
2159 spin_lock_irqsave(shost->host_lock, flags); 2178 spin_lock_irqsave(shost->host_lock, flags);
2160 2179
2161 /* If no scsi target id mapping, delete it */ 2180 if (rport->port_state != FC_PORTSTATE_ONLINE) {
2162 if (rport->scsi_target_id == -1) {
2163 list_del(&rport->peers);
2164 rport->port_state = FC_PORTSTATE_DELETED;
2165 fc_queue_work(shost, &rport->rport_delete_work);
2166 spin_unlock_irqrestore(shost->host_lock, flags); 2181 spin_unlock_irqrestore(shost->host_lock, flags);
2167 return; 2182 return;
2168 } 2183 }
2169 2184
2185 /*
2186 * In the past, we if this was not an FCP-Target, we would
2187 * unconditionally just jump to deleting the rport.
2188 * However, rports can be used as node containers by the LLDD,
2189 * and its not appropriate to just terminate the rport at the
2190 * first sign of a loss in connectivity. The LLDD may want to
2191 * send ELS traffic to re-validate the login. If the rport is
2192 * immediately deleted, it makes it inappropriate for a node
2193 * container.
2194 * So... we now unconditionally wait dev_loss_tmo before
2195 * destroying an rport.
2196 */
2197
2170 rport->port_state = FC_PORTSTATE_BLOCKED; 2198 rport->port_state = FC_PORTSTATE_BLOCKED;
2171 2199
2172 rport->flags |= FC_RPORT_DEVLOSS_PENDING; 2200 rport->flags |= FC_RPORT_DEVLOSS_PENDING;
@@ -2263,11 +2291,11 @@ fc_remote_port_rolechg(struct fc_rport *rport, u32 roles)
2263EXPORT_SYMBOL(fc_remote_port_rolechg); 2291EXPORT_SYMBOL(fc_remote_port_rolechg);
2264 2292
2265/** 2293/**
2266 * fc_timeout_deleted_rport - Timeout handler for a deleted remote port that 2294 * fc_timeout_deleted_rport - Timeout handler for a deleted remote port,
2267 * was a SCSI target (thus was blocked), and failed 2295 * which we blocked, and has now failed to return
2268 * to return in the alloted time. 2296 * in the allotted time.
2269 * 2297 *
2270 * @work: rport target that failed to reappear in the alloted time. 2298 * @work: rport target that failed to reappear in the allotted time.
2271 **/ 2299 **/
2272static void 2300static void
2273fc_timeout_deleted_rport(struct work_struct *work) 2301fc_timeout_deleted_rport(struct work_struct *work)
@@ -2283,10 +2311,12 @@ fc_timeout_deleted_rport(struct work_struct *work)
2283 rport->flags &= ~FC_RPORT_DEVLOSS_PENDING; 2311 rport->flags &= ~FC_RPORT_DEVLOSS_PENDING;
2284 2312
2285 /* 2313 /*
2286 * If the port is ONLINE, then it came back. Validate it's still an 2314 * If the port is ONLINE, then it came back. If it was a SCSI
2287 * FCP target. If not, tear down the scsi_target on it. 2315 * target, validate it still is. If not, tear down the
2316 * scsi_target on it.
2288 */ 2317 */
2289 if ((rport->port_state == FC_PORTSTATE_ONLINE) && 2318 if ((rport->port_state == FC_PORTSTATE_ONLINE) &&
2319 (rport->scsi_target_id != -1) &&
2290 !(rport->roles & FC_RPORT_ROLE_FCP_TARGET)) { 2320 !(rport->roles & FC_RPORT_ROLE_FCP_TARGET)) {
2291 dev_printk(KERN_ERR, &rport->dev, 2321 dev_printk(KERN_ERR, &rport->dev,
2292 "blocked FC remote port time out: no longer" 2322 "blocked FC remote port time out: no longer"
@@ -2297,18 +2327,24 @@ fc_timeout_deleted_rport(struct work_struct *work)
2297 return; 2327 return;
2298 } 2328 }
2299 2329
2330 /* NOOP state - we're flushing workq's */
2300 if (rport->port_state != FC_PORTSTATE_BLOCKED) { 2331 if (rport->port_state != FC_PORTSTATE_BLOCKED) {
2301 spin_unlock_irqrestore(shost->host_lock, flags); 2332 spin_unlock_irqrestore(shost->host_lock, flags);
2302 dev_printk(KERN_ERR, &rport->dev, 2333 dev_printk(KERN_ERR, &rport->dev,
2303 "blocked FC remote port time out: leaving target alone\n"); 2334 "blocked FC remote port time out: leaving"
2335 " rport%s alone\n",
2336 (rport->scsi_target_id != -1) ? " and starget" : "");
2304 return; 2337 return;
2305 } 2338 }
2306 2339
2307 if (fc_host->tgtid_bind_type == FC_TGTID_BIND_NONE) { 2340 if ((fc_host->tgtid_bind_type == FC_TGTID_BIND_NONE) ||
2341 (rport->scsi_target_id == -1)) {
2308 list_del(&rport->peers); 2342 list_del(&rport->peers);
2309 rport->port_state = FC_PORTSTATE_DELETED; 2343 rport->port_state = FC_PORTSTATE_DELETED;
2310 dev_printk(KERN_ERR, &rport->dev, 2344 dev_printk(KERN_ERR, &rport->dev,
2311 "blocked FC remote port time out: removing target\n"); 2345 "blocked FC remote port time out: removing"
2346 " rport%s\n",
2347 (rport->scsi_target_id != -1) ? " and starget" : "");
2312 fc_queue_work(shost, &rport->rport_delete_work); 2348 fc_queue_work(shost, &rport->rport_delete_work);
2313 spin_unlock_irqrestore(shost->host_lock, flags); 2349 spin_unlock_irqrestore(shost->host_lock, flags);
2314 return; 2350 return;