aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi
diff options
context:
space:
mode:
authorHiral Patel <hiralpat@cisco.com>2013-02-12 20:01:00 -0500
committerJames Bottomley <JBottomley@Parallels.com>2013-02-22 12:30:19 -0500
commita0bf1ca27b644c1c4b1f0ea2d81f99471b2549e8 (patch)
treec18497413ffc28dd7c56a1c8c56113d9e078fac7 /drivers/scsi
parentcfe16d5da88896fc78d008c96d639cf9c90850a0 (diff)
[SCSI] fnic: fnic driver may hit BUG_ON on device reset
The issue was observed when LUN Reset is issued through IOCTL or sg_reset utility. fnic driver issues LUN RESET to firmware. On successful completion of device reset, driver cleans up all the pending IOs that were issued prior to device reset. These pending IOs are expected to be in ABTS_PENDING state. This works fine, when the device reset operation resulted from midlayer, but not when device reset was triggered from IOCTL path as the pending IOs were not in ABTS_PENDING state. execution path hits panic if the pending IO is not in ABTS_PENDING state. Changes: The fix replaces BUG_ON check in fnic_clean_pending_aborts() with marking pending IOs as ABTS_PENDING if they were not in ABTS_PENDING state and skips if they were already in ABTS_PENDING state. An extra check is added to validate the abort status of the commands after a delay of 2 * E_D_TOV using a helper function. The helper function returns 1 if it finds any pending IO in ABTS_PENDING state, belong to the LUN on which device reset was issued else 0. With this, device reset operation returns success only if the helper funciton returns 0, otherwise it returns failure. Other changes: - Removed code in fnic_clean_pending_aborts() that returns failure if it finds io_req NULL, instead of returning failure added code to continue with next io - Added device reset flags for debugging in fnic_terminate_rport_io, fnic_rport_exch_reset, and fnic_clean_pending_aborts Signed-off-by: Narsimhulu Musini <nmusini@cisco.com> Signed-off-by: Hiral Patel <hiralpat@cisco.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
Diffstat (limited to 'drivers/scsi')
-rw-r--r--drivers/scsi/fnic/fnic.h2
-rw-r--r--drivers/scsi/fnic/fnic_scsi.c121
2 files changed, 116 insertions, 7 deletions
diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
index 63b35c8e40bd..b8e6644ad237 100644
--- a/drivers/scsi/fnic/fnic.h
+++ b/drivers/scsi/fnic/fnic.h
@@ -303,6 +303,8 @@ const char *fnic_state_to_str(unsigned int state);
303void fnic_log_q_error(struct fnic *fnic); 303void fnic_log_q_error(struct fnic *fnic);
304void fnic_handle_link_event(struct fnic *fnic); 304void fnic_handle_link_event(struct fnic *fnic);
305 305
306int fnic_is_abts_pending(struct fnic *, struct scsi_cmnd *);
307
306static inline int 308static inline int
307fnic_chk_state_flags_locked(struct fnic *fnic, unsigned long st_flags) 309fnic_chk_state_flags_locked(struct fnic *fnic, unsigned long st_flags)
308{ 310{
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 2f46509f5b5a..64830814da0d 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -1271,7 +1271,8 @@ static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
1271 spin_unlock_irqrestore(io_lock, flags); 1271 spin_unlock_irqrestore(io_lock, flags);
1272 } else { 1272 } else {
1273 spin_lock_irqsave(io_lock, flags); 1273 spin_lock_irqsave(io_lock, flags);
1274 CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; 1274 if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
1275 CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
1275 spin_unlock_irqrestore(io_lock, flags); 1276 spin_unlock_irqrestore(io_lock, flags);
1276 } 1277 }
1277 } 1278 }
@@ -1379,7 +1380,8 @@ void fnic_terminate_rport_io(struct fc_rport *rport)
1379 spin_unlock_irqrestore(io_lock, flags); 1380 spin_unlock_irqrestore(io_lock, flags);
1380 } else { 1381 } else {
1381 spin_lock_irqsave(io_lock, flags); 1382 spin_lock_irqsave(io_lock, flags);
1382 CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED; 1383 if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
1384 CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
1383 spin_unlock_irqrestore(io_lock, flags); 1385 spin_unlock_irqrestore(io_lock, flags);
1384 } 1386 }
1385 } 1387 }
@@ -1592,7 +1594,7 @@ lr_io_req_end:
1592static int fnic_clean_pending_aborts(struct fnic *fnic, 1594static int fnic_clean_pending_aborts(struct fnic *fnic,
1593 struct scsi_cmnd *lr_sc) 1595 struct scsi_cmnd *lr_sc)
1594{ 1596{
1595 int tag; 1597 int tag, abt_tag;
1596 struct fnic_io_req *io_req; 1598 struct fnic_io_req *io_req;
1597 spinlock_t *io_lock; 1599 spinlock_t *io_lock;
1598 unsigned long flags; 1600 unsigned long flags;
@@ -1601,6 +1603,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
1601 struct scsi_lun fc_lun; 1603 struct scsi_lun fc_lun;
1602 struct scsi_device *lun_dev = lr_sc->device; 1604 struct scsi_device *lun_dev = lr_sc->device;
1603 DECLARE_COMPLETION_ONSTACK(tm_done); 1605 DECLARE_COMPLETION_ONSTACK(tm_done);
1606 enum fnic_ioreq_state old_ioreq_state;
1604 1607
1605 for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { 1608 for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
1606 sc = scsi_host_find_tag(fnic->lport->host, tag); 1609 sc = scsi_host_find_tag(fnic->lport->host, tag);
@@ -1629,7 +1632,41 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
1629 "Found IO in %s on lun\n", 1632 "Found IO in %s on lun\n",
1630 fnic_ioreq_state_to_str(CMD_STATE(sc))); 1633 fnic_ioreq_state_to_str(CMD_STATE(sc)));
1631 1634
1632 BUG_ON(CMD_STATE(sc) != FNIC_IOREQ_ABTS_PENDING); 1635 if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
1636 spin_unlock_irqrestore(io_lock, flags);
1637 continue;
1638 }
1639 if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
1640 (!(CMD_FLAGS(sc) & FNIC_DEV_RST_PENDING))) {
1641 FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
1642 "%s dev rst not pending sc 0x%p\n", __func__,
1643 sc);
1644 spin_unlock_irqrestore(io_lock, flags);
1645 continue;
1646 }
1647 old_ioreq_state = CMD_STATE(sc);
1648 /*
1649 * Any pending IO issued prior to reset is expected to be
1650 * in abts pending state, if not we need to set
1651 * FNIC_IOREQ_ABTS_PENDING to indicate the IO is abort pending.
1652 * When IO is completed, the IO will be handed over and
1653 * handled in this function.
1654 */
1655 CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
1656
1657 if (io_req->abts_done)
1658 shost_printk(KERN_ERR, fnic->lport->host,
1659 "%s: io_req->abts_done is set state is %s\n",
1660 __func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
1661
1662 BUG_ON(io_req->abts_done);
1663
1664 abt_tag = tag;
1665 if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
1666 abt_tag |= FNIC_TAG_DEV_RST;
1667 FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
1668 "%s: dev rst sc 0x%p\n", __func__, sc);
1669 }
1633 1670
1634 CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE; 1671 CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
1635 io_req->abts_done = &tm_done; 1672 io_req->abts_done = &tm_done;
@@ -1638,16 +1675,23 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
1638 /* Now queue the abort command to firmware */ 1675 /* Now queue the abort command to firmware */
1639 int_to_scsilun(sc->device->lun, &fc_lun); 1676 int_to_scsilun(sc->device->lun, &fc_lun);
1640 1677
1641 if (fnic_queue_abort_io_req(fnic, tag, 1678 if (fnic_queue_abort_io_req(fnic, abt_tag,
1642 FCPIO_ITMF_ABT_TASK_TERM, 1679 FCPIO_ITMF_ABT_TASK_TERM,
1643 fc_lun.scsi_lun, io_req)) { 1680 fc_lun.scsi_lun, io_req)) {
1644 spin_lock_irqsave(io_lock, flags); 1681 spin_lock_irqsave(io_lock, flags);
1645 io_req = (struct fnic_io_req *)CMD_SP(sc); 1682 io_req = (struct fnic_io_req *)CMD_SP(sc);
1646 if (io_req) 1683 if (io_req)
1647 io_req->abts_done = NULL; 1684 io_req->abts_done = NULL;
1685 if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
1686 CMD_STATE(sc) = old_ioreq_state;
1648 spin_unlock_irqrestore(io_lock, flags); 1687 spin_unlock_irqrestore(io_lock, flags);
1649 ret = 1; 1688 ret = 1;
1650 goto clean_pending_aborts_end; 1689 goto clean_pending_aborts_end;
1690 } else {
1691 spin_lock_irqsave(io_lock, flags);
1692 if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
1693 CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
1694 spin_unlock_irqrestore(io_lock, flags);
1651 } 1695 }
1652 1696
1653 wait_for_completion_timeout(&tm_done, 1697 wait_for_completion_timeout(&tm_done,
@@ -1659,8 +1703,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
1659 io_req = (struct fnic_io_req *)CMD_SP(sc); 1703 io_req = (struct fnic_io_req *)CMD_SP(sc);
1660 if (!io_req) { 1704 if (!io_req) {
1661 spin_unlock_irqrestore(io_lock, flags); 1705 spin_unlock_irqrestore(io_lock, flags);
1662 ret = 1; 1706 continue;
1663 goto clean_pending_aborts_end;
1664 } 1707 }
1665 1708
1666 io_req->abts_done = NULL; 1709 io_req->abts_done = NULL;
@@ -1678,6 +1721,12 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
1678 mempool_free(io_req, fnic->io_req_pool); 1721 mempool_free(io_req, fnic->io_req_pool);
1679 } 1722 }
1680 1723
1724 schedule_timeout(msecs_to_jiffies(2 * fnic->config.ed_tov));
1725
1726 /* walk again to check, if IOs are still pending in fw */
1727 if (fnic_is_abts_pending(fnic, lr_sc))
1728 ret = FAILED;
1729
1681clean_pending_aborts_end: 1730clean_pending_aborts_end:
1682 return ret; 1731 return ret;
1683} 1732}
@@ -2142,3 +2191,61 @@ call_fc_exch_mgr_reset:
2142 fc_exch_mgr_reset(lp, sid, did); 2191 fc_exch_mgr_reset(lp, sid, did);
2143 2192
2144} 2193}
2194
2195/*
2196 * fnic_is_abts_pending() is a helper function that
2197 * walks through tag map to check if there is any IOs pending,if there is one,
2198 * then it returns 1 (true), otherwise 0 (false)
2199 * if @lr_sc is non NULL, then it checks IOs specific to particular LUN,
2200 * otherwise, it checks for all IOs.
2201 */
2202int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc)
2203{
2204 int tag;
2205 struct fnic_io_req *io_req;
2206 spinlock_t *io_lock;
2207 unsigned long flags;
2208 int ret = 0;
2209 struct scsi_cmnd *sc;
2210 struct scsi_device *lun_dev = NULL;
2211
2212 if (lr_sc)
2213 lun_dev = lr_sc->device;
2214
2215 /* walk again to check, if IOs are still pending in fw */
2216 for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
2217 sc = scsi_host_find_tag(fnic->lport->host, tag);
2218 /*
2219 * ignore this lun reset cmd or cmds that do not belong to
2220 * this lun
2221 */
2222 if (!sc || (lr_sc && (sc->device != lun_dev || sc == lr_sc)))
2223 continue;
2224
2225 io_lock = fnic_io_lock_hash(fnic, sc);
2226 spin_lock_irqsave(io_lock, flags);
2227
2228 io_req = (struct fnic_io_req *)CMD_SP(sc);
2229
2230 if (!io_req || sc->device != lun_dev) {
2231 spin_unlock_irqrestore(io_lock, flags);
2232 continue;
2233 }
2234
2235 /*
2236 * Found IO that is still pending with firmware and
2237 * belongs to the LUN that we are resetting
2238 */
2239 FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
2240 "Found IO in %s on lun\n",
2241 fnic_ioreq_state_to_str(CMD_STATE(sc)));
2242
2243 if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
2244 spin_unlock_irqrestore(io_lock, flags);
2245 ret = 1;
2246 continue;
2247 }
2248 }
2249
2250 return ret;
2251}