aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSesidhar Beddel <sebaddel@cisco.com>2013-09-09 16:31:49 -0400
committerJames Bottomley <JBottomley@Parallels.com>2013-09-11 18:54:08 -0400
commit1259c5dc752474f74ef3da451dadeafce1d48b55 (patch)
treeba0ac325d904daf55d3e6467aa15e5e94572bc78
parent318c7c4325ad2c4e969a0b79008192ce8c054463 (diff)
[SCSI] fnic: Hitting BUG_ON(io_req->abts_done) in fnic_rport_exch_reset
Hitting BUG_ON(io_req->abts_done) in fnic_rport_exch_reset in case of timing issue and also to some extent locking issue where abts and terminate is happening around same timing. The code changes are intended to update CMD_STATE(sc) and io_req->abts_done together. Signed-off-by: Sesidhar Beddel <sebaddel@cisco.com> Signed-off-by: Hiral Patel <hiralpat@cisco.com> Signed-off-by: James Bottomley <JBottomley@Parallels.com>
-rw-r--r--drivers/scsi/fnic/fnic_scsi.c70
1 files changed, 42 insertions, 28 deletions
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 4ff0a332f298..fb8413aa005f 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -111,6 +111,12 @@ static inline spinlock_t *fnic_io_lock_hash(struct fnic *fnic,
111 return &fnic->io_req_lock[hash]; 111 return &fnic->io_req_lock[hash];
112} 112}
113 113
114static inline spinlock_t *fnic_io_lock_tag(struct fnic *fnic,
115 int tag)
116{
117 return &fnic->io_req_lock[tag & (FNIC_IO_LOCKS - 1)];
118}
119
114/* 120/*
115 * Unmap the data buffer and sense buffer for an io_req, 121 * Unmap the data buffer and sense buffer for an io_req,
116 * also unmap and free the device-private scatter/gather list. 122 * also unmap and free the device-private scatter/gather list.
@@ -956,9 +962,7 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic,
956 spin_unlock_irqrestore(io_lock, flags); 962 spin_unlock_irqrestore(io_lock, flags);
957 return; 963 return;
958 } 964 }
959 CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
960 CMD_ABTS_STATUS(sc) = hdr_status; 965 CMD_ABTS_STATUS(sc) = hdr_status;
961
962 CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE; 966 CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE;
963 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host, 967 FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
964 "abts cmpl recd. id %d status %s\n", 968 "abts cmpl recd. id %d status %s\n",
@@ -1116,7 +1120,7 @@ int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int copy_work_to_do)
1116 1120
1117static void fnic_cleanup_io(struct fnic *fnic, int exclude_id) 1121static void fnic_cleanup_io(struct fnic *fnic, int exclude_id)
1118{ 1122{
1119 unsigned int i; 1123 int i;
1120 struct fnic_io_req *io_req; 1124 struct fnic_io_req *io_req;
1121 unsigned long flags = 0; 1125 unsigned long flags = 0;
1122 struct scsi_cmnd *sc; 1126 struct scsi_cmnd *sc;
@@ -1127,12 +1131,14 @@ static void fnic_cleanup_io(struct fnic *fnic, int exclude_id)
1127 if (i == exclude_id) 1131 if (i == exclude_id)
1128 continue; 1132 continue;
1129 1133
1134 io_lock = fnic_io_lock_tag(fnic, i);
1135 spin_lock_irqsave(io_lock, flags);
1130 sc = scsi_host_find_tag(fnic->lport->host, i); 1136 sc = scsi_host_find_tag(fnic->lport->host, i);
1131 if (!sc) 1137 if (!sc) {
1138 spin_unlock_irqrestore(io_lock, flags);
1132 continue; 1139 continue;
1140 }
1133 1141
1134 io_lock = fnic_io_lock_hash(fnic, sc);
1135 spin_lock_irqsave(io_lock, flags);
1136 io_req = (struct fnic_io_req *)CMD_SP(sc); 1142 io_req = (struct fnic_io_req *)CMD_SP(sc);
1137 if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) && 1143 if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
1138 !(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) { 1144 !(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) {
@@ -1310,12 +1316,13 @@ static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
1310 1316
1311 for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { 1317 for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
1312 abt_tag = tag; 1318 abt_tag = tag;
1319 io_lock = fnic_io_lock_tag(fnic, tag);
1320 spin_lock_irqsave(io_lock, flags);
1313 sc = scsi_host_find_tag(fnic->lport->host, tag); 1321 sc = scsi_host_find_tag(fnic->lport->host, tag);
1314 if (!sc) 1322 if (!sc) {
1323 spin_unlock_irqrestore(io_lock, flags);
1315 continue; 1324 continue;
1316 1325 }
1317 io_lock = fnic_io_lock_hash(fnic, sc);
1318 spin_lock_irqsave(io_lock, flags);
1319 1326
1320 io_req = (struct fnic_io_req *)CMD_SP(sc); 1327 io_req = (struct fnic_io_req *)CMD_SP(sc);
1321 1328
@@ -1426,16 +1433,19 @@ void fnic_terminate_rport_io(struct fc_rport *rport)
1426 1433
1427 for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { 1434 for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
1428 abt_tag = tag; 1435 abt_tag = tag;
1436 io_lock = fnic_io_lock_tag(fnic, tag);
1437 spin_lock_irqsave(io_lock, flags);
1429 sc = scsi_host_find_tag(fnic->lport->host, tag); 1438 sc = scsi_host_find_tag(fnic->lport->host, tag);
1430 if (!sc) 1439 if (!sc) {
1440 spin_unlock_irqrestore(io_lock, flags);
1431 continue; 1441 continue;
1442 }
1432 1443
1433 cmd_rport = starget_to_rport(scsi_target(sc->device)); 1444 cmd_rport = starget_to_rport(scsi_target(sc->device));
1434 if (rport != cmd_rport) 1445 if (rport != cmd_rport) {
1446 spin_unlock_irqrestore(io_lock, flags);
1435 continue; 1447 continue;
1436 1448 }
1437 io_lock = fnic_io_lock_hash(fnic, sc);
1438 spin_lock_irqsave(io_lock, flags);
1439 1449
1440 io_req = (struct fnic_io_req *)CMD_SP(sc); 1450 io_req = (struct fnic_io_req *)CMD_SP(sc);
1441 1451
@@ -1648,13 +1658,15 @@ int fnic_abort_cmd(struct scsi_cmnd *sc)
1648 io_req->abts_done = NULL; 1658 io_req->abts_done = NULL;
1649 1659
1650 /* fw did not complete abort, timed out */ 1660 /* fw did not complete abort, timed out */
1651 if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { 1661 if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) {
1652 spin_unlock_irqrestore(io_lock, flags); 1662 spin_unlock_irqrestore(io_lock, flags);
1653 CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_TIMED_OUT; 1663 CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_TIMED_OUT;
1654 ret = FAILED; 1664 ret = FAILED;
1655 goto fnic_abort_cmd_end; 1665 goto fnic_abort_cmd_end;
1656 } 1666 }
1657 1667
1668 CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
1669
1658 /* 1670 /*
1659 * firmware completed the abort, check the status, 1671 * firmware completed the abort, check the status,
1660 * free the io_req irrespective of failure or success 1672 * free the io_req irrespective of failure or success
@@ -1753,16 +1765,17 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
1753 enum fnic_ioreq_state old_ioreq_state; 1765 enum fnic_ioreq_state old_ioreq_state;
1754 1766
1755 for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) { 1767 for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
1768 io_lock = fnic_io_lock_tag(fnic, tag);
1769 spin_lock_irqsave(io_lock, flags);
1756 sc = scsi_host_find_tag(fnic->lport->host, tag); 1770 sc = scsi_host_find_tag(fnic->lport->host, tag);
1757 /* 1771 /*
1758 * ignore this lun reset cmd or cmds that do not belong to 1772 * ignore this lun reset cmd or cmds that do not belong to
1759 * this lun 1773 * this lun
1760 */ 1774 */
1761 if (!sc || sc == lr_sc || sc->device != lun_dev) 1775 if (!sc || sc == lr_sc || sc->device != lun_dev) {
1776 spin_unlock_irqrestore(io_lock, flags);
1762 continue; 1777 continue;
1763 1778 }
1764 io_lock = fnic_io_lock_hash(fnic, sc);
1765 spin_lock_irqsave(io_lock, flags);
1766 1779
1767 io_req = (struct fnic_io_req *)CMD_SP(sc); 1780 io_req = (struct fnic_io_req *)CMD_SP(sc);
1768 1781
@@ -1791,6 +1804,11 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
1791 spin_unlock_irqrestore(io_lock, flags); 1804 spin_unlock_irqrestore(io_lock, flags);
1792 continue; 1805 continue;
1793 } 1806 }
1807
1808 if (io_req->abts_done)
1809 shost_printk(KERN_ERR, fnic->lport->host,
1810 "%s: io_req->abts_done is set state is %s\n",
1811 __func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
1794 old_ioreq_state = CMD_STATE(sc); 1812 old_ioreq_state = CMD_STATE(sc);
1795 /* 1813 /*
1796 * Any pending IO issued prior to reset is expected to be 1814 * Any pending IO issued prior to reset is expected to be
@@ -1801,11 +1819,6 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
1801 */ 1819 */
1802 CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING; 1820 CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
1803 1821
1804 if (io_req->abts_done)
1805 shost_printk(KERN_ERR, fnic->lport->host,
1806 "%s: io_req->abts_done is set state is %s\n",
1807 __func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
1808
1809 BUG_ON(io_req->abts_done); 1822 BUG_ON(io_req->abts_done);
1810 1823
1811 abt_tag = tag; 1824 abt_tag = tag;
@@ -1858,12 +1871,13 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
1858 io_req->abts_done = NULL; 1871 io_req->abts_done = NULL;
1859 1872
1860 /* if abort is still pending with fw, fail */ 1873 /* if abort is still pending with fw, fail */
1861 if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) { 1874 if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) {
1862 spin_unlock_irqrestore(io_lock, flags); 1875 spin_unlock_irqrestore(io_lock, flags);
1863 CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE; 1876 CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE;
1864 ret = 1; 1877 ret = 1;
1865 goto clean_pending_aborts_end; 1878 goto clean_pending_aborts_end;
1866 } 1879 }
1880 CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
1867 CMD_SP(sc) = NULL; 1881 CMD_SP(sc) = NULL;
1868 spin_unlock_irqrestore(io_lock, flags); 1882 spin_unlock_irqrestore(io_lock, flags);
1869 1883
@@ -2061,8 +2075,8 @@ int fnic_device_reset(struct scsi_cmnd *sc)
2061 spin_unlock_irqrestore(io_lock, flags); 2075 spin_unlock_irqrestore(io_lock, flags);
2062 int_to_scsilun(sc->device->lun, &fc_lun); 2076 int_to_scsilun(sc->device->lun, &fc_lun);
2063 /* 2077 /*
2064 * Issue abort and terminate on the device reset request. 2078 * Issue abort and terminate on device reset request.
2065 * If q'ing of the abort fails, retry issue it after a delay. 2079 * If q'ing of terminate fails, retry it after a delay.
2066 */ 2080 */
2067 while (1) { 2081 while (1) {
2068 spin_lock_irqsave(io_lock, flags); 2082 spin_lock_irqsave(io_lock, flags);