aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuinn Tran <quinn.tran@cavium.com>2018-08-02 16:16:54 -0400
committerMartin K. Petersen <martin.petersen@oracle.com>2018-08-02 16:56:18 -0400
commitf6145e86d21fccd59bbb87bc1e1eb076658fd79a (patch)
tree41edd1622ea0d380cdeabe672aff4ee00618b26e
parentf6602f3befbb9979cdb031e32211358dd008d05e (diff)
scsi: qla2xxx: Fix race between switch cmd completion and timeout
Fix race condition between switch cmd completion and timeout timer. Timer has popped triggers command free. On IOCB completion, stale sp point was reused. Instead, an abort will be sent to FW to nudge the command out of FW where the normal completion will take place. RIP: 0010:qla2x00_chk_ms_status+0xf3/0x1b0 [qla2xxx] Call Trace: <IRQ> qla24xx_els_ct_entry.isra.15+0x1d4/0x2b0 [qla2xxx] qla24xx_msix_rsp_q+0x39/0xf0 [qla2xxx] qla24xx_process_response_queue+0xbc/0x2b0 [qla2xxx] qla24xx_msix_rsp_q+0x8a/0xf0 [qla2xxx] __handle_irq_event_percpu+0xa0/0x1f0 Signed-off-by: Quinn Tran <quinn.tran@cavium.com> Signed-off-by: Himanshu Madhani <himanshu.madhani@cavium.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
-rw-r--r--drivers/scsi/qla2xxx/qla_def.h1
-rw-r--r--drivers/scsi/qla2xxx/qla_gbl.h2
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c75
3 files changed, 58 insertions, 20 deletions
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 40bcf938cf4f..0fc563572fad 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -313,6 +313,7 @@ struct srb_cmd {
313#define SRB_CRC_CTX_DMA_VALID BIT_2 /* DIF: context DMA valid */ 313#define SRB_CRC_CTX_DMA_VALID BIT_2 /* DIF: context DMA valid */
314#define SRB_CRC_PROT_DMA_VALID BIT_4 /* DIF: prot DMA valid */ 314#define SRB_CRC_PROT_DMA_VALID BIT_4 /* DIF: prot DMA valid */
315#define SRB_CRC_CTX_DSD_VALID BIT_5 /* DIF: dsd_list valid */ 315#define SRB_CRC_CTX_DSD_VALID BIT_5 /* DIF: dsd_list valid */
316#define SRB_WAKEUP_ON_COMP BIT_6
316 317
317/* To identify if a srb is of T10-CRC type. @sp => srb_t pointer */ 318/* To identify if a srb is of T10-CRC type. @sp => srb_t pointer */
318#define IS_PROT_IO(sp) (sp->flags & SRB_CRC_CTX_DSD_VALID) 319#define IS_PROT_IO(sp) (sp->flags & SRB_CRC_CTX_DSD_VALID)
diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
index 00fbd49a9a7a..6f2a37220a55 100644
--- a/drivers/scsi/qla2xxx/qla_gbl.h
+++ b/drivers/scsi/qla2xxx/qla_gbl.h
@@ -213,7 +213,7 @@ extern int qla24xx_post_upd_fcport_work(struct scsi_qla_host *, fc_port_t *);
213void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *, 213void qla2x00_handle_login_done_event(struct scsi_qla_host *, fc_port_t *,
214 uint16_t *); 214 uint16_t *);
215int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *); 215int qla24xx_post_gnl_work(struct scsi_qla_host *, fc_port_t *);
216int qla24xx_async_abort_cmd(srb_t *); 216int qla24xx_async_abort_cmd(srb_t *, bool);
217int qla24xx_post_relogin_work(struct scsi_qla_host *vha); 217int qla24xx_post_relogin_work(struct scsi_qla_host *vha);
218 218
219/* 219/*
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 9d1a8b2c41a9..75538383de5b 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -50,16 +50,15 @@ qla2x00_sp_timeout(struct timer_list *t)
50{ 50{
51 srb_t *sp = from_timer(sp, t, u.iocb_cmd.timer); 51 srb_t *sp = from_timer(sp, t, u.iocb_cmd.timer);
52 struct srb_iocb *iocb; 52 struct srb_iocb *iocb;
53 scsi_qla_host_t *vha = sp->vha;
54 struct req_que *req; 53 struct req_que *req;
55 unsigned long flags; 54 unsigned long flags;
56 55
57 spin_lock_irqsave(&vha->hw->hardware_lock, flags); 56 spin_lock_irqsave(sp->qpair->qp_lock_ptr, flags);
58 req = vha->hw->req_q_map[0]; 57 req = sp->qpair->req;
59 req->outstanding_cmds[sp->handle] = NULL; 58 req->outstanding_cmds[sp->handle] = NULL;
60 iocb = &sp->u.iocb_cmd; 59 iocb = &sp->u.iocb_cmd;
60 spin_unlock_irqrestore(sp->qpair->qp_lock_ptr, flags);
61 iocb->timeout(sp); 61 iocb->timeout(sp);
62 spin_unlock_irqrestore(&vha->hw->hardware_lock, flags);
63} 62}
64 63
65void 64void
@@ -100,6 +99,8 @@ qla2x00_async_iocb_timeout(void *data)
100 srb_t *sp = data; 99 srb_t *sp = data;
101 fc_port_t *fcport = sp->fcport; 100 fc_port_t *fcport = sp->fcport;
102 struct srb_iocb *lio = &sp->u.iocb_cmd; 101 struct srb_iocb *lio = &sp->u.iocb_cmd;
102 int rc, h;
103 unsigned long flags;
103 104
104 if (fcport) { 105 if (fcport) {
105 ql_dbg(ql_dbg_disc, fcport->vha, 0x2071, 106 ql_dbg(ql_dbg_disc, fcport->vha, 0x2071,
@@ -114,11 +115,26 @@ qla2x00_async_iocb_timeout(void *data)
114 115
115 switch (sp->type) { 116 switch (sp->type) {
116 case SRB_LOGIN_CMD: 117 case SRB_LOGIN_CMD:
117 /* Retry as needed. */ 118 rc = qla24xx_async_abort_cmd(sp, false);
118 lio->u.logio.data[0] = MBS_COMMAND_ERROR; 119 if (rc) {
119 lio->u.logio.data[1] = lio->u.logio.flags & SRB_LOGIN_RETRIED ? 120 /* Retry as needed. */
120 QLA_LOGIO_LOGIN_RETRIED : 0; 121 lio->u.logio.data[0] = MBS_COMMAND_ERROR;
121 sp->done(sp, QLA_FUNCTION_TIMEOUT); 122 lio->u.logio.data[1] =
123 lio->u.logio.flags & SRB_LOGIN_RETRIED ?
124 QLA_LOGIO_LOGIN_RETRIED : 0;
125 spin_lock_irqsave(sp->qpair->qp_lock_ptr, flags);
126 for (h = 1; h < sp->qpair->req->num_outstanding_cmds;
127 h++) {
128 if (sp->qpair->req->outstanding_cmds[h] ==
129 sp) {
130 sp->qpair->req->outstanding_cmds[h] =
131 NULL;
132 break;
133 }
134 }
135 spin_unlock_irqrestore(sp->qpair->qp_lock_ptr, flags);
136 sp->done(sp, QLA_FUNCTION_TIMEOUT);
137 }
122 break; 138 break;
123 case SRB_LOGOUT_CMD: 139 case SRB_LOGOUT_CMD:
124 case SRB_CT_PTHRU_CMD: 140 case SRB_CT_PTHRU_CMD:
@@ -127,7 +143,21 @@ qla2x00_async_iocb_timeout(void *data)
127 case SRB_NACK_PRLI: 143 case SRB_NACK_PRLI:
128 case SRB_NACK_LOGO: 144 case SRB_NACK_LOGO:
129 case SRB_CTRL_VP: 145 case SRB_CTRL_VP:
130 sp->done(sp, QLA_FUNCTION_TIMEOUT); 146 rc = qla24xx_async_abort_cmd(sp, false);
147 if (rc) {
148 spin_lock_irqsave(sp->qpair->qp_lock_ptr, flags);
149 for (h = 1; h < sp->qpair->req->num_outstanding_cmds;
150 h++) {
151 if (sp->qpair->req->outstanding_cmds[h] ==
152 sp) {
153 sp->qpair->req->outstanding_cmds[h] =
154 NULL;
155 break;
156 }
157 }
158 spin_unlock_irqrestore(sp->qpair->qp_lock_ptr, flags);
159 sp->done(sp, QLA_FUNCTION_TIMEOUT);
160 }
131 break; 161 break;
132 } 162 }
133} 163}
@@ -1594,7 +1624,7 @@ qla24xx_abort_iocb_timeout(void *data)
1594 struct srb_iocb *abt = &sp->u.iocb_cmd; 1624 struct srb_iocb *abt = &sp->u.iocb_cmd;
1595 1625
1596 abt->u.abt.comp_status = CS_TIMEOUT; 1626 abt->u.abt.comp_status = CS_TIMEOUT;
1597 complete(&abt->u.abt.comp); 1627 sp->done(sp, QLA_FUNCTION_TIMEOUT);
1598} 1628}
1599 1629
1600static void 1630static void
@@ -1603,12 +1633,16 @@ qla24xx_abort_sp_done(void *ptr, int res)
1603 srb_t *sp = ptr; 1633 srb_t *sp = ptr;
1604 struct srb_iocb *abt = &sp->u.iocb_cmd; 1634 struct srb_iocb *abt = &sp->u.iocb_cmd;
1605 1635
1606 if (del_timer(&sp->u.iocb_cmd.timer)) 1636 if (del_timer(&sp->u.iocb_cmd.timer)) {
1607 complete(&abt->u.abt.comp); 1637 if (sp->flags & SRB_WAKEUP_ON_COMP)
1638 complete(&abt->u.abt.comp);
1639 else
1640 sp->free(sp);
1641 }
1608} 1642}
1609 1643
1610int 1644int
1611qla24xx_async_abort_cmd(srb_t *cmd_sp) 1645qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
1612{ 1646{
1613 scsi_qla_host_t *vha = cmd_sp->vha; 1647 scsi_qla_host_t *vha = cmd_sp->vha;
1614 fc_port_t *fcport = cmd_sp->fcport; 1648 fc_port_t *fcport = cmd_sp->fcport;
@@ -1623,6 +1657,8 @@ qla24xx_async_abort_cmd(srb_t *cmd_sp)
1623 abt_iocb = &sp->u.iocb_cmd; 1657 abt_iocb = &sp->u.iocb_cmd;
1624 sp->type = SRB_ABT_CMD; 1658 sp->type = SRB_ABT_CMD;
1625 sp->name = "abort"; 1659 sp->name = "abort";
1660 if (wait)
1661 sp->flags = SRB_WAKEUP_ON_COMP;
1626 1662
1627 abt_iocb->timeout = qla24xx_abort_iocb_timeout; 1663 abt_iocb->timeout = qla24xx_abort_iocb_timeout;
1628 init_completion(&abt_iocb->u.abt.comp); 1664 init_completion(&abt_iocb->u.abt.comp);
@@ -1646,10 +1682,11 @@ qla24xx_async_abort_cmd(srb_t *cmd_sp)
1646 "Abort command issued - hdl=%x, target_id=%x\n", 1682 "Abort command issued - hdl=%x, target_id=%x\n",
1647 cmd_sp->handle, fcport->tgt_id); 1683 cmd_sp->handle, fcport->tgt_id);
1648 1684
1649 wait_for_completion(&abt_iocb->u.abt.comp); 1685 if (wait) {
1650 1686 wait_for_completion(&abt_iocb->u.abt.comp);
1651 rval = abt_iocb->u.abt.comp_status == CS_COMPLETE ? 1687 rval = abt_iocb->u.abt.comp_status == CS_COMPLETE ?
1652 QLA_SUCCESS : QLA_FUNCTION_FAILED; 1688 QLA_SUCCESS : QLA_FUNCTION_FAILED;
1689 }
1653 1690
1654done_free_sp: 1691done_free_sp:
1655 sp->free(sp); 1692 sp->free(sp);
@@ -1685,7 +1722,7 @@ qla24xx_async_abort_command(srb_t *sp)
1685 return qlafx00_fx_disc(vha, &vha->hw->mr.fcport, 1722 return qlafx00_fx_disc(vha, &vha->hw->mr.fcport,
1686 FXDISC_ABORT_IOCTL); 1723 FXDISC_ABORT_IOCTL);
1687 1724
1688 return qla24xx_async_abort_cmd(sp); 1725 return qla24xx_async_abort_cmd(sp, true);
1689} 1726}
1690 1727
1691static void 1728static void