aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi
diff options
context:
space:
mode:
authorSantosh Vernekar <santosh.vernekar@qlogic.com>2010-05-28 18:08:25 -0400
committerJames Bottomley <James.Bottomley@suse.de>2010-07-27 13:01:25 -0400
commitcdbb0a4f31c486e4f6fb6e673a892f4f5205f91c (patch)
treeef386912ef74cb07ae79b522db0a31fc7ce14b03 /drivers/scsi
parent3f3b6f98cb33043cba04f45a2f2c43b8303c120c (diff)
[SCSI] qla2xxx: Handle outstanding mbx cmds on hung f/w scenarios.
Outstanding mailbox commands, have no way to recover on f/w hung, and we timeout on waiting for mbx response. This in turn affects the recovery process as follows: - We might already be in dpc while waiting for mbx to complete, so recovery for that pci function will never get invoked. Reset Timeout (10 sec) is far less than mbx timeout (30 sec). - Other mbx cmds will get stuck due to serial mbx access. Solution is to identify fw-hung scenario and handle outstanding mbx commands to have an early-exit instead of waiting for response. Other mbx commands waiting for access will also do an early-exit if fw-hung is still applicable. Signed-off-by: Giridhar Malavali <giridhar.malavali@qlogic.com> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Diffstat (limited to 'drivers/scsi')
-rw-r--r--drivers/scsi/qla2xxx/qla_def.h1
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c3
-rw-r--r--drivers/scsi/qla2xxx/qla_mbx.c78
-rw-r--r--drivers/scsi/qla2xxx/qla_nx.c20
4 files changed, 78 insertions, 24 deletions
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index f0e792a82dde..2bb187e23db0 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2413,6 +2413,7 @@ struct qla_hw_data {
2413 uint32_t cpu_affinity_enabled :1; 2413 uint32_t cpu_affinity_enabled :1;
2414 uint32_t disable_msix_handshake :1; 2414 uint32_t disable_msix_handshake :1;
2415 uint32_t fcp_prio_enabled :1; 2415 uint32_t fcp_prio_enabled :1;
2416 uint32_t fw_hung :1;
2416 } flags; 2417 } flags;
2417 2418
2418 /* This spinlock is used to protect "io transactions", you must 2419 /* This spinlock is used to protect "io transactions", you must
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 9d969b596b14..4c6caccc6ad5 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -1972,7 +1972,8 @@ qla2x00_fw_ready(scsi_qla_host_t *vha)
1972 } 1972 }
1973 } else { 1973 } else {
1974 /* Mailbox cmd failed. Timeout on min_wait. */ 1974 /* Mailbox cmd failed. Timeout on min_wait. */
1975 if (time_after_eq(jiffies, mtime)) 1975 if (time_after_eq(jiffies, mtime) ||
1976 (IS_QLA82XX(ha) && ha->flags.fw_hung))
1976 break; 1977 break;
1977 } 1978 }
1978 1979
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 10f4815aec77..2f39e3093939 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -37,7 +37,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
37 device_reg_t __iomem *reg; 37 device_reg_t __iomem *reg;
38 uint8_t abort_active; 38 uint8_t abort_active;
39 uint8_t io_lock_on; 39 uint8_t io_lock_on;
40 uint16_t command; 40 uint16_t command = 0;
41 uint16_t *iptr; 41 uint16_t *iptr;
42 uint16_t __iomem *optr; 42 uint16_t __iomem *optr;
43 uint32_t cnt; 43 uint32_t cnt;
@@ -83,6 +83,13 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
83 return QLA_FUNCTION_TIMEOUT; 83 return QLA_FUNCTION_TIMEOUT;
84 } 84 }
85 85
86 if (IS_QLA82XX(ha) && ha->flags.fw_hung) {
87 /* Setting Link-Down error */
88 mcp->mb[0] = MBS_LINK_DOWN_ERROR;
89 rval = QLA_FUNCTION_FAILED;
90 goto premature_exit;
91 }
92
86 ha->flags.mbox_busy = 1; 93 ha->flags.mbox_busy = 1;
87 /* Save mailbox command for debug */ 94 /* Save mailbox command for debug */
88 ha->mcp = mcp; 95 ha->mcp = mcp;
@@ -151,7 +158,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
151 DEBUG2_3_11(printk(KERN_INFO 158 DEBUG2_3_11(printk(KERN_INFO
152 "%s(%ld): Pending Mailbox timeout. " 159 "%s(%ld): Pending Mailbox timeout. "
153 "Exiting.\n", __func__, base_vha->host_no)); 160 "Exiting.\n", __func__, base_vha->host_no));
154 return QLA_FUNCTION_TIMEOUT; 161 rval = QLA_FUNCTION_TIMEOUT;
162 goto premature_exit;
155 } 163 }
156 WRT_REG_DWORD(&reg->isp82.hint, HINT_MBX_INT_PENDING); 164 WRT_REG_DWORD(&reg->isp82.hint, HINT_MBX_INT_PENDING);
157 } else if (IS_FWI2_CAPABLE(ha)) 165 } else if (IS_FWI2_CAPABLE(ha))
@@ -176,7 +184,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
176 DEBUG2_3_11(printk(KERN_INFO 184 DEBUG2_3_11(printk(KERN_INFO
177 "%s(%ld): Pending Mailbox timeout. " 185 "%s(%ld): Pending Mailbox timeout. "
178 "Exiting.\n", __func__, base_vha->host_no)); 186 "Exiting.\n", __func__, base_vha->host_no));
179 return QLA_FUNCTION_TIMEOUT; 187 rval = QLA_FUNCTION_TIMEOUT;
188 goto premature_exit;
180 } 189 }
181 WRT_REG_DWORD(&reg->isp82.hint, HINT_MBX_INT_PENDING); 190 WRT_REG_DWORD(&reg->isp82.hint, HINT_MBX_INT_PENDING);
182 } else if (IS_FWI2_CAPABLE(ha)) 191 } else if (IS_FWI2_CAPABLE(ha))
@@ -214,6 +223,15 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
214 ha->flags.mbox_int = 0; 223 ha->flags.mbox_int = 0;
215 clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags); 224 clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags);
216 225
226 if (IS_QLA82XX(ha) && ha->flags.fw_hung) {
227 ha->flags.mbox_busy = 0;
228 /* Setting Link-Down error */
229 mcp->mb[0] = MBS_LINK_DOWN_ERROR;
230 ha->mcp = NULL;
231 rval = QLA_FUNCTION_FAILED;
232 goto premature_exit;
233 }
234
217 if (ha->mailbox_out[0] != MBS_COMMAND_COMPLETE) 235 if (ha->mailbox_out[0] != MBS_COMMAND_COMPLETE)
218 rval = QLA_FUNCTION_FAILED; 236 rval = QLA_FUNCTION_FAILED;
219 237
@@ -279,35 +297,51 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
279 DEBUG2_3_11(printk("%s(%ld): timeout schedule " 297 DEBUG2_3_11(printk("%s(%ld): timeout schedule "
280 "isp_abort_needed.\n", __func__, 298 "isp_abort_needed.\n", __func__,
281 base_vha->host_no)); 299 base_vha->host_no));
282 qla_printk(KERN_WARNING, ha, 300
283 "Mailbox command timeout occurred. Scheduling ISP " 301 if (!test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) &&
284 "abort. eeh_busy: 0x%x\n", ha->flags.eeh_busy); 302 !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
285 set_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags); 303 !test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) {
286 qla2xxx_wake_dpc(vha); 304
305 qla_printk(KERN_WARNING, ha,
306 "Mailbox command timeout occured. "
307 "Scheduling ISP " "abort. eeh_busy: 0x%x\n",
308 ha->flags.eeh_busy);
309 set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
310 qla2xxx_wake_dpc(vha);
311 }
287 } else if (!abort_active) { 312 } else if (!abort_active) {
288 /* call abort directly since we are in the DPC thread */ 313 /* call abort directly since we are in the DPC thread */
289 DEBUG(printk("%s(%ld): timeout calling abort_isp\n", 314 DEBUG(printk("%s(%ld): timeout calling abort_isp\n",
290 __func__, base_vha->host_no)); 315 __func__, base_vha->host_no));
291 DEBUG2_3_11(printk("%s(%ld): timeout calling " 316 DEBUG2_3_11(printk("%s(%ld): timeout calling "
292 "abort_isp\n", __func__, base_vha->host_no)); 317 "abort_isp\n", __func__, base_vha->host_no));
293 qla_printk(KERN_WARNING, ha, 318
294 "Mailbox command timeout occurred. Issuing ISP " 319 if (!test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) &&
295 "abort.\n"); 320 !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) &&
296 321 !test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) {
297 set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags); 322
298 clear_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags); 323 qla_printk(KERN_WARNING, ha,
299 if (ha->isp_ops->abort_isp(base_vha)) { 324 "Mailbox command timeout occured. "
300 /* Failed. retry later. */ 325 "Issuing ISP abort.\n");
301 set_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags); 326
327 set_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags);
328 clear_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
329 if (ha->isp_ops->abort_isp(vha)) {
330 /* Failed. retry later. */
331 set_bit(ISP_ABORT_NEEDED,
332 &vha->dpc_flags);
333 }
334 clear_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags);
335 DEBUG(printk("%s(%ld): finished abort_isp\n",
336 __func__, vha->host_no));
337 DEBUG2_3_11(printk(
338 "%s(%ld): finished abort_isp\n",
339 __func__, vha->host_no));
302 } 340 }
303 clear_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags);
304 DEBUG(printk("%s(%ld): finished abort_isp\n", __func__,
305 base_vha->host_no));
306 DEBUG2_3_11(printk("%s(%ld): finished abort_isp\n",
307 __func__, base_vha->host_no));
308 } 341 }
309 } 342 }
310 343
344premature_exit:
311 /* Allow next mbx cmd to come in. */ 345 /* Allow next mbx cmd to come in. */
312 complete(&ha->mbx_cmd_comp); 346 complete(&ha->mbx_cmd_comp);
313 347
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c
index 1a9a7343dffa..512ba8a4ac57 100644
--- a/drivers/scsi/qla2xxx/qla_nx.c
+++ b/drivers/scsi/qla2xxx/qla_nx.c
@@ -3543,6 +3543,14 @@ qla82xx_check_fw_alive(scsi_qla_host_t *vha)
3543 set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); 3543 set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
3544 } 3544 }
3545 qla2xxx_wake_dpc(vha); 3545 qla2xxx_wake_dpc(vha);
3546 if (ha->flags.mbox_busy) {
3547 ha->flags.fw_hung = 1;
3548 ha->flags.mbox_int = 1;
3549 DEBUG2(qla_printk(KERN_ERR, ha,
3550 "Due to fw hung, doing premature "
3551 "completion of mbx command\n"));
3552 complete(&ha->mbx_intr_comp);
3553 }
3546 } 3554 }
3547 } 3555 }
3548 vha->fw_heartbeat_counter = fw_heartbeat_counter; 3556 vha->fw_heartbeat_counter = fw_heartbeat_counter;
@@ -3646,6 +3654,14 @@ void qla82xx_watchdog(scsi_qla_host_t *vha)
3646 "%s(): Adapter reset needed!\n", __func__); 3654 "%s(): Adapter reset needed!\n", __func__);
3647 set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); 3655 set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags);
3648 qla2xxx_wake_dpc(vha); 3656 qla2xxx_wake_dpc(vha);
3657 if (ha->flags.mbox_busy) {
3658 ha->flags.fw_hung = 1;
3659 ha->flags.mbox_int = 1;
3660 DEBUG2(qla_printk(KERN_ERR, ha,
3661 "Need reset, doing premature "
3662 "completion of mbx command\n"));
3663 complete(&ha->mbx_intr_comp);
3664 }
3649 } else { 3665 } else {
3650 qla82xx_check_fw_alive(vha); 3666 qla82xx_check_fw_alive(vha);
3651 } 3667 }
@@ -3701,8 +3717,10 @@ qla82xx_abort_isp(scsi_qla_host_t *vha)
3701 qla82xx_clear_rst_ready(ha); 3717 qla82xx_clear_rst_ready(ha);
3702 qla82xx_idc_unlock(ha); 3718 qla82xx_idc_unlock(ha);
3703 3719
3704 if (rval == QLA_SUCCESS) 3720 if (rval == QLA_SUCCESS) {
3721 ha->flags.fw_hung = 0;
3705 qla82xx_restart_isp(vha); 3722 qla82xx_restart_isp(vha);
3723 }
3706 3724
3707 if (rval) { 3725 if (rval) {
3708 vha->flags.online = 1; 3726 vha->flags.online = 1;