diff options
author | Santosh Vernekar <santosh.vernekar@qlogic.com> | 2010-05-28 18:08:25 -0400 |
---|---|---|
committer | James Bottomley <James.Bottomley@suse.de> | 2010-07-27 13:01:25 -0400 |
commit | cdbb0a4f31c486e4f6fb6e673a892f4f5205f91c (patch) | |
tree | ef386912ef74cb07ae79b522db0a31fc7ce14b03 /drivers/scsi/qla2xxx | |
parent | 3f3b6f98cb33043cba04f45a2f2c43b8303c120c (diff) |
[SCSI] qla2xxx: Handle outstanding mbx cmds on hung f/w scenarios.
Outstanding mailbox commands, have no way to recover on f/w hung, and we
timeout on waiting for mbx response. This in turn affects the recovery process
as follows:
- We might already be in dpc while waiting for mbx to complete, so recovery for
that pci function will never get invoked. Reset Timeout (10 sec) is far less
than mbx timeout (30 sec).
- Other mbx cmds will get stuck due to serial mbx access.
Solution is to identify fw-hung scenario and handle outstanding mbx commands to
have an early-exit instead of waiting for response.
Other mbx commands waiting for access will also do an early-exit if fw-hung is
still applicable.
Signed-off-by: Giridhar Malavali <giridhar.malavali@qlogic.com>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Diffstat (limited to 'drivers/scsi/qla2xxx')
-rw-r--r-- | drivers/scsi/qla2xxx/qla_def.h | 1 | ||||
-rw-r--r-- | drivers/scsi/qla2xxx/qla_init.c | 3 | ||||
-rw-r--r-- | drivers/scsi/qla2xxx/qla_mbx.c | 78 | ||||
-rw-r--r-- | drivers/scsi/qla2xxx/qla_nx.c | 20 |
4 files changed, 78 insertions, 24 deletions
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index f0e792a82dde..2bb187e23db0 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h | |||
@@ -2413,6 +2413,7 @@ struct qla_hw_data { | |||
2413 | uint32_t cpu_affinity_enabled :1; | 2413 | uint32_t cpu_affinity_enabled :1; |
2414 | uint32_t disable_msix_handshake :1; | 2414 | uint32_t disable_msix_handshake :1; |
2415 | uint32_t fcp_prio_enabled :1; | 2415 | uint32_t fcp_prio_enabled :1; |
2416 | uint32_t fw_hung :1; | ||
2416 | } flags; | 2417 | } flags; |
2417 | 2418 | ||
2418 | /* This spinlock is used to protect "io transactions", you must | 2419 | /* This spinlock is used to protect "io transactions", you must |
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 9d969b596b14..4c6caccc6ad5 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c | |||
@@ -1972,7 +1972,8 @@ qla2x00_fw_ready(scsi_qla_host_t *vha) | |||
1972 | } | 1972 | } |
1973 | } else { | 1973 | } else { |
1974 | /* Mailbox cmd failed. Timeout on min_wait. */ | 1974 | /* Mailbox cmd failed. Timeout on min_wait. */ |
1975 | if (time_after_eq(jiffies, mtime)) | 1975 | if (time_after_eq(jiffies, mtime) || |
1976 | (IS_QLA82XX(ha) && ha->flags.fw_hung)) | ||
1976 | break; | 1977 | break; |
1977 | } | 1978 | } |
1978 | 1979 | ||
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 10f4815aec77..2f39e3093939 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c | |||
@@ -37,7 +37,7 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) | |||
37 | device_reg_t __iomem *reg; | 37 | device_reg_t __iomem *reg; |
38 | uint8_t abort_active; | 38 | uint8_t abort_active; |
39 | uint8_t io_lock_on; | 39 | uint8_t io_lock_on; |
40 | uint16_t command; | 40 | uint16_t command = 0; |
41 | uint16_t *iptr; | 41 | uint16_t *iptr; |
42 | uint16_t __iomem *optr; | 42 | uint16_t __iomem *optr; |
43 | uint32_t cnt; | 43 | uint32_t cnt; |
@@ -83,6 +83,13 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) | |||
83 | return QLA_FUNCTION_TIMEOUT; | 83 | return QLA_FUNCTION_TIMEOUT; |
84 | } | 84 | } |
85 | 85 | ||
86 | if (IS_QLA82XX(ha) && ha->flags.fw_hung) { | ||
87 | /* Setting Link-Down error */ | ||
88 | mcp->mb[0] = MBS_LINK_DOWN_ERROR; | ||
89 | rval = QLA_FUNCTION_FAILED; | ||
90 | goto premature_exit; | ||
91 | } | ||
92 | |||
86 | ha->flags.mbox_busy = 1; | 93 | ha->flags.mbox_busy = 1; |
87 | /* Save mailbox command for debug */ | 94 | /* Save mailbox command for debug */ |
88 | ha->mcp = mcp; | 95 | ha->mcp = mcp; |
@@ -151,7 +158,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) | |||
151 | DEBUG2_3_11(printk(KERN_INFO | 158 | DEBUG2_3_11(printk(KERN_INFO |
152 | "%s(%ld): Pending Mailbox timeout. " | 159 | "%s(%ld): Pending Mailbox timeout. " |
153 | "Exiting.\n", __func__, base_vha->host_no)); | 160 | "Exiting.\n", __func__, base_vha->host_no)); |
154 | return QLA_FUNCTION_TIMEOUT; | 161 | rval = QLA_FUNCTION_TIMEOUT; |
162 | goto premature_exit; | ||
155 | } | 163 | } |
156 | WRT_REG_DWORD(®->isp82.hint, HINT_MBX_INT_PENDING); | 164 | WRT_REG_DWORD(®->isp82.hint, HINT_MBX_INT_PENDING); |
157 | } else if (IS_FWI2_CAPABLE(ha)) | 165 | } else if (IS_FWI2_CAPABLE(ha)) |
@@ -176,7 +184,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) | |||
176 | DEBUG2_3_11(printk(KERN_INFO | 184 | DEBUG2_3_11(printk(KERN_INFO |
177 | "%s(%ld): Pending Mailbox timeout. " | 185 | "%s(%ld): Pending Mailbox timeout. " |
178 | "Exiting.\n", __func__, base_vha->host_no)); | 186 | "Exiting.\n", __func__, base_vha->host_no)); |
179 | return QLA_FUNCTION_TIMEOUT; | 187 | rval = QLA_FUNCTION_TIMEOUT; |
188 | goto premature_exit; | ||
180 | } | 189 | } |
181 | WRT_REG_DWORD(®->isp82.hint, HINT_MBX_INT_PENDING); | 190 | WRT_REG_DWORD(®->isp82.hint, HINT_MBX_INT_PENDING); |
182 | } else if (IS_FWI2_CAPABLE(ha)) | 191 | } else if (IS_FWI2_CAPABLE(ha)) |
@@ -214,6 +223,15 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) | |||
214 | ha->flags.mbox_int = 0; | 223 | ha->flags.mbox_int = 0; |
215 | clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags); | 224 | clear_bit(MBX_INTERRUPT, &ha->mbx_cmd_flags); |
216 | 225 | ||
226 | if (IS_QLA82XX(ha) && ha->flags.fw_hung) { | ||
227 | ha->flags.mbox_busy = 0; | ||
228 | /* Setting Link-Down error */ | ||
229 | mcp->mb[0] = MBS_LINK_DOWN_ERROR; | ||
230 | ha->mcp = NULL; | ||
231 | rval = QLA_FUNCTION_FAILED; | ||
232 | goto premature_exit; | ||
233 | } | ||
234 | |||
217 | if (ha->mailbox_out[0] != MBS_COMMAND_COMPLETE) | 235 | if (ha->mailbox_out[0] != MBS_COMMAND_COMPLETE) |
218 | rval = QLA_FUNCTION_FAILED; | 236 | rval = QLA_FUNCTION_FAILED; |
219 | 237 | ||
@@ -279,35 +297,51 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) | |||
279 | DEBUG2_3_11(printk("%s(%ld): timeout schedule " | 297 | DEBUG2_3_11(printk("%s(%ld): timeout schedule " |
280 | "isp_abort_needed.\n", __func__, | 298 | "isp_abort_needed.\n", __func__, |
281 | base_vha->host_no)); | 299 | base_vha->host_no)); |
282 | qla_printk(KERN_WARNING, ha, | 300 | |
283 | "Mailbox command timeout occurred. Scheduling ISP " | 301 | if (!test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) && |
284 | "abort. eeh_busy: 0x%x\n", ha->flags.eeh_busy); | 302 | !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) && |
285 | set_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags); | 303 | !test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { |
286 | qla2xxx_wake_dpc(vha); | 304 | |
305 | qla_printk(KERN_WARNING, ha, | ||
306 | "Mailbox command timeout occured. " | ||
307 | "Scheduling ISP " "abort. eeh_busy: 0x%x\n", | ||
308 | ha->flags.eeh_busy); | ||
309 | set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); | ||
310 | qla2xxx_wake_dpc(vha); | ||
311 | } | ||
287 | } else if (!abort_active) { | 312 | } else if (!abort_active) { |
288 | /* call abort directly since we are in the DPC thread */ | 313 | /* call abort directly since we are in the DPC thread */ |
289 | DEBUG(printk("%s(%ld): timeout calling abort_isp\n", | 314 | DEBUG(printk("%s(%ld): timeout calling abort_isp\n", |
290 | __func__, base_vha->host_no)); | 315 | __func__, base_vha->host_no)); |
291 | DEBUG2_3_11(printk("%s(%ld): timeout calling " | 316 | DEBUG2_3_11(printk("%s(%ld): timeout calling " |
292 | "abort_isp\n", __func__, base_vha->host_no)); | 317 | "abort_isp\n", __func__, base_vha->host_no)); |
293 | qla_printk(KERN_WARNING, ha, | 318 | |
294 | "Mailbox command timeout occurred. Issuing ISP " | 319 | if (!test_bit(ISP_ABORT_NEEDED, &vha->dpc_flags) && |
295 | "abort.\n"); | 320 | !test_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags) && |
296 | 321 | !test_bit(ISP_ABORT_RETRY, &vha->dpc_flags)) { | |
297 | set_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags); | 322 | |
298 | clear_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags); | 323 | qla_printk(KERN_WARNING, ha, |
299 | if (ha->isp_ops->abort_isp(base_vha)) { | 324 | "Mailbox command timeout occured. " |
300 | /* Failed. retry later. */ | 325 | "Issuing ISP abort.\n"); |
301 | set_bit(ISP_ABORT_NEEDED, &base_vha->dpc_flags); | 326 | |
327 | set_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags); | ||
328 | clear_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); | ||
329 | if (ha->isp_ops->abort_isp(vha)) { | ||
330 | /* Failed. retry later. */ | ||
331 | set_bit(ISP_ABORT_NEEDED, | ||
332 | &vha->dpc_flags); | ||
333 | } | ||
334 | clear_bit(ABORT_ISP_ACTIVE, &vha->dpc_flags); | ||
335 | DEBUG(printk("%s(%ld): finished abort_isp\n", | ||
336 | __func__, vha->host_no)); | ||
337 | DEBUG2_3_11(printk( | ||
338 | "%s(%ld): finished abort_isp\n", | ||
339 | __func__, vha->host_no)); | ||
302 | } | 340 | } |
303 | clear_bit(ABORT_ISP_ACTIVE, &base_vha->dpc_flags); | ||
304 | DEBUG(printk("%s(%ld): finished abort_isp\n", __func__, | ||
305 | base_vha->host_no)); | ||
306 | DEBUG2_3_11(printk("%s(%ld): finished abort_isp\n", | ||
307 | __func__, base_vha->host_no)); | ||
308 | } | 341 | } |
309 | } | 342 | } |
310 | 343 | ||
344 | premature_exit: | ||
311 | /* Allow next mbx cmd to come in. */ | 345 | /* Allow next mbx cmd to come in. */ |
312 | complete(&ha->mbx_cmd_comp); | 346 | complete(&ha->mbx_cmd_comp); |
313 | 347 | ||
diff --git a/drivers/scsi/qla2xxx/qla_nx.c b/drivers/scsi/qla2xxx/qla_nx.c index 1a9a7343dffa..512ba8a4ac57 100644 --- a/drivers/scsi/qla2xxx/qla_nx.c +++ b/drivers/scsi/qla2xxx/qla_nx.c | |||
@@ -3543,6 +3543,14 @@ qla82xx_check_fw_alive(scsi_qla_host_t *vha) | |||
3543 | set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); | 3543 | set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); |
3544 | } | 3544 | } |
3545 | qla2xxx_wake_dpc(vha); | 3545 | qla2xxx_wake_dpc(vha); |
3546 | if (ha->flags.mbox_busy) { | ||
3547 | ha->flags.fw_hung = 1; | ||
3548 | ha->flags.mbox_int = 1; | ||
3549 | DEBUG2(qla_printk(KERN_ERR, ha, | ||
3550 | "Due to fw hung, doing premature " | ||
3551 | "completion of mbx command\n")); | ||
3552 | complete(&ha->mbx_intr_comp); | ||
3553 | } | ||
3546 | } | 3554 | } |
3547 | } | 3555 | } |
3548 | vha->fw_heartbeat_counter = fw_heartbeat_counter; | 3556 | vha->fw_heartbeat_counter = fw_heartbeat_counter; |
@@ -3646,6 +3654,14 @@ void qla82xx_watchdog(scsi_qla_host_t *vha) | |||
3646 | "%s(): Adapter reset needed!\n", __func__); | 3654 | "%s(): Adapter reset needed!\n", __func__); |
3647 | set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); | 3655 | set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); |
3648 | qla2xxx_wake_dpc(vha); | 3656 | qla2xxx_wake_dpc(vha); |
3657 | if (ha->flags.mbox_busy) { | ||
3658 | ha->flags.fw_hung = 1; | ||
3659 | ha->flags.mbox_int = 1; | ||
3660 | DEBUG2(qla_printk(KERN_ERR, ha, | ||
3661 | "Need reset, doing premature " | ||
3662 | "completion of mbx command\n")); | ||
3663 | complete(&ha->mbx_intr_comp); | ||
3664 | } | ||
3649 | } else { | 3665 | } else { |
3650 | qla82xx_check_fw_alive(vha); | 3666 | qla82xx_check_fw_alive(vha); |
3651 | } | 3667 | } |
@@ -3701,8 +3717,10 @@ qla82xx_abort_isp(scsi_qla_host_t *vha) | |||
3701 | qla82xx_clear_rst_ready(ha); | 3717 | qla82xx_clear_rst_ready(ha); |
3702 | qla82xx_idc_unlock(ha); | 3718 | qla82xx_idc_unlock(ha); |
3703 | 3719 | ||
3704 | if (rval == QLA_SUCCESS) | 3720 | if (rval == QLA_SUCCESS) { |
3721 | ha->flags.fw_hung = 0; | ||
3705 | qla82xx_restart_isp(vha); | 3722 | qla82xx_restart_isp(vha); |
3723 | } | ||
3706 | 3724 | ||
3707 | if (rval) { | 3725 | if (rval) { |
3708 | vha->flags.online = 1; | 3726 | vha->flags.online = 1; |