diff options
author | Shyam Sunder <shyam.sunder@qlogic.com> | 2011-12-02 01:42:13 -0500 |
---|---|---|
committer | James Bottomley <JBottomley@Parallels.com> | 2011-12-15 01:57:42 -0500 |
commit | 9ee91a38b1e224b0ee4ddc9a69d41f732d776310 (patch) | |
tree | 2a86b0ddd75c76090a7a448794cc0a7ac9e5f194 /drivers/scsi/qla4xxx | |
parent | 0a24361566d750065849f63521097f8373a0ac6b (diff) |
[SCSI] qla4xxx: check for FW alive before calling chip_reset
Check for firmware alive and do premature completion of
mbox commands in case of FW hung before doing chip_reset
Signed-off-by: Shyam Sunder <shyam.sunder@qlogic.com>
Reviewed-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Nilesh Javali <nilesh.javali@qlogic.com>
Signed-off-by: Vikas Chaudhary <vikas.chaudhary@qlogic.com>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
Diffstat (limited to 'drivers/scsi/qla4xxx')
-rw-r--r-- | drivers/scsi/qla4xxx/ql4_def.h | 1 | ||||
-rw-r--r-- | drivers/scsi/qla4xxx/ql4_os.c | 75 |
2 files changed, 52 insertions, 24 deletions
diff --git a/drivers/scsi/qla4xxx/ql4_def.h b/drivers/scsi/qla4xxx/ql4_def.h index fd5edc6e166..22a3ff02e48 100644 --- a/drivers/scsi/qla4xxx/ql4_def.h +++ b/drivers/scsi/qla4xxx/ql4_def.h | |||
@@ -177,6 +177,7 @@ | |||
177 | #define LOGIN_TOV 12 | 177 | #define LOGIN_TOV 12 |
178 | 178 | ||
179 | #define MAX_RESET_HA_RETRIES 2 | 179 | #define MAX_RESET_HA_RETRIES 2 |
180 | #define FW_ALIVE_WAIT_TOV 3 | ||
180 | 181 | ||
181 | #define CMD_SP(Cmnd) ((Cmnd)->SCp.ptr) | 182 | #define CMD_SP(Cmnd) ((Cmnd)->SCp.ptr) |
182 | 183 | ||
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 0c9ee937809..70778d5227a 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c | |||
@@ -1975,9 +1975,10 @@ mem_alloc_error_exit: | |||
1975 | * | 1975 | * |
1976 | * Context: Interrupt | 1976 | * Context: Interrupt |
1977 | **/ | 1977 | **/ |
1978 | static void qla4_8xxx_check_fw_alive(struct scsi_qla_host *ha) | 1978 | static int qla4_8xxx_check_fw_alive(struct scsi_qla_host *ha) |
1979 | { | 1979 | { |
1980 | uint32_t fw_heartbeat_counter, halt_status; | 1980 | uint32_t fw_heartbeat_counter; |
1981 | int status = QLA_SUCCESS; | ||
1981 | 1982 | ||
1982 | fw_heartbeat_counter = qla4_8xxx_rd_32(ha, QLA82XX_PEG_ALIVE_COUNTER); | 1983 | fw_heartbeat_counter = qla4_8xxx_rd_32(ha, QLA82XX_PEG_ALIVE_COUNTER); |
1983 | /* If PEG_ALIVE_COUNTER is 0xffffffff, AER/EEH is in progress, ignore */ | 1984 | /* If PEG_ALIVE_COUNTER is 0xffffffff, AER/EEH is in progress, ignore */ |
@@ -1985,7 +1986,7 @@ static void qla4_8xxx_check_fw_alive(struct scsi_qla_host *ha) | |||
1985 | DEBUG2(printk(KERN_WARNING "scsi%ld: %s: Device in frozen " | 1986 | DEBUG2(printk(KERN_WARNING "scsi%ld: %s: Device in frozen " |
1986 | "state, QLA82XX_PEG_ALIVE_COUNTER is 0xffffffff\n", | 1987 | "state, QLA82XX_PEG_ALIVE_COUNTER is 0xffffffff\n", |
1987 | ha->host_no, __func__)); | 1988 | ha->host_no, __func__)); |
1988 | return; | 1989 | return status; |
1989 | } | 1990 | } |
1990 | 1991 | ||
1991 | if (ha->fw_heartbeat_counter == fw_heartbeat_counter) { | 1992 | if (ha->fw_heartbeat_counter == fw_heartbeat_counter) { |
@@ -1993,8 +1994,6 @@ static void qla4_8xxx_check_fw_alive(struct scsi_qla_host *ha) | |||
1993 | /* FW not alive after 2 seconds */ | 1994 | /* FW not alive after 2 seconds */ |
1994 | if (ha->seconds_since_last_heartbeat == 2) { | 1995 | if (ha->seconds_since_last_heartbeat == 2) { |
1995 | ha->seconds_since_last_heartbeat = 0; | 1996 | ha->seconds_since_last_heartbeat = 0; |
1996 | halt_status = qla4_8xxx_rd_32(ha, | ||
1997 | QLA82XX_PEG_HALT_STATUS1); | ||
1998 | 1997 | ||
1999 | ql4_printk(KERN_INFO, ha, | 1998 | ql4_printk(KERN_INFO, ha, |
2000 | "scsi(%ld): %s, Dumping hw/fw registers:\n " | 1999 | "scsi(%ld): %s, Dumping hw/fw registers:\n " |
@@ -2002,7 +2001,9 @@ static void qla4_8xxx_check_fw_alive(struct scsi_qla_host *ha) | |||
2002 | " 0x%x,\n PEG_NET_0_PC: 0x%x, PEG_NET_1_PC:" | 2001 | " 0x%x,\n PEG_NET_0_PC: 0x%x, PEG_NET_1_PC:" |
2003 | " 0x%x,\n PEG_NET_2_PC: 0x%x, PEG_NET_3_PC:" | 2002 | " 0x%x,\n PEG_NET_2_PC: 0x%x, PEG_NET_3_PC:" |
2004 | " 0x%x,\n PEG_NET_4_PC: 0x%x\n", | 2003 | " 0x%x,\n PEG_NET_4_PC: 0x%x\n", |
2005 | ha->host_no, __func__, halt_status, | 2004 | ha->host_no, __func__, |
2005 | qla4_8xxx_rd_32(ha, | ||
2006 | QLA82XX_PEG_HALT_STATUS1), | ||
2006 | qla4_8xxx_rd_32(ha, | 2007 | qla4_8xxx_rd_32(ha, |
2007 | QLA82XX_PEG_HALT_STATUS2), | 2008 | QLA82XX_PEG_HALT_STATUS2), |
2008 | qla4_8xxx_rd_32(ha, QLA82XX_CRB_PEG_NET_0 + | 2009 | qla4_8xxx_rd_32(ha, QLA82XX_CRB_PEG_NET_0 + |
@@ -2015,24 +2016,13 @@ static void qla4_8xxx_check_fw_alive(struct scsi_qla_host *ha) | |||
2015 | 0x3c), | 2016 | 0x3c), |
2016 | qla4_8xxx_rd_32(ha, QLA82XX_CRB_PEG_NET_4 + | 2017 | qla4_8xxx_rd_32(ha, QLA82XX_CRB_PEG_NET_4 + |
2017 | 0x3c)); | 2018 | 0x3c)); |
2018 | 2019 | status = QLA_ERROR; | |
2019 | /* Since we cannot change dev_state in interrupt | ||
2020 | * context, set appropriate DPC flag then wakeup | ||
2021 | * DPC */ | ||
2022 | if (halt_status & HALT_STATUS_UNRECOVERABLE) | ||
2023 | set_bit(DPC_HA_UNRECOVERABLE, &ha->dpc_flags); | ||
2024 | else { | ||
2025 | printk("scsi%ld: %s: detect abort needed!\n", | ||
2026 | ha->host_no, __func__); | ||
2027 | set_bit(DPC_RESET_HA, &ha->dpc_flags); | ||
2028 | } | ||
2029 | qla4xxx_wake_dpc(ha); | ||
2030 | qla4xxx_mailbox_premature_completion(ha); | ||
2031 | } | 2020 | } |
2032 | } else | 2021 | } else |
2033 | ha->seconds_since_last_heartbeat = 0; | 2022 | ha->seconds_since_last_heartbeat = 0; |
2034 | 2023 | ||
2035 | ha->fw_heartbeat_counter = fw_heartbeat_counter; | 2024 | ha->fw_heartbeat_counter = fw_heartbeat_counter; |
2025 | return status; | ||
2036 | } | 2026 | } |
2037 | 2027 | ||
2038 | /** | 2028 | /** |
@@ -2043,14 +2033,13 @@ static void qla4_8xxx_check_fw_alive(struct scsi_qla_host *ha) | |||
2043 | **/ | 2033 | **/ |
2044 | void qla4_8xxx_watchdog(struct scsi_qla_host *ha) | 2034 | void qla4_8xxx_watchdog(struct scsi_qla_host *ha) |
2045 | { | 2035 | { |
2046 | uint32_t dev_state; | 2036 | uint32_t dev_state, halt_status; |
2047 | |||
2048 | dev_state = qla4_8xxx_rd_32(ha, QLA82XX_CRB_DEV_STATE); | ||
2049 | 2037 | ||
2050 | /* don't poll if reset is going on */ | 2038 | /* don't poll if reset is going on */ |
2051 | if (!(test_bit(DPC_RESET_ACTIVE, &ha->dpc_flags) || | 2039 | if (!(test_bit(DPC_RESET_ACTIVE, &ha->dpc_flags) || |
2052 | test_bit(DPC_RESET_HA, &ha->dpc_flags) || | 2040 | test_bit(DPC_RESET_HA, &ha->dpc_flags) || |
2053 | test_bit(DPC_RETRY_RESET_HA, &ha->dpc_flags))) { | 2041 | test_bit(DPC_RETRY_RESET_HA, &ha->dpc_flags))) { |
2042 | dev_state = qla4_8xxx_rd_32(ha, QLA82XX_CRB_DEV_STATE); | ||
2054 | if (dev_state == QLA82XX_DEV_NEED_RESET && | 2043 | if (dev_state == QLA82XX_DEV_NEED_RESET && |
2055 | !test_bit(DPC_RESET_HA, &ha->dpc_flags)) { | 2044 | !test_bit(DPC_RESET_HA, &ha->dpc_flags)) { |
2056 | if (!ql4xdontresethba) { | 2045 | if (!ql4xdontresethba) { |
@@ -2058,7 +2047,6 @@ void qla4_8xxx_watchdog(struct scsi_qla_host *ha) | |||
2058 | "NEED RESET!\n", __func__); | 2047 | "NEED RESET!\n", __func__); |
2059 | set_bit(DPC_RESET_HA, &ha->dpc_flags); | 2048 | set_bit(DPC_RESET_HA, &ha->dpc_flags); |
2060 | qla4xxx_wake_dpc(ha); | 2049 | qla4xxx_wake_dpc(ha); |
2061 | qla4xxx_mailbox_premature_completion(ha); | ||
2062 | } | 2050 | } |
2063 | } else if (dev_state == QLA82XX_DEV_NEED_QUIESCENT && | 2051 | } else if (dev_state == QLA82XX_DEV_NEED_QUIESCENT && |
2064 | !test_bit(DPC_HA_NEED_QUIESCENT, &ha->dpc_flags)) { | 2052 | !test_bit(DPC_HA_NEED_QUIESCENT, &ha->dpc_flags)) { |
@@ -2068,7 +2056,24 @@ void qla4_8xxx_watchdog(struct scsi_qla_host *ha) | |||
2068 | qla4xxx_wake_dpc(ha); | 2056 | qla4xxx_wake_dpc(ha); |
2069 | } else { | 2057 | } else { |
2070 | /* Check firmware health */ | 2058 | /* Check firmware health */ |
2071 | qla4_8xxx_check_fw_alive(ha); | 2059 | if (qla4_8xxx_check_fw_alive(ha)) { |
2060 | halt_status = qla4_8xxx_rd_32(ha, | ||
2061 | QLA82XX_PEG_HALT_STATUS1); | ||
2062 | |||
2063 | /* Since we cannot change dev_state in interrupt | ||
2064 | * context, set appropriate DPC flag then wakeup | ||
2065 | * DPC */ | ||
2066 | if (halt_status & HALT_STATUS_UNRECOVERABLE) | ||
2067 | set_bit(DPC_HA_UNRECOVERABLE, | ||
2068 | &ha->dpc_flags); | ||
2069 | else { | ||
2070 | ql4_printk(KERN_INFO, ha, "%s: detect " | ||
2071 | "abort needed!\n", __func__); | ||
2072 | set_bit(DPC_RESET_HA, &ha->dpc_flags); | ||
2073 | } | ||
2074 | qla4xxx_mailbox_premature_completion(ha); | ||
2075 | qla4xxx_wake_dpc(ha); | ||
2076 | } | ||
2072 | } | 2077 | } |
2073 | } | 2078 | } |
2074 | } | 2079 | } |
@@ -2424,6 +2429,7 @@ static int qla4xxx_recover_adapter(struct scsi_qla_host *ha) | |||
2424 | int status = QLA_ERROR; | 2429 | int status = QLA_ERROR; |
2425 | uint8_t reset_chip = 0; | 2430 | uint8_t reset_chip = 0; |
2426 | uint32_t dev_state; | 2431 | uint32_t dev_state; |
2432 | unsigned long wait; | ||
2427 | 2433 | ||
2428 | /* Stall incoming I/O until we are done */ | 2434 | /* Stall incoming I/O until we are done */ |
2429 | scsi_block_requests(ha->host); | 2435 | scsi_block_requests(ha->host); |
@@ -2474,8 +2480,29 @@ static int qla4xxx_recover_adapter(struct scsi_qla_host *ha) | |||
2474 | * or if stop_firmware fails for ISP-82xx. | 2480 | * or if stop_firmware fails for ISP-82xx. |
2475 | * This is the default case for ISP-4xxx */ | 2481 | * This is the default case for ISP-4xxx */ |
2476 | if (!is_qla8022(ha) || reset_chip) { | 2482 | if (!is_qla8022(ha) || reset_chip) { |
2483 | if (!is_qla8022(ha)) | ||
2484 | goto chip_reset; | ||
2485 | |||
2486 | /* Check if 82XX firmware is alive or not | ||
2487 | * We may have arrived here from NEED_RESET | ||
2488 | * detection only */ | ||
2489 | if (test_bit(AF_FW_RECOVERY, &ha->flags)) | ||
2490 | goto chip_reset; | ||
2491 | |||
2492 | wait = jiffies + (FW_ALIVE_WAIT_TOV * HZ); | ||
2493 | while (time_before(jiffies, wait)) { | ||
2494 | if (qla4_8xxx_check_fw_alive(ha)) { | ||
2495 | qla4xxx_mailbox_premature_completion(ha); | ||
2496 | break; | ||
2497 | } | ||
2498 | |||
2499 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
2500 | schedule_timeout(HZ); | ||
2501 | } | ||
2502 | |||
2477 | if (!test_bit(AF_FW_RECOVERY, &ha->flags)) | 2503 | if (!test_bit(AF_FW_RECOVERY, &ha->flags)) |
2478 | qla4xxx_cmd_wait(ha); | 2504 | qla4xxx_cmd_wait(ha); |
2505 | chip_reset: | ||
2479 | qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); | 2506 | qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS); |
2480 | qla4xxx_abort_active_cmds(ha, DID_RESET << 16); | 2507 | qla4xxx_abort_active_cmds(ha, DID_RESET << 16); |
2481 | DEBUG2(ql4_printk(KERN_INFO, ha, | 2508 | DEBUG2(ql4_printk(KERN_INFO, ha, |