aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi/qla4xxx/ql4_os.c
diff options
context:
space:
mode:
authorLalit Chandivade <lalit.chandivade@qlogic.com>2010-07-30 05:08:47 -0400
committerJames Bottomley <James.Bottomley@suse.de>2010-08-06 13:00:40 -0400
commit2232be0d5707cd331b92027c0fd7ea5e843c2121 (patch)
treeb391f987edc2ede2924bf2963beab4bc9f2eff26 /drivers/scsi/qla4xxx/ql4_os.c
parent21033639699d883668f6937b03e7b710771ad37e (diff)
[SCSI] qla4xxx: Added AER support for ISP82xx
Added support for PCI error handling Signed-off-by: Lalit Chandivade <lalit.chandivade@qlogic.com> Signed-off-by: Vikas Chaudhary <vikas.chaudhary@qlogic.com> Signed-off-by: Poornima Vonti <poornima.vonti@qlogic.com> Signed-off-by: Ravi Anand <ravi.anand@qlogic.com> Reviewed-by: Mike Christie <michaelc@cs.wisc.edu> Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Diffstat (limited to 'drivers/scsi/qla4xxx/ql4_os.c')
-rw-r--r--drivers/scsi/qla4xxx/ql4_os.c301
1 files changed, 301 insertions, 0 deletions
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 2bb362c6e634..370d40ff1529 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -475,6 +475,14 @@ static int qla4xxx_queuecommand(struct scsi_cmnd *cmd,
475 struct srb *srb; 475 struct srb *srb;
476 int rval; 476 int rval;
477 477
478 if (test_bit(AF_EEH_BUSY, &ha->flags)) {
479 if (test_bit(AF_PCI_CHANNEL_IO_PERM_FAILURE, &ha->flags))
480 cmd->result = DID_NO_CONNECT << 16;
481 else
482 cmd->result = DID_REQUEUE << 16;
483 goto qc_fail_command;
484 }
485
478 if (!sess) { 486 if (!sess) {
479 cmd->result = DID_IMM_RETRY << 16; 487 cmd->result = DID_IMM_RETRY << 16;
480 goto qc_fail_command; 488 goto qc_fail_command;
@@ -655,6 +663,13 @@ static void qla4_8xxx_check_fw_alive(struct scsi_qla_host *ha)
655 uint32_t fw_heartbeat_counter, halt_status; 663 uint32_t fw_heartbeat_counter, halt_status;
656 664
657 fw_heartbeat_counter = qla4_8xxx_rd_32(ha, QLA82XX_PEG_ALIVE_COUNTER); 665 fw_heartbeat_counter = qla4_8xxx_rd_32(ha, QLA82XX_PEG_ALIVE_COUNTER);
666 /* If PEG_ALIVE_COUNTER is 0xffffffff, AER/EEH is in progress, ignore */
667 if (fw_heartbeat_counter == 0xffffffff) {
668 DEBUG2(printk(KERN_WARNING "scsi%ld: %s: Device in frozen "
669 "state, QLA82XX_PEG_ALIVE_COUNTER is 0xffffffff\n",
670 ha->host_no, __func__));
671 return;
672 }
658 673
659 if (ha->fw_heartbeat_counter == fw_heartbeat_counter) { 674 if (ha->fw_heartbeat_counter == fw_heartbeat_counter) {
660 ha->seconds_since_last_heartbeat++; 675 ha->seconds_since_last_heartbeat++;
@@ -723,6 +738,19 @@ static void qla4xxx_timer(struct scsi_qla_host *ha)
723{ 738{
724 struct ddb_entry *ddb_entry, *dtemp; 739 struct ddb_entry *ddb_entry, *dtemp;
725 int start_dpc = 0; 740 int start_dpc = 0;
741 uint16_t w;
742
743 /* If we are in the middle of AER/EEH processing
744 * skip any processing and reschedule the timer
745 */
746 if (test_bit(AF_EEH_BUSY, &ha->flags)) {
747 mod_timer(&ha->timer, jiffies + HZ);
748 return;
749 }
750
751 /* Hardware read to trigger an EEH error during mailbox waits. */
752 if (!pci_channel_offline(ha->pdev))
753 pci_read_config_word(ha->pdev, PCI_VENDOR_ID, &w);
726 754
727 if (test_bit(AF_HBA_GOING_AWAY, &ha->flags)) { 755 if (test_bit(AF_HBA_GOING_AWAY, &ha->flags)) {
728 DEBUG2(ql4_printk(KERN_INFO, ha, "%s exited. HBA GOING AWAY\n", 756 DEBUG2(ql4_printk(KERN_INFO, ha, "%s exited. HBA GOING AWAY\n",
@@ -1213,6 +1241,12 @@ static void qla4xxx_do_dpc(struct work_struct *work)
1213 if (!test_bit(AF_INIT_DONE, &ha->flags)) 1241 if (!test_bit(AF_INIT_DONE, &ha->flags))
1214 goto do_dpc_exit; 1242 goto do_dpc_exit;
1215 1243
1244 if (test_bit(AF_EEH_BUSY, &ha->flags)) {
1245 DEBUG2(printk(KERN_INFO "scsi%ld: %s: flags = %lx\n",
1246 ha->host_no, __func__, ha->flags));
1247 goto do_dpc_exit;
1248 }
1249
1216 /* HBA is in the process of being permanently disabled. 1250 /* HBA is in the process of being permanently disabled.
1217 * Don't process anything */ 1251 * Don't process anything */
1218 if (test_bit(AF_HBA_GOING_AWAY, &ha->flags)) 1252 if (test_bit(AF_HBA_GOING_AWAY, &ha->flags))
@@ -1618,6 +1652,8 @@ static int __devinit qla4xxx_probe_adapter(struct pci_dev *pdev,
1618 ha->host = host; 1652 ha->host = host;
1619 ha->host_no = host->host_no; 1653 ha->host_no = host->host_no;
1620 1654
1655 pci_enable_pcie_error_reporting(pdev);
1656
1621 /* Setup Runtime configurable options */ 1657 /* Setup Runtime configurable options */
1622 if (is_qla8022(ha)) { 1658 if (is_qla8022(ha)) {
1623 ha->isp_ops = &qla4_8xxx_isp_ops; 1659 ha->isp_ops = &qla4_8xxx_isp_ops;
@@ -1636,6 +1672,10 @@ static int __devinit qla4xxx_probe_adapter(struct pci_dev *pdev,
1636 ha->isp_ops = &qla4xxx_isp_ops; 1672 ha->isp_ops = &qla4xxx_isp_ops;
1637 } 1673 }
1638 1674
1675 /* Set EEH reset type to fundamental if required by hba */
1676 if (is_qla8022(ha))
1677 pdev->needs_freset = 1;
1678
1639 /* Configure PCI I/O space. */ 1679 /* Configure PCI I/O space. */
1640 ret = ha->isp_ops->iospace_config(ha); 1680 ret = ha->isp_ops->iospace_config(ha);
1641 if (ret) 1681 if (ret)
@@ -1732,6 +1772,7 @@ static int __devinit qla4xxx_probe_adapter(struct pci_dev *pdev,
1732 } 1772 }
1733 } 1773 }
1734 1774
1775 pci_save_state(ha->pdev);
1735 ha->isp_ops->enable_intrs(ha); 1776 ha->isp_ops->enable_intrs(ha);
1736 1777
1737 /* Start timer thread. */ 1778 /* Start timer thread. */
@@ -1758,6 +1799,7 @@ probe_failed:
1758 qla4xxx_free_adapter(ha); 1799 qla4xxx_free_adapter(ha);
1759 1800
1760probe_failed_ioconfig: 1801probe_failed_ioconfig:
1802 pci_disable_pcie_error_reporting(pdev);
1761 scsi_host_put(ha->host); 1803 scsi_host_put(ha->host);
1762 1804
1763probe_disable_device: 1805probe_disable_device:
@@ -1787,6 +1829,7 @@ static void __devexit qla4xxx_remove_adapter(struct pci_dev *pdev)
1787 1829
1788 scsi_host_put(ha->host); 1830 scsi_host_put(ha->host);
1789 1831
1832 pci_disable_pcie_error_reporting(pdev);
1790 pci_disable_device(pdev); 1833 pci_disable_device(pdev);
1791 pci_set_drvdata(pdev, NULL); 1834 pci_set_drvdata(pdev, NULL);
1792} 1835}
@@ -1883,6 +1926,17 @@ static int qla4xxx_eh_wait_on_command(struct scsi_qla_host *ha,
1883 int done = 0; 1926 int done = 0;
1884 struct srb *rp; 1927 struct srb *rp;
1885 uint32_t max_wait_time = EH_WAIT_CMD_TOV; 1928 uint32_t max_wait_time = EH_WAIT_CMD_TOV;
1929 int ret = SUCCESS;
1930
1931 /* Dont wait on command if PCI error is being handled
1932 * by PCI AER driver
1933 */
1934 if (unlikely(pci_channel_offline(ha->pdev)) ||
1935 (test_bit(AF_EEH_BUSY, &ha->flags))) {
1936 ql4_printk(KERN_WARNING, ha, "scsi%ld: Return from %s\n",
1937 ha->host_no, __func__);
1938 return ret;
1939 }
1886 1940
1887 do { 1941 do {
1888 /* Checking to see if its returned to OS */ 1942 /* Checking to see if its returned to OS */
@@ -2178,6 +2232,252 @@ static int qla4xxx_eh_host_reset(struct scsi_cmnd *cmd)
2178 return return_status; 2232 return return_status;
2179} 2233}
2180 2234
2235/* PCI AER driver recovers from all correctable errors w/o
2236 * driver intervention. For uncorrectable errors PCI AER
2237 * driver calls the following device driver's callbacks
2238 *
2239 * - Fatal Errors - link_reset
2240 * - Non-Fatal Errors - driver's pci_error_detected() which
2241 * returns CAN_RECOVER, NEED_RESET or DISCONNECT.
2242 *
2243 * PCI AER driver calls
2244 * CAN_RECOVER - driver's pci_mmio_enabled(), mmio_enabled
2245 * returns RECOVERED or NEED_RESET if fw_hung
2246 * NEED_RESET - driver's slot_reset()
2247 * DISCONNECT - device is dead & cannot recover
2248 * RECOVERED - driver's pci_resume()
2249 */
2250static pci_ers_result_t
2251qla4xxx_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
2252{
2253 struct scsi_qla_host *ha = pci_get_drvdata(pdev);
2254
2255 ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: error detected:state %x\n",
2256 ha->host_no, __func__, state);
2257
2258 if (!is_aer_supported(ha))
2259 return PCI_ERS_RESULT_NONE;
2260
2261 switch (state) {
2262 case pci_channel_io_normal:
2263 clear_bit(AF_EEH_BUSY, &ha->flags);
2264 return PCI_ERS_RESULT_CAN_RECOVER;
2265 case pci_channel_io_frozen:
2266 set_bit(AF_EEH_BUSY, &ha->flags);
2267 qla4xxx_mailbox_premature_completion(ha);
2268 qla4xxx_free_irqs(ha);
2269 pci_disable_device(pdev);
2270 return PCI_ERS_RESULT_NEED_RESET;
2271 case pci_channel_io_perm_failure:
2272 set_bit(AF_EEH_BUSY, &ha->flags);
2273 set_bit(AF_PCI_CHANNEL_IO_PERM_FAILURE, &ha->flags);
2274 qla4xxx_abort_active_cmds(ha, DID_NO_CONNECT << 16);
2275 return PCI_ERS_RESULT_DISCONNECT;
2276 }
2277 return PCI_ERS_RESULT_NEED_RESET;
2278}
2279
2280/**
2281 * qla4xxx_pci_mmio_enabled() gets called if
2282 * qla4xxx_pci_error_detected() returns PCI_ERS_RESULT_CAN_RECOVER
2283 * and read/write to the device still works.
2284 **/
2285static pci_ers_result_t
2286qla4xxx_pci_mmio_enabled(struct pci_dev *pdev)
2287{
2288 struct scsi_qla_host *ha = pci_get_drvdata(pdev);
2289
2290 if (!is_aer_supported(ha))
2291 return PCI_ERS_RESULT_NONE;
2292
2293 if (test_bit(AF_FW_RECOVERY, &ha->flags)) {
2294 ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: firmware hang -- "
2295 "mmio_enabled\n", ha->host_no, __func__);
2296 return PCI_ERS_RESULT_NEED_RESET;
2297 } else
2298 return PCI_ERS_RESULT_RECOVERED;
2299}
2300
2301uint32_t qla4_8xxx_error_recovery(struct scsi_qla_host *ha)
2302{
2303 uint32_t rval = QLA_ERROR;
2304 int fn;
2305 struct pci_dev *other_pdev = NULL;
2306
2307 ql4_printk(KERN_WARNING, ha, "scsi%ld: In %s\n", ha->host_no, __func__);
2308
2309 set_bit(DPC_RESET_ACTIVE, &ha->dpc_flags);
2310
2311 if (test_bit(AF_ONLINE, &ha->flags)) {
2312 clear_bit(AF_ONLINE, &ha->flags);
2313 qla4xxx_mark_all_devices_missing(ha);
2314 qla4xxx_process_aen(ha, FLUSH_DDB_CHANGED_AENS);
2315 qla4xxx_abort_active_cmds(ha, DID_RESET << 16);
2316 }
2317
2318 fn = PCI_FUNC(ha->pdev->devfn);
2319 while (fn > 0) {
2320 fn--;
2321 ql4_printk(KERN_INFO, ha, "scsi%ld: %s: Finding PCI device at "
2322 "func %x\n", ha->host_no, __func__, fn);
2323 /* Get the pci device given the domain, bus,
2324 * slot/function number */
2325 other_pdev =
2326 pci_get_domain_bus_and_slot(pci_domain_nr(ha->pdev->bus),
2327 ha->pdev->bus->number, PCI_DEVFN(PCI_SLOT(ha->pdev->devfn),
2328 fn));
2329
2330 if (!other_pdev)
2331 continue;
2332
2333 if (atomic_read(&other_pdev->enable_cnt)) {
2334 ql4_printk(KERN_INFO, ha, "scsi%ld: %s: Found PCI "
2335 "func in enabled state%x\n", ha->host_no,
2336 __func__, fn);
2337 pci_dev_put(other_pdev);
2338 break;
2339 }
2340 pci_dev_put(other_pdev);
2341 }
2342
2343 /* The first function on the card, the reset owner will
2344 * start & initialize the firmware. The other functions
2345 * on the card will reset the firmware context
2346 */
2347 if (!fn) {
2348 ql4_printk(KERN_INFO, ha, "scsi%ld: %s: devfn being reset "
2349 "0x%x is the owner\n", ha->host_no, __func__,
2350 ha->pdev->devfn);
2351
2352 qla4_8xxx_idc_lock(ha);
2353 qla4_8xxx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
2354 QLA82XX_DEV_COLD);
2355
2356 qla4_8xxx_wr_32(ha, QLA82XX_CRB_DRV_IDC_VERSION,
2357 QLA82XX_IDC_VERSION);
2358
2359 qla4_8xxx_idc_unlock(ha);
2360 clear_bit(AF_FW_RECOVERY, &ha->flags);
2361 rval = qla4xxx_initialize_adapter(ha, PRESERVE_DDB_LIST);
2362 qla4_8xxx_idc_lock(ha);
2363
2364 if (rval != QLA_SUCCESS) {
2365 ql4_printk(KERN_INFO, ha, "scsi%ld: %s: HW State: "
2366 "FAILED\n", ha->host_no, __func__);
2367 qla4_8xxx_clear_drv_active(ha);
2368 qla4_8xxx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
2369 QLA82XX_DEV_FAILED);
2370 } else {
2371 ql4_printk(KERN_INFO, ha, "scsi%ld: %s: HW State: "
2372 "READY\n", ha->host_no, __func__);
2373 qla4_8xxx_wr_32(ha, QLA82XX_CRB_DEV_STATE,
2374 QLA82XX_DEV_READY);
2375 /* Clear driver state register */
2376 qla4_8xxx_wr_32(ha, QLA82XX_CRB_DRV_STATE, 0);
2377 qla4_8xxx_set_drv_active(ha);
2378 ha->isp_ops->enable_intrs(ha);
2379 }
2380 qla4_8xxx_idc_unlock(ha);
2381 } else {
2382 ql4_printk(KERN_INFO, ha, "scsi%ld: %s: devfn 0x%x is not "
2383 "the reset owner\n", ha->host_no, __func__,
2384 ha->pdev->devfn);
2385 if ((qla4_8xxx_rd_32(ha, QLA82XX_CRB_DEV_STATE) ==
2386 QLA82XX_DEV_READY)) {
2387 clear_bit(AF_FW_RECOVERY, &ha->flags);
2388 rval = qla4xxx_initialize_adapter(ha,
2389 PRESERVE_DDB_LIST);
2390 if (rval == QLA_SUCCESS)
2391 ha->isp_ops->enable_intrs(ha);
2392 qla4_8xxx_idc_lock(ha);
2393 qla4_8xxx_set_drv_active(ha);
2394 qla4_8xxx_idc_unlock(ha);
2395 }
2396 }
2397 clear_bit(DPC_RESET_ACTIVE, &ha->dpc_flags);
2398 return rval;
2399}
2400
2401static pci_ers_result_t
2402qla4xxx_pci_slot_reset(struct pci_dev *pdev)
2403{
2404 pci_ers_result_t ret = PCI_ERS_RESULT_DISCONNECT;
2405 struct scsi_qla_host *ha = pci_get_drvdata(pdev);
2406 int rc;
2407
2408 ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: slot_reset\n",
2409 ha->host_no, __func__);
2410
2411 if (!is_aer_supported(ha))
2412 return PCI_ERS_RESULT_NONE;
2413
2414 /* Restore the saved state of PCIe device -
2415 * BAR registers, PCI Config space, PCIX, MSI,
2416 * IOV states
2417 */
2418 pci_restore_state(pdev);
2419
2420 /* pci_restore_state() clears the saved_state flag of the device
2421 * save restored state which resets saved_state flag
2422 */
2423 pci_save_state(pdev);
2424
2425 /* Initialize device or resume if in suspended state */
2426 rc = pci_enable_device(pdev);
2427 if (rc) {
2428 ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: Cant re-enable "
2429 "device after reset\n", ha->host_no, __func__);
2430 goto exit_slot_reset;
2431 }
2432
2433 ret = qla4xxx_request_irqs(ha);
2434 if (ret) {
2435 ql4_printk(KERN_WARNING, ha, "Failed to reserve interrupt %d"
2436 " already in use.\n", pdev->irq);
2437 goto exit_slot_reset;
2438 }
2439
2440 if (is_qla8022(ha)) {
2441 if (qla4_8xxx_error_recovery(ha) == QLA_SUCCESS) {
2442 ret = PCI_ERS_RESULT_RECOVERED;
2443 goto exit_slot_reset;
2444 } else
2445 goto exit_slot_reset;
2446 }
2447
2448exit_slot_reset:
2449 ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: Return=%x\n"
2450 "device after reset\n", ha->host_no, __func__, ret);
2451 return ret;
2452}
2453
2454static void
2455qla4xxx_pci_resume(struct pci_dev *pdev)
2456{
2457 struct scsi_qla_host *ha = pci_get_drvdata(pdev);
2458 int ret;
2459
2460 ql4_printk(KERN_WARNING, ha, "scsi%ld: %s: pci_resume\n",
2461 ha->host_no, __func__);
2462
2463 ret = qla4xxx_wait_for_hba_online(ha);
2464 if (ret != QLA_SUCCESS) {
2465 ql4_printk(KERN_ERR, ha, "scsi%ld: %s: the device failed to "
2466 "resume I/O from slot/link_reset\n", ha->host_no,
2467 __func__);
2468 }
2469
2470 pci_cleanup_aer_uncorrect_error_status(pdev);
2471 clear_bit(AF_EEH_BUSY, &ha->flags);
2472}
2473
2474static struct pci_error_handlers qla4xxx_err_handler = {
2475 .error_detected = qla4xxx_pci_error_detected,
2476 .mmio_enabled = qla4xxx_pci_mmio_enabled,
2477 .slot_reset = qla4xxx_pci_slot_reset,
2478 .resume = qla4xxx_pci_resume,
2479};
2480
2181static struct pci_device_id qla4xxx_pci_tbl[] = { 2481static struct pci_device_id qla4xxx_pci_tbl[] = {
2182 { 2482 {
2183 .vendor = PCI_VENDOR_ID_QLOGIC, 2483 .vendor = PCI_VENDOR_ID_QLOGIC,
@@ -2212,6 +2512,7 @@ static struct pci_driver qla4xxx_pci_driver = {
2212 .id_table = qla4xxx_pci_tbl, 2512 .id_table = qla4xxx_pci_tbl,
2213 .probe = qla4xxx_probe_adapter, 2513 .probe = qla4xxx_probe_adapter,
2214 .remove = qla4xxx_remove_adapter, 2514 .remove = qla4xxx_remove_adapter,
2515 .err_handler = &qla4xxx_err_handler,
2215}; 2516};
2216 2517
2217static int __init qla4xxx_module_init(void) 2518static int __init qla4xxx_module_init(void)