diff options
author | Linas Vepstas <linas@austin.ibm.com> | 2007-10-05 15:55:04 -0400 |
---|---|---|
committer | James Bottomley <jejb@mulgrave.localdomain> | 2007-10-23 15:10:20 -0400 |
commit | d68cd75992f95d6977956fb227f02e6d532f3d26 (patch) | |
tree | eb28d218817bb9c893220191ad9a7a90ef474861 /drivers/scsi/sym53c8xx_2/sym_glue.c | |
parent | 2ba65367720d871f9d955ca3ef96358999182765 (diff) |
[SCSI] sym53c8xx: PCI Error Recovery support
This patch adds the PCI error recovery callbacks to the Symbios SCSI device
driver. It includes support for First Failure Data Capture.
Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Assorted changes to initial patches, including returning IRQ_NONE from the
interrupt handler if the device is offline and re-using the eh_done completion
in the scsi error handler.
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
Diffstat (limited to 'drivers/scsi/sym53c8xx_2/sym_glue.c')
-rw-r--r-- | drivers/scsi/sym53c8xx_2/sym_glue.c | 179 |
1 files changed, 176 insertions, 3 deletions
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c index 4de0692f5c4f..67a577db5d18 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.c +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c | |||
@@ -134,7 +134,7 @@ static struct scsi_transport_template *sym2_transport_template = NULL; | |||
134 | * Driver private area in the SCSI command structure. | 134 | * Driver private area in the SCSI command structure. |
135 | */ | 135 | */ |
136 | struct sym_ucmd { /* Override the SCSI pointer structure */ | 136 | struct sym_ucmd { /* Override the SCSI pointer structure */ |
137 | struct completion *eh_done; /* For error handling */ | 137 | struct completion *eh_done; /* SCSI error handling */ |
138 | }; | 138 | }; |
139 | 139 | ||
140 | #define SYM_UCMD_PTR(cmd) ((struct sym_ucmd *)(&(cmd)->SCp)) | 140 | #define SYM_UCMD_PTR(cmd) ((struct sym_ucmd *)(&(cmd)->SCp)) |
@@ -556,6 +556,10 @@ static irqreturn_t sym53c8xx_intr(int irq, void *dev_id) | |||
556 | { | 556 | { |
557 | struct sym_hcb *np = dev_id; | 557 | struct sym_hcb *np = dev_id; |
558 | 558 | ||
559 | /* Avoid spinloop trying to handle interrupts on frozen device */ | ||
560 | if (pci_channel_offline(np->s.device)) | ||
561 | return IRQ_NONE; | ||
562 | |||
559 | if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("["); | 563 | if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("["); |
560 | 564 | ||
561 | spin_lock(np->s.host->host_lock); | 565 | spin_lock(np->s.host->host_lock); |
@@ -598,6 +602,7 @@ static int sym_eh_handler(int op, char *opname, struct scsi_cmnd *cmd) | |||
598 | struct sym_hcb *np = SYM_SOFTC_PTR(cmd); | 602 | struct sym_hcb *np = SYM_SOFTC_PTR(cmd); |
599 | struct sym_ucmd *ucmd = SYM_UCMD_PTR(cmd); | 603 | struct sym_ucmd *ucmd = SYM_UCMD_PTR(cmd); |
600 | struct Scsi_Host *host = cmd->device->host; | 604 | struct Scsi_Host *host = cmd->device->host; |
605 | struct pci_dev *pdev = np->s.device; | ||
601 | SYM_QUEHEAD *qp; | 606 | SYM_QUEHEAD *qp; |
602 | int cmd_queued = 0; | 607 | int cmd_queued = 0; |
603 | int sts = -1; | 608 | int sts = -1; |
@@ -605,6 +610,38 @@ static int sym_eh_handler(int op, char *opname, struct scsi_cmnd *cmd) | |||
605 | 610 | ||
606 | dev_warn(&cmd->device->sdev_gendev, "%s operation started.\n", opname); | 611 | dev_warn(&cmd->device->sdev_gendev, "%s operation started.\n", opname); |
607 | 612 | ||
613 | /* We may be in an error condition because the PCI bus | ||
614 | * went down. In this case, we need to wait until the | ||
615 | * PCI bus is reset, the card is reset, and only then | ||
616 | * proceed with the scsi error recovery. There's no | ||
617 | * point in hurrying; take a leisurely wait. | ||
618 | */ | ||
619 | #define WAIT_FOR_PCI_RECOVERY 35 | ||
620 | if (pci_channel_offline(pdev)) { | ||
621 | struct host_data *hostdata = shost_priv(host); | ||
622 | struct completion *io_reset; | ||
623 | int finished_reset = 0; | ||
624 | init_completion(&eh_done); | ||
625 | spin_lock_irq(host->host_lock); | ||
626 | /* Make sure we didn't race */ | ||
627 | if (pci_channel_offline(pdev)) { | ||
628 | if (!hostdata->io_reset) | ||
629 | hostdata->io_reset = &eh_done; | ||
630 | io_reset = hostdata->io_reset; | ||
631 | } else { | ||
632 | io_reset = NULL; | ||
633 | } | ||
634 | |||
635 | if (!pci_channel_offline(pdev)) | ||
636 | finished_reset = 1; | ||
637 | spin_unlock_irq(host->host_lock); | ||
638 | if (!finished_reset) | ||
639 | finished_reset = wait_for_completion_timeout(io_reset, | ||
640 | WAIT_FOR_PCI_RECOVERY*HZ); | ||
641 | if (!finished_reset) | ||
642 | return SCSI_FAILED; | ||
643 | } | ||
644 | |||
608 | spin_lock_irq(host->host_lock); | 645 | spin_lock_irq(host->host_lock); |
609 | /* This one is queued in some place -> to wait for completion */ | 646 | /* This one is queued in some place -> to wait for completion */ |
610 | FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) { | 647 | FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) { |
@@ -630,7 +667,7 @@ static int sym_eh_handler(int op, char *opname, struct scsi_cmnd *cmd) | |||
630 | break; | 667 | break; |
631 | case SYM_EH_HOST_RESET: | 668 | case SYM_EH_HOST_RESET: |
632 | sym_reset_scsi_bus(np, 0); | 669 | sym_reset_scsi_bus(np, 0); |
633 | sym_start_up (np, 1); | 670 | sym_start_up(np, 1); |
634 | sts = 0; | 671 | sts = 0; |
635 | break; | 672 | break; |
636 | default: | 673 | default: |
@@ -1435,7 +1472,7 @@ static struct Scsi_Host * __devinit sym_attach(struct scsi_host_template *tpnt, | |||
1435 | /* | 1472 | /* |
1436 | * Start the SCRIPTS. | 1473 | * Start the SCRIPTS. |
1437 | */ | 1474 | */ |
1438 | sym_start_up (np, 1); | 1475 | sym_start_up(np, 1); |
1439 | 1476 | ||
1440 | /* | 1477 | /* |
1441 | * Start the timer daemon | 1478 | * Start the timer daemon |
@@ -1823,6 +1860,134 @@ static void __devexit sym2_remove(struct pci_dev *pdev) | |||
1823 | attach_count--; | 1860 | attach_count--; |
1824 | } | 1861 | } |
1825 | 1862 | ||
1863 | /** | ||
1864 | * sym2_io_error_detected() - called when PCI error is detected | ||
1865 | * @pdev: pointer to PCI device | ||
1866 | * @state: current state of the PCI slot | ||
1867 | */ | ||
1868 | static pci_ers_result_t sym2_io_error_detected(struct pci_dev *pdev, | ||
1869 | enum pci_channel_state state) | ||
1870 | { | ||
1871 | /* If slot is permanently frozen, turn everything off */ | ||
1872 | if (state == pci_channel_io_perm_failure) { | ||
1873 | sym2_remove(pdev); | ||
1874 | return PCI_ERS_RESULT_DISCONNECT; | ||
1875 | } | ||
1876 | |||
1877 | disable_irq(pdev->irq); | ||
1878 | pci_disable_device(pdev); | ||
1879 | |||
1880 | /* Request that MMIO be enabled, so register dump can be taken. */ | ||
1881 | return PCI_ERS_RESULT_CAN_RECOVER; | ||
1882 | } | ||
1883 | |||
1884 | /** | ||
1885 | * sym2_io_slot_dump - Enable MMIO and dump debug registers | ||
1886 | * @pdev: pointer to PCI device | ||
1887 | */ | ||
1888 | static pci_ers_result_t sym2_io_slot_dump(struct pci_dev *pdev) | ||
1889 | { | ||
1890 | struct sym_hcb *np = pci_get_drvdata(pdev); | ||
1891 | |||
1892 | sym_dump_registers(np); | ||
1893 | |||
1894 | /* Request a slot reset. */ | ||
1895 | return PCI_ERS_RESULT_NEED_RESET; | ||
1896 | } | ||
1897 | |||
1898 | /** | ||
1899 | * sym2_reset_workarounds - hardware-specific work-arounds | ||
1900 | * | ||
1901 | * This routine is similar to sym_set_workarounds(), except | ||
1902 | * that, at this point, we already know that the device was | ||
1903 | * succesfully intialized at least once before, and so most | ||
1904 | * of the steps taken there are un-needed here. | ||
1905 | */ | ||
1906 | static void sym2_reset_workarounds(struct pci_dev *pdev) | ||
1907 | { | ||
1908 | u_char revision; | ||
1909 | u_short status_reg; | ||
1910 | struct sym_chip *chip; | ||
1911 | |||
1912 | pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision); | ||
1913 | chip = sym_lookup_chip_table(pdev->device, revision); | ||
1914 | |||
1915 | /* Work around for errant bit in 895A, in a fashion | ||
1916 | * similar to what is done in sym_set_workarounds(). | ||
1917 | */ | ||
1918 | pci_read_config_word(pdev, PCI_STATUS, &status_reg); | ||
1919 | if (!(chip->features & FE_66MHZ) && (status_reg & PCI_STATUS_66MHZ)) { | ||
1920 | status_reg = PCI_STATUS_66MHZ; | ||
1921 | pci_write_config_word(pdev, PCI_STATUS, status_reg); | ||
1922 | pci_read_config_word(pdev, PCI_STATUS, &status_reg); | ||
1923 | } | ||
1924 | } | ||
1925 | |||
1926 | /** | ||
1927 | * sym2_io_slot_reset() - called when the pci bus has been reset. | ||
1928 | * @pdev: pointer to PCI device | ||
1929 | * | ||
1930 | * Restart the card from scratch. | ||
1931 | */ | ||
1932 | static pci_ers_result_t sym2_io_slot_reset(struct pci_dev *pdev) | ||
1933 | { | ||
1934 | struct sym_hcb *np = pci_get_drvdata(pdev); | ||
1935 | |||
1936 | printk(KERN_INFO "%s: recovering from a PCI slot reset\n", | ||
1937 | sym_name(np)); | ||
1938 | |||
1939 | if (pci_enable_device(pdev)) { | ||
1940 | printk(KERN_ERR "%s: Unable to enable after PCI reset\n", | ||
1941 | sym_name(np)); | ||
1942 | return PCI_ERS_RESULT_DISCONNECT; | ||
1943 | } | ||
1944 | |||
1945 | pci_set_master(pdev); | ||
1946 | enable_irq(pdev->irq); | ||
1947 | |||
1948 | /* If the chip can do Memory Write Invalidate, enable it */ | ||
1949 | if (np->features & FE_WRIE) { | ||
1950 | if (pci_set_mwi(pdev)) | ||
1951 | return PCI_ERS_RESULT_DISCONNECT; | ||
1952 | } | ||
1953 | |||
1954 | /* Perform work-arounds, analogous to sym_set_workarounds() */ | ||
1955 | sym2_reset_workarounds(pdev); | ||
1956 | |||
1957 | /* Perform host reset only on one instance of the card */ | ||
1958 | if (PCI_FUNC(pdev->devfn) == 0) { | ||
1959 | if (sym_reset_scsi_bus(np, 0)) { | ||
1960 | printk(KERN_ERR "%s: Unable to reset scsi host\n", | ||
1961 | sym_name(np)); | ||
1962 | return PCI_ERS_RESULT_DISCONNECT; | ||
1963 | } | ||
1964 | sym_start_up(np, 1); | ||
1965 | } | ||
1966 | |||
1967 | return PCI_ERS_RESULT_RECOVERED; | ||
1968 | } | ||
1969 | |||
1970 | /** | ||
1971 | * sym2_io_resume() - resume normal ops after PCI reset | ||
1972 | * @pdev: pointer to PCI device | ||
1973 | * | ||
1974 | * Called when the error recovery driver tells us that its | ||
1975 | * OK to resume normal operation. Use completion to allow | ||
1976 | * halted scsi ops to resume. | ||
1977 | */ | ||
1978 | static void sym2_io_resume(struct pci_dev *pdev) | ||
1979 | { | ||
1980 | struct sym_hcb *np = pci_get_drvdata(pdev); | ||
1981 | struct Scsi_Host *shost = np->s.host; | ||
1982 | struct host_data *hostdata = shost_priv(shost); | ||
1983 | |||
1984 | spin_lock_irq(shost->host_lock); | ||
1985 | if (hostdata->io_reset) | ||
1986 | complete_all(hostdata->io_reset); | ||
1987 | hostdata->io_reset = NULL; | ||
1988 | spin_unlock_irq(shost->host_lock); | ||
1989 | } | ||
1990 | |||
1826 | static void sym2_get_signalling(struct Scsi_Host *shost) | 1991 | static void sym2_get_signalling(struct Scsi_Host *shost) |
1827 | { | 1992 | { |
1828 | struct sym_hcb *np = sym_get_hcb(shost); | 1993 | struct sym_hcb *np = sym_get_hcb(shost); |
@@ -1985,11 +2150,19 @@ static struct pci_device_id sym2_id_table[] __devinitdata = { | |||
1985 | 2150 | ||
1986 | MODULE_DEVICE_TABLE(pci, sym2_id_table); | 2151 | MODULE_DEVICE_TABLE(pci, sym2_id_table); |
1987 | 2152 | ||
2153 | static struct pci_error_handlers sym2_err_handler = { | ||
2154 | .error_detected = sym2_io_error_detected, | ||
2155 | .mmio_enabled = sym2_io_slot_dump, | ||
2156 | .slot_reset = sym2_io_slot_reset, | ||
2157 | .resume = sym2_io_resume, | ||
2158 | }; | ||
2159 | |||
1988 | static struct pci_driver sym2_driver = { | 2160 | static struct pci_driver sym2_driver = { |
1989 | .name = NAME53C8XX, | 2161 | .name = NAME53C8XX, |
1990 | .id_table = sym2_id_table, | 2162 | .id_table = sym2_id_table, |
1991 | .probe = sym2_probe, | 2163 | .probe = sym2_probe, |
1992 | .remove = __devexit_p(sym2_remove), | 2164 | .remove = __devexit_p(sym2_remove), |
2165 | .err_handler = &sym2_err_handler, | ||
1993 | }; | 2166 | }; |
1994 | 2167 | ||
1995 | static int __init sym2_init(void) | 2168 | static int __init sym2_init(void) |