diff options
author | Linas Vepstas <linas@austin.ibm.com> | 2007-10-05 15:55:04 -0400 |
---|---|---|
committer | James Bottomley <jejb@mulgrave.localdomain> | 2007-10-23 15:10:20 -0400 |
commit | d68cd75992f95d6977956fb227f02e6d532f3d26 (patch) | |
tree | eb28d218817bb9c893220191ad9a7a90ef474861 /drivers/scsi | |
parent | 2ba65367720d871f9d955ca3ef96358999182765 (diff) |
[SCSI] sym53c8xx: PCI Error Recovery support
This patch adds the PCI error recovery callbacks to the Symbios SCSI device
driver. It includes support for First Failure Data Capture.
Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Assorted changes to initial patches, including returning IRQ_NONE from the
interrupt handler if the device is offline and re-using the eh_done completion
in the scsi error handler.
Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
Diffstat (limited to 'drivers/scsi')
-rw-r--r-- | drivers/scsi/sym53c8xx_2/sym_glue.c | 179 | ||||
-rw-r--r-- | drivers/scsi/sym53c8xx_2/sym_glue.h | 3 | ||||
-rw-r--r-- | drivers/scsi/sym53c8xx_2/sym_hipd.c | 25 |
3 files changed, 200 insertions, 7 deletions
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c index 4de0692f5c4f..67a577db5d18 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.c +++ b/drivers/scsi/sym53c8xx_2/sym_glue.c | |||
@@ -134,7 +134,7 @@ static struct scsi_transport_template *sym2_transport_template = NULL; | |||
134 | * Driver private area in the SCSI command structure. | 134 | * Driver private area in the SCSI command structure. |
135 | */ | 135 | */ |
136 | struct sym_ucmd { /* Override the SCSI pointer structure */ | 136 | struct sym_ucmd { /* Override the SCSI pointer structure */ |
137 | struct completion *eh_done; /* For error handling */ | 137 | struct completion *eh_done; /* SCSI error handling */ |
138 | }; | 138 | }; |
139 | 139 | ||
140 | #define SYM_UCMD_PTR(cmd) ((struct sym_ucmd *)(&(cmd)->SCp)) | 140 | #define SYM_UCMD_PTR(cmd) ((struct sym_ucmd *)(&(cmd)->SCp)) |
@@ -556,6 +556,10 @@ static irqreturn_t sym53c8xx_intr(int irq, void *dev_id) | |||
556 | { | 556 | { |
557 | struct sym_hcb *np = dev_id; | 557 | struct sym_hcb *np = dev_id; |
558 | 558 | ||
559 | /* Avoid spinloop trying to handle interrupts on frozen device */ | ||
560 | if (pci_channel_offline(np->s.device)) | ||
561 | return IRQ_NONE; | ||
562 | |||
559 | if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("["); | 563 | if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("["); |
560 | 564 | ||
561 | spin_lock(np->s.host->host_lock); | 565 | spin_lock(np->s.host->host_lock); |
@@ -598,6 +602,7 @@ static int sym_eh_handler(int op, char *opname, struct scsi_cmnd *cmd) | |||
598 | struct sym_hcb *np = SYM_SOFTC_PTR(cmd); | 602 | struct sym_hcb *np = SYM_SOFTC_PTR(cmd); |
599 | struct sym_ucmd *ucmd = SYM_UCMD_PTR(cmd); | 603 | struct sym_ucmd *ucmd = SYM_UCMD_PTR(cmd); |
600 | struct Scsi_Host *host = cmd->device->host; | 604 | struct Scsi_Host *host = cmd->device->host; |
605 | struct pci_dev *pdev = np->s.device; | ||
601 | SYM_QUEHEAD *qp; | 606 | SYM_QUEHEAD *qp; |
602 | int cmd_queued = 0; | 607 | int cmd_queued = 0; |
603 | int sts = -1; | 608 | int sts = -1; |
@@ -605,6 +610,38 @@ static int sym_eh_handler(int op, char *opname, struct scsi_cmnd *cmd) | |||
605 | 610 | ||
606 | dev_warn(&cmd->device->sdev_gendev, "%s operation started.\n", opname); | 611 | dev_warn(&cmd->device->sdev_gendev, "%s operation started.\n", opname); |
607 | 612 | ||
613 | /* We may be in an error condition because the PCI bus | ||
614 | * went down. In this case, we need to wait until the | ||
615 | * PCI bus is reset, the card is reset, and only then | ||
616 | * proceed with the scsi error recovery. There's no | ||
617 | * point in hurrying; take a leisurely wait. | ||
618 | */ | ||
619 | #define WAIT_FOR_PCI_RECOVERY 35 | ||
620 | if (pci_channel_offline(pdev)) { | ||
621 | struct host_data *hostdata = shost_priv(host); | ||
622 | struct completion *io_reset; | ||
623 | int finished_reset = 0; | ||
624 | init_completion(&eh_done); | ||
625 | spin_lock_irq(host->host_lock); | ||
626 | /* Make sure we didn't race */ | ||
627 | if (pci_channel_offline(pdev)) { | ||
628 | if (!hostdata->io_reset) | ||
629 | hostdata->io_reset = &eh_done; | ||
630 | io_reset = hostdata->io_reset; | ||
631 | } else { | ||
632 | io_reset = NULL; | ||
633 | } | ||
634 | |||
635 | if (!pci_channel_offline(pdev)) | ||
636 | finished_reset = 1; | ||
637 | spin_unlock_irq(host->host_lock); | ||
638 | if (!finished_reset) | ||
639 | finished_reset = wait_for_completion_timeout(io_reset, | ||
640 | WAIT_FOR_PCI_RECOVERY*HZ); | ||
641 | if (!finished_reset) | ||
642 | return SCSI_FAILED; | ||
643 | } | ||
644 | |||
608 | spin_lock_irq(host->host_lock); | 645 | spin_lock_irq(host->host_lock); |
609 | /* This one is queued in some place -> to wait for completion */ | 646 | /* This one is queued in some place -> to wait for completion */ |
610 | FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) { | 647 | FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) { |
@@ -630,7 +667,7 @@ static int sym_eh_handler(int op, char *opname, struct scsi_cmnd *cmd) | |||
630 | break; | 667 | break; |
631 | case SYM_EH_HOST_RESET: | 668 | case SYM_EH_HOST_RESET: |
632 | sym_reset_scsi_bus(np, 0); | 669 | sym_reset_scsi_bus(np, 0); |
633 | sym_start_up (np, 1); | 670 | sym_start_up(np, 1); |
634 | sts = 0; | 671 | sts = 0; |
635 | break; | 672 | break; |
636 | default: | 673 | default: |
@@ -1435,7 +1472,7 @@ static struct Scsi_Host * __devinit sym_attach(struct scsi_host_template *tpnt, | |||
1435 | /* | 1472 | /* |
1436 | * Start the SCRIPTS. | 1473 | * Start the SCRIPTS. |
1437 | */ | 1474 | */ |
1438 | sym_start_up (np, 1); | 1475 | sym_start_up(np, 1); |
1439 | 1476 | ||
1440 | /* | 1477 | /* |
1441 | * Start the timer daemon | 1478 | * Start the timer daemon |
@@ -1823,6 +1860,134 @@ static void __devexit sym2_remove(struct pci_dev *pdev) | |||
1823 | attach_count--; | 1860 | attach_count--; |
1824 | } | 1861 | } |
1825 | 1862 | ||
1863 | /** | ||
1864 | * sym2_io_error_detected() - called when PCI error is detected | ||
1865 | * @pdev: pointer to PCI device | ||
1866 | * @state: current state of the PCI slot | ||
1867 | */ | ||
1868 | static pci_ers_result_t sym2_io_error_detected(struct pci_dev *pdev, | ||
1869 | enum pci_channel_state state) | ||
1870 | { | ||
1871 | /* If slot is permanently frozen, turn everything off */ | ||
1872 | if (state == pci_channel_io_perm_failure) { | ||
1873 | sym2_remove(pdev); | ||
1874 | return PCI_ERS_RESULT_DISCONNECT; | ||
1875 | } | ||
1876 | |||
1877 | disable_irq(pdev->irq); | ||
1878 | pci_disable_device(pdev); | ||
1879 | |||
1880 | /* Request that MMIO be enabled, so register dump can be taken. */ | ||
1881 | return PCI_ERS_RESULT_CAN_RECOVER; | ||
1882 | } | ||
1883 | |||
1884 | /** | ||
1885 | * sym2_io_slot_dump - Enable MMIO and dump debug registers | ||
1886 | * @pdev: pointer to PCI device | ||
1887 | */ | ||
1888 | static pci_ers_result_t sym2_io_slot_dump(struct pci_dev *pdev) | ||
1889 | { | ||
1890 | struct sym_hcb *np = pci_get_drvdata(pdev); | ||
1891 | |||
1892 | sym_dump_registers(np); | ||
1893 | |||
1894 | /* Request a slot reset. */ | ||
1895 | return PCI_ERS_RESULT_NEED_RESET; | ||
1896 | } | ||
1897 | |||
1898 | /** | ||
1899 | * sym2_reset_workarounds - hardware-specific work-arounds | ||
1900 | * | ||
1901 | * This routine is similar to sym_set_workarounds(), except | ||
1902 | * that, at this point, we already know that the device was | ||
1903 | * succesfully intialized at least once before, and so most | ||
1904 | * of the steps taken there are un-needed here. | ||
1905 | */ | ||
1906 | static void sym2_reset_workarounds(struct pci_dev *pdev) | ||
1907 | { | ||
1908 | u_char revision; | ||
1909 | u_short status_reg; | ||
1910 | struct sym_chip *chip; | ||
1911 | |||
1912 | pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision); | ||
1913 | chip = sym_lookup_chip_table(pdev->device, revision); | ||
1914 | |||
1915 | /* Work around for errant bit in 895A, in a fashion | ||
1916 | * similar to what is done in sym_set_workarounds(). | ||
1917 | */ | ||
1918 | pci_read_config_word(pdev, PCI_STATUS, &status_reg); | ||
1919 | if (!(chip->features & FE_66MHZ) && (status_reg & PCI_STATUS_66MHZ)) { | ||
1920 | status_reg = PCI_STATUS_66MHZ; | ||
1921 | pci_write_config_word(pdev, PCI_STATUS, status_reg); | ||
1922 | pci_read_config_word(pdev, PCI_STATUS, &status_reg); | ||
1923 | } | ||
1924 | } | ||
1925 | |||
1926 | /** | ||
1927 | * sym2_io_slot_reset() - called when the pci bus has been reset. | ||
1928 | * @pdev: pointer to PCI device | ||
1929 | * | ||
1930 | * Restart the card from scratch. | ||
1931 | */ | ||
1932 | static pci_ers_result_t sym2_io_slot_reset(struct pci_dev *pdev) | ||
1933 | { | ||
1934 | struct sym_hcb *np = pci_get_drvdata(pdev); | ||
1935 | |||
1936 | printk(KERN_INFO "%s: recovering from a PCI slot reset\n", | ||
1937 | sym_name(np)); | ||
1938 | |||
1939 | if (pci_enable_device(pdev)) { | ||
1940 | printk(KERN_ERR "%s: Unable to enable after PCI reset\n", | ||
1941 | sym_name(np)); | ||
1942 | return PCI_ERS_RESULT_DISCONNECT; | ||
1943 | } | ||
1944 | |||
1945 | pci_set_master(pdev); | ||
1946 | enable_irq(pdev->irq); | ||
1947 | |||
1948 | /* If the chip can do Memory Write Invalidate, enable it */ | ||
1949 | if (np->features & FE_WRIE) { | ||
1950 | if (pci_set_mwi(pdev)) | ||
1951 | return PCI_ERS_RESULT_DISCONNECT; | ||
1952 | } | ||
1953 | |||
1954 | /* Perform work-arounds, analogous to sym_set_workarounds() */ | ||
1955 | sym2_reset_workarounds(pdev); | ||
1956 | |||
1957 | /* Perform host reset only on one instance of the card */ | ||
1958 | if (PCI_FUNC(pdev->devfn) == 0) { | ||
1959 | if (sym_reset_scsi_bus(np, 0)) { | ||
1960 | printk(KERN_ERR "%s: Unable to reset scsi host\n", | ||
1961 | sym_name(np)); | ||
1962 | return PCI_ERS_RESULT_DISCONNECT; | ||
1963 | } | ||
1964 | sym_start_up(np, 1); | ||
1965 | } | ||
1966 | |||
1967 | return PCI_ERS_RESULT_RECOVERED; | ||
1968 | } | ||
1969 | |||
1970 | /** | ||
1971 | * sym2_io_resume() - resume normal ops after PCI reset | ||
1972 | * @pdev: pointer to PCI device | ||
1973 | * | ||
1974 | * Called when the error recovery driver tells us that its | ||
1975 | * OK to resume normal operation. Use completion to allow | ||
1976 | * halted scsi ops to resume. | ||
1977 | */ | ||
1978 | static void sym2_io_resume(struct pci_dev *pdev) | ||
1979 | { | ||
1980 | struct sym_hcb *np = pci_get_drvdata(pdev); | ||
1981 | struct Scsi_Host *shost = np->s.host; | ||
1982 | struct host_data *hostdata = shost_priv(shost); | ||
1983 | |||
1984 | spin_lock_irq(shost->host_lock); | ||
1985 | if (hostdata->io_reset) | ||
1986 | complete_all(hostdata->io_reset); | ||
1987 | hostdata->io_reset = NULL; | ||
1988 | spin_unlock_irq(shost->host_lock); | ||
1989 | } | ||
1990 | |||
1826 | static void sym2_get_signalling(struct Scsi_Host *shost) | 1991 | static void sym2_get_signalling(struct Scsi_Host *shost) |
1827 | { | 1992 | { |
1828 | struct sym_hcb *np = sym_get_hcb(shost); | 1993 | struct sym_hcb *np = sym_get_hcb(shost); |
@@ -1985,11 +2150,19 @@ static struct pci_device_id sym2_id_table[] __devinitdata = { | |||
1985 | 2150 | ||
1986 | MODULE_DEVICE_TABLE(pci, sym2_id_table); | 2151 | MODULE_DEVICE_TABLE(pci, sym2_id_table); |
1987 | 2152 | ||
2153 | static struct pci_error_handlers sym2_err_handler = { | ||
2154 | .error_detected = sym2_io_error_detected, | ||
2155 | .mmio_enabled = sym2_io_slot_dump, | ||
2156 | .slot_reset = sym2_io_slot_reset, | ||
2157 | .resume = sym2_io_resume, | ||
2158 | }; | ||
2159 | |||
1988 | static struct pci_driver sym2_driver = { | 2160 | static struct pci_driver sym2_driver = { |
1989 | .name = NAME53C8XX, | 2161 | .name = NAME53C8XX, |
1990 | .id_table = sym2_id_table, | 2162 | .id_table = sym2_id_table, |
1991 | .probe = sym2_probe, | 2163 | .probe = sym2_probe, |
1992 | .remove = __devexit_p(sym2_remove), | 2164 | .remove = __devexit_p(sym2_remove), |
2165 | .err_handler = &sym2_err_handler, | ||
1993 | }; | 2166 | }; |
1994 | 2167 | ||
1995 | static int __init sym2_init(void) | 2168 | static int __init sym2_init(void) |
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.h b/drivers/scsi/sym53c8xx_2/sym_glue.h index bea7bcc69ebd..d5ba5aae6c5a 100644 --- a/drivers/scsi/sym53c8xx_2/sym_glue.h +++ b/drivers/scsi/sym53c8xx_2/sym_glue.h | |||
@@ -40,6 +40,7 @@ | |||
40 | #ifndef SYM_GLUE_H | 40 | #ifndef SYM_GLUE_H |
41 | #define SYM_GLUE_H | 41 | #define SYM_GLUE_H |
42 | 42 | ||
43 | #include <linux/completion.h> | ||
43 | #include <linux/delay.h> | 44 | #include <linux/delay.h> |
44 | #include <linux/ioport.h> | 45 | #include <linux/ioport.h> |
45 | #include <linux/pci.h> | 46 | #include <linux/pci.h> |
@@ -220,6 +221,7 @@ struct sym_device { | |||
220 | */ | 221 | */ |
221 | struct host_data { | 222 | struct host_data { |
222 | struct sym_hcb *ncb; | 223 | struct sym_hcb *ncb; |
224 | struct completion *io_reset; /* PCI error handling */ | ||
223 | }; | 225 | }; |
224 | 226 | ||
225 | static inline struct sym_hcb * sym_get_hcb(struct Scsi_Host *host) | 227 | static inline struct sym_hcb * sym_get_hcb(struct Scsi_Host *host) |
@@ -265,5 +267,6 @@ void sym_xpt_async_bus_reset(struct sym_hcb *np); | |||
265 | void sym_xpt_async_sent_bdr(struct sym_hcb *np, int target); | 267 | void sym_xpt_async_sent_bdr(struct sym_hcb *np, int target); |
266 | int sym_setup_data_and_start (struct sym_hcb *np, struct scsi_cmnd *csio, struct sym_ccb *cp); | 268 | int sym_setup_data_and_start (struct sym_hcb *np, struct scsi_cmnd *csio, struct sym_ccb *cp); |
267 | void sym_log_bus_error(struct sym_hcb *np); | 269 | void sym_log_bus_error(struct sym_hcb *np); |
270 | void sym_dump_registers(struct sym_hcb *np); | ||
268 | 271 | ||
269 | #endif /* SYM_GLUE_H */ | 272 | #endif /* SYM_GLUE_H */ |
diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c index 21cd4c7f5289..af24c447b195 100644 --- a/drivers/scsi/sym53c8xx_2/sym_hipd.c +++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c | |||
@@ -1180,10 +1180,10 @@ static void sym_log_hard_error(struct sym_hcb *np, u_short sist, u_char dstat) | |||
1180 | scr_to_cpu((int) *(u32 *)(script_base + script_ofs))); | 1180 | scr_to_cpu((int) *(u32 *)(script_base + script_ofs))); |
1181 | } | 1181 | } |
1182 | 1182 | ||
1183 | printf ("%s: regdump:", sym_name(np)); | 1183 | printf("%s: regdump:", sym_name(np)); |
1184 | for (i=0; i<24;i++) | 1184 | for (i = 0; i < 24; i++) |
1185 | printf (" %02x", (unsigned)INB_OFF(np, i)); | 1185 | printf(" %02x", (unsigned)INB_OFF(np, i)); |
1186 | printf (".\n"); | 1186 | printf(".\n"); |
1187 | 1187 | ||
1188 | /* | 1188 | /* |
1189 | * PCI BUS error. | 1189 | * PCI BUS error. |
@@ -1192,6 +1192,16 @@ static void sym_log_hard_error(struct sym_hcb *np, u_short sist, u_char dstat) | |||
1192 | sym_log_bus_error(np); | 1192 | sym_log_bus_error(np); |
1193 | } | 1193 | } |
1194 | 1194 | ||
1195 | void sym_dump_registers(struct sym_hcb *np) | ||
1196 | { | ||
1197 | u_short sist; | ||
1198 | u_char dstat; | ||
1199 | |||
1200 | sist = INW(np, nc_sist); | ||
1201 | dstat = INB(np, nc_dstat); | ||
1202 | sym_log_hard_error(np, sist, dstat); | ||
1203 | } | ||
1204 | |||
1195 | static struct sym_chip sym_dev_table[] = { | 1205 | static struct sym_chip sym_dev_table[] = { |
1196 | {PCI_DEVICE_ID_NCR_53C810, 0x0f, "810", 4, 8, 4, 64, | 1206 | {PCI_DEVICE_ID_NCR_53C810, 0x0f, "810", 4, 8, 4, 64, |
1197 | FE_ERL} | 1207 | FE_ERL} |
@@ -2809,6 +2819,13 @@ void sym_interrupt (struct sym_hcb *np) | |||
2809 | dstat |= INB(np, nc_dstat); | 2819 | dstat |= INB(np, nc_dstat); |
2810 | istatc = INB(np, nc_istat); | 2820 | istatc = INB(np, nc_istat); |
2811 | istat |= istatc; | 2821 | istat |= istatc; |
2822 | |||
2823 | /* Prevent deadlock waiting on a condition that may | ||
2824 | * never clear. */ | ||
2825 | if (unlikely(sist == 0xffff && dstat == 0xff)) { | ||
2826 | if (pci_channel_offline(np->s.device)) | ||
2827 | return; | ||
2828 | } | ||
2812 | } while (istatc & (SIP|DIP)); | 2829 | } while (istatc & (SIP|DIP)); |
2813 | 2830 | ||
2814 | if (DEBUG_FLAGS & DEBUG_TINY) | 2831 | if (DEBUG_FLAGS & DEBUG_TINY) |