aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/scsi
diff options
context:
space:
mode:
authorLinas Vepstas <linas@austin.ibm.com>2007-10-05 15:55:04 -0400
committerJames Bottomley <jejb@mulgrave.localdomain>2007-10-23 15:10:20 -0400
commitd68cd75992f95d6977956fb227f02e6d532f3d26 (patch)
treeeb28d218817bb9c893220191ad9a7a90ef474861 /drivers/scsi
parent2ba65367720d871f9d955ca3ef96358999182765 (diff)
[SCSI] sym53c8xx: PCI Error Recovery support
This patch adds the PCI error recovery callbacks to the Symbios SCSI device driver. It includes support for First Failure Data Capture. Signed-off-by: Linas Vepstas <linas@austin.ibm.com> Assorted changes to initial patches, including returning IRQ_NONE from the interrupt handler if the device is offline and re-using the eh_done completion in the scsi error handler. Signed-off-by: Matthew Wilcox <willy@linux.intel.com> Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
Diffstat (limited to 'drivers/scsi')
-rw-r--r--drivers/scsi/sym53c8xx_2/sym_glue.c179
-rw-r--r--drivers/scsi/sym53c8xx_2/sym_glue.h3
-rw-r--r--drivers/scsi/sym53c8xx_2/sym_hipd.c25
3 files changed, 200 insertions, 7 deletions
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 4de0692f5c4f..67a577db5d18 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -134,7 +134,7 @@ static struct scsi_transport_template *sym2_transport_template = NULL;
134 * Driver private area in the SCSI command structure. 134 * Driver private area in the SCSI command structure.
135 */ 135 */
136struct sym_ucmd { /* Override the SCSI pointer structure */ 136struct sym_ucmd { /* Override the SCSI pointer structure */
137 struct completion *eh_done; /* For error handling */ 137 struct completion *eh_done; /* SCSI error handling */
138}; 138};
139 139
140#define SYM_UCMD_PTR(cmd) ((struct sym_ucmd *)(&(cmd)->SCp)) 140#define SYM_UCMD_PTR(cmd) ((struct sym_ucmd *)(&(cmd)->SCp))
@@ -556,6 +556,10 @@ static irqreturn_t sym53c8xx_intr(int irq, void *dev_id)
556{ 556{
557 struct sym_hcb *np = dev_id; 557 struct sym_hcb *np = dev_id;
558 558
559 /* Avoid spinloop trying to handle interrupts on frozen device */
560 if (pci_channel_offline(np->s.device))
561 return IRQ_NONE;
562
559 if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("["); 563 if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("[");
560 564
561 spin_lock(np->s.host->host_lock); 565 spin_lock(np->s.host->host_lock);
@@ -598,6 +602,7 @@ static int sym_eh_handler(int op, char *opname, struct scsi_cmnd *cmd)
598 struct sym_hcb *np = SYM_SOFTC_PTR(cmd); 602 struct sym_hcb *np = SYM_SOFTC_PTR(cmd);
599 struct sym_ucmd *ucmd = SYM_UCMD_PTR(cmd); 603 struct sym_ucmd *ucmd = SYM_UCMD_PTR(cmd);
600 struct Scsi_Host *host = cmd->device->host; 604 struct Scsi_Host *host = cmd->device->host;
605 struct pci_dev *pdev = np->s.device;
601 SYM_QUEHEAD *qp; 606 SYM_QUEHEAD *qp;
602 int cmd_queued = 0; 607 int cmd_queued = 0;
603 int sts = -1; 608 int sts = -1;
@@ -605,6 +610,38 @@ static int sym_eh_handler(int op, char *opname, struct scsi_cmnd *cmd)
605 610
606 dev_warn(&cmd->device->sdev_gendev, "%s operation started.\n", opname); 611 dev_warn(&cmd->device->sdev_gendev, "%s operation started.\n", opname);
607 612
613 /* We may be in an error condition because the PCI bus
614 * went down. In this case, we need to wait until the
615 * PCI bus is reset, the card is reset, and only then
616 * proceed with the scsi error recovery. There's no
617 * point in hurrying; take a leisurely wait.
618 */
619#define WAIT_FOR_PCI_RECOVERY 35
620 if (pci_channel_offline(pdev)) {
621 struct host_data *hostdata = shost_priv(host);
622 struct completion *io_reset;
623 int finished_reset = 0;
624 init_completion(&eh_done);
625 spin_lock_irq(host->host_lock);
626 /* Make sure we didn't race */
627 if (pci_channel_offline(pdev)) {
628 if (!hostdata->io_reset)
629 hostdata->io_reset = &eh_done;
630 io_reset = hostdata->io_reset;
631 } else {
632 io_reset = NULL;
633 }
634
635 if (!pci_channel_offline(pdev))
636 finished_reset = 1;
637 spin_unlock_irq(host->host_lock);
638 if (!finished_reset)
639 finished_reset = wait_for_completion_timeout(io_reset,
640 WAIT_FOR_PCI_RECOVERY*HZ);
641 if (!finished_reset)
642 return SCSI_FAILED;
643 }
644
608 spin_lock_irq(host->host_lock); 645 spin_lock_irq(host->host_lock);
609 /* This one is queued in some place -> to wait for completion */ 646 /* This one is queued in some place -> to wait for completion */
610 FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) { 647 FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
@@ -630,7 +667,7 @@ static int sym_eh_handler(int op, char *opname, struct scsi_cmnd *cmd)
630 break; 667 break;
631 case SYM_EH_HOST_RESET: 668 case SYM_EH_HOST_RESET:
632 sym_reset_scsi_bus(np, 0); 669 sym_reset_scsi_bus(np, 0);
633 sym_start_up (np, 1); 670 sym_start_up(np, 1);
634 sts = 0; 671 sts = 0;
635 break; 672 break;
636 default: 673 default:
@@ -1435,7 +1472,7 @@ static struct Scsi_Host * __devinit sym_attach(struct scsi_host_template *tpnt,
1435 /* 1472 /*
1436 * Start the SCRIPTS. 1473 * Start the SCRIPTS.
1437 */ 1474 */
1438 sym_start_up (np, 1); 1475 sym_start_up(np, 1);
1439 1476
1440 /* 1477 /*
1441 * Start the timer daemon 1478 * Start the timer daemon
@@ -1823,6 +1860,134 @@ static void __devexit sym2_remove(struct pci_dev *pdev)
1823 attach_count--; 1860 attach_count--;
1824} 1861}
1825 1862
1863/**
1864 * sym2_io_error_detected() - called when PCI error is detected
1865 * @pdev: pointer to PCI device
1866 * @state: current state of the PCI slot
1867 */
1868static pci_ers_result_t sym2_io_error_detected(struct pci_dev *pdev,
1869 enum pci_channel_state state)
1870{
1871 /* If slot is permanently frozen, turn everything off */
1872 if (state == pci_channel_io_perm_failure) {
1873 sym2_remove(pdev);
1874 return PCI_ERS_RESULT_DISCONNECT;
1875 }
1876
1877 disable_irq(pdev->irq);
1878 pci_disable_device(pdev);
1879
1880 /* Request that MMIO be enabled, so register dump can be taken. */
1881 return PCI_ERS_RESULT_CAN_RECOVER;
1882}
1883
1884/**
1885 * sym2_io_slot_dump - Enable MMIO and dump debug registers
1886 * @pdev: pointer to PCI device
1887 */
1888static pci_ers_result_t sym2_io_slot_dump(struct pci_dev *pdev)
1889{
1890 struct sym_hcb *np = pci_get_drvdata(pdev);
1891
1892 sym_dump_registers(np);
1893
1894 /* Request a slot reset. */
1895 return PCI_ERS_RESULT_NEED_RESET;
1896}
1897
1898/**
1899 * sym2_reset_workarounds - hardware-specific work-arounds
1900 *
1901 * This routine is similar to sym_set_workarounds(), except
1902 * that, at this point, we already know that the device was
1903 * succesfully intialized at least once before, and so most
1904 * of the steps taken there are un-needed here.
1905 */
1906static void sym2_reset_workarounds(struct pci_dev *pdev)
1907{
1908 u_char revision;
1909 u_short status_reg;
1910 struct sym_chip *chip;
1911
1912 pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision);
1913 chip = sym_lookup_chip_table(pdev->device, revision);
1914
1915 /* Work around for errant bit in 895A, in a fashion
1916 * similar to what is done in sym_set_workarounds().
1917 */
1918 pci_read_config_word(pdev, PCI_STATUS, &status_reg);
1919 if (!(chip->features & FE_66MHZ) && (status_reg & PCI_STATUS_66MHZ)) {
1920 status_reg = PCI_STATUS_66MHZ;
1921 pci_write_config_word(pdev, PCI_STATUS, status_reg);
1922 pci_read_config_word(pdev, PCI_STATUS, &status_reg);
1923 }
1924}
1925
1926/**
1927 * sym2_io_slot_reset() - called when the pci bus has been reset.
1928 * @pdev: pointer to PCI device
1929 *
1930 * Restart the card from scratch.
1931 */
1932static pci_ers_result_t sym2_io_slot_reset(struct pci_dev *pdev)
1933{
1934 struct sym_hcb *np = pci_get_drvdata(pdev);
1935
1936 printk(KERN_INFO "%s: recovering from a PCI slot reset\n",
1937 sym_name(np));
1938
1939 if (pci_enable_device(pdev)) {
1940 printk(KERN_ERR "%s: Unable to enable after PCI reset\n",
1941 sym_name(np));
1942 return PCI_ERS_RESULT_DISCONNECT;
1943 }
1944
1945 pci_set_master(pdev);
1946 enable_irq(pdev->irq);
1947
1948 /* If the chip can do Memory Write Invalidate, enable it */
1949 if (np->features & FE_WRIE) {
1950 if (pci_set_mwi(pdev))
1951 return PCI_ERS_RESULT_DISCONNECT;
1952 }
1953
1954 /* Perform work-arounds, analogous to sym_set_workarounds() */
1955 sym2_reset_workarounds(pdev);
1956
1957 /* Perform host reset only on one instance of the card */
1958 if (PCI_FUNC(pdev->devfn) == 0) {
1959 if (sym_reset_scsi_bus(np, 0)) {
1960 printk(KERN_ERR "%s: Unable to reset scsi host\n",
1961 sym_name(np));
1962 return PCI_ERS_RESULT_DISCONNECT;
1963 }
1964 sym_start_up(np, 1);
1965 }
1966
1967 return PCI_ERS_RESULT_RECOVERED;
1968}
1969
1970/**
1971 * sym2_io_resume() - resume normal ops after PCI reset
1972 * @pdev: pointer to PCI device
1973 *
1974 * Called when the error recovery driver tells us that its
1975 * OK to resume normal operation. Use completion to allow
1976 * halted scsi ops to resume.
1977 */
1978static void sym2_io_resume(struct pci_dev *pdev)
1979{
1980 struct sym_hcb *np = pci_get_drvdata(pdev);
1981 struct Scsi_Host *shost = np->s.host;
1982 struct host_data *hostdata = shost_priv(shost);
1983
1984 spin_lock_irq(shost->host_lock);
1985 if (hostdata->io_reset)
1986 complete_all(hostdata->io_reset);
1987 hostdata->io_reset = NULL;
1988 spin_unlock_irq(shost->host_lock);
1989}
1990
1826static void sym2_get_signalling(struct Scsi_Host *shost) 1991static void sym2_get_signalling(struct Scsi_Host *shost)
1827{ 1992{
1828 struct sym_hcb *np = sym_get_hcb(shost); 1993 struct sym_hcb *np = sym_get_hcb(shost);
@@ -1985,11 +2150,19 @@ static struct pci_device_id sym2_id_table[] __devinitdata = {
1985 2150
1986MODULE_DEVICE_TABLE(pci, sym2_id_table); 2151MODULE_DEVICE_TABLE(pci, sym2_id_table);
1987 2152
2153static struct pci_error_handlers sym2_err_handler = {
2154 .error_detected = sym2_io_error_detected,
2155 .mmio_enabled = sym2_io_slot_dump,
2156 .slot_reset = sym2_io_slot_reset,
2157 .resume = sym2_io_resume,
2158};
2159
1988static struct pci_driver sym2_driver = { 2160static struct pci_driver sym2_driver = {
1989 .name = NAME53C8XX, 2161 .name = NAME53C8XX,
1990 .id_table = sym2_id_table, 2162 .id_table = sym2_id_table,
1991 .probe = sym2_probe, 2163 .probe = sym2_probe,
1992 .remove = __devexit_p(sym2_remove), 2164 .remove = __devexit_p(sym2_remove),
2165 .err_handler = &sym2_err_handler,
1993}; 2166};
1994 2167
1995static int __init sym2_init(void) 2168static int __init sym2_init(void)
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.h b/drivers/scsi/sym53c8xx_2/sym_glue.h
index bea7bcc69ebd..d5ba5aae6c5a 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.h
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.h
@@ -40,6 +40,7 @@
40#ifndef SYM_GLUE_H 40#ifndef SYM_GLUE_H
41#define SYM_GLUE_H 41#define SYM_GLUE_H
42 42
43#include <linux/completion.h>
43#include <linux/delay.h> 44#include <linux/delay.h>
44#include <linux/ioport.h> 45#include <linux/ioport.h>
45#include <linux/pci.h> 46#include <linux/pci.h>
@@ -220,6 +221,7 @@ struct sym_device {
220 */ 221 */
221struct host_data { 222struct host_data {
222 struct sym_hcb *ncb; 223 struct sym_hcb *ncb;
224 struct completion *io_reset; /* PCI error handling */
223}; 225};
224 226
225static inline struct sym_hcb * sym_get_hcb(struct Scsi_Host *host) 227static inline struct sym_hcb * sym_get_hcb(struct Scsi_Host *host)
@@ -265,5 +267,6 @@ void sym_xpt_async_bus_reset(struct sym_hcb *np);
265void sym_xpt_async_sent_bdr(struct sym_hcb *np, int target); 267void sym_xpt_async_sent_bdr(struct sym_hcb *np, int target);
266int sym_setup_data_and_start (struct sym_hcb *np, struct scsi_cmnd *csio, struct sym_ccb *cp); 268int sym_setup_data_and_start (struct sym_hcb *np, struct scsi_cmnd *csio, struct sym_ccb *cp);
267void sym_log_bus_error(struct sym_hcb *np); 269void sym_log_bus_error(struct sym_hcb *np);
270void sym_dump_registers(struct sym_hcb *np);
268 271
269#endif /* SYM_GLUE_H */ 272#endif /* SYM_GLUE_H */
diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c
index 21cd4c7f5289..af24c447b195 100644
--- a/drivers/scsi/sym53c8xx_2/sym_hipd.c
+++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c
@@ -1180,10 +1180,10 @@ static void sym_log_hard_error(struct sym_hcb *np, u_short sist, u_char dstat)
1180 scr_to_cpu((int) *(u32 *)(script_base + script_ofs))); 1180 scr_to_cpu((int) *(u32 *)(script_base + script_ofs)));
1181 } 1181 }
1182 1182
1183 printf ("%s: regdump:", sym_name(np)); 1183 printf("%s: regdump:", sym_name(np));
1184 for (i=0; i<24;i++) 1184 for (i = 0; i < 24; i++)
1185 printf (" %02x", (unsigned)INB_OFF(np, i)); 1185 printf(" %02x", (unsigned)INB_OFF(np, i));
1186 printf (".\n"); 1186 printf(".\n");
1187 1187
1188 /* 1188 /*
1189 * PCI BUS error. 1189 * PCI BUS error.
@@ -1192,6 +1192,16 @@ static void sym_log_hard_error(struct sym_hcb *np, u_short sist, u_char dstat)
1192 sym_log_bus_error(np); 1192 sym_log_bus_error(np);
1193} 1193}
1194 1194
1195void sym_dump_registers(struct sym_hcb *np)
1196{
1197 u_short sist;
1198 u_char dstat;
1199
1200 sist = INW(np, nc_sist);
1201 dstat = INB(np, nc_dstat);
1202 sym_log_hard_error(np, sist, dstat);
1203}
1204
1195static struct sym_chip sym_dev_table[] = { 1205static struct sym_chip sym_dev_table[] = {
1196 {PCI_DEVICE_ID_NCR_53C810, 0x0f, "810", 4, 8, 4, 64, 1206 {PCI_DEVICE_ID_NCR_53C810, 0x0f, "810", 4, 8, 4, 64,
1197 FE_ERL} 1207 FE_ERL}
@@ -2809,6 +2819,13 @@ void sym_interrupt (struct sym_hcb *np)
2809 dstat |= INB(np, nc_dstat); 2819 dstat |= INB(np, nc_dstat);
2810 istatc = INB(np, nc_istat); 2820 istatc = INB(np, nc_istat);
2811 istat |= istatc; 2821 istat |= istatc;
2822
2823 /* Prevent deadlock waiting on a condition that may
2824 * never clear. */
2825 if (unlikely(sist == 0xffff && dstat == 0xff)) {
2826 if (pci_channel_offline(np->s.device))
2827 return;
2828 }
2812 } while (istatc & (SIP|DIP)); 2829 } while (istatc & (SIP|DIP));
2813 2830
2814 if (DEBUG_FLAGS & DEBUG_TINY) 2831 if (DEBUG_FLAGS & DEBUG_TINY)