aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/include/asm
diff options
context:
space:
mode:
authorGavin Shan <gwshan@linux.vnet.ibm.com>2014-04-24 04:00:19 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-04-28 03:34:32 -0400
commitd2b0f6f77ee525811b6efe864efa6a4eb82eea73 (patch)
tree84205706f9cc2e03425ba3a48edf2a1d527e3267 /arch/powerpc/include/asm
parent7f52a526f64c69c913f0027fbf43821ff0b3a7d7 (diff)
powerpc/eeh: No hotplug on permanently removed dev
The issue was detected in a bit complicated test case where we have multiple hierarchical PEs shown as following figure: +-----------------+ | PE#3 p2p#0 | | p2p#1 | +-----------------+ | +-----------------+ | PE#4 pdev#0 | | pdev#1 | +-----------------+ PE#4 (have 2 PCI devices) is the child of PE#3, which has 2 p2p bridges. We accidentally had less-known scenario: PE#4 was removed permanently from the system because of permanent failure (e.g. exceeding the max allowd failure times in last hour), then we detects EEH errors on PE#3 and tried to recover it. However, eeh_dev instances for pdev#0/1 were not detached from PE#4, which was still connected to PE#3. All of that was because of the fact that we rely on count-based pcibios_release_device(), which isn't reliable enough. When doing recovery for PE#3, we still apply hotplug on PE#4 and pdev#0/1, which are not valid any more. Eventually, we run into kernel crash. The patch fixes above issue from two aspects. For unplug, we simply skip those permanently removed PE, whose state is (EEH_PE_STATE_ISOLATED && !EEH_PE_STATE_RECOVERING) and its frozen count should be greater than EEH_MAX_ALLOWED_FREEZES. For plug, we marked all permanently removed EEH devices with EEH_DEV_REMOVED and return 0xFF's on read its PCI config so that PCI core will omit them. Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/include/asm')
-rw-r--r--arch/powerpc/include/asm/eeh.h1
-rw-r--r--arch/powerpc/include/asm/ppc-pci.h1
2 files changed, 2 insertions, 0 deletions
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index f4a93218fbcb..2841ecac4c47 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -98,6 +98,7 @@ struct eeh_pe {
98 98
99#define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */ 99#define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */
100#define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */ 100#define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */
101#define EEH_DEV_REMOVED (1 << 10) /* Removed permanently */
101 102
102struct eeh_dev { 103struct eeh_dev {
103 int mode; /* EEH mode */ 104 int mode; /* EEH mode */
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index ed57fa7920c8..db1e2b8eff3c 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -58,6 +58,7 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
58int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); 58int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
59void eeh_pe_state_mark(struct eeh_pe *pe, int state); 59void eeh_pe_state_mark(struct eeh_pe *pe, int state);
60void eeh_pe_state_clear(struct eeh_pe *pe, int state); 60void eeh_pe_state_clear(struct eeh_pe *pe, int state);
61void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
61 62
62void eeh_sysfs_add_device(struct pci_dev *pdev); 63void eeh_sysfs_add_device(struct pci_dev *pdev);
63void eeh_sysfs_remove_device(struct pci_dev *pdev); 64void eeh_sysfs_remove_device(struct pci_dev *pdev);