Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc

Pull powerpc updates from Ben Herrenschmidt: "This is the powerpc changes for the 3.11 merge window. In addition to the usual bug fixes and small updates, the main highlights are: - Support for transparent huge pages by Aneesh Kumar for 64-bit server processors. This allows the use of 16M pages as transparent huge pages on kernels compiled with a 64K base page size. - Base VFIO support for KVM on power by Alexey Kardashevskiy - Wiring up of our nvram to the pstore infrastructure, including putting compressed oopses in there by Aruna Balakrishnaiah - Move, rework and improve our "EEH" (basically PCI error handling and recovery) infrastructure. It is no longer specific to pseries but is now usable by the new "powernv" platform as well (no hypervisor) by Gavin Shan. - I fixed some bugs in our math-emu instruction decoding and made it usable to emulate some optional FP instructions on processors with hard FP that lack them (such as fsqrt on Freescale embedded processors). - Support for Power8 "Event Based Branch" facility by Michael Ellerman. This facility allows what is basically "userspace interrupts" for performance monitor events. - A bunch of Transactional Memory vs. Signals bug fixes and HW breakpoint/watchpoint fixes by Michael Neuling. And more ... I appologize in advance if I've failed to highlight something that somebody deemed worth it." * 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (156 commits) pstore: Add hsize argument in write_buf call of pstore_ftrace_call powerpc/fsl: add MPIC timer wakeup support powerpc/mpic: create mpic subsystem object powerpc/mpic: add global timer support powerpc/mpic: add irq_set_wake support powerpc/85xx: enable coreint for all the 64bit boards powerpc/8xx: Erroneous double irq_eoi() on CPM IRQ in MPC8xx powerpc/fsl: Enable CONFIG_E1000E in mpc85xx_smp_defconfig powerpc/mpic: Add get_version API both for internal and external use powerpc: Handle both new style and old style reserve maps powerpc/hw_brk: Fix off by one error when validating DAWR region end powerpc/pseries: Support compression of oops text via pstore powerpc/pseries: Re-organise the oops compression code pstore: Pass header size in the pstore write callback powerpc/powernv: Fix iommu initialization again powerpc/pseries: Inform the hypervisor we are using EBB regs powerpc/perf: Add power8 EBB support powerpc/perf: Core EBB support for 64-bit book3s powerpc/perf: Drop MMCRA from thread_struct powerpc/perf: Don't enable if we have zero events ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2013-07-04 13:29:23 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2013-07-04 13:29:23 -0400
commit: 65b97fb7303050fc826e518cf67fc283da23314f (patch)
tree: 595e7f04d65d95a39d65bd2dcf2385b3b6ea7969 /arch/powerpc/platforms
parent: ddcf6600b133697adbafd96e080818bdc0dfd028 (diff)
parent: 1d8b368ab4aacfc3f864655baad4d31a3028ec1a (diff)
46 files changed, 2300 insertions, 3127 deletions
diff --git a/arch/powerpc/platforms/44x/currituck.c b/arch/powerpc/platforms/44x/currituck.c
index ecd3890c40d7..7f1b71a01c6a 100644
--- a/arch/powerpc/platforms/44x/currituck.c
+++ b/arch/powerpc/platforms/44x/currituck.c
@@ -91,12 +91,12 @@ static void __init ppc47x_init_irq(void)
 }
 #ifdef CONFIG_SMP
-static void __cpuinit smp_ppc47x_setup_cpu(int cpu)
+static void smp_ppc47x_setup_cpu(int cpu)
 {
        mpic_setup_this_cpu();
 }
-static int __cpuinit smp_ppc47x_kick_cpu(int cpu)
+static int smp_ppc47x_kick_cpu(int cpu)
 {
        struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
        const u64 *spin_table_addr_prop;
@@ -176,13 +176,48 @@ static int __init ppc47x_probe(void)
        return 1;
 }
+static int board_rev = -1;
+static int __init ppc47x_get_board_rev(void)
+{
+        u8 fpga_reg0;
+        void *fpga;
+        struct device_node *np;
+        np = of_find_compatible_node(NULL, NULL, "ibm,currituck-fpga");
+        if (!np)
+                goto fail;
+        fpga = of_iomap(np, 0);
+        of_node_put(np);
+        if (!fpga)
+                goto fail;
+        fpga_reg0 = ioread8(fpga);
+        board_rev = fpga_reg0 & 0x03;
+        pr_info("%s: Found board revision %d\n", __func__, board_rev);
+        iounmap(fpga);
+        return 0;
+fail:
+        pr_info("%s: Unable to find board revision\n", __func__);
+        return 0;
+}
+machine_arch_initcall(ppc47x, ppc47x_get_board_rev);
 /* Use USB controller should have been hardware swizzled but it wasn't :( */
 static void ppc47x_pci_irq_fixup(struct pci_dev *dev)
 {
        if (dev->vendor == 0x1033 && (dev->device == 0x0035 ||
                                      dev->device == 0x00e0)) {
-                dev->irq = irq_create_mapping(NULL, 47);
+                if (board_rev == 0) {
-                pr_info("%s: Mapping irq 47 %d\n", __func__, dev->irq);
+                        dev->irq = irq_create_mapping(NULL, 47);
+                        pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
+                } else if (board_rev == 2) {
+                        dev->irq = irq_create_mapping(NULL, 49);
+                        pr_info("%s: Mapping irq %d\n", __func__, dev->irq);
+                } else {
+                        pr_alert("%s: Unknown board revision\n", __func__);
+                }
        }
 }
diff --git a/arch/powerpc/platforms/44x/iss4xx.c b/arch/powerpc/platforms/44x/iss4xx.c
index a28a8629727e..4241bc825800 100644
--- a/arch/powerpc/platforms/44x/iss4xx.c
+++ b/arch/powerpc/platforms/44x/iss4xx.c
@@ -81,12 +81,12 @@ static void __init iss4xx_init_irq(void)
 }
 #ifdef CONFIG_SMP
-static void __cpuinit smp_iss4xx_setup_cpu(int cpu)
+static void smp_iss4xx_setup_cpu(int cpu)
 {
        mpic_setup_this_cpu();
 }
-static int __cpuinit smp_iss4xx_kick_cpu(int cpu)
+static int smp_iss4xx_kick_cpu(int cpu)
 {
        struct device_node *cpunode = of_get_cpu_node(cpu, NULL);
        const u64 *spin_table_addr_prop;
diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c
index 0a134e0469ef..3e90ece10ae9 100644
--- a/arch/powerpc/platforms/512x/mpc5121_ads.c
+++ b/arch/powerpc/platforms/512x/mpc5121_ads.c
@@ -43,9 +43,7 @@ static void __init mpc5121_ads_setup_arch(void)
                mpc83xx_add_bridge(np);
 #endif
-#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+        mpc512x_setup_arch();
-        mpc512x_setup_diu();
-#endif
 }
 static void __init mpc5121_ads_init_IRQ(void)
@@ -69,7 +67,7 @@ define_machine(mpc5121_ads) {
        .probe                  = mpc5121_ads_probe,
        .setup_arch             = mpc5121_ads_setup_arch,
        .init                   = mpc512x_init,
-        .init_early             = mpc512x_init_diu,
+        .init_early             = mpc512x_init_early,
        .init_IRQ               = mpc5121_ads_init_IRQ,
        .get_irq                = ipic_get_irq,
        .calibrate_decr         = generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h
index 0a8e60023944..cc97f022d028 100644
--- a/arch/powerpc/platforms/512x/mpc512x.h
+++ b/arch/powerpc/platforms/512x/mpc512x.h
@@ -12,18 +12,12 @@
 #ifndef __MPC512X_H__
 #define __MPC512X_H__
 extern void __init mpc512x_init_IRQ(void);
+extern void __init mpc512x_init_early(void);
 extern void __init mpc512x_init(void);
+extern void __init mpc512x_setup_arch(void);
 extern int __init mpc5121_clk_init(void);
-void __init mpc512x_declare_of_platform_devices(void);
 extern const char *mpc512x_select_psc_compat(void);
+extern const char *mpc512x_select_reset_compat(void);
 extern void mpc512x_restart(char *cmd);
-#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
-void mpc512x_init_diu(void);
-void mpc512x_setup_diu(void);
-#else
-#define mpc512x_init_diu NULL
-#define mpc512x_setup_diu NULL
-#endif
 #endif                          /* __MPC512X_H__ */
diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c
index 5fb919b30924..ce71408781a0 100644
--- a/arch/powerpc/platforms/512x/mpc512x_generic.c
+++ b/arch/powerpc/platforms/512x/mpc512x_generic.c
@@ -45,8 +45,8 @@ define_machine(mpc512x_generic) {
        .name                   = "MPC512x generic",
        .probe                  = mpc512x_generic_probe,
        .init                   = mpc512x_init,
-        .init_early             = mpc512x_init_diu,
+        .init_early             = mpc512x_init_early,
-        .setup_arch             = mpc512x_setup_diu,
+        .setup_arch             = mpc512x_setup_arch,
        .init_IRQ               = mpc512x_init_IRQ,
        .get_irq                = ipic_get_irq,
        .calibrate_decr         = generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 6eb94ab99d39..a82a41b4fd91 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -35,8 +35,10 @@ static struct mpc512x_reset_module __iomem *reset_module_base;
 static void __init mpc512x_restart_init(void)
 {
        struct device_node *np;
+        const char *reset_compat;
-        np = of_find_compatible_node(NULL, NULL, "fsl,mpc5121-reset");
+        reset_compat = mpc512x_select_reset_compat();
+        np = of_find_compatible_node(NULL, NULL, reset_compat);
        if (!np)
                return;
@@ -58,7 +60,7 @@ void mpc512x_restart(char *cmd)
                ;
 }
-#if defined(CONFIG_FB_FSL_DIU) || defined(CONFIG_FB_FSL_DIU_MODULE)
+#if IS_ENABLED(CONFIG_FB_FSL_DIU)
 struct fsl_diu_shared_fb {
        u8              gamma[0x300];   /* 32-bit aligned! */
@@ -355,6 +357,17 @@ const char *mpc512x_select_psc_compat(void)
        return NULL;
 }
+const char *mpc512x_select_reset_compat(void)
+{
+        if (of_machine_is_compatible("fsl,mpc5121"))
+                return "fsl,mpc5121-reset";
+        if (of_machine_is_compatible("fsl,mpc5125"))
+                return "fsl,mpc5125-reset";
+        return NULL;
+}
 static unsigned int __init get_fifo_size(struct device_node *np,
                                         char *prop_name)
 {
@@ -436,14 +449,26 @@ void __init mpc512x_psc_fifo_init(void)
        }
 }
+void __init mpc512x_init_early(void)
+{
+        mpc512x_restart_init();
+        if (IS_ENABLED(CONFIG_FB_FSL_DIU))
+                mpc512x_init_diu();
+}
 void __init mpc512x_init(void)
 {
        mpc5121_clk_init();
        mpc512x_declare_of_platform_devices();
-        mpc512x_restart_init();
        mpc512x_psc_fifo_init();
 }
+void __init mpc512x_setup_arch(void)
+{
+        if (IS_ENABLED(CONFIG_FB_FSL_DIU))
+                mpc512x_setup_diu();
+}
 /**
 * mpc512x_cs_config - Setup chip select configuration
 * @cs: chip select number
diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c
index 0575e858291c..24b314d7bd5f 100644
--- a/arch/powerpc/platforms/512x/pdm360ng.c
+++ b/arch/powerpc/platforms/512x/pdm360ng.c
@@ -119,9 +119,9 @@ static int __init pdm360ng_probe(void)
 define_machine(pdm360ng) {
        .name                   = "PDM360NG",
        .probe                  = pdm360ng_probe,
-        .setup_arch             = mpc512x_setup_diu,
+        .setup_arch             = mpc512x_setup_arch,
        .init                   = pdm360ng_init,
-        .init_early             = mpc512x_init_diu,
+        .init_early             = mpc512x_init_early,
        .init_IRQ               = mpc512x_init_IRQ,
        .get_irq                = ipic_get_irq,
        .calibrate_decr         = generic_calibrate_decr,
diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
index 624cb51d19c9..7bc315822935 100644
--- a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
+++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c
@@ -231,17 +231,7 @@ static struct i2c_driver mcu_driver = {
        .id_table = mcu_ids,
 };
-static int __init mcu_init(void)
+module_i2c_driver(mcu_driver);
-{
-        return i2c_add_driver(&mcu_driver);
-}
-module_init(mcu_init);
-static void __exit mcu_exit(void)
-{
-        i2c_del_driver(&mcu_driver);
-}
-module_exit(mcu_exit);
 MODULE_DESCRIPTION("Power Management and GPIO expander driver for "
                   "MPC8349E-mITX-compatible MCU");
diff --git a/arch/powerpc/platforms/85xx/p5020_ds.c b/arch/powerpc/platforms/85xx/p5020_ds.c
index 753a42c29d4d..39cfa4044e6c 100644
--- a/arch/powerpc/platforms/85xx/p5020_ds.c
+++ b/arch/powerpc/platforms/85xx/p5020_ds.c
@@ -75,12 +75,7 @@ define_machine(p5020_ds) {
 #ifdef CONFIG_PCI
        .pcibios_fixup_bus      = fsl_pcibios_fixup_bus,
 #endif
-/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
-#ifdef CONFIG_PPC64
-        .get_irq                = mpic_get_irq,
-#else
        .get_irq                = mpic_get_coreint_irq,
-#endif
        .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
diff --git a/arch/powerpc/platforms/85xx/p5040_ds.c b/arch/powerpc/platforms/85xx/p5040_ds.c
index 11381851828e..f70e74cddf97 100644
--- a/arch/powerpc/platforms/85xx/p5040_ds.c
+++ b/arch/powerpc/platforms/85xx/p5040_ds.c
@@ -66,12 +66,7 @@ define_machine(p5040_ds) {
 #ifdef CONFIG_PCI
        .pcibios_fixup_bus      = fsl_pcibios_fixup_bus,
 #endif
-/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
-#ifdef CONFIG_PPC64
-        .get_irq                = mpic_get_irq,
-#else
        .get_irq                = mpic_get_coreint_irq,
-#endif
        .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 6a1759939c6b..5ced4f5bb2b2 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -99,7 +99,7 @@ static void mpc85xx_take_timebase(void)
 }
 #ifdef CONFIG_HOTPLUG_CPU
-static void __cpuinit smp_85xx_mach_cpu_die(void)
+static void smp_85xx_mach_cpu_die(void)
 {
        unsigned int cpu = smp_processor_id();
        u32 tmp;
@@ -141,7 +141,7 @@ static inline u32 read_spin_table_addr_l(void *spin_table)
        return in_be32(&((struct epapr_spin_table *)spin_table)->addr_l);
 }
-static int __cpuinit smp_85xx_kick_cpu(int nr)
+static int smp_85xx_kick_cpu(int nr)
 {
        unsigned long flags;
        const u64 *cpu_rel_addr;
@@ -362,7 +362,7 @@ static void mpc85xx_smp_machine_kexec(struct kimage *image)
 }
 #endif /* CONFIG_KEXEC */
-static void __cpuinit smp_85xx_setup_cpu(int cpu_nr)
+static void smp_85xx_setup_cpu(int cpu_nr)
 {
        if (smp_85xx_ops.probe == smp_mpic_probe)
                mpic_setup_this_cpu();
diff --git a/arch/powerpc/platforms/85xx/t4240_qds.c b/arch/powerpc/platforms/85xx/t4240_qds.c
index 5998e9f33304..91ead6b1b8af 100644
--- a/arch/powerpc/platforms/85xx/t4240_qds.c
+++ b/arch/powerpc/platforms/85xx/t4240_qds.c
@@ -75,12 +75,7 @@ define_machine(t4240_qds) {
 #ifdef CONFIG_PCI
        .pcibios_fixup_bus      = fsl_pcibios_fixup_bus,
 #endif
-/* coreint doesn't play nice with lazy EE, use legacy mpic for now */
-#ifdef CONFIG_PPC64
-        .get_irq                = mpic_get_irq,
-#else
        .get_irq                = mpic_get_coreint_irq,
-#endif
        .restart                = fsl_rstcr_restart,
        .calibrate_decr         = generic_calibrate_decr,
        .progress               = udbg_progress,
diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c
index 1e121088826f..587a2828b06c 100644
--- a/arch/powerpc/platforms/8xx/m8xx_setup.c
+++ b/arch/powerpc/platforms/8xx/m8xx_setup.c
@@ -43,6 +43,7 @@ static irqreturn_t timebase_interrupt(int irq, void *dev)
 static struct irqaction tbint_irqaction = {
        .handler = timebase_interrupt,
+        .flags = IRQF_NO_THREAD,
        .name = "tbint",
 };
@@ -218,19 +219,12 @@ void mpc8xx_restart(char *cmd)
 static void cpm_cascade(unsigned int irq, struct irq_desc *desc)
 {
-        struct irq_chip *chip;
+        struct irq_chip *chip = irq_desc_get_chip(desc);
-        int cascade_irq;
+        int cascade_irq = cpm_get_irq();
-        if ((cascade_irq = cpm_get_irq()) >= 0) {
-                struct irq_desc *cdesc = irq_to_desc(cascade_irq);
+        if (cascade_irq >= 0)
                generic_handle_irq(cascade_irq);
-                chip = irq_desc_get_chip(cdesc);
-                chip->irq_eoi(&cdesc->irq_data);
-        }
-        chip = irq_desc_get_chip(desc);
        chip->irq_eoi(&desc->irq_data);
 }
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index e17cdfc5ba40..d703775bda30 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -86,6 +86,27 @@ config MPIC
        bool
        default n
+config MPIC_TIMER
+        bool "MPIC Global Timer"
+        depends on MPIC && FSL_SOC
+        default n
+        help
+          The MPIC global timer is a hardware timer inside the
+          Freescale PIC complying with OpenPIC standard. When the
+          specified interval times out, the hardware timer generates
+          an interrupt. The driver currently is only tested on fsl
+          chip, but it can potentially support other global timers
+          complying with the OpenPIC standard.
+config FSL_MPIC_TIMER_WAKEUP
+        tristate "Freescale MPIC global timer wakeup driver"
+        depends on FSL_SOC &&  MPIC_TIMER && PM
+        default n
+        help
+          The driver provides a way to wake up the system by MPIC
+          timer.
+          e.g. "echo 5 > /sys/devices/system/mpic/timer_wakeup"
 config PPC_EPAPR_HV_PIC
        bool
        default n
@@ -164,6 +185,11 @@ config IBMEBUS
        help
          Bus device driver for GX bus based adapters.
+config EEH
+        bool
+        depends on (PPC_POWERNV || PPC_PSERIES) && PCI
+        default y
 config PPC_MPC106
        bool
        default n
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 7819c40a6bc3..47d9a03dd415 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -71,6 +71,7 @@ config PPC_BOOK3S_64
        select PPC_FPU
        select PPC_HAVE_PMU_SUPPORT
        select SYS_SUPPORTS_HUGETLBFS
+        select HAVE_ARCH_TRANSPARENT_HUGEPAGE if PPC_64K_PAGES
 config PPC_BOOK3E_64
        bool "Embedded processors"
diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
index 246e1d8b3af3..c34ee4e60873 100644
--- a/arch/powerpc/platforms/cell/beat_htab.c
+++ b/arch/powerpc/platforms/cell/beat_htab.c
@@ -185,7 +185,8 @@ static void beat_lpar_hptab_clear(void)
 static long beat_lpar_hpte_updatepp(unsigned long slot,
                                    unsigned long newpp,
                                    unsigned long vpn,
-                                    int psize, int ssize, int local)
+                                    int psize, int apsize,
+                                    int ssize, int local)
 {
        unsigned long lpar_rc;
        u64 dummy0, dummy1;
@@ -274,7 +275,8 @@ static void beat_lpar_hpte_updateboltedpp(unsigned long newpp,
 }
 static void beat_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
-                                         int psize, int ssize, int local)
+                                      int psize, int apsize,
+                                      int ssize, int local)
 {
        unsigned long want_v;
        unsigned long lpar_rc;
@@ -364,9 +366,10 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
 * already zero.  For now I am paranoid.
 */
 static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
-                                    unsigned long newpp,
+                                       unsigned long newpp,
-                                    unsigned long vpn,
+                                       unsigned long vpn,
-                                    int psize, int ssize, int local)
+                                       int psize, int apsize,
+                                       int ssize, int local)
 {
        unsigned long lpar_rc;
        unsigned long want_v;
@@ -394,7 +397,8 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
 }
 static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn,
-                                         int psize, int ssize, int local)
+                                         int psize, int apsize,
+                                         int ssize, int local)
 {
        unsigned long want_v;
        unsigned long lpar_rc;
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index d35dbbc8ec79..f75f6fcac729 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -142,7 +142,7 @@ static int smp_cell_cpu_bootable(unsigned int nr)
         * during boot if the user requests it.  Odd-numbered
         * cpus are assumed to be secondary threads.
         */
-        if (system_state < SYSTEM_RUNNING &&
+        if (system_state == SYSTEM_BOOTING &&
            cpu_has_feature(CPU_FTR_SMT) &&
            !smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
                return 0;
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index bdb738a69e41..49c9f9501c21 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -885,7 +885,7 @@ static int smp_core99_cpu_notify(struct notifier_block *self,
        return NOTIFY_OK;
 }
-static struct notifier_block __cpuinitdata smp_core99_cpu_nb = {
+static struct notifier_block smp_core99_cpu_nb = {
        .notifier_call  = smp_core99_cpu_notify,
 };
 #endif /* CONFIG_HOTPLUG_CPU */
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index bcc3cb48a44e..7fe595152478 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -3,3 +3,4 @@ obj-y			+= opal-rtc.o opal-nvram.o
 obj-$(CONFIG_SMP)       += smp.o
 obj-$(CONFIG_PCI)       += pci.o pci-p5ioc2.o pci-ioda.o
+obj-$(CONFIG_EEH)       += eeh-ioda.o eeh-powernv.o
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
new file mode 100644
index 000000000000..0cd1c4a71755
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -0,0 +1,916 @@
+/*
+ * The file intends to implement the functions needed by EEH, which is
+ * built on IODA compliant chip. Actually, lots of functions related
+ * to EEH would be built based on the OPAL APIs.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/bootmem.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/msi.h>
+#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <linux/string.h>
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/msi_bitmap.h>
+#include <asm/opal.h>
+#include <asm/pci-bridge.h>
+#include <asm/ppc-pci.h>
+#include <asm/tce.h>
+#include "powernv.h"
+#include "pci.h"
+/* Debugging option */
+#ifdef IODA_EEH_DBG_ON
+#define IODA_EEH_DBG(args...)   pr_info(args)
+#else
+#define IODA_EEH_DBG(args...)
+#endif
+static char *hub_diag = NULL;
+static int ioda_eeh_nb_init = 0;
+static int ioda_eeh_event(struct notifier_block *nb,
+                          unsigned long events, void *change)
+{
+        uint64_t changed_evts = (uint64_t)change;
+        /* We simply send special EEH event */
+        if ((changed_evts & OPAL_EVENT_PCI_ERROR) &&
+            (events & OPAL_EVENT_PCI_ERROR))
+                eeh_send_failure_event(NULL);
+        return 0;
+}
+static struct notifier_block ioda_eeh_nb = {
+        .notifier_call  = ioda_eeh_event,
+        .next           = NULL,
+        .priority       = 0
+};
+#ifdef CONFIG_DEBUG_FS
+static int ioda_eeh_dbgfs_set(void *data, u64 val)
+{
+        struct pci_controller *hose = data;
+        struct pnv_phb *phb = hose->private_data;
+        out_be64(phb->regs + 0xD10, val);
+        return 0;
+}
+static int ioda_eeh_dbgfs_get(void *data, u64 *val)
+{
+        struct pci_controller *hose = data;
+        struct pnv_phb *phb = hose->private_data;
+        *val = in_be64(phb->regs + 0xD10);
+        return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_dbgfs_ops, ioda_eeh_dbgfs_get,
+                        ioda_eeh_dbgfs_set, "0x%llx\n");
+#endif /* CONFIG_DEBUG_FS */
+/**
+ * ioda_eeh_post_init - Chip dependent post initialization
+ * @hose: PCI controller
+ *
+ * The function will be called after eeh PEs and devices
+ * have been built. That means the EEH is ready to supply
+ * service with I/O cache.
+ */
+static int ioda_eeh_post_init(struct pci_controller *hose)
+{
+        struct pnv_phb *phb = hose->private_data;
+        int ret;
+        /* Register OPAL event notifier */
+        if (!ioda_eeh_nb_init) {
+                ret = opal_notifier_register(&ioda_eeh_nb);
+                if (ret) {
+                        pr_err("%s: Can't register OPAL event notifier (%d)\n",
+                               __func__, ret);
+                        return ret;
+                }
+                ioda_eeh_nb_init = 1;
+        }
+        /* FIXME: Enable it for PHB3 later */
+        if (phb->type == PNV_PHB_IODA1) {
+                if (!hub_diag) {
+                        hub_diag = (char *)__get_free_page(GFP_KERNEL |
+                                                           __GFP_ZERO);
+                        if (!hub_diag) {
+                                pr_err("%s: Out of memory !\n",
+                                       __func__);
+                                return -ENOMEM;
+                        }
+                }
+#ifdef CONFIG_DEBUG_FS
+                if (phb->dbgfs)
+                        debugfs_create_file("err_injct", 0600,
+                                            phb->dbgfs, hose,
+                                            &ioda_eeh_dbgfs_ops);
+#endif
+                phb->eeh_state |= PNV_EEH_STATE_ENABLED;
+        }
+        return 0;
+}
+/**
+ * ioda_eeh_set_option - Set EEH operation or I/O setting
+ * @pe: EEH PE
+ * @option: options
+ *
+ * Enable or disable EEH option for the indicated PE. The
+ * function also can be used to enable I/O or DMA for the
+ * PE.
+ */
+static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
+{
+        s64 ret;
+        u32 pe_no;
+        struct pci_controller *hose = pe->phb;
+        struct pnv_phb *phb = hose->private_data;
+        /* Check on PE number */
+        if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
+                pr_err("%s: PE address %x out of range [0, %x] "
+                       "on PHB#%x\n",
+                        __func__, pe->addr, phb->ioda.total_pe,
+                        hose->global_number);
+                return -EINVAL;
+        }
+        pe_no = pe->addr;
+        switch (option) {
+        case EEH_OPT_DISABLE:
+                ret = -EEXIST;
+                break;
+        case EEH_OPT_ENABLE:
+                ret = 0;
+                break;
+        case EEH_OPT_THAW_MMIO:
+                ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
+                                OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO);
+                if (ret) {
+                        pr_warning("%s: Failed to enable MMIO for "
+                                   "PHB#%x-PE#%x, err=%lld\n",
+                                __func__, hose->global_number, pe_no, ret);
+                        return -EIO;
+                }
+                break;
+        case EEH_OPT_THAW_DMA:
+                ret = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
+                                OPAL_EEH_ACTION_CLEAR_FREEZE_DMA);
+                if (ret) {
+                        pr_warning("%s: Failed to enable DMA for "
+                                   "PHB#%x-PE#%x, err=%lld\n",
+                                __func__, hose->global_number, pe_no, ret);
+                        return -EIO;
+                }
+                break;
+        default:
+                pr_warning("%s: Invalid option %d\n", __func__, option);
+                return -EINVAL;
+        }
+        return ret;
+}
+/**
+ * ioda_eeh_get_state - Retrieve the state of PE
+ * @pe: EEH PE
+ *
+ * The PE's state should be retrieved from the PEEV, PEST
+ * IODA tables. Since the OPAL has exported the function
+ * to do it, it'd better to use that.
+ */
+static int ioda_eeh_get_state(struct eeh_pe *pe)
+{
+        s64 ret = 0;
+        u8 fstate;
+        u16 pcierr;
+        u32 pe_no;
+        int result;
+        struct pci_controller *hose = pe->phb;
+        struct pnv_phb *phb = hose->private_data;
+        /*
+         * Sanity check on PE address. The PHB PE address should
+         * be zero.
+         */
+        if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
+                pr_err("%s: PE address %x out of range [0, %x] "
+                       "on PHB#%x\n",
+                       __func__, pe->addr, phb->ioda.total_pe,
+                       hose->global_number);
+                return EEH_STATE_NOT_SUPPORT;
+        }
+        /* Retrieve PE status through OPAL */
+        pe_no = pe->addr;
+        ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
+                        &fstate, &pcierr, NULL);
+        if (ret) {
+                pr_err("%s: Failed to get EEH status on "
+                       "PHB#%x-PE#%x\n, err=%lld\n",
+                       __func__, hose->global_number, pe_no, ret);
+                return EEH_STATE_NOT_SUPPORT;
+        }
+        /* Check PHB status */
+        if (pe->type & EEH_PE_PHB) {
+                result = 0;
+                result &= ~EEH_STATE_RESET_ACTIVE;
+                if (pcierr != OPAL_EEH_PHB_ERROR) {
+                        result |= EEH_STATE_MMIO_ACTIVE;
+                        result |= EEH_STATE_DMA_ACTIVE;
+                        result |= EEH_STATE_MMIO_ENABLED;
+                        result |= EEH_STATE_DMA_ENABLED;
+                }
+                return result;
+        }
+        /* Parse result out */
+        result = 0;
+        switch (fstate) {
+        case OPAL_EEH_STOPPED_NOT_FROZEN:
+                result &= ~EEH_STATE_RESET_ACTIVE;
+                result |= EEH_STATE_MMIO_ACTIVE;
+                result |= EEH_STATE_DMA_ACTIVE;
+                result |= EEH_STATE_MMIO_ENABLED;
+                result |= EEH_STATE_DMA_ENABLED;
+                break;
+        case OPAL_EEH_STOPPED_MMIO_FREEZE:
+                result &= ~EEH_STATE_RESET_ACTIVE;
+                result |= EEH_STATE_DMA_ACTIVE;
+                result |= EEH_STATE_DMA_ENABLED;
+                break;
+        case OPAL_EEH_STOPPED_DMA_FREEZE:
+                result &= ~EEH_STATE_RESET_ACTIVE;
+                result |= EEH_STATE_MMIO_ACTIVE;
+                result |= EEH_STATE_MMIO_ENABLED;
+                break;
+        case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
+                result &= ~EEH_STATE_RESET_ACTIVE;
+                break;
+        case OPAL_EEH_STOPPED_RESET:
+                result |= EEH_STATE_RESET_ACTIVE;
+                break;
+        case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
+                result |= EEH_STATE_UNAVAILABLE;
+                break;
+        case OPAL_EEH_STOPPED_PERM_UNAVAIL:
+                result |= EEH_STATE_NOT_SUPPORT;
+                break;
+        default:
+                pr_warning("%s: Unexpected EEH status 0x%x "
+                           "on PHB#%x-PE#%x\n",
+                           __func__, fstate, hose->global_number, pe_no);
+        }
+        return result;
+}
+static int ioda_eeh_pe_clear(struct eeh_pe *pe)
+{
+        struct pci_controller *hose;
+        struct pnv_phb *phb;
+        u32 pe_no;
+        u8 fstate;
+        u16 pcierr;
+        s64 ret;
+        pe_no = pe->addr;
+        hose = pe->phb;
+        phb = pe->phb->private_data;
+        /* Clear the EEH error on the PE */
+        ret = opal_pci_eeh_freeze_clear(phb->opal_id,
+                        pe_no, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+        if (ret) {
+                pr_err("%s: Failed to clear EEH error for "
+                       "PHB#%x-PE#%x, err=%lld\n",
+                       __func__, hose->global_number, pe_no, ret);
+                return -EIO;
+        }
+        /*
+         * Read the PE state back and verify that the frozen
+         * state has been removed.
+         */
+        ret = opal_pci_eeh_freeze_status(phb->opal_id, pe_no,
+                        &fstate, &pcierr, NULL);
+        if (ret) {
+                pr_err("%s: Failed to get EEH status on "
+                       "PHB#%x-PE#%x\n, err=%lld\n",
+                       __func__, hose->global_number, pe_no, ret);
+                return -EIO;
+        }
+        if (fstate != OPAL_EEH_STOPPED_NOT_FROZEN) {
+                pr_err("%s: Frozen state not cleared on "
+                       "PHB#%x-PE#%x, sts=%x\n",
+                       __func__, hose->global_number, pe_no, fstate);
+                return -EIO;
+        }
+        return 0;
+}
+static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
+{
+        s64 rc = OPAL_HARDWARE;
+        while (1) {
+                rc = opal_pci_poll(phb->opal_id);
+                if (rc <= 0)
+                        break;
+                msleep(rc);
+        }
+        return rc;
+}
+static int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
+{
+        struct pnv_phb *phb = hose->private_data;
+        s64 rc = OPAL_HARDWARE;
+        pr_debug("%s: Reset PHB#%x, option=%d\n",
+                 __func__, hose->global_number, option);
+        /* Issue PHB complete reset request */
+        if (option == EEH_RESET_FUNDAMENTAL ||
+            option == EEH_RESET_HOT)
+                rc = opal_pci_reset(phb->opal_id,
+                                OPAL_PHB_COMPLETE,
+                                OPAL_ASSERT_RESET);
+        else if (option == EEH_RESET_DEACTIVATE)
+                rc = opal_pci_reset(phb->opal_id,
+                                OPAL_PHB_COMPLETE,
+                                OPAL_DEASSERT_RESET);
+        if (rc < 0)
+                goto out;
+        /*
+         * Poll state of the PHB until the request is done
+         * successfully.
+         */
+        rc = ioda_eeh_phb_poll(phb);
+out:
+        if (rc != OPAL_SUCCESS)
+                return -EIO;
+        return 0;
+}
+static int ioda_eeh_root_reset(struct pci_controller *hose, int option)
+{
+        struct pnv_phb *phb = hose->private_data;
+        s64 rc = OPAL_SUCCESS;
+        pr_debug("%s: Reset PHB#%x, option=%d\n",
+                 __func__, hose->global_number, option);
+        /*
+         * During the reset deassert time, we needn't care
+         * the reset scope because the firmware does nothing
+         * for fundamental or hot reset during deassert phase.
+         */
+        if (option == EEH_RESET_FUNDAMENTAL)
+                rc = opal_pci_reset(phb->opal_id,
+                                OPAL_PCI_FUNDAMENTAL_RESET,
+                                OPAL_ASSERT_RESET);
+        else if (option == EEH_RESET_HOT)
+                rc = opal_pci_reset(phb->opal_id,
+                                OPAL_PCI_HOT_RESET,
+                                OPAL_ASSERT_RESET);
+        else if (option == EEH_RESET_DEACTIVATE)
+                rc = opal_pci_reset(phb->opal_id,
+                                OPAL_PCI_HOT_RESET,
+                                OPAL_DEASSERT_RESET);
+        if (rc < 0)
+                goto out;
+        /* Poll state of the PHB until the request is done */
+        rc = ioda_eeh_phb_poll(phb);
+out:
+        if (rc != OPAL_SUCCESS)
+                return -EIO;
+        return 0;
+}
+static int ioda_eeh_bridge_reset(struct pci_controller *hose,
+                struct pci_dev *dev, int option)
+{
+        u16 ctrl;
+        pr_debug("%s: Reset device %04x:%02x:%02x.%01x with option %d\n",
+                 __func__, hose->global_number, dev->bus->number,
+                 PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), option);
+        switch (option) {
+        case EEH_RESET_FUNDAMENTAL:
+        case EEH_RESET_HOT:
+                pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
+                ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+                pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+                break;
+        case EEH_RESET_DEACTIVATE:
+                pci_read_config_word(dev, PCI_BRIDGE_CONTROL, &ctrl);
+                ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+                pci_write_config_word(dev, PCI_BRIDGE_CONTROL, ctrl);
+                break;
+        }
+        return 0;
+}
+/**
+ * ioda_eeh_reset - Reset the indicated PE
+ * @pe: EEH PE
+ * @option: reset option
+ *
+ * Do reset on the indicated PE. For PCI bus sensitive PE,
+ * we need to reset the parent p2p bridge. The PHB has to
+ * be reinitialized if the p2p bridge is root bridge. For
+ * PCI device sensitive PE, we will try to reset the device
+ * through FLR. For now, we don't have OPAL APIs to do HARD
+ * reset yet, so all reset would be SOFT (HOT) reset.
+ */
+static int ioda_eeh_reset(struct eeh_pe *pe, int option)
+{
+        struct pci_controller *hose = pe->phb;
+        struct eeh_dev *edev;
+        struct pci_dev *dev;
+        int ret;
+        /*
+         * Anyway, we have to clear the problematic state for the
+         * corresponding PE. However, we needn't do it if the PE
+         * is PHB associated. That means the PHB is having fatal
+         * errors and it needs reset. Further more, the AIB interface
+         * isn't reliable any more.
+         */
+        if (!(pe->type & EEH_PE_PHB) &&
+            (option == EEH_RESET_HOT ||
+            option == EEH_RESET_FUNDAMENTAL)) {
+                ret = ioda_eeh_pe_clear(pe);
+                if (ret)
+                        return -EIO;
+        }
+        /*
+         * The rules applied to reset, either fundamental or hot reset:
+         *
+         * We always reset the direct upstream bridge of the PE. If the
+         * direct upstream bridge isn't root bridge, we always take hot
+         * reset no matter what option (fundamental or hot) is. Otherwise,
+         * we should do the reset according to the required option.
+         */
+        if (pe->type & EEH_PE_PHB) {
+                ret = ioda_eeh_phb_reset(hose, option);
+        } else {
+                if (pe->type & EEH_PE_DEVICE) {
+                        /*
+                         * If it's device PE, we didn't refer to the parent
+                         * PCI bus yet. So we have to figure it out indirectly.
+                         */
+                        edev = list_first_entry(&pe->edevs,
+                                        struct eeh_dev, list);
+                        dev = eeh_dev_to_pci_dev(edev);
+                        dev = dev->bus->self;
+                } else {
+                        /*
+                         * If it's bus PE, the parent PCI bus is already there
+                         * and just pick it up.
+                         */
+                        dev = pe->bus->self;
+                }
+                /*
+                 * Do reset based on the fact that the direct upstream bridge
+                 * is root bridge (port) or not.
+                 */
+                if (dev->bus->number == 0)
+                        ret = ioda_eeh_root_reset(hose, option);
+                else
+                        ret = ioda_eeh_bridge_reset(hose, dev, option);
+        }
+        return ret;
+}
+/**
+ * ioda_eeh_get_log - Retrieve error log
+ * @pe: EEH PE
+ * @severity: Severity level of the log
+ * @drv_log: buffer to store the log
+ * @len: space of the log buffer
+ *
+ * The function is used to retrieve error log from P7IOC.
+ */
+static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
+                            char *drv_log, unsigned long len)
+{
+        s64 ret;
+        unsigned long flags;
+        struct pci_controller *hose = pe->phb;
+        struct pnv_phb *phb = hose->private_data;
+        spin_lock_irqsave(&phb->lock, flags);
+        ret = opal_pci_get_phb_diag_data2(phb->opal_id,
+                        phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
+        if (ret) {
+                spin_unlock_irqrestore(&phb->lock, flags);
+                pr_warning("%s: Failed to get log for PHB#%x-PE#%x\n",
+                           __func__, hose->global_number, pe->addr);
+                return -EIO;
+        }
+        /*
+         * FIXME: We probably need log the error in somewhere.
+         * Lets make it up in future.
+         */
+        /* pr_info("%s", phb->diag.blob); */
+        spin_unlock_irqrestore(&phb->lock, flags);
+        return 0;
+}
+/**
+ * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
+ * @pe: EEH PE
+ *
+ * For particular PE, it might have included PCI bridges. In order
+ * to make the PE work properly, those PCI bridges should be configured
+ * correctly. However, we need do nothing on P7IOC since the reset
+ * function will do everything that should be covered by the function.
+ */
+static int ioda_eeh_configure_bridge(struct eeh_pe *pe)
+{
+        return 0;
+}
+static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data)
+{
+        /* GEM */
+        pr_info("  GEM XFIR:        %016llx\n", data->gemXfir);
+        pr_info("  GEM RFIR:        %016llx\n", data->gemRfir);
+        pr_info("  GEM RIRQFIR:     %016llx\n", data->gemRirqfir);
+        pr_info("  GEM Mask:        %016llx\n", data->gemMask);
+        pr_info("  GEM RWOF:        %016llx\n", data->gemRwof);
+        /* LEM */
+        pr_info("  LEM FIR:         %016llx\n", data->lemFir);
+        pr_info("  LEM Error Mask:  %016llx\n", data->lemErrMask);
+        pr_info("  LEM Action 0:    %016llx\n", data->lemAction0);
+        pr_info("  LEM Action 1:    %016llx\n", data->lemAction1);
+        pr_info("  LEM WOF:         %016llx\n", data->lemWof);
+}
+static void ioda_eeh_hub_diag(struct pci_controller *hose)
+{
+        struct pnv_phb *phb = hose->private_data;
+        struct OpalIoP7IOCErrorData *data;
+        long rc;
+        data = (struct OpalIoP7IOCErrorData *)ioda_eeh_hub_diag;
+        rc = opal_pci_get_hub_diag_data(phb->hub_id, data, PAGE_SIZE);
+        if (rc != OPAL_SUCCESS) {
+                pr_warning("%s: Failed to get HUB#%llx diag-data (%ld)\n",
+                           __func__, phb->hub_id, rc);
+                return;
+        }
+        switch (data->type) {
+        case OPAL_P7IOC_DIAG_TYPE_RGC:
+                pr_info("P7IOC diag-data for RGC\n\n");
+                ioda_eeh_hub_diag_common(data);
+                pr_info("  RGC Status:      %016llx\n", data->rgc.rgcStatus);
+                pr_info("  RGC LDCP:        %016llx\n", data->rgc.rgcLdcp);
+                break;
+        case OPAL_P7IOC_DIAG_TYPE_BI:
+                pr_info("P7IOC diag-data for BI %s\n\n",
+                        data->bi.biDownbound ? "Downbound" : "Upbound");
+                ioda_eeh_hub_diag_common(data);
+                pr_info("  BI LDCP 0:       %016llx\n", data->bi.biLdcp0);
+                pr_info("  BI LDCP 1:       %016llx\n", data->bi.biLdcp1);
+                pr_info("  BI LDCP 2:       %016llx\n", data->bi.biLdcp2);
+                pr_info("  BI Fence Status: %016llx\n", data->bi.biFenceStatus);
+                break;
+        case OPAL_P7IOC_DIAG_TYPE_CI:
+                pr_info("P7IOC diag-data for CI Port %d\\nn",
+                        data->ci.ciPort);
+                ioda_eeh_hub_diag_common(data);
+                pr_info("  CI Port Status:  %016llx\n", data->ci.ciPortStatus);
+                pr_info("  CI Port LDCP:    %016llx\n", data->ci.ciPortLdcp);
+                break;
+        case OPAL_P7IOC_DIAG_TYPE_MISC:
+                pr_info("P7IOC diag-data for MISC\n\n");
+                ioda_eeh_hub_diag_common(data);
+                break;
+        case OPAL_P7IOC_DIAG_TYPE_I2C:
+                pr_info("P7IOC diag-data for I2C\n\n");
+                ioda_eeh_hub_diag_common(data);
+                break;
+        default:
+                pr_warning("%s: Invalid type of HUB#%llx diag-data (%d)\n",
+                           __func__, phb->hub_id, data->type);
+        }
+}
+static void ioda_eeh_p7ioc_phb_diag(struct pci_controller *hose,
+                                    struct OpalIoPhbErrorCommon *common)
+{
+        struct OpalIoP7IOCPhbErrorData *data;
+        int i;
+        data = (struct OpalIoP7IOCPhbErrorData *)common;
+        pr_info("P7IOC PHB#%x Diag-data (Version: %d)\n\n",
+                hose->global_number, common->version);
+        pr_info("  brdgCtl:              %08x\n", data->brdgCtl);
+        pr_info("  portStatusReg:        %08x\n", data->portStatusReg);
+        pr_info("  rootCmplxStatus:      %08x\n", data->rootCmplxStatus);
+        pr_info("  busAgentStatus:       %08x\n", data->busAgentStatus);
+        pr_info("  deviceStatus:         %08x\n", data->deviceStatus);
+        pr_info("  slotStatus:           %08x\n", data->slotStatus);
+        pr_info("  linkStatus:           %08x\n", data->linkStatus);
+        pr_info("  devCmdStatus:         %08x\n", data->devCmdStatus);
+        pr_info("  devSecStatus:         %08x\n", data->devSecStatus);
+        pr_info("  rootErrorStatus:      %08x\n", data->rootErrorStatus);
+        pr_info("  uncorrErrorStatus:    %08x\n", data->uncorrErrorStatus);
+        pr_info("  corrErrorStatus:      %08x\n", data->corrErrorStatus);
+        pr_info("  tlpHdr1:              %08x\n", data->tlpHdr1);
+        pr_info("  tlpHdr2:              %08x\n", data->tlpHdr2);
+        pr_info("  tlpHdr3:              %08x\n", data->tlpHdr3);
+        pr_info("  tlpHdr4:              %08x\n", data->tlpHdr4);
+        pr_info("  sourceId:             %08x\n", data->sourceId);
+        pr_info("  errorClass:           %016llx\n", data->errorClass);
+        pr_info("  correlator:           %016llx\n", data->correlator);
+        pr_info("  p7iocPlssr:           %016llx\n", data->p7iocPlssr);
+        pr_info("  p7iocCsr:             %016llx\n", data->p7iocCsr);
+        pr_info("  lemFir:               %016llx\n", data->lemFir);
+        pr_info("  lemErrorMask:         %016llx\n", data->lemErrorMask);
+        pr_info("  lemWOF:               %016llx\n", data->lemWOF);
+        pr_info("  phbErrorStatus:       %016llx\n", data->phbErrorStatus);
+        pr_info("  phbFirstErrorStatus:  %016llx\n", data->phbFirstErrorStatus);
+        pr_info("  phbErrorLog0:         %016llx\n", data->phbErrorLog0);
+        pr_info("  phbErrorLog1:         %016llx\n", data->phbErrorLog1);
+        pr_info("  mmioErrorStatus:      %016llx\n", data->mmioErrorStatus);
+        pr_info("  mmioFirstErrorStatus: %016llx\n", data->mmioFirstErrorStatus);
+        pr_info("  mmioErrorLog0:        %016llx\n", data->mmioErrorLog0);
+        pr_info("  mmioErrorLog1:        %016llx\n", data->mmioErrorLog1);
+        pr_info("  dma0ErrorStatus:      %016llx\n", data->dma0ErrorStatus);
+        pr_info("  dma0FirstErrorStatus: %016llx\n", data->dma0FirstErrorStatus);
+        pr_info("  dma0ErrorLog0:        %016llx\n", data->dma0ErrorLog0);
+        pr_info("  dma0ErrorLog1:        %016llx\n", data->dma0ErrorLog1);
+        pr_info("  dma1ErrorStatus:      %016llx\n", data->dma1ErrorStatus);
+        pr_info("  dma1FirstErrorStatus: %016llx\n", data->dma1FirstErrorStatus);
+        pr_info("  dma1ErrorLog0:        %016llx\n", data->dma1ErrorLog0);
+        pr_info("  dma1ErrorLog1:        %016llx\n", data->dma1ErrorLog1);
+        for (i = 0; i < OPAL_P7IOC_NUM_PEST_REGS; i++) {
+                if ((data->pestA[i] >> 63) == 0 &&
+                    (data->pestB[i] >> 63) == 0)
+                        continue;
+                pr_info("  PE[%3d] PESTA:        %016llx\n", i, data->pestA[i]);
+                pr_info("          PESTB:        %016llx\n", data->pestB[i]);
+        }
+}
+static void ioda_eeh_phb_diag(struct pci_controller *hose)
+{
+        struct pnv_phb *phb = hose->private_data;
+        struct OpalIoPhbErrorCommon *common;
+        long rc;
+        common = (struct OpalIoPhbErrorCommon *)phb->diag.blob;
+        rc = opal_pci_get_phb_diag_data2(phb->opal_id, common, PAGE_SIZE);
+        if (rc != OPAL_SUCCESS) {
+                pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
+                            __func__, hose->global_number, rc);
+                return;
+        }
+        switch (common->ioType) {
+        case OPAL_PHB_ERROR_DATA_TYPE_P7IOC:
+                ioda_eeh_p7ioc_phb_diag(hose, common);
+                break;
+        default:
+                pr_warning("%s: Unrecognized I/O chip %d\n",
+                           __func__, common->ioType);
+        }
+}
+static int ioda_eeh_get_phb_pe(struct pci_controller *hose,
+                               struct eeh_pe **pe)
+{
+        struct eeh_pe *phb_pe;
+        phb_pe = eeh_phb_pe_get(hose);
+        if (!phb_pe) {
+                pr_warning("%s Can't find PE for PHB#%d\n",
+                           __func__, hose->global_number);
+                return -EEXIST;
+        }
+        *pe = phb_pe;
+        return 0;
+}
+static int ioda_eeh_get_pe(struct pci_controller *hose,
+                           u16 pe_no, struct eeh_pe **pe)
+{
+        struct eeh_pe *phb_pe, *dev_pe;
+        struct eeh_dev dev;
+        /* Find the PHB PE */
+        if (ioda_eeh_get_phb_pe(hose, &phb_pe))
+                return -EEXIST;
+        /* Find the PE according to PE# */
+        memset(&dev, 0, sizeof(struct eeh_dev));
+        dev.phb = hose;
+        dev.pe_config_addr = pe_no;
+        dev_pe = eeh_pe_get(&dev);
+        if (!dev_pe) {
+                pr_warning("%s: Can't find PE for PHB#%x - PE#%x\n",
+                           __func__, hose->global_number, pe_no);
+                return -EEXIST;
+        }
+        *pe = dev_pe;
+        return 0;
+}
+/**
+ * ioda_eeh_next_error - Retrieve next error for EEH core to handle
+ * @pe: The affected PE
+ *
+ * The function is expected to be called by EEH core while it gets
+ * special EEH event (without binding PE). The function calls to
+ * OPAL APIs for next error to handle. The informational error is
+ * handled internally by platform. However, the dead IOC, dead PHB,
+ * fenced PHB and frozen PE should be handled by EEH core eventually.
+ */
+static int ioda_eeh_next_error(struct eeh_pe **pe)
+{
+        struct pci_controller *hose, *tmp;
+        struct pnv_phb *phb;
+        u64 frozen_pe_no;
+        u16 err_type, severity;
+        long rc;
+        int ret = 1;
+        /*
+         * While running here, it's safe to purge the event queue.
+         * And we should keep the cached OPAL notifier event sychronized
+         * between the kernel and firmware.
+         */
+        eeh_remove_event(NULL);
+        opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
+        list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+                /*
+                 * If the subordinate PCI buses of the PHB has been
+                 * removed, we needn't take care of it any more.
+                 */
+                phb = hose->private_data;
+                if (phb->eeh_state & PNV_EEH_STATE_REMOVED)
+                        continue;
+                rc = opal_pci_next_error(phb->opal_id,
+                                &frozen_pe_no, &err_type, &severity);
+                /* If OPAL API returns error, we needn't proceed */
+                if (rc != OPAL_SUCCESS) {
+                        IODA_EEH_DBG("%s: Invalid return value on "
+                                     "PHB#%x (0x%lx) from opal_pci_next_error",
+                                     __func__, hose->global_number, rc);
+                        continue;
+                }
+                /* If the PHB doesn't have error, stop processing */
+                if (err_type == OPAL_EEH_NO_ERROR ||
+                    severity == OPAL_EEH_SEV_NO_ERROR) {
+                        IODA_EEH_DBG("%s: No error found on PHB#%x\n",
+                                     __func__, hose->global_number);
+                        continue;
+                }
+                /*
+                 * Processing the error. We're expecting the error with
+                 * highest priority reported upon multiple errors on the
+                 * specific PHB.
+                 */
+                IODA_EEH_DBG("%s: Error (%d, %d, %d) on PHB#%x\n",
+                        err_type, severity, pe_no, hose->global_number);
+                switch (err_type) {
+                case OPAL_EEH_IOC_ERROR:
+                        if (severity == OPAL_EEH_SEV_IOC_DEAD) {
+                                list_for_each_entry_safe(hose, tmp,
+                                                &hose_list, list_node) {
+                                        phb = hose->private_data;
+                                        phb->eeh_state |= PNV_EEH_STATE_REMOVED;
+                                }
+                                pr_err("EEH: dead IOC detected\n");
+                                ret = 4;
+                                goto out;
+                        } else if (severity == OPAL_EEH_SEV_INF) {
+                                pr_info("EEH: IOC informative error "
+                                        "detected\n");
+                                ioda_eeh_hub_diag(hose);
+                        }
+                        break;
+                case OPAL_EEH_PHB_ERROR:
+                        if (severity == OPAL_EEH_SEV_PHB_DEAD) {
+                                if (ioda_eeh_get_phb_pe(hose, pe))
+                                        break;
+                                pr_err("EEH: dead PHB#%x detected\n",
+                                        hose->global_number);
+                                phb->eeh_state |= PNV_EEH_STATE_REMOVED;
+                                ret = 3;
+                                goto out;
+                        } else if (severity == OPAL_EEH_SEV_PHB_FENCED) {
+                                if (ioda_eeh_get_phb_pe(hose, pe))
+                                        break;
+                                pr_err("EEH: fenced PHB#%x detected\n",
+                                        hose->global_number);
+                                ret = 2;
+                                goto out;
+                        } else if (severity == OPAL_EEH_SEV_INF) {
+                                pr_info("EEH: PHB#%x informative error "
+                                        "detected\n",
+                                        hose->global_number);
+                                ioda_eeh_phb_diag(hose);
+                        }
+                        break;
+                case OPAL_EEH_PE_ERROR:
+                        if (ioda_eeh_get_pe(hose, frozen_pe_no, pe))
+                                break;
+                        pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
+                                (*pe)->addr, (*pe)->phb->global_number);
+                        ret = 1;
+                        goto out;
+                }
+        }
+        ret = 0;
+out:
+        return ret;
+}
+struct pnv_eeh_ops ioda_eeh_ops = {
+        .post_init              = ioda_eeh_post_init,
+        .set_option             = ioda_eeh_set_option,
+        .get_state              = ioda_eeh_get_state,
+        .reset                  = ioda_eeh_reset,
+        .get_log                = ioda_eeh_get_log,
+        .configure_bridge       = ioda_eeh_configure_bridge,
+        .next_error             = ioda_eeh_next_error
+};
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
new file mode 100644
index 000000000000..969cce73055a
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -0,0 +1,379 @@
+/*
+ * The file intends to implement the platform dependent EEH operations on
+ * powernv platform. Actually, the powernv was created in order to fully
+ * hypervisor support.
+ *
+ * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#include <linux/atomic.h>
+#include <linux/delay.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/rbtree.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <asm/eeh.h>
+#include <asm/eeh_event.h>
+#include <asm/firmware.h>
+#include <asm/io.h>
+#include <asm/iommu.h>
+#include <asm/machdep.h>
+#include <asm/msi_bitmap.h>
+#include <asm/opal.h>
+#include <asm/ppc-pci.h>
+#include "powernv.h"
+#include "pci.h"
+/**
+ * powernv_eeh_init - EEH platform dependent initialization
+ *
+ * EEH platform dependent initialization on powernv
+ */
+static int powernv_eeh_init(void)
+{
+        /* We require OPALv3 */
+        if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
+                pr_warning("%s: OPALv3 is required !\n", __func__);
+                return -EINVAL;
+        }
+        /* Set EEH probe mode */
+        eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
+        return 0;
+}
+/**
+ * powernv_eeh_post_init - EEH platform dependent post initialization
+ *
+ * EEH platform dependent post initialization on powernv. When
+ * the function is called, the EEH PEs and devices should have
+ * been built. If the I/O cache staff has been built, EEH is
+ * ready to supply service.
+ */
+static int powernv_eeh_post_init(void)
+{
+        struct pci_controller *hose;
+        struct pnv_phb *phb;
+        int ret = 0;
+        list_for_each_entry(hose, &hose_list, list_node) {
+                phb = hose->private_data;
+                if (phb->eeh_ops && phb->eeh_ops->post_init) {
+                        ret = phb->eeh_ops->post_init(hose);
+                        if (ret)
+                                break;
+                }
+        }
+        return ret;
+}
+/**
+ * powernv_eeh_dev_probe - Do probe on PCI device
+ * @dev: PCI device
+ * @flag: unused
+ *
+ * When EEH module is installed during system boot, all PCI devices
+ * are checked one by one to see if it supports EEH. The function
+ * is introduced for the purpose. By default, EEH has been enabled
+ * on all PCI devices. That's to say, we only need do necessary
+ * initialization on the corresponding eeh device and create PE
+ * accordingly.
+ *
+ * It's notable that's unsafe to retrieve the EEH device through
+ * the corresponding PCI device. During the PCI device hotplug, which
+ * was possiblly triggered by EEH core, the binding between EEH device
+ * and the PCI device isn't built yet.
+ */
+static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
+{
+        struct pci_controller *hose = pci_bus_to_host(dev->bus);
+        struct pnv_phb *phb = hose->private_data;
+        struct device_node *dn = pci_device_to_OF_node(dev);
+        struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+        /*
+         * When probing the root bridge, which doesn't have any
+         * subordinate PCI devices. We don't have OF node for
+         * the root bridge. So it's not reasonable to continue
+         * the probing.
+         */
+        if (!dn || !edev)
+                return 0;
+        /* Skip for PCI-ISA bridge */
+        if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
+                return 0;
+        /* Initialize eeh device */
+        edev->class_code        = dev->class;
+        edev->mode              = 0;
+        edev->config_addr       = ((dev->bus->number << 8) | dev->devfn);
+        edev->pe_config_addr    = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
+        /* Create PE */
+        eeh_add_to_parent_pe(edev);
+        /*
+         * Enable EEH explicitly so that we will do EEH check
+         * while accessing I/O stuff
+         *
+         * FIXME: Enable that for PHB3 later
+         */
+        if (phb->type == PNV_PHB_IODA1)
+                eeh_subsystem_enabled = 1;
+        /* Save memory bars */
+        eeh_save_bars(edev);
+        return 0;
+}
+/**
+ * powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * @pe: EEH PE
+ * @option: operation to be issued
+ *
+ * The function is used to control the EEH functionality globally.
+ * Currently, following options are support according to PAPR:
+ * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
+ */
+static int powernv_eeh_set_option(struct eeh_pe *pe, int option)
+{
+        struct pci_controller *hose = pe->phb;
+        struct pnv_phb *phb = hose->private_data;
+        int ret = -EEXIST;
+        /*
+         * What we need do is pass it down for hardware
+         * implementation to handle it.
+         */
+        if (phb->eeh_ops && phb->eeh_ops->set_option)
+                ret = phb->eeh_ops->set_option(pe, option);
+        return ret;
+}
+/**
+ * powernv_eeh_get_pe_addr - Retrieve PE address
+ * @pe: EEH PE
+ *
+ * Retrieve the PE address according to the given tranditional
+ * PCI BDF (Bus/Device/Function) address.
+ */
+static int powernv_eeh_get_pe_addr(struct eeh_pe *pe)
+{
+        return pe->addr;
+}
+/**
+ * powernv_eeh_get_state - Retrieve PE state
+ * @pe: EEH PE
+ * @delay: delay while PE state is temporarily unavailable
+ *
+ * Retrieve the state of the specified PE. For IODA-compitable
+ * platform, it should be retrieved from IODA table. Therefore,
+ * we prefer passing down to hardware implementation to handle
+ * it.
+ */
+static int powernv_eeh_get_state(struct eeh_pe *pe, int *delay)
+{
+        struct pci_controller *hose = pe->phb;
+        struct pnv_phb *phb = hose->private_data;
+        int ret = EEH_STATE_NOT_SUPPORT;
+        if (phb->eeh_ops && phb->eeh_ops->get_state) {
+                ret = phb->eeh_ops->get_state(pe);
+                /*
+                 * If the PE state is temporarily unavailable,
+                 * to inform the EEH core delay for default
+                 * period (1 second)
+                 */
+                if (delay) {
+                        *delay = 0;
+                        if (ret & EEH_STATE_UNAVAILABLE)
+                                *delay = 1000;
+                }
+        }
+        return ret;
+}
+/**
+ * powernv_eeh_reset - Reset the specified PE
+ * @pe: EEH PE
+ * @option: reset option
+ *
+ * Reset the specified PE
+ */
+static int powernv_eeh_reset(struct eeh_pe *pe, int option)
+{
+        struct pci_controller *hose = pe->phb;
+        struct pnv_phb *phb = hose->private_data;
+        int ret = -EEXIST;
+        if (phb->eeh_ops && phb->eeh_ops->reset)
+                ret = phb->eeh_ops->reset(pe, option);
+        return ret;
+}
+/**
+ * powernv_eeh_wait_state - Wait for PE state
+ * @pe: EEH PE
+ * @max_wait: maximal period in microsecond
+ *
+ * Wait for the state of associated PE. It might take some time
+ * to retrieve the PE's state.
+ */
+static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
+{
+        int ret;
+        int mwait;
+        while (1) {
+                ret = powernv_eeh_get_state(pe, &mwait);
+                /*
+                 * If the PE's state is temporarily unavailable,
+                 * we have to wait for the specified time. Otherwise,
+                 * the PE's state will be returned immediately.
+                 */
+                if (ret != EEH_STATE_UNAVAILABLE)
+                        return ret;
+                max_wait -= mwait;
+                if (max_wait <= 0) {
+                        pr_warning("%s: Timeout getting PE#%x's state (%d)\n",
+                                   __func__, pe->addr, max_wait);
+                        return EEH_STATE_NOT_SUPPORT;
+                }
+                msleep(mwait);
+        }
+        return EEH_STATE_NOT_SUPPORT;
+}
+/**
+ * powernv_eeh_get_log - Retrieve error log
+ * @pe: EEH PE
+ * @severity: temporary or permanent error log
+ * @drv_log: driver log to be combined with retrieved error log
+ * @len: length of driver log
+ *
+ * Retrieve the temporary or permanent error from the PE.
+ */
+static int powernv_eeh_get_log(struct eeh_pe *pe, int severity,
+                        char *drv_log, unsigned long len)
+{
+        struct pci_controller *hose = pe->phb;
+        struct pnv_phb *phb = hose->private_data;
+        int ret = -EEXIST;
+        if (phb->eeh_ops && phb->eeh_ops->get_log)
+                ret = phb->eeh_ops->get_log(pe, severity, drv_log, len);
+        return ret;
+}
+/**
+ * powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * @pe: EEH PE
+ *
+ * The function will be called to reconfigure the bridges included
+ * in the specified PE so that the mulfunctional PE would be recovered
+ * again.
+ */
+static int powernv_eeh_configure_bridge(struct eeh_pe *pe)
+{
+        struct pci_controller *hose = pe->phb;
+        struct pnv_phb *phb = hose->private_data;
+        int ret = 0;
+        if (phb->eeh_ops && phb->eeh_ops->configure_bridge)
+                ret = phb->eeh_ops->configure_bridge(pe);
+        return ret;
+}
+/**
+ * powernv_eeh_next_error - Retrieve next EEH error to handle
+ * @pe: Affected PE
+ *
+ * Using OPAL API, to retrieve next EEH error for EEH core to handle
+ */
+static int powernv_eeh_next_error(struct eeh_pe **pe)
+{
+        struct pci_controller *hose;
+        struct pnv_phb *phb = NULL;
+        list_for_each_entry(hose, &hose_list, list_node) {
+                phb = hose->private_data;
+                break;
+        }
+        if (phb && phb->eeh_ops->next_error)
+                return phb->eeh_ops->next_error(pe);
+        return -EEXIST;
+}
+static struct eeh_ops powernv_eeh_ops = {
+        .name                   = "powernv",
+        .init                   = powernv_eeh_init,
+        .post_init              = powernv_eeh_post_init,
+        .of_probe               = NULL,
+        .dev_probe              = powernv_eeh_dev_probe,
+        .set_option             = powernv_eeh_set_option,
+        .get_pe_addr            = powernv_eeh_get_pe_addr,
+        .get_state              = powernv_eeh_get_state,
+        .reset                  = powernv_eeh_reset,
+        .wait_state             = powernv_eeh_wait_state,
+        .get_log                = powernv_eeh_get_log,
+        .configure_bridge       = powernv_eeh_configure_bridge,
+        .read_config            = pnv_pci_cfg_read,
+        .write_config           = pnv_pci_cfg_write,
+        .next_error             = powernv_eeh_next_error
+};
+/**
+ * eeh_powernv_init - Register platform dependent EEH operations
+ *
+ * EEH initialization on powernv platform. This function should be
+ * called before any EEH related functions.
+ */
+static int __init eeh_powernv_init(void)
+{
+        int ret = -EINVAL;
+        if (!machine_is(powernv))
+                return ret;
+        ret = eeh_ops_register(&powernv_eeh_ops);
+        if (!ret)
+                pr_info("EEH: PowerNV platform initialized\n");
+        else
+                pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret);
+        return ret;
+}
+early_initcall(eeh_powernv_init);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6fabe92eafb6..e88863ffb135 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -107,4 +107,7 @@ OPAL_CALL(opal_pci_mask_pe_error,		OPAL_PCI_MASK_PE_ERROR);
 OPAL_CALL(opal_set_slot_led_status,             OPAL_SET_SLOT_LED_STATUS);
 OPAL_CALL(opal_get_epow_status,                 OPAL_GET_EPOW_STATUS);
 OPAL_CALL(opal_set_system_attention_led,        OPAL_SET_SYSTEM_ATTENTION_LED);
+OPAL_CALL(opal_pci_next_error,                  OPAL_PCI_NEXT_ERROR);
+OPAL_CALL(opal_pci_poll,                        OPAL_PCI_POLL);
 OPAL_CALL(opal_pci_msi_eoi,                     OPAL_PCI_MSI_EOI);
+OPAL_CALL(opal_pci_get_phb_diag_data2,          OPAL_PCI_GET_PHB_DIAG_DATA2);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 628c564ceadb..106301fd2fa5 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -15,6 +15,7 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/interrupt.h>
+#include <linux/notifier.h>
 #include <linux/slab.h>
 #include <asm/opal.h>
 #include <asm/firmware.h>
@@ -31,6 +32,10 @@ static DEFINE_SPINLOCK(opal_write_lock);
 extern u64 opal_mc_secondary_handler[];
 static unsigned int *opal_irqs;
 static unsigned int opal_irq_count;
+static ATOMIC_NOTIFIER_HEAD(opal_notifier_head);
+static DEFINE_SPINLOCK(opal_notifier_lock);
+static uint64_t last_notified_mask = 0x0ul;
+static atomic_t opal_notifier_hold = ATOMIC_INIT(0);
 int __init early_init_dt_scan_opal(unsigned long node,
                                   const char *uname, int depth, void *data)
@@ -95,6 +100,68 @@ static int __init opal_register_exception_handlers(void)
 early_initcall(opal_register_exception_handlers);
+int opal_notifier_register(struct notifier_block *nb)
+{
+        if (!nb) {
+                pr_warning("%s: Invalid argument (%p)\n",
+                           __func__, nb);
+                return -EINVAL;
+        }
+        atomic_notifier_chain_register(&opal_notifier_head, nb);
+        return 0;
+}
+static void opal_do_notifier(uint64_t events)
+{
+        unsigned long flags;
+        uint64_t changed_mask;
+        if (atomic_read(&opal_notifier_hold))
+                return;
+        spin_lock_irqsave(&opal_notifier_lock, flags);
+        changed_mask = last_notified_mask ^ events;
+        last_notified_mask = events;
+        spin_unlock_irqrestore(&opal_notifier_lock, flags);
+        /*
+         * We feed with the event bits and changed bits for
+         * enough information to the callback.
+         */
+        atomic_notifier_call_chain(&opal_notifier_head,
+                                   events, (void *)changed_mask);
+}
+void opal_notifier_update_evt(uint64_t evt_mask,
+                              uint64_t evt_val)
+{
+        unsigned long flags;
+        spin_lock_irqsave(&opal_notifier_lock, flags);
+        last_notified_mask &= ~evt_mask;
+        last_notified_mask |= evt_val;
+        spin_unlock_irqrestore(&opal_notifier_lock, flags);
+}
+void opal_notifier_enable(void)
+{
+        int64_t rc;
+        uint64_t evt = 0;
+        atomic_set(&opal_notifier_hold, 0);
+        /* Process pending events */
+        rc = opal_poll_events(&evt);
+        if (rc == OPAL_SUCCESS && evt)
+                opal_do_notifier(evt);
+}
+void opal_notifier_disable(void)
+{
+        atomic_set(&opal_notifier_hold, 1);
+}
 int opal_get_chars(uint32_t vtermno, char *buf, int count)
 {
        s64 len, rc;
@@ -297,7 +364,7 @@ static irqreturn_t opal_interrupt(int irq, void *data)
        opal_handle_interrupt(virq_to_hw(irq), &events);
-        /* XXX TODO: Do something with the events */
+        opal_do_notifier(events);
        return IRQ_HANDLED;
 }
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 9c9d15e4cdf2..49b57b9f835d 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/string.h>
 #include <linux/init.h>
@@ -32,6 +33,7 @@
 #include <asm/iommu.h>
 #include <asm/tce.h>
 #include <asm/xics.h>
+#include <asm/debug.h>
 #include "powernv.h"
 #include "pci.h"
@@ -441,6 +443,17 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
        set_iommu_table_base(&pdev->dev, &pe->tce32_table);
 }
+static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
+{
+        struct pci_dev *dev;
+        list_for_each_entry(dev, &bus->devices, bus_list) {
+                set_iommu_table_base(&dev->dev, &pe->tce32_table);
+                if (dev->subordinate)
+                        pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+        }
+}
 static void pnv_pci_ioda1_tce_invalidate(struct iommu_table *tbl,
                                         u64 *startp, u64 *endp)
 {
@@ -595,6 +608,12 @@ static void pnv_pci_ioda_setup_dma_pe(struct pnv_phb *phb,
                               TCE_PCI_SWINV_PAIR;
        }
        iommu_init_table(tbl, phb->hose->node);
+        iommu_register_group(tbl, pci_domain_nr(pe->pbus), pe->pe_number);
+        if (pe->pdev)
+                set_iommu_table_base(&pe->pdev->dev, tbl);
+        else
+                pnv_ioda_setup_bus_dma(pe, pe->pbus);
        return;
 fail:
@@ -667,6 +686,11 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
        }
        iommu_init_table(tbl, phb->hose->node);
+        if (pe->pdev)
+                set_iommu_table_base(&pe->pdev->dev, tbl);
+        else
+                pnv_ioda_setup_bus_dma(pe, pe->pbus);
        return;
 fail:
        if (pe->tce32_seg >= 0)
@@ -968,11 +992,38 @@ static void pnv_pci_ioda_setup_DMA(void)
        }
 }
+static void pnv_pci_ioda_create_dbgfs(void)
+{
+#ifdef CONFIG_DEBUG_FS
+        struct pci_controller *hose, *tmp;
+        struct pnv_phb *phb;
+        char name[16];
+        list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+                phb = hose->private_data;
+                sprintf(name, "PCI%04x", hose->global_number);
+                phb->dbgfs = debugfs_create_dir(name, powerpc_debugfs_root);
+                if (!phb->dbgfs)
+                        pr_warning("%s: Error on creating debugfs on PHB#%x\n",
+                                __func__, hose->global_number);
+        }
+#endif /* CONFIG_DEBUG_FS */
+}
 static void pnv_pci_ioda_fixup(void)
 {
        pnv_pci_ioda_setup_PEs();
        pnv_pci_ioda_setup_seg();
        pnv_pci_ioda_setup_DMA();
+        pnv_pci_ioda_create_dbgfs();
+#ifdef CONFIG_EEH
+        eeh_probe_mode_set(EEH_PROBE_MODE_DEV);
+        eeh_addr_cache_build();
+        eeh_init();
+#endif
 }
 /*
@@ -1049,7 +1100,8 @@ static void pnv_pci_ioda_shutdown(struct pnv_phb *phb)
                       OPAL_ASSERT_RESET);
 }
-void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
+void __init pnv_pci_init_ioda_phb(struct device_node *np,
+                                  u64 hub_id, int ioda_type)
 {
        struct pci_controller *hose;
        static int primary = 1;
@@ -1087,6 +1139,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
        hose->first_busno = 0;
        hose->last_busno = 0xff;
        hose->private_data = phb;
+        phb->hub_id = hub_id;
        phb->opal_id = phb_id;
        phb->type = ioda_type;
@@ -1172,6 +1225,9 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
                phb->ioda.io_size, phb->ioda.io_segsize);
        phb->hose->ops = &pnv_pci_ops;
+#ifdef CONFIG_EEH
+        phb->eeh_ops = &ioda_eeh_ops;
+#endif
        /* Setup RID -> PE mapping function */
        phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
@@ -1212,7 +1268,7 @@ void __init pnv_pci_init_ioda_phb(struct device_node *np, int ioda_type)
 void pnv_pci_init_ioda2_phb(struct device_node *np)
 {
-        pnv_pci_init_ioda_phb(np, PNV_PHB_IODA2);
+        pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
 }
 void __init pnv_pci_init_ioda_hub(struct device_node *np)
@@ -1235,6 +1291,6 @@ void __init pnv_pci_init_ioda_hub(struct device_node *np)
        for_each_child_of_node(np, phbn) {
                /* Look for IODA1 PHBs */
                if (of_device_is_compatible(phbn, "ibm,ioda-phb"))
-                        pnv_pci_init_ioda_phb(phbn, PNV_PHB_IODA1);
+                        pnv_pci_init_ioda_phb(phbn, hub_id, PNV_PHB_IODA1);
        }
 }
diff --git a/arch/powerpc/platforms/powernv/pci-p5ioc2.c b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
index 92b37a0186c9..b68db6325c1b 100644
--- a/arch/powerpc/platforms/powernv/pci-p5ioc2.c
+++ b/arch/powerpc/platforms/powernv/pci-p5ioc2.c
@@ -86,13 +86,16 @@ static void pnv_pci_init_p5ioc2_msis(struct pnv_phb *phb) { }
 static void pnv_pci_p5ioc2_dma_dev_setup(struct pnv_phb *phb,
                                         struct pci_dev *pdev)
 {
-        if (phb->p5ioc2.iommu_table.it_map == NULL)
+        if (phb->p5ioc2.iommu_table.it_map == NULL) {
                iommu_init_table(&phb->p5ioc2.iommu_table, phb->hose->node);
+                iommu_register_group(&phb->p5ioc2.iommu_table,
+                                pci_domain_nr(phb->hose->bus), phb->opal_id);
+        }
        set_iommu_table_base(&pdev->dev, &phb->p5ioc2.iommu_table);
 }
-static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np,
+static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np, u64 hub_id,
                                           void *tce_mem, u64 tce_size)
 {
        struct pnv_phb *phb;
@@ -133,6 +136,7 @@ static void __init pnv_pci_init_p5ioc2_phb(struct device_node *np,
        phb->hose->first_busno = 0;
        phb->hose->last_busno = 0xff;
        phb->hose->private_data = phb;
+        phb->hub_id = hub_id;
        phb->opal_id = phb_id;
        phb->type = PNV_PHB_P5IOC2;
        phb->model = PNV_PHB_MODEL_P5IOC2;
@@ -226,7 +230,8 @@ void __init pnv_pci_init_p5ioc2_hub(struct device_node *np)
        for_each_child_of_node(np, phbn) {
                if (of_device_is_compatible(phbn, "ibm,p5ioc2-pcix") ||
                    of_device_is_compatible(phbn, "ibm,p5ioc2-pciex")) {
-                        pnv_pci_init_p5ioc2_phb(phbn, tce_mem, tce_per_phb);
+                        pnv_pci_init_p5ioc2_phb(phbn, hub_id,
+                                        tce_mem, tce_per_phb);
                        tce_mem += tce_per_phb;
                }
        }
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 277343cc6a3d..a28d3b5e6393 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -20,6 +20,7 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/msi.h>
+#include <linux/iommu.h>
 #include <asm/sections.h>
 #include <asm/io.h>
@@ -32,6 +33,8 @@
 #include <asm/iommu.h>
 #include <asm/tce.h>
 #include <asm/firmware.h>
+#include <asm/eeh_event.h>
+#include <asm/eeh.h>
 #include "powernv.h"
 #include "pci.h"
@@ -202,7 +205,8 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
        spin_lock_irqsave(&phb->lock, flags);
-        rc = opal_pci_get_phb_diag_data(phb->opal_id, phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
+        rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
+                                         PNV_PCI_DIAG_BUF_SIZE);
        has_diag = (rc == OPAL_SUCCESS);
        rc = opal_pci_eeh_freeze_clear(phb->opal_id, pe_no,
@@ -227,43 +231,50 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
        spin_unlock_irqrestore(&phb->lock, flags);
 }
-static void pnv_pci_config_check_eeh(struct pnv_phb *phb, struct pci_bus *bus,
+static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
-                                     u32 bdfn)
+                                     struct device_node *dn)
 {
        s64     rc;
        u8      fstate;
        u16     pcierr;
        u32     pe_no;
-        /* Get PE# if we support IODA */
+        /*
-        pe_no = phb->bdfn_to_pe ? phb->bdfn_to_pe(phb, bus, bdfn & 0xff) : 0;
+         * Get the PE#. During the PCI probe stage, we might not
+         * setup that yet. So all ER errors should be mapped to
+         * PE#0
+         */
+        pe_no = PCI_DN(dn)->pe_number;
+        if (pe_no == IODA_INVALID_PE)
+                pe_no = 0;
        /* Read freeze status */
        rc = opal_pci_eeh_freeze_status(phb->opal_id, pe_no, &fstate, &pcierr,
                                        NULL);
        if (rc) {
-                pr_warning("PCI %d: Failed to read EEH status for PE#%d,"
+                pr_warning("%s: Can't read EEH status (PE#%d) for "
-                           " err %lld\n", phb->hose->global_number, pe_no, rc);
+                           "%s, err %lld\n",
+                           __func__, pe_no, dn->full_name, rc);
                return;
        }
-        cfg_dbg(" -> EEH check, bdfn=%04x PE%d fstate=%x\n",
+        cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
-                bdfn, pe_no, fstate);
+                (PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn),
+                pe_no, fstate);
        if (fstate != 0)
                pnv_pci_handle_eeh_config(phb, pe_no);
 }
-static int pnv_pci_read_config(struct pci_bus *bus,
+int pnv_pci_cfg_read(struct device_node *dn,
-                               unsigned int devfn,
+                     int where, int size, u32 *val)
-                               int where, int size, u32 *val)
 {
-        struct pci_controller *hose = pci_bus_to_host(bus);
+        struct pci_dn *pdn = PCI_DN(dn);
-        struct pnv_phb *phb = hose->private_data;
+        struct pnv_phb *phb = pdn->phb->private_data;
-        u32 bdfn = (((uint64_t)bus->number) << 8) | devfn;
+        u32 bdfn = (pdn->busno << 8) | pdn->devfn;
+#ifdef CONFIG_EEH
+        struct eeh_pe *phb_pe = NULL;
+#endif
        s64 rc;
-        if (hose == NULL)
-                return PCIBIOS_DEVICE_NOT_FOUND;
        switch (size) {
        case 1: {
                u8 v8;
@@ -287,28 +298,43 @@ static int pnv_pci_read_config(struct pci_bus *bus,
        default:
                return PCIBIOS_FUNC_NOT_SUPPORTED;
        }
-        cfg_dbg("pnv_pci_read_config bus: %x devfn: %x +%x/%x -> %08x\n",
+        cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
-                bus->number, devfn, where, size, *val);
+                __func__, pdn->busno, pdn->devfn, where, size, *val);
-        /* Check if the PHB got frozen due to an error (no response) */
+        /*
-        pnv_pci_config_check_eeh(phb, bus, bdfn);
+         * Check if the specified PE has been put into frozen
+         * state. On the other hand, we needn't do that while
+         * the PHB has been put into frozen state because of
+         * PHB-fatal errors.
+         */
+#ifdef CONFIG_EEH
+        phb_pe = eeh_phb_pe_get(pdn->phb);
+        if (phb_pe && (phb_pe->state & EEH_PE_ISOLATED))
+                return PCIBIOS_SUCCESSFUL;
+        if (phb->eeh_state & PNV_EEH_STATE_ENABLED) {
+                if (*val == EEH_IO_ERROR_VALUE(size) &&
+                    eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
+                        return PCIBIOS_DEVICE_NOT_FOUND;
+        } else {
+                pnv_pci_config_check_eeh(phb, dn);
+        }
+#else
+        pnv_pci_config_check_eeh(phb, dn);
+#endif
        return PCIBIOS_SUCCESSFUL;
 }
-static int pnv_pci_write_config(struct pci_bus *bus,
+int pnv_pci_cfg_write(struct device_node *dn,
-                                unsigned int devfn,
+                      int where, int size, u32 val)
-                                int where, int size, u32 val)
 {
-        struct pci_controller *hose = pci_bus_to_host(bus);
+        struct pci_dn *pdn = PCI_DN(dn);
-        struct pnv_phb *phb = hose->private_data;
+        struct pnv_phb *phb = pdn->phb->private_data;
-        u32 bdfn = (((uint64_t)bus->number) << 8) | devfn;
+        u32 bdfn = (pdn->busno << 8) | pdn->devfn;
-        if (hose == NULL)
-                return PCIBIOS_DEVICE_NOT_FOUND;
-        cfg_dbg("pnv_pci_write_config bus: %x devfn: %x +%x/%x -> %08x\n",
+        cfg_dbg("%s: bus: %x devfn: %x +%x/%x -> %08x\n",
-                bus->number, devfn, where, size, val);
+                pdn->busno, pdn->devfn, where, size, val);
        switch (size) {
        case 1:
                opal_pci_config_write_byte(phb->opal_id, bdfn, where, val);
@@ -322,14 +348,54 @@ static int pnv_pci_write_config(struct pci_bus *bus,
        default:
                return PCIBIOS_FUNC_NOT_SUPPORTED;
        }
        /* Check if the PHB got frozen due to an error (no response) */
-        pnv_pci_config_check_eeh(phb, bus, bdfn);
+#ifdef CONFIG_EEH
+        if (!(phb->eeh_state & PNV_EEH_STATE_ENABLED))
+                pnv_pci_config_check_eeh(phb, dn);
+#else
+        pnv_pci_config_check_eeh(phb, dn);
+#endif
        return PCIBIOS_SUCCESSFUL;
 }
+static int pnv_pci_read_config(struct pci_bus *bus,
+                               unsigned int devfn,
+                               int where, int size, u32 *val)
+{
+        struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
+        struct pci_dn *pdn;
+        for (dn = busdn->child; dn; dn = dn->sibling) {
+                pdn = PCI_DN(dn);
+                if (pdn && pdn->devfn == devfn)
+                        return pnv_pci_cfg_read(dn, where, size, val);
+        }
+        *val = 0xFFFFFFFF;
+        return PCIBIOS_DEVICE_NOT_FOUND;
+}
+static int pnv_pci_write_config(struct pci_bus *bus,
+                                unsigned int devfn,
+                                int where, int size, u32 val)
+{
+        struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
+        struct pci_dn *pdn;
+        for (dn = busdn->child; dn; dn = dn->sibling) {
+                pdn = PCI_DN(dn);
+                if (pdn && pdn->devfn == devfn)
+                        return pnv_pci_cfg_write(dn, where, size, val);
+        }
+        return PCIBIOS_DEVICE_NOT_FOUND;
+}
 struct pci_ops pnv_pci_ops = {
-        .read = pnv_pci_read_config,
+        .read  = pnv_pci_read_config,
        .write = pnv_pci_write_config,
 };
@@ -412,6 +478,7 @@ static struct iommu_table *pnv_pci_setup_bml_iommu(struct pci_controller *hose)
        pnv_pci_setup_iommu_table(tbl, __va(be64_to_cpup(basep)),
                                  be32_to_cpup(sizep), 0);
        iommu_init_table(tbl, hose->node);
+        iommu_register_group(tbl, pci_domain_nr(hose->bus), 0);
        /* Deal with SW invalidated TCEs when needed (BML way) */
        swinvp = of_get_property(hose->dn, "linux,tce-sw-invalidate-info",
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 25d76c4df50b..d633c64e05a1 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -66,15 +66,43 @@ struct pnv_ioda_pe {
        struct list_head        list;
 };
+/* IOC dependent EEH operations */
+#ifdef CONFIG_EEH
+struct pnv_eeh_ops {
+        int (*post_init)(struct pci_controller *hose);
+        int (*set_option)(struct eeh_pe *pe, int option);
+        int (*get_state)(struct eeh_pe *pe);
+        int (*reset)(struct eeh_pe *pe, int option);
+        int (*get_log)(struct eeh_pe *pe, int severity,
+                       char *drv_log, unsigned long len);
+        int (*configure_bridge)(struct eeh_pe *pe);
+        int (*next_error)(struct eeh_pe **pe);
+};
+#define PNV_EEH_STATE_ENABLED   (1 << 0)        /* EEH enabled  */
+#define PNV_EEH_STATE_REMOVED   (1 << 1)        /* PHB removed  */
+#endif /* CONFIG_EEH */
 struct pnv_phb {
        struct pci_controller   *hose;
        enum pnv_phb_type       type;
        enum pnv_phb_model      model;
+        u64                     hub_id;
        u64                     opal_id;
        void __iomem            *regs;
        int                     initialized;
        spinlock_t              lock;
+#ifdef CONFIG_EEH
+        struct pnv_eeh_ops      *eeh_ops;
+        int                     eeh_state;
+#endif
+#ifdef CONFIG_DEBUG_FS
+        struct dentry           *dbgfs;
+#endif
 #ifdef CONFIG_PCI_MSI
        unsigned int            msi_base;
        unsigned int            msi32_support;
@@ -150,7 +178,14 @@ struct pnv_phb {
 };
 extern struct pci_ops pnv_pci_ops;
+#ifdef CONFIG_EEH
+extern struct pnv_eeh_ops ioda_eeh_ops;
+#endif
+int pnv_pci_cfg_read(struct device_node *dn,
+                     int where, int size, u32 *val);
+int pnv_pci_cfg_write(struct device_node *dn,
+                      int where, int size, u32 val);
 extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
                                      void *tce_mem, u64 tce_size,
                                      u64 dma_offset);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index d4459bfc92f7..84438af96c05 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -93,6 +93,8 @@ static void  __noreturn pnv_restart(char *cmd)
 {
        long rc = OPAL_BUSY;
+        opal_notifier_disable();
        while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
                rc = opal_cec_reboot();
                if (rc == OPAL_BUSY_EVENT)
@@ -108,6 +110,8 @@ static void __noreturn pnv_power_off(void)
 {
        long rc = OPAL_BUSY;
+        opal_notifier_disable();
        while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
                rc = opal_cec_power_down(0);
                if (rc == OPAL_BUSY_EVENT)
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 88c9459c3e07..89e3857af4e0 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -40,7 +40,7 @@
 #define DBG(fmt...)
 #endif
-static void __cpuinit pnv_smp_setup_cpu(int cpu)
+static void pnv_smp_setup_cpu(int cpu)
 {
        if (cpu != boot_cpuid)
                xics_setup_cpu();
@@ -51,7 +51,7 @@ static int pnv_smp_cpu_bootable(unsigned int nr)
        /* Special case - we inhibit secondary thread startup
         * during boot if the user requests it.
         */
-        if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
+        if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
                if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
                        return 0;
                if (smt_enabled_at_boot
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index 177a2f70700c..3e270e3412ae 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -109,7 +109,8 @@ static long ps3_hpte_remove(unsigned long hpte_group)
 }
 static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp,
-        unsigned long vpn, int psize, int ssize, int local)
+                              unsigned long vpn, int psize, int apsize,
+                              int ssize, int local)
 {
        int result;
        u64 hpte_v, want_v, hpte_rs;
@@ -162,7 +163,7 @@ static void ps3_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
 }
 static void ps3_hpte_invalidate(unsigned long slot, unsigned long vpn,
-        int psize, int ssize, int local)
+                                int psize, int apsize, int ssize, int local)
 {
        unsigned long flags;
        int result;
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 4459eff7a75a..1bd3399146ed 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -33,11 +33,6 @@ config PPC_SPLPAR
          processors, that is, which share physical processors between
          two or more partitions.
-config EEH
-        bool
-        depends on PPC_PSERIES && PCI
-        default y
 config PSERIES_MSI
       bool
       depends on PCI_MSI && EEH
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 53866e537a92..8ae010381316 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -6,9 +6,7 @@ obj-y			:= lpar.o hvCall.o nvram.o reconfig.o \
                           firmware.o power.o dlpar.o mobility.o
 obj-$(CONFIG_SMP)       += smp.o
 obj-$(CONFIG_SCANLOG)   += scanlog.o
-obj-$(CONFIG_EEH)       += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
+obj-$(CONFIG_EEH)       += eeh_pseries.o
-                           eeh_driver.o eeh_event.o eeh_sysfs.o \
-                           eeh_pseries.o
 obj-$(CONFIG_KEXEC)     += kexec.o
 obj-$(CONFIG_PCI)       += pci.o pci_dlpar.o
 obj-$(CONFIG_PSERIES_MSI)       += msi.o
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
deleted file mode 100644
index 6b73d6c44f51..000000000000
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ /dev/null
@@ -1,942 +0,0 @@
-/*
- * Copyright IBM Corporation 2001, 2005, 2006
- * Copyright Dave Engebretsen & Todd Inglett 2001
- * Copyright Linas Vepstas 2005, 2006
- * Copyright 2001-2012 IBM Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com>
- */
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/pci.h>
-#include <linux/proc_fs.h>
-#include <linux/rbtree.h>
-#include <linux/seq_file.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/of.h>
-#include <linux/atomic.h>
-#include <asm/eeh.h>
-#include <asm/eeh_event.h>
-#include <asm/io.h>
-#include <asm/machdep.h>
-#include <asm/ppc-pci.h>
-#include <asm/rtas.h>
-/** Overview:
- *  EEH, or "Extended Error Handling" is a PCI bridge technology for
- *  dealing with PCI bus errors that can't be dealt with within the
- *  usual PCI framework, except by check-stopping the CPU.  Systems
- *  that are designed for high-availability/reliability cannot afford
- *  to crash due to a "mere" PCI error, thus the need for EEH.
- *  An EEH-capable bridge operates by converting a detected error
- *  into a "slot freeze", taking the PCI adapter off-line, making
- *  the slot behave, from the OS'es point of view, as if the slot
- *  were "empty": all reads return 0xff's and all writes are silently
- *  ignored.  EEH slot isolation events can be triggered by parity
- *  errors on the address or data busses (e.g. during posted writes),
- *  which in turn might be caused by low voltage on the bus, dust,
- *  vibration, humidity, radioactivity or plain-old failed hardware.
- *
- *  Note, however, that one of the leading causes of EEH slot
- *  freeze events are buggy device drivers, buggy device microcode,
- *  or buggy device hardware.  This is because any attempt by the
- *  device to bus-master data to a memory address that is not
- *  assigned to the device will trigger a slot freeze.   (The idea
- *  is to prevent devices-gone-wild from corrupting system memory).
- *  Buggy hardware/drivers will have a miserable time co-existing
- *  with EEH.
- *
- *  Ideally, a PCI device driver, when suspecting that an isolation
- *  event has occurred (e.g. by reading 0xff's), will then ask EEH
- *  whether this is the case, and then take appropriate steps to
- *  reset the PCI slot, the PCI device, and then resume operations.
- *  However, until that day,  the checking is done here, with the
- *  eeh_check_failure() routine embedded in the MMIO macros.  If
- *  the slot is found to be isolated, an "EEH Event" is synthesized
- *  and sent out for processing.
- */
-/* If a device driver keeps reading an MMIO register in an interrupt
- * handler after a slot isolation event, it might be broken.
- * This sets the threshold for how many read attempts we allow
- * before printing an error message.
- */
-#define EEH_MAX_FAILS   2100000
-/* Time to wait for a PCI slot to report status, in milliseconds */
-#define PCI_BUS_RESET_WAIT_MSEC (60*1000)
-/* Platform dependent EEH operations */
-struct eeh_ops *eeh_ops = NULL;
-int eeh_subsystem_enabled;
-EXPORT_SYMBOL(eeh_subsystem_enabled);
-/*
- * EEH probe mode support. The intention is to support multiple
- * platforms for EEH. Some platforms like pSeries do PCI emunation
- * based on device tree. However, other platforms like powernv probe
- * PCI devices from hardware. The flag is used to distinguish that.
- * In addition, struct eeh_ops::probe would be invoked for particular
- * OF node or PCI device so that the corresponding PE would be created
- * there.
- */
-int eeh_probe_mode;
-/* Global EEH mutex */
-DEFINE_MUTEX(eeh_mutex);
-/* Lock to avoid races due to multiple reports of an error */
-static DEFINE_RAW_SPINLOCK(confirm_error_lock);
-/* Buffer for reporting pci register dumps. Its here in BSS, and
- * not dynamically alloced, so that it ends up in RMO where RTAS
- * can access it.
- */
-#define EEH_PCI_REGS_LOG_LEN 4096
-static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN];
-/*
- * The struct is used to maintain the EEH global statistic
- * information. Besides, the EEH global statistics will be
- * exported to user space through procfs
- */
-struct eeh_stats {
-        u64 no_device;          /* PCI device not found         */
-        u64 no_dn;              /* OF node not found            */
-        u64 no_cfg_addr;        /* Config address not found     */
-        u64 ignored_check;      /* EEH check skipped            */
-        u64 total_mmio_ffs;     /* Total EEH checks             */
-        u64 false_positives;    /* Unnecessary EEH checks       */
-        u64 slot_resets;        /* PE reset                     */
-};
-static struct eeh_stats eeh_stats;
-#define IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE)
-/**
- * eeh_gather_pci_data - Copy assorted PCI config space registers to buff
- * @edev: device to report data for
- * @buf: point to buffer in which to log
- * @len: amount of room in buffer
- *
- * This routine captures assorted PCI configuration space data,
- * and puts them into a buffer for RTAS error logging.
- */
-static size_t eeh_gather_pci_data(struct eeh_dev *edev, char * buf, size_t len)
-{
-        struct device_node *dn = eeh_dev_to_of_node(edev);
-        struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-        u32 cfg;
-        int cap, i;
-        int n = 0;
-        n += scnprintf(buf+n, len-n, "%s\n", dn->full_name);
-        printk(KERN_WARNING "EEH: of node=%s\n", dn->full_name);
-        eeh_ops->read_config(dn, PCI_VENDOR_ID, 4, &cfg);
-        n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
-        printk(KERN_WARNING "EEH: PCI device/vendor: %08x\n", cfg);
-        eeh_ops->read_config(dn, PCI_COMMAND, 4, &cfg);
-        n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
-        printk(KERN_WARNING "EEH: PCI cmd/status register: %08x\n", cfg);
-        if (!dev) {
-                printk(KERN_WARNING "EEH: no PCI device for this of node\n");
-                return n;
-        }
-        /* Gather bridge-specific registers */
-        if (dev->class >> 16 == PCI_BASE_CLASS_BRIDGE) {
-                eeh_ops->read_config(dn, PCI_SEC_STATUS, 2, &cfg);
-                n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
-                printk(KERN_WARNING "EEH: Bridge secondary status: %04x\n", cfg);
-                eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &cfg);
-                n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
-                printk(KERN_WARNING "EEH: Bridge control: %04x\n", cfg);
-        }
-        /* Dump out the PCI-X command and status regs */
-        cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
-        if (cap) {
-                eeh_ops->read_config(dn, cap, 4, &cfg);
-                n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
-                printk(KERN_WARNING "EEH: PCI-X cmd: %08x\n", cfg);
-                eeh_ops->read_config(dn, cap+4, 4, &cfg);
-                n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
-                printk(KERN_WARNING "EEH: PCI-X status: %08x\n", cfg);
-        }
-        /* If PCI-E capable, dump PCI-E cap 10, and the AER */
-        cap = pci_find_capability(dev, PCI_CAP_ID_EXP);
-        if (cap) {
-                n += scnprintf(buf+n, len-n, "pci-e cap10:\n");
-                printk(KERN_WARNING
-                       "EEH: PCI-E capabilities and status follow:\n");
-                for (i=0; i<=8; i++) {
-                        eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
-                        n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
-                        printk(KERN_WARNING "EEH: PCI-E %02x: %08x\n", i, cfg);
-                }
-                cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
-                if (cap) {
-                        n += scnprintf(buf+n, len-n, "pci-e AER:\n");
-                        printk(KERN_WARNING
-                               "EEH: PCI-E AER capability register set follows:\n");
-                        for (i=0; i<14; i++) {
-                                eeh_ops->read_config(dn, cap+4*i, 4, &cfg);
-                                n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
-                                printk(KERN_WARNING "EEH: PCI-E AER %02x: %08x\n", i, cfg);
-                        }
-                }
-        }
-        return n;
-}
-/**
- * eeh_slot_error_detail - Generate combined log including driver log and error log
- * @pe: EEH PE
- * @severity: temporary or permanent error log
- *
- * This routine should be called to generate the combined log, which
- * is comprised of driver log and error log. The driver log is figured
- * out from the config space of the corresponding PCI device, while
- * the error log is fetched through platform dependent function call.
- */
-void eeh_slot_error_detail(struct eeh_pe *pe, int severity)
-{
-        size_t loglen = 0;
-        struct eeh_dev *edev;
-        eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
-        eeh_ops->configure_bridge(pe);
-        eeh_pe_restore_bars(pe);
-        pci_regs_buf[0] = 0;
-        eeh_pe_for_each_dev(pe, edev) {
-                loglen += eeh_gather_pci_data(edev, pci_regs_buf,
-                                EEH_PCI_REGS_LOG_LEN);
-        }
-        eeh_ops->get_log(pe, severity, pci_regs_buf, loglen);
-}
-/**
- * eeh_token_to_phys - Convert EEH address token to phys address
- * @token: I/O token, should be address in the form 0xA....
- *
- * This routine should be called to convert virtual I/O address
- * to physical one.
- */
-static inline unsigned long eeh_token_to_phys(unsigned long token)
-{
-        pte_t *ptep;
-        unsigned long pa;
-        ptep = find_linux_pte(init_mm.pgd, token);
-        if (!ptep)
-                return token;
-        pa = pte_pfn(*ptep) << PAGE_SHIFT;
-        return pa | (token & (PAGE_SIZE-1));
-}
-/**
- * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze
- * @edev: eeh device
- *
- * Check for an EEH failure for the given device node.  Call this
- * routine if the result of a read was all 0xff's and you want to
- * find out if this is due to an EEH slot freeze.  This routine
- * will query firmware for the EEH status.
- *
- * Returns 0 if there has not been an EEH error; otherwise returns
- * a non-zero value and queues up a slot isolation event notification.
- *
- * It is safe to call this routine in an interrupt context.
- */
-int eeh_dev_check_failure(struct eeh_dev *edev)
-{
-        int ret;
-        unsigned long flags;
-        struct device_node *dn;
-        struct pci_dev *dev;
-        struct eeh_pe *pe;
-        int rc = 0;
-        const char *location;
-        eeh_stats.total_mmio_ffs++;
-        if (!eeh_subsystem_enabled)
-                return 0;
-        if (!edev) {
-                eeh_stats.no_dn++;
-                return 0;
-        }
-        dn = eeh_dev_to_of_node(edev);
-        dev = eeh_dev_to_pci_dev(edev);
-        pe = edev->pe;
-        /* Access to IO BARs might get this far and still not want checking. */
-        if (!pe) {
-                eeh_stats.ignored_check++;
-                pr_debug("EEH: Ignored check for %s %s\n",
-                        eeh_pci_name(dev), dn->full_name);
-                return 0;
-        }
-        if (!pe->addr && !pe->config_addr) {
-                eeh_stats.no_cfg_addr++;
-                return 0;
-        }
-        /* If we already have a pending isolation event for this
-         * slot, we know it's bad already, we don't need to check.
-         * Do this checking under a lock; as multiple PCI devices
-         * in one slot might report errors simultaneously, and we
-         * only want one error recovery routine running.
-         */
-        raw_spin_lock_irqsave(&confirm_error_lock, flags);
-        rc = 1;
-        if (pe->state & EEH_PE_ISOLATED) {
-                pe->check_count++;
-                if (pe->check_count % EEH_MAX_FAILS == 0) {
-                        location = of_get_property(dn, "ibm,loc-code", NULL);
-                        printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
-                                "location=%s driver=%s pci addr=%s\n",
-                                pe->check_count, location,
-                                eeh_driver_name(dev), eeh_pci_name(dev));
-                        printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
-                                eeh_driver_name(dev));
-                        dump_stack();
-                }
-                goto dn_unlock;
-        }
-        /*
-         * Now test for an EEH failure.  This is VERY expensive.
-         * Note that the eeh_config_addr may be a parent device
-         * in the case of a device behind a bridge, or it may be
-         * function zero of a multi-function device.
-         * In any case they must share a common PHB.
-         */
-        ret = eeh_ops->get_state(pe, NULL);
-        /* Note that config-io to empty slots may fail;
-         * they are empty when they don't have children.
-         * We will punt with the following conditions: Failure to get
-         * PE's state, EEH not support and Permanently unavailable
-         * state, PE is in good state.
-         */
-        if ((ret < 0) ||
-            (ret == EEH_STATE_NOT_SUPPORT) ||
-            (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) ==
-            (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
-                eeh_stats.false_positives++;
-                pe->false_positives++;
-                rc = 0;
-                goto dn_unlock;
-        }
-        eeh_stats.slot_resets++;
- 
-        /* Avoid repeated reports of this failure, including problems
-         * with other functions on this device, and functions under
-         * bridges.
-         */
-        eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
-        raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
-        eeh_send_failure_event(pe);
-        /* Most EEH events are due to device driver bugs.  Having
-         * a stack trace will help the device-driver authors figure
-         * out what happened.  So print that out.
-         */
-        WARN(1, "EEH: failure detected\n");
-        return 1;
-dn_unlock:
-        raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
-        return rc;
-}
-EXPORT_SYMBOL_GPL(eeh_dev_check_failure);
-/**
- * eeh_check_failure - Check if all 1's data is due to EEH slot freeze
- * @token: I/O token, should be address in the form 0xA....
- * @val: value, should be all 1's (XXX why do we need this arg??)
- *
- * Check for an EEH failure at the given token address.  Call this
- * routine if the result of a read was all 0xff's and you want to
- * find out if this is due to an EEH slot freeze event.  This routine
- * will query firmware for the EEH status.
- *
- * Note this routine is safe to call in an interrupt context.
- */
-unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
-{
-        unsigned long addr;
-        struct eeh_dev *edev;
-        /* Finding the phys addr + pci device; this is pretty quick. */
-        addr = eeh_token_to_phys((unsigned long __force) token);
-        edev = eeh_addr_cache_get_dev(addr);
-        if (!edev) {
-                eeh_stats.no_device++;
-                return val;
-        }
-        eeh_dev_check_failure(edev);
-        pci_dev_put(eeh_dev_to_pci_dev(edev));
-        return val;
-}
-EXPORT_SYMBOL(eeh_check_failure);
-/**
- * eeh_pci_enable - Enable MMIO or DMA transfers for this slot
- * @pe: EEH PE
- *
- * This routine should be called to reenable frozen MMIO or DMA
- * so that it would work correctly again. It's useful while doing
- * recovery or log collection on the indicated device.
- */
-int eeh_pci_enable(struct eeh_pe *pe, int function)
-{
-        int rc;
-        rc = eeh_ops->set_option(pe, function);
-        if (rc)
-                pr_warning("%s: Unexpected state change %d on PHB#%d-PE#%x, err=%d\n",
-                        __func__, function, pe->phb->global_number, pe->addr, rc);
-        rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
-        if (rc > 0 && (rc & EEH_STATE_MMIO_ENABLED) &&
-           (function == EEH_OPT_THAW_MMIO))
-                return 0;
-        return rc;
-}
-/**
- * pcibios_set_pcie_slot_reset - Set PCI-E reset state
- * @dev: pci device struct
- * @state: reset state to enter
- *
- * Return value:
- *      0 if success
- */
-int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state)
-{
-        struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
-        struct eeh_pe *pe = edev->pe;
-        if (!pe) {
-                pr_err("%s: No PE found on PCI device %s\n",
-                        __func__, pci_name(dev));
-                return -EINVAL;
-        }
-        switch (state) {
-        case pcie_deassert_reset:
-                eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
-                break;
-        case pcie_hot_reset:
-                eeh_ops->reset(pe, EEH_RESET_HOT);
-                break;
-        case pcie_warm_reset:
-                eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
-                break;
-        default:
-                return -EINVAL;
-        };
-        return 0;
-}
-/**
- * eeh_set_pe_freset - Check the required reset for the indicated device
- * @data: EEH device
- * @flag: return value
- *
- * Each device might have its preferred reset type: fundamental or
- * hot reset. The routine is used to collected the information for
- * the indicated device and its children so that the bunch of the
- * devices could be reset properly.
- */
-static void *eeh_set_dev_freset(void *data, void *flag)
-{
-        struct pci_dev *dev;
-        unsigned int *freset = (unsigned int *)flag;
-        struct eeh_dev *edev = (struct eeh_dev *)data;
-        dev = eeh_dev_to_pci_dev(edev);
-        if (dev)
-                *freset |= dev->needs_freset;
-        return NULL;
-}
-/**
- * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
- * @pe: EEH PE
- *
- * Assert the PCI #RST line for 1/4 second.
- */
-static void eeh_reset_pe_once(struct eeh_pe *pe)
-{
-        unsigned int freset = 0;
-        /* Determine type of EEH reset required for
-         * Partitionable Endpoint, a hot-reset (1)
-         * or a fundamental reset (3).
-         * A fundamental reset required by any device under
-         * Partitionable Endpoint trumps hot-reset.
-         */
-        eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
-        if (freset)
-                eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
-        else
-                eeh_ops->reset(pe, EEH_RESET_HOT);
-        /* The PCI bus requires that the reset be held high for at least
-         * a 100 milliseconds. We wait a bit longer 'just in case'.
-         */
-#define PCI_BUS_RST_HOLD_TIME_MSEC 250
-        msleep(PCI_BUS_RST_HOLD_TIME_MSEC);
-        
-        /* We might get hit with another EEH freeze as soon as the 
-         * pci slot reset line is dropped. Make sure we don't miss
-         * these, and clear the flag now.
-         */
-        eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
-        eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
-        /* After a PCI slot has been reset, the PCI Express spec requires
-         * a 1.5 second idle time for the bus to stabilize, before starting
-         * up traffic.
-         */
-#define PCI_BUS_SETTLE_TIME_MSEC 1800
-        msleep(PCI_BUS_SETTLE_TIME_MSEC);
-}
-/**
- * eeh_reset_pe - Reset the indicated PE
- * @pe: EEH PE
- *
- * This routine should be called to reset indicated device, including
- * PE. A PE might include multiple PCI devices and sometimes PCI bridges
- * might be involved as well.
- */
-int eeh_reset_pe(struct eeh_pe *pe)
-{
-        int i, rc;
-        /* Take three shots at resetting the bus */
-        for (i=0; i<3; i++) {
-                eeh_reset_pe_once(pe);
-                rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
-                if (rc == (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE))
-                        return 0;
-                if (rc < 0) {
-                        pr_err("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
-                                __func__, pe->phb->global_number, pe->addr);
-                        return -1;
-                }
-                pr_err("EEH: bus reset %d failed on PHB#%d-PE#%x, rc=%d\n",
-                        i+1, pe->phb->global_number, pe->addr, rc);
-        }
-        return -1;
-}
-/**
- * eeh_save_bars - Save device bars
- * @edev: PCI device associated EEH device
- *
- * Save the values of the device bars. Unlike the restore
- * routine, this routine is *not* recursive. This is because
- * PCI devices are added individually; but, for the restore,
- * an entire slot is reset at a time.
- */
-void eeh_save_bars(struct eeh_dev *edev)
-{
-        int i;
-        struct device_node *dn;
-        if (!edev)
-                return;
-        dn = eeh_dev_to_of_node(edev);
-        
-        for (i = 0; i < 16; i++)
-                eeh_ops->read_config(dn, i * 4, 4, &edev->config_space[i]);
-}
-/**
- * eeh_ops_register - Register platform dependent EEH operations
- * @ops: platform dependent EEH operations
- *
- * Register the platform dependent EEH operation callback
- * functions. The platform should call this function before
- * any other EEH operations.
- */
-int __init eeh_ops_register(struct eeh_ops *ops)
-{
-        if (!ops->name) {
-                pr_warning("%s: Invalid EEH ops name for %p\n",
-                        __func__, ops);
-                return -EINVAL;
-        }
-        if (eeh_ops && eeh_ops != ops) {
-                pr_warning("%s: EEH ops of platform %s already existing (%s)\n",
-                        __func__, eeh_ops->name, ops->name);
-                return -EEXIST;
-        }
-        eeh_ops = ops;
-        return 0;
-}
-/**
- * eeh_ops_unregister - Unreigster platform dependent EEH operations
- * @name: name of EEH platform operations
- *
- * Unregister the platform dependent EEH operation callback
- * functions.
- */
-int __exit eeh_ops_unregister(const char *name)
-{
-        if (!name || !strlen(name)) {
-                pr_warning("%s: Invalid EEH ops name\n",
-                        __func__);
-                return -EINVAL;
-        }
-        if (eeh_ops && !strcmp(eeh_ops->name, name)) {
-                eeh_ops = NULL;
-                return 0;
-        }
-        return -EEXIST;
-}
-/**
- * eeh_init - EEH initialization
- *
- * Initialize EEH by trying to enable it for all of the adapters in the system.
- * As a side effect we can determine here if eeh is supported at all.
- * Note that we leave EEH on so failed config cycles won't cause a machine
- * check.  If a user turns off EEH for a particular adapter they are really
- * telling Linux to ignore errors.  Some hardware (e.g. POWER5) won't
- * grant access to a slot if EEH isn't enabled, and so we always enable
- * EEH for all slots/all devices.
- *
- * The eeh-force-off option disables EEH checking globally, for all slots.
- * Even if force-off is set, the EEH hardware is still enabled, so that
- * newer systems can boot.
- */
-static int __init eeh_init(void)
-{
-        struct pci_controller *hose, *tmp;
-        struct device_node *phb;
-        int ret;
-        /* call platform initialization function */
-        if (!eeh_ops) {
-                pr_warning("%s: Platform EEH operation not found\n",
-                        __func__);
-                return -EEXIST;
-        } else if ((ret = eeh_ops->init())) {
-                pr_warning("%s: Failed to call platform init function (%d)\n",
-                        __func__, ret);
-                return ret;
-        }
-        raw_spin_lock_init(&confirm_error_lock);
-        /* Enable EEH for all adapters */
-        if (eeh_probe_mode_devtree()) {
-                list_for_each_entry_safe(hose, tmp,
-                        &hose_list, list_node) {
-                        phb = hose->dn;
-                        traverse_pci_devices(phb, eeh_ops->of_probe, NULL);
-                }
-        }
-        if (eeh_subsystem_enabled)
-                pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
-        else
-                pr_warning("EEH: No capable adapters found\n");
-        return ret;
-}
-core_initcall_sync(eeh_init);
-/**
- * eeh_add_device_early - Enable EEH for the indicated device_node
- * @dn: device node for which to set up EEH
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices that were added after system boot (e.g. hotplug, dlpar).
- * This routine must be called before any i/o is performed to the
- * adapter (inluding any config-space i/o).
- * Whether this actually enables EEH or not for this device depends
- * on the CEC architecture, type of the device, on earlier boot
- * command-line arguments & etc.
- */
-static void eeh_add_device_early(struct device_node *dn)
-{
-        struct pci_controller *phb;
-        if (!of_node_to_eeh_dev(dn))
-                return;
-        phb = of_node_to_eeh_dev(dn)->phb;
-        /* USB Bus children of PCI devices will not have BUID's */
-        if (NULL == phb || 0 == phb->buid)
-                return;
-        /* FIXME: hotplug support on POWERNV */
-        eeh_ops->of_probe(dn, NULL);
-}
-/**
- * eeh_add_device_tree_early - Enable EEH for the indicated device
- * @dn: device node
- *
- * This routine must be used to perform EEH initialization for the
- * indicated PCI device that was added after system boot (e.g.
- * hotplug, dlpar).
- */
-void eeh_add_device_tree_early(struct device_node *dn)
-{
-        struct device_node *sib;
-        for_each_child_of_node(dn, sib)
-                eeh_add_device_tree_early(sib);
-        eeh_add_device_early(dn);
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_early);
-/**
- * eeh_add_device_late - Perform EEH initialization for the indicated pci device
- * @dev: pci device for which to set up EEH
- *
- * This routine must be used to complete EEH initialization for PCI
- * devices that were added after system boot (e.g. hotplug, dlpar).
- */
-static void eeh_add_device_late(struct pci_dev *dev)
-{
-        struct device_node *dn;
-        struct eeh_dev *edev;
-        if (!dev || !eeh_subsystem_enabled)
-                return;
-        pr_debug("EEH: Adding device %s\n", pci_name(dev));
-        dn = pci_device_to_OF_node(dev);
-        edev = of_node_to_eeh_dev(dn);
-        if (edev->pdev == dev) {
-                pr_debug("EEH: Already referenced !\n");
-                return;
-        }
-        WARN_ON(edev->pdev);
-        pci_dev_get(dev);
-        edev->pdev = dev;
-        dev->dev.archdata.edev = edev;
-        eeh_addr_cache_insert_dev(dev);
-}
-/**
- * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to perform EEH initialization for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_device_tree_late(struct pci_bus *bus)
-{
-        struct pci_dev *dev;
-        list_for_each_entry(dev, &bus->devices, bus_list) {
-                eeh_add_device_late(dev);
-                if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
-                        struct pci_bus *subbus = dev->subordinate;
-                        if (subbus)
-                                eeh_add_device_tree_late(subbus);
-                }
-        }
-}
-EXPORT_SYMBOL_GPL(eeh_add_device_tree_late);
-/**
- * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus
- * @bus: PCI bus
- *
- * This routine must be used to add EEH sysfs files for PCI
- * devices which are attached to the indicated PCI bus. The PCI bus
- * is added after system boot through hotplug or dlpar.
- */
-void eeh_add_sysfs_files(struct pci_bus *bus)
-{
-        struct pci_dev *dev;
-        list_for_each_entry(dev, &bus->devices, bus_list) {
-                eeh_sysfs_add_device(dev);
-                if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
-                        struct pci_bus *subbus = dev->subordinate;
-                        if (subbus)
-                                eeh_add_sysfs_files(subbus);
-                }
-        }
-}
-EXPORT_SYMBOL_GPL(eeh_add_sysfs_files);
-/**
- * eeh_remove_device - Undo EEH setup for the indicated pci device
- * @dev: pci device to be removed
- * @purge_pe: remove the PE or not
- *
- * This routine should be called when a device is removed from
- * a running system (e.g. by hotplug or dlpar).  It unregisters
- * the PCI device from the EEH subsystem.  I/O errors affecting
- * this device will no longer be detected after this call; thus,
- * i/o errors affecting this slot may leave this device unusable.
- */
-static void eeh_remove_device(struct pci_dev *dev, int purge_pe)
-{
-        struct eeh_dev *edev;
-        if (!dev || !eeh_subsystem_enabled)
-                return;
-        edev = pci_dev_to_eeh_dev(dev);
-        /* Unregister the device with the EEH/PCI address search system */
-        pr_debug("EEH: Removing device %s\n", pci_name(dev));
-        if (!edev || !edev->pdev) {
-                pr_debug("EEH: Not referenced !\n");
-                return;
-        }
-        edev->pdev = NULL;
-        dev->dev.archdata.edev = NULL;
-        pci_dev_put(dev);
-        eeh_rmv_from_parent_pe(edev, purge_pe);
-        eeh_addr_cache_rmv_dev(dev);
-        eeh_sysfs_remove_device(dev);
-}
-/**
- * eeh_remove_bus_device - Undo EEH setup for the indicated PCI device
- * @dev: PCI device
- * @purge_pe: remove the corresponding PE or not
- *
- * This routine must be called when a device is removed from the
- * running system through hotplug or dlpar. The corresponding
- * PCI address cache will be removed.
- */
-void eeh_remove_bus_device(struct pci_dev *dev, int purge_pe)
-{
-        struct pci_bus *bus = dev->subordinate;
-        struct pci_dev *child, *tmp;
-        eeh_remove_device(dev, purge_pe);
-        if (bus && dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
-                list_for_each_entry_safe(child, tmp, &bus->devices, bus_list)
-                         eeh_remove_bus_device(child, purge_pe);
-        }
-}
-EXPORT_SYMBOL_GPL(eeh_remove_bus_device);
-static int proc_eeh_show(struct seq_file *m, void *v)
-{
-        if (0 == eeh_subsystem_enabled) {
-                seq_printf(m, "EEH Subsystem is globally disabled\n");
-                seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs);
-        } else {
-                seq_printf(m, "EEH Subsystem is enabled\n");
-                seq_printf(m,
-                                "no device=%llu\n"
-                                "no device node=%llu\n"
-                                "no config address=%llu\n"
-                                "check not wanted=%llu\n"
-                                "eeh_total_mmio_ffs=%llu\n"
-                                "eeh_false_positives=%llu\n"
-                                "eeh_slot_resets=%llu\n",
-                                eeh_stats.no_device,
-                                eeh_stats.no_dn,
-                                eeh_stats.no_cfg_addr,
-                                eeh_stats.ignored_check,
-                                eeh_stats.total_mmio_ffs,
-                                eeh_stats.false_positives,
-                                eeh_stats.slot_resets);
-        }
-        return 0;
-}
-static int proc_eeh_open(struct inode *inode, struct file *file)
-{
-        return single_open(file, proc_eeh_show, NULL);
-}
-static const struct file_operations proc_eeh_operations = {
-        .open      = proc_eeh_open,
-        .read      = seq_read,
-        .llseek    = seq_lseek,
-        .release   = single_release,
-};
-static int __init eeh_init_proc(void)
-{
-        if (machine_is(pseries))
-                proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations);
-        return 0;
-}
-__initcall(eeh_init_proc);
diff --git a/arch/powerpc/platforms/pseries/eeh_cache.c b/arch/powerpc/platforms/pseries/eeh_cache.c
deleted file mode 100644
index 5ce3ba7ad137..000000000000
--- a/arch/powerpc/platforms/pseries/eeh_cache.c
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * PCI address cache; allows the lookup of PCI devices based on I/O address
- *
- * Copyright IBM Corporation 2004
- * Copyright Linas Vepstas <linas@austin.ibm.com> 2004
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-#include <linux/list.h>
-#include <linux/pci.h>
-#include <linux/rbtree.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/atomic.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-/**
- * The pci address cache subsystem.  This subsystem places
- * PCI device address resources into a red-black tree, sorted
- * according to the address range, so that given only an i/o
- * address, the corresponding PCI device can be **quickly**
- * found. It is safe to perform an address lookup in an interrupt
- * context; this ability is an important feature.
- *
- * Currently, the only customer of this code is the EEH subsystem;
- * thus, this code has been somewhat tailored to suit EEH better.
- * In particular, the cache does *not* hold the addresses of devices
- * for which EEH is not enabled.
- *
- * (Implementation Note: The RB tree seems to be better/faster
- * than any hash algo I could think of for this problem, even
- * with the penalty of slow pointer chases for d-cache misses).
- */
-struct pci_io_addr_range {
-        struct rb_node rb_node;
-        unsigned long addr_lo;
-        unsigned long addr_hi;
-        struct eeh_dev *edev;
-        struct pci_dev *pcidev;
-        unsigned int flags;
-};
-static struct pci_io_addr_cache {
-        struct rb_root rb_root;
-        spinlock_t piar_lock;
-} pci_io_addr_cache_root;
-static inline struct eeh_dev *__eeh_addr_cache_get_device(unsigned long addr)
-{
-        struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
-        while (n) {
-                struct pci_io_addr_range *piar;
-                piar = rb_entry(n, struct pci_io_addr_range, rb_node);
-                if (addr < piar->addr_lo) {
-                        n = n->rb_left;
-                } else {
-                        if (addr > piar->addr_hi) {
-                                n = n->rb_right;
-                        } else {
-                                pci_dev_get(piar->pcidev);
-                                return piar->edev;
-                        }
-                }
-        }
-        return NULL;
-}
-/**
- * eeh_addr_cache_get_dev - Get device, given only address
- * @addr: mmio (PIO) phys address or i/o port number
- *
- * Given an mmio phys address, or a port number, find a pci device
- * that implements this address.  Be sure to pci_dev_put the device
- * when finished.  I/O port numbers are assumed to be offset
- * from zero (that is, they do *not* have pci_io_addr added in).
- * It is safe to call this function within an interrupt.
- */
-struct eeh_dev *eeh_addr_cache_get_dev(unsigned long addr)
-{
-        struct eeh_dev *edev;
-        unsigned long flags;
-        spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-        edev = __eeh_addr_cache_get_device(addr);
-        spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-        return edev;
-}
-#ifdef DEBUG
-/*
- * Handy-dandy debug print routine, does nothing more
- * than print out the contents of our addr cache.
- */
-static void eeh_addr_cache_print(struct pci_io_addr_cache *cache)
-{
-        struct rb_node *n;
-        int cnt = 0;
-        n = rb_first(&cache->rb_root);
-        while (n) {
-                struct pci_io_addr_range *piar;
-                piar = rb_entry(n, struct pci_io_addr_range, rb_node);
-                pr_debug("PCI: %s addr range %d [%lx-%lx]: %s\n",
-                       (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
-                       piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev));
-                cnt++;
-                n = rb_next(n);
-        }
-}
-#endif
-/* Insert address range into the rb tree. */
-static struct pci_io_addr_range *
-eeh_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
-                      unsigned long ahi, unsigned int flags)
-{
-        struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
-        struct rb_node *parent = NULL;
-        struct pci_io_addr_range *piar;
-        /* Walk tree, find a place to insert into tree */
-        while (*p) {
-                parent = *p;
-                piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
-                if (ahi < piar->addr_lo) {
-                        p = &parent->rb_left;
-                } else if (alo > piar->addr_hi) {
-                        p = &parent->rb_right;
-                } else {
-                        if (dev != piar->pcidev ||
-                            alo != piar->addr_lo || ahi != piar->addr_hi) {
-                                pr_warning("PIAR: overlapping address range\n");
-                        }
-                        return piar;
-                }
-        }
-        piar = kzalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
-        if (!piar)
-                return NULL;
-        pci_dev_get(dev);
-        piar->addr_lo = alo;
-        piar->addr_hi = ahi;
-        piar->edev = pci_dev_to_eeh_dev(dev);
-        piar->pcidev = dev;
-        piar->flags = flags;
-#ifdef DEBUG
-        pr_debug("PIAR: insert range=[%lx:%lx] dev=%s\n",
-                          alo, ahi, pci_name(dev));
-#endif
-        rb_link_node(&piar->rb_node, parent, p);
-        rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
-        return piar;
-}
-static void __eeh_addr_cache_insert_dev(struct pci_dev *dev)
-{
-        struct device_node *dn;
-        struct eeh_dev *edev;
-        int i;
-        dn = pci_device_to_OF_node(dev);
-        if (!dn) {
-                pr_warning("PCI: no pci dn found for dev=%s\n", pci_name(dev));
-                return;
-        }
-        edev = of_node_to_eeh_dev(dn);
-        if (!edev) {
-                pr_warning("PCI: no EEH dev found for dn=%s\n",
-                        dn->full_name);
-                return;
-        }
-        /* Skip any devices for which EEH is not enabled. */
-        if (!edev->pe) {
-#ifdef DEBUG
-                pr_info("PCI: skip building address cache for=%s - %s\n",
-                        pci_name(dev), dn->full_name);
-#endif
-                return;
-        }
-        /* Walk resources on this device, poke them into the tree */
-        for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
-                unsigned long start = pci_resource_start(dev,i);
-                unsigned long end = pci_resource_end(dev,i);
-                unsigned int flags = pci_resource_flags(dev,i);
-                /* We are interested only bus addresses, not dma or other stuff */
-                if (0 == (flags & (IORESOURCE_IO | IORESOURCE_MEM)))
-                        continue;
-                if (start == 0 || ~start == 0 || end == 0 || ~end == 0)
-                         continue;
-                eeh_addr_cache_insert(dev, start, end, flags);
-        }
-}
-/**
- * eeh_addr_cache_insert_dev - Add a device to the address cache
- * @dev: PCI device whose I/O addresses we are interested in.
- *
- * In order to support the fast lookup of devices based on addresses,
- * we maintain a cache of devices that can be quickly searched.
- * This routine adds a device to that cache.
- */
-void eeh_addr_cache_insert_dev(struct pci_dev *dev)
-{
-        unsigned long flags;
-        /* Ignore PCI bridges */
-        if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE)
-                return;
-        spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-        __eeh_addr_cache_insert_dev(dev);
-        spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-}
-static inline void __eeh_addr_cache_rmv_dev(struct pci_dev *dev)
-{
-        struct rb_node *n;
-restart:
-        n = rb_first(&pci_io_addr_cache_root.rb_root);
-        while (n) {
-                struct pci_io_addr_range *piar;
-                piar = rb_entry(n, struct pci_io_addr_range, rb_node);
-                if (piar->pcidev == dev) {
-                        rb_erase(n, &pci_io_addr_cache_root.rb_root);
-                        pci_dev_put(piar->pcidev);
-                        kfree(piar);
-                        goto restart;
-                }
-                n = rb_next(n);
-        }
-}
-/**
- * eeh_addr_cache_rmv_dev - remove pci device from addr cache
- * @dev: device to remove
- *
- * Remove a device from the addr-cache tree.
- * This is potentially expensive, since it will walk
- * the tree multiple times (once per resource).
- * But so what; device removal doesn't need to be that fast.
- */
-void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
-{
-        unsigned long flags;
-        spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
-        __eeh_addr_cache_rmv_dev(dev);
-        spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
-}
-/**
- * eeh_addr_cache_build - Build a cache of I/O addresses
- *
- * Build a cache of pci i/o addresses.  This cache will be used to
- * find the pci device that corresponds to a given address.
- * This routine scans all pci busses to build the cache.
- * Must be run late in boot process, after the pci controllers
- * have been scanned for devices (after all device resources are known).
- */
-void __init eeh_addr_cache_build(void)
-{
-        struct device_node *dn;
-        struct eeh_dev *edev;
-        struct pci_dev *dev = NULL;
-        spin_lock_init(&pci_io_addr_cache_root.piar_lock);
-        for_each_pci_dev(dev) {
-                dn = pci_device_to_OF_node(dev);
-                if (!dn)
-                        continue;
-                edev = of_node_to_eeh_dev(dn);
-                if (!edev)
-                        continue;
-                pci_dev_get(dev);  /* matching put is in eeh_remove_device() */
-                dev->dev.archdata.edev = edev;
-                edev->pdev = dev;
-                eeh_addr_cache_insert_dev(dev);
-                eeh_sysfs_add_device(dev);
-        }
-#ifdef DEBUG
-        /* Verify tree built up above, echo back the list of addrs. */
-        eeh_addr_cache_print(&pci_io_addr_cache_root);
-#endif
-}
diff --git a/arch/powerpc/platforms/pseries/eeh_dev.c b/arch/powerpc/platforms/pseries/eeh_dev.c
deleted file mode 100644
index 1efa28f5fc54..000000000000
--- a/arch/powerpc/platforms/pseries/eeh_dev.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * The file intends to implement dynamic creation of EEH device, which will
- * be bound with OF node and PCI device simutaneously. The EEH devices would
- * be foundamental information for EEH core components to work proerly. Besides,
- * We have to support multiple situations where dynamic creation of EEH device
- * is required:
- *
- * 1) Before PCI emunation starts, we need create EEH devices according to the
- *    PCI sensitive OF nodes.
- * 2) When PCI emunation is done, we need do the binding between PCI device and
- *    the associated EEH device.
- * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device
- *    will be created while PCI sensitive OF node is detected from DR.
- * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If
- *    PHB is newly inserted, we also need create EEH devices accordingly.
- *
- * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-#include <linux/export.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-/**
- * eeh_dev_init - Create EEH device according to OF node
- * @dn: device node
- * @data: PHB
- *
- * It will create EEH device according to the given OF node. The function
- * might be called by PCI emunation, DR, PHB hotplug.
- */
-void *eeh_dev_init(struct device_node *dn, void *data)
-{
-        struct pci_controller *phb = data;
-        struct eeh_dev *edev;
-        /* Allocate EEH device */
-        edev = kzalloc(sizeof(*edev), GFP_KERNEL);
-        if (!edev) {
-                pr_warning("%s: out of memory\n", __func__);
-                return NULL;
-        }
-        /* Associate EEH device with OF node */
-        PCI_DN(dn)->edev = edev;
-        edev->dn  = dn;
-        edev->phb = phb;
-        INIT_LIST_HEAD(&edev->list);
-        return NULL;
-}
-/**
- * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB
- * @phb: PHB
- *
- * Scan the PHB OF node and its child association, then create the
- * EEH devices accordingly
- */
-void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
-{
-        struct device_node *dn = phb->dn;
-        /* EEH PE for PHB */
-        eeh_phb_pe_create(phb);
-        /* EEH device for PHB */
-        eeh_dev_init(dn, phb);
-        /* EEH devices for children OF nodes */
-        traverse_pci_devices(dn, eeh_dev_init, phb);
-}
-/**
- * eeh_dev_phb_init - Create EEH devices for devices included in existing PHBs
- *
- * Scan all the existing PHBs and create EEH devices for their OF
- * nodes and their children OF nodes
- */
-static int __init eeh_dev_phb_init(void)
-{
-        struct pci_controller *phb, *tmp;
-        list_for_each_entry_safe(phb, tmp, &hose_list, list_node)
-                eeh_dev_phb_init_dynamic(phb);
-        pr_info("EEH: devices created\n");
-        return 0;
-}
-core_initcall(eeh_dev_phb_init);
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
deleted file mode 100644
index a3fefb61097c..000000000000
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ /dev/null
@@ -1,552 +0,0 @@
-/*
- * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
- * Copyright IBM Corp. 2004 2005
- * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
- */
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <asm/eeh.h>
-#include <asm/eeh_event.h>
-#include <asm/ppc-pci.h>
-#include <asm/pci-bridge.h>
-#include <asm/prom.h>
-#include <asm/rtas.h>
-/**
- * eeh_pcid_name - Retrieve name of PCI device driver
- * @pdev: PCI device
- *
- * This routine is used to retrieve the name of PCI device driver
- * if that's valid.
- */
-static inline const char *eeh_pcid_name(struct pci_dev *pdev)
-{
-        if (pdev && pdev->dev.driver)
-                return pdev->dev.driver->name;
-        return "";
-}
-/**
- * eeh_pcid_get - Get the PCI device driver
- * @pdev: PCI device
- *
- * The function is used to retrieve the PCI device driver for
- * the indicated PCI device. Besides, we will increase the reference
- * of the PCI device driver to prevent that being unloaded on
- * the fly. Otherwise, kernel crash would be seen.
- */
-static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
-{
-        if (!pdev || !pdev->driver)
-                return NULL;
-        if (!try_module_get(pdev->driver->driver.owner))
-                return NULL;
-        return pdev->driver;
-}
-/**
- * eeh_pcid_put - Dereference on the PCI device driver
- * @pdev: PCI device
- *
- * The function is called to do dereference on the PCI device
- * driver of the indicated PCI device.
- */
-static inline void eeh_pcid_put(struct pci_dev *pdev)
-{
-        if (!pdev || !pdev->driver)
-                return;
-        module_put(pdev->driver->driver.owner);
-}
-#if 0
-static void print_device_node_tree(struct pci_dn *pdn, int dent)
-{
-        int i;
-        struct device_node *pc;
-        if (!pdn)
-                return;
-        for (i = 0; i < dent; i++)
-                printk(" ");
-        printk("dn=%s mode=%x \tcfg_addr=%x pe_addr=%x \tfull=%s\n",
-                pdn->node->name, pdn->eeh_mode, pdn->eeh_config_addr,
-                pdn->eeh_pe_config_addr, pdn->node->full_name);
-        dent += 3;
-        pc = pdn->node->child;
-        while (pc) {
-                print_device_node_tree(PCI_DN(pc), dent);
-                pc = pc->sibling;
-        }
-}
-#endif
-/**
- * eeh_disable_irq - Disable interrupt for the recovering device
- * @dev: PCI device
- *
- * This routine must be called when reporting temporary or permanent
- * error to the particular PCI device to disable interrupt of that
- * device. If the device has enabled MSI or MSI-X interrupt, we needn't
- * do real work because EEH should freeze DMA transfers for those PCI
- * devices encountering EEH errors, which includes MSI or MSI-X.
- */
-static void eeh_disable_irq(struct pci_dev *dev)
-{
-        struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
-        /* Don't disable MSI and MSI-X interrupts. They are
-         * effectively disabled by the DMA Stopped state
-         * when an EEH error occurs.
-         */
-        if (dev->msi_enabled || dev->msix_enabled)
-                return;
-        if (!irq_has_action(dev->irq))
-                return;
-        edev->mode |= EEH_DEV_IRQ_DISABLED;
-        disable_irq_nosync(dev->irq);
-}
-/**
- * eeh_enable_irq - Enable interrupt for the recovering device
- * @dev: PCI device
- *
- * This routine must be called to enable interrupt while failed
- * device could be resumed.
- */
-static void eeh_enable_irq(struct pci_dev *dev)
-{
-        struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
-        if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
-                edev->mode &= ~EEH_DEV_IRQ_DISABLED;
-                enable_irq(dev->irq);
-        }
-}
-/**
- * eeh_report_error - Report pci error to each device driver
- * @data: eeh device
- * @userdata: return value
- * 
- * Report an EEH error to each device driver, collect up and 
- * merge the device driver responses. Cumulative response 
- * passed back in "userdata".
- */
-static void *eeh_report_error(void *data, void *userdata)
-{
-        struct eeh_dev *edev = (struct eeh_dev *)data;
-        struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-        enum pci_ers_result rc, *res = userdata;
-        struct pci_driver *driver;
-        /* We might not have the associated PCI device,
-         * then we should continue for next one.
-         */
-        if (!dev) return NULL;
-        dev->error_state = pci_channel_io_frozen;
-        driver = eeh_pcid_get(dev);
-        if (!driver) return NULL;
-        eeh_disable_irq(dev);
-        if (!driver->err_handler ||
-            !driver->err_handler->error_detected) {
-                eeh_pcid_put(dev);
-                return NULL;
-        }
-        rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
-        /* A driver that needs a reset trumps all others */
-        if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
-        if (*res == PCI_ERS_RESULT_NONE) *res = rc;
-        eeh_pcid_put(dev);
-        return NULL;
-}
-/**
- * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
- * @data: eeh device
- * @userdata: return value
- *
- * Tells each device driver that IO ports, MMIO and config space I/O
- * are now enabled. Collects up and merges the device driver responses.
- * Cumulative response passed back in "userdata".
- */
-static void *eeh_report_mmio_enabled(void *data, void *userdata)
-{
-        struct eeh_dev *edev = (struct eeh_dev *)data;
-        struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-        enum pci_ers_result rc, *res = userdata;
-        struct pci_driver *driver;
-        driver = eeh_pcid_get(dev);
-        if (!driver) return NULL;
-        if (!driver->err_handler ||
-            !driver->err_handler->mmio_enabled) {
-                eeh_pcid_put(dev);
-                return NULL;
-        }
-        rc = driver->err_handler->mmio_enabled(dev);
-        /* A driver that needs a reset trumps all others */
-        if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
-        if (*res == PCI_ERS_RESULT_NONE) *res = rc;
-        eeh_pcid_put(dev);
-        return NULL;
-}
-/**
- * eeh_report_reset - Tell device that slot has been reset
- * @data: eeh device
- * @userdata: return value
- *
- * This routine must be called while EEH tries to reset particular
- * PCI device so that the associated PCI device driver could take
- * some actions, usually to save data the driver needs so that the
- * driver can work again while the device is recovered.
- */
-static void *eeh_report_reset(void *data, void *userdata)
-{
-        struct eeh_dev *edev = (struct eeh_dev *)data;
-        struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-        enum pci_ers_result rc, *res = userdata;
-        struct pci_driver *driver;
-        if (!dev) return NULL;
-        dev->error_state = pci_channel_io_normal;
-        driver = eeh_pcid_get(dev);
-        if (!driver) return NULL;
-        eeh_enable_irq(dev);
-        if (!driver->err_handler ||
-            !driver->err_handler->slot_reset) {
-                eeh_pcid_put(dev);
-                return NULL;
-        }
-        rc = driver->err_handler->slot_reset(dev);
-        if ((*res == PCI_ERS_RESULT_NONE) ||
-            (*res == PCI_ERS_RESULT_RECOVERED)) *res = rc;
-        if (*res == PCI_ERS_RESULT_DISCONNECT &&
-             rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
-        eeh_pcid_put(dev);
-        return NULL;
-}
-/**
- * eeh_report_resume - Tell device to resume normal operations
- * @data: eeh device
- * @userdata: return value
- *
- * This routine must be called to notify the device driver that it
- * could resume so that the device driver can do some initialization
- * to make the recovered device work again.
- */
-static void *eeh_report_resume(void *data, void *userdata)
-{
-        struct eeh_dev *edev = (struct eeh_dev *)data;
-        struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-        struct pci_driver *driver;
-        if (!dev) return NULL;
-        dev->error_state = pci_channel_io_normal;
-        driver = eeh_pcid_get(dev);
-        if (!driver) return NULL;
-        eeh_enable_irq(dev);
-        if (!driver->err_handler ||
-            !driver->err_handler->resume) {
-                eeh_pcid_put(dev);
-                return NULL;
-        }
-        driver->err_handler->resume(dev);
-        eeh_pcid_put(dev);
-        return NULL;
-}
-/**
- * eeh_report_failure - Tell device driver that device is dead.
- * @data: eeh device
- * @userdata: return value
- *
- * This informs the device driver that the device is permanently
- * dead, and that no further recovery attempts will be made on it.
- */
-static void *eeh_report_failure(void *data, void *userdata)
-{
-        struct eeh_dev *edev = (struct eeh_dev *)data;
-        struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-        struct pci_driver *driver;
-        if (!dev) return NULL;
-        dev->error_state = pci_channel_io_perm_failure;
-        driver = eeh_pcid_get(dev);
-        if (!driver) return NULL;
-        eeh_disable_irq(dev);
-        if (!driver->err_handler ||
-            !driver->err_handler->error_detected) {
-                eeh_pcid_put(dev);
-                return NULL;
-        }
-        driver->err_handler->error_detected(dev, pci_channel_io_perm_failure);
-        eeh_pcid_put(dev);
-        return NULL;
-}
-/**
- * eeh_reset_device - Perform actual reset of a pci slot
- * @pe: EEH PE
- * @bus: PCI bus corresponding to the isolcated slot
- *
- * This routine must be called to do reset on the indicated PE.
- * During the reset, udev might be invoked because those affected
- * PCI devices will be removed and then added.
- */
-static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
-{
-        int cnt, rc;
-        /* pcibios will clear the counter; save the value */
-        cnt = pe->freeze_count;
-        /*
-         * We don't remove the corresponding PE instances because
-         * we need the information afterwords. The attached EEH
-         * devices are expected to be attached soon when calling
-         * into pcibios_add_pci_devices().
-         */
-        if (bus)
-                __pcibios_remove_pci_devices(bus, 0);
-        /* Reset the pci controller. (Asserts RST#; resets config space).
-         * Reconfigure bridges and devices. Don't try to bring the system
-         * up if the reset failed for some reason.
-         */
-        rc = eeh_reset_pe(pe);
-        if (rc)
-                return rc;
-        /* Restore PE */
-        eeh_ops->configure_bridge(pe);
-        eeh_pe_restore_bars(pe);
-        /* Give the system 5 seconds to finish running the user-space
-         * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes, 
-         * this is a hack, but if we don't do this, and try to bring 
-         * the device up before the scripts have taken it down, 
-         * potentially weird things happen.
-         */
-        if (bus) {
-                ssleep(5);
-                pcibios_add_pci_devices(bus);
-        }
-        pe->freeze_count = cnt;
-        return 0;
-}
-/* The longest amount of time to wait for a pci device
- * to come back on line, in seconds.
- */
-#define MAX_WAIT_FOR_RECOVERY 150
-/**
- * eeh_handle_event - Reset a PCI device after hard lockup.
- * @pe: EEH PE
- *
- * While PHB detects address or data parity errors on particular PCI
- * slot, the associated PE will be frozen. Besides, DMA's occurring
- * to wild addresses (which usually happen due to bugs in device
- * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
- * #PERR or other misc PCI-related errors also can trigger EEH errors.
- *
- * Recovery process consists of unplugging the device driver (which
- * generated hotplug events to userspace), then issuing a PCI #RST to
- * the device, then reconfiguring the PCI config space for all bridges
- * & devices under this slot, and then finally restarting the device
- * drivers (which cause a second set of hotplug events to go out to
- * userspace).
- */
-void eeh_handle_event(struct eeh_pe *pe)
-{
-        struct pci_bus *frozen_bus;
-        int rc = 0;
-        enum pci_ers_result result = PCI_ERS_RESULT_NONE;
-        frozen_bus = eeh_pe_bus_get(pe);
-        if (!frozen_bus) {
-                pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n",
-                        __func__, pe->phb->global_number, pe->addr);
-                return;
-        }
-        pe->freeze_count++;
-        if (pe->freeze_count > EEH_MAX_ALLOWED_FREEZES)
-                goto excess_failures;
-        pr_warning("EEH: This PCI device has failed %d times in the last hour\n",
-                pe->freeze_count);
-        /* Walk the various device drivers attached to this slot through
-         * a reset sequence, giving each an opportunity to do what it needs
-         * to accomplish the reset.  Each child gets a report of the
-         * status ... if any child can't handle the reset, then the entire
-         * slot is dlpar removed and added.
-         */
-        eeh_pe_dev_traverse(pe, eeh_report_error, &result);
-        /* Get the current PCI slot state. This can take a long time,
-         * sometimes over 3 seconds for certain systems.
-         */
-        rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
-        if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
-                printk(KERN_WARNING "EEH: Permanent failure\n");
-                goto hard_fail;
-        }
-        /* Since rtas may enable MMIO when posting the error log,
-         * don't post the error log until after all dev drivers
-         * have been informed.
-         */
-        eeh_slot_error_detail(pe, EEH_LOG_TEMP);
-        /* If all device drivers were EEH-unaware, then shut
-         * down all of the device drivers, and hope they
-         * go down willingly, without panicing the system.
-         */
-        if (result == PCI_ERS_RESULT_NONE) {
-                rc = eeh_reset_device(pe, frozen_bus);
-                if (rc) {
-                        printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
-                        goto hard_fail;
-                }
-        }
-        /* If all devices reported they can proceed, then re-enable MMIO */
-        if (result == PCI_ERS_RESULT_CAN_RECOVER) {
-                rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
-                if (rc < 0)
-                        goto hard_fail;
-                if (rc) {
-                        result = PCI_ERS_RESULT_NEED_RESET;
-                } else {
-                        result = PCI_ERS_RESULT_NONE;
-                        eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
-                }
-        }
-        /* If all devices reported they can proceed, then re-enable DMA */
-        if (result == PCI_ERS_RESULT_CAN_RECOVER) {
-                rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
-                if (rc < 0)
-                        goto hard_fail;
-                if (rc)
-                        result = PCI_ERS_RESULT_NEED_RESET;
-                else
-                        result = PCI_ERS_RESULT_RECOVERED;
-        }
-        /* If any device has a hard failure, then shut off everything. */
-        if (result == PCI_ERS_RESULT_DISCONNECT) {
-                printk(KERN_WARNING "EEH: Device driver gave up\n");
-                goto hard_fail;
-        }
-        /* If any device called out for a reset, then reset the slot */
-        if (result == PCI_ERS_RESULT_NEED_RESET) {
-                rc = eeh_reset_device(pe, NULL);
-                if (rc) {
-                        printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
-                        goto hard_fail;
-                }
-                result = PCI_ERS_RESULT_NONE;
-                eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
-        }
-        /* All devices should claim they have recovered by now. */
-        if ((result != PCI_ERS_RESULT_RECOVERED) &&
-            (result != PCI_ERS_RESULT_NONE)) {
-                printk(KERN_WARNING "EEH: Not recovered\n");
-                goto hard_fail;
-        }
-        /* Tell all device drivers that they can resume operations */
-        eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
-        return;
-        
-excess_failures:
-        /*
-         * About 90% of all real-life EEH failures in the field
-         * are due to poorly seated PCI cards. Only 10% or so are
-         * due to actual, failed cards.
-         */
-        pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n"
-               "last hour and has been permanently disabled.\n"
-               "Please try reseating or replacing it.\n",
-                pe->phb->global_number, pe->addr,
-                pe->freeze_count);
-        goto perm_error;
-hard_fail:
-        pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n"
-               "Please try reseating or replacing it\n",
-                pe->phb->global_number, pe->addr);
-perm_error:
-        eeh_slot_error_detail(pe, EEH_LOG_PERM);
-        /* Notify all devices that they're about to go down. */
-        eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
-        /* Shut down the device drivers for good. */
-        if (frozen_bus)
-                pcibios_remove_pci_devices(frozen_bus);
-}
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
deleted file mode 100644
index 185bedd926df..000000000000
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
- */
-#include <linux/delay.h>
-#include <linux/list.h>
-#include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/workqueue.h>
-#include <linux/kthread.h>
-#include <asm/eeh_event.h>
-#include <asm/ppc-pci.h>
-/** Overview:
- *  EEH error states may be detected within exception handlers;
- *  however, the recovery processing needs to occur asynchronously
- *  in a normal kernel context and not an interrupt context.
- *  This pair of routines creates an event and queues it onto a
- *  work-queue, where a worker thread can drive recovery.
- */
-/* EEH event workqueue setup. */
-static DEFINE_SPINLOCK(eeh_eventlist_lock);
-LIST_HEAD(eeh_eventlist);
-static void eeh_thread_launcher(struct work_struct *);
-DECLARE_WORK(eeh_event_wq, eeh_thread_launcher);
-/* Serialize reset sequences for a given pci device */
-DEFINE_MUTEX(eeh_event_mutex);
-/**
- * eeh_event_handler - Dispatch EEH events.
- * @dummy - unused
- *
- * The detection of a frozen slot can occur inside an interrupt,
- * where it can be hard to do anything about it.  The goal of this
- * routine is to pull these detection events out of the context
- * of the interrupt handler, and re-dispatch them for processing
- * at a later time in a normal context.
- */
-static int eeh_event_handler(void * dummy)
-{
-        unsigned long flags;
-        struct eeh_event *event;
-        struct eeh_pe *pe;
-        spin_lock_irqsave(&eeh_eventlist_lock, flags);
-        event = NULL;
-        /* Unqueue the event, get ready to process. */
-        if (!list_empty(&eeh_eventlist)) {
-                event = list_entry(eeh_eventlist.next, struct eeh_event, list);
-                list_del(&event->list);
-        }
-        spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
-        if (event == NULL)
-                return 0;
-        /* Serialize processing of EEH events */
-        mutex_lock(&eeh_event_mutex);
-        pe = event->pe;
-        eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
-        pr_info("EEH: Detected PCI bus error on PHB#%d-PE#%x\n",
-                pe->phb->global_number, pe->addr);
-        set_current_state(TASK_INTERRUPTIBLE);  /* Don't add to load average */
-        eeh_handle_event(pe);
-        eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
-        kfree(event);
-        mutex_unlock(&eeh_event_mutex);
-        /* If there are no new errors after an hour, clear the counter. */
-        if (pe && pe->freeze_count > 0) {
-                msleep_interruptible(3600*1000);
-                if (pe->freeze_count > 0)
-                        pe->freeze_count--;
-        }
-        return 0;
-}
-/**
- * eeh_thread_launcher - Start kernel thread to handle EEH events
- * @dummy - unused
- *
- * This routine is called to start the kernel thread for processing
- * EEH event.
- */
-static void eeh_thread_launcher(struct work_struct *dummy)
-{
-        if (IS_ERR(kthread_run(eeh_event_handler, NULL, "eehd")))
-                printk(KERN_ERR "Failed to start EEH daemon\n");
-}
-/**
- * eeh_send_failure_event - Generate a PCI error event
- * @pe: EEH PE
- *
- * This routine can be called within an interrupt context;
- * the actual event will be delivered in a normal context
- * (from a workqueue).
- */
-int eeh_send_failure_event(struct eeh_pe *pe)
-{
-        unsigned long flags;
-        struct eeh_event *event;
-        event = kzalloc(sizeof(*event), GFP_ATOMIC);
-        if (!event) {
-                pr_err("EEH: out of memory, event not handled\n");
-                return -ENOMEM;
-        }
-        event->pe = pe;
-        /* We may or may not be called in an interrupt context */
-        spin_lock_irqsave(&eeh_eventlist_lock, flags);
-        list_add(&event->list, &eeh_eventlist);
-        spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
-        schedule_work(&eeh_event_wq);
-        return 0;
-}
diff --git a/arch/powerpc/platforms/pseries/eeh_pe.c b/arch/powerpc/platforms/pseries/eeh_pe.c
deleted file mode 100644
index 9d4a9e8562b2..000000000000
--- a/arch/powerpc/platforms/pseries/eeh_pe.c
+++ /dev/null
@@ -1,653 +0,0 @@
-/*
- * The file intends to implement PE based on the information from
- * platforms. Basically, there have 3 types of PEs: PHB/Bus/Device.
- * All the PEs should be organized as hierarchy tree. The first level
- * of the tree will be associated to existing PHBs since the particular
- * PE is only meaningful in one PHB domain.
- *
- * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-#include <linux/export.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-static LIST_HEAD(eeh_phb_pe);
-/**
- * eeh_pe_alloc - Allocate PE
- * @phb: PCI controller
- * @type: PE type
- *
- * Allocate PE instance dynamically.
- */
-static struct eeh_pe *eeh_pe_alloc(struct pci_controller *phb, int type)
-{
-        struct eeh_pe *pe;
-        /* Allocate PHB PE */
-        pe = kzalloc(sizeof(struct eeh_pe), GFP_KERNEL);
-        if (!pe) return NULL;
-        /* Initialize PHB PE */
-        pe->type = type;
-        pe->phb = phb;
-        INIT_LIST_HEAD(&pe->child_list);
-        INIT_LIST_HEAD(&pe->child);
-        INIT_LIST_HEAD(&pe->edevs);
-        return pe;
-}
-/**
- * eeh_phb_pe_create - Create PHB PE
- * @phb: PCI controller
- *
- * The function should be called while the PHB is detected during
- * system boot or PCI hotplug in order to create PHB PE.
- */
-int eeh_phb_pe_create(struct pci_controller *phb)
-{
-        struct eeh_pe *pe;
-        /* Allocate PHB PE */
-        pe = eeh_pe_alloc(phb, EEH_PE_PHB);
-        if (!pe) {
-                pr_err("%s: out of memory!\n", __func__);
-                return -ENOMEM;
-        }
-        /* Put it into the list */
-        eeh_lock();
-        list_add_tail(&pe->child, &eeh_phb_pe);
-        eeh_unlock();
-        pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number);
-        return 0;
-}
-/**
- * eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
- * @phb: PCI controller
- *
- * The overall PEs form hierarchy tree. The first layer of the
- * hierarchy tree is composed of PHB PEs. The function is used
- * to retrieve the corresponding PHB PE according to the given PHB.
- */
-static struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb)
-{
-        struct eeh_pe *pe;
-        list_for_each_entry(pe, &eeh_phb_pe, child) {
-                /*
-                 * Actually, we needn't check the type since
-                 * the PE for PHB has been determined when that
-                 * was created.
-                 */
-                if ((pe->type & EEH_PE_PHB) && pe->phb == phb)
-                        return pe;
-        }
-        return NULL;
-}
-/**
- * eeh_pe_next - Retrieve the next PE in the tree
- * @pe: current PE
- * @root: root PE
- *
- * The function is used to retrieve the next PE in the
- * hierarchy PE tree.
- */
-static struct eeh_pe *eeh_pe_next(struct eeh_pe *pe,
-                                  struct eeh_pe *root)
-{
-        struct list_head *next = pe->child_list.next;
-        if (next == &pe->child_list) {
-                while (1) {
-                        if (pe == root)
-                                return NULL;
-                        next = pe->child.next;
-                        if (next != &pe->parent->child_list)
-                                break;
-                        pe = pe->parent;
-                }
-        }
-        return list_entry(next, struct eeh_pe, child);
-}
-/**
- * eeh_pe_traverse - Traverse PEs in the specified PHB
- * @root: root PE
- * @fn: callback
- * @flag: extra parameter to callback
- *
- * The function is used to traverse the specified PE and its
- * child PEs. The traversing is to be terminated once the
- * callback returns something other than NULL, or no more PEs
- * to be traversed.
- */
-static void *eeh_pe_traverse(struct eeh_pe *root,
-                        eeh_traverse_func fn, void *flag)
-{
-        struct eeh_pe *pe;
-        void *ret;
-        for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
-                ret = fn(pe, flag);
-                if (ret) return ret;
-        }
-        return NULL;
-}
-/**
- * eeh_pe_dev_traverse - Traverse the devices from the PE
- * @root: EEH PE
- * @fn: function callback
- * @flag: extra parameter to callback
- *
- * The function is used to traverse the devices of the specified
- * PE and its child PEs.
- */
-void *eeh_pe_dev_traverse(struct eeh_pe *root,
-                eeh_traverse_func fn, void *flag)
-{
-        struct eeh_pe *pe;
-        struct eeh_dev *edev;
-        void *ret;
-        if (!root) {
-                pr_warning("%s: Invalid PE %p\n", __func__, root);
-                return NULL;
-        }
-        eeh_lock();
-        /* Traverse root PE */
-        for (pe = root; pe; pe = eeh_pe_next(pe, root)) {
-                eeh_pe_for_each_dev(pe, edev) {
-                        ret = fn(edev, flag);
-                        if (ret) {
-                                eeh_unlock();
-                                return ret;
-                        }
-                }
-        }
-        eeh_unlock();
-        return NULL;
-}
-/**
- * __eeh_pe_get - Check the PE address
- * @data: EEH PE
- * @flag: EEH device
- *
- * For one particular PE, it can be identified by PE address
- * or tranditional BDF address. BDF address is composed of
- * Bus/Device/Function number. The extra data referred by flag
- * indicates which type of address should be used.
- */
-static void *__eeh_pe_get(void *data, void *flag)
-{
-        struct eeh_pe *pe = (struct eeh_pe *)data;
-        struct eeh_dev *edev = (struct eeh_dev *)flag;
-        /* Unexpected PHB PE */
-        if (pe->type & EEH_PE_PHB)
-                return NULL;
-        /* We prefer PE address */
-        if (edev->pe_config_addr &&
-           (edev->pe_config_addr == pe->addr))
-                return pe;
-        /* Try BDF address */
-        if (edev->pe_config_addr &&
-           (edev->config_addr == pe->config_addr))
-                return pe;
-        return NULL;
-}
-/**
- * eeh_pe_get - Search PE based on the given address
- * @edev: EEH device
- *
- * Search the corresponding PE based on the specified address which
- * is included in the eeh device. The function is used to check if
- * the associated PE has been created against the PE address. It's
- * notable that the PE address has 2 format: traditional PE address
- * which is composed of PCI bus/device/function number, or unified
- * PE address.
- */
-static struct eeh_pe *eeh_pe_get(struct eeh_dev *edev)
-{
-        struct eeh_pe *root = eeh_phb_pe_get(edev->phb);
-        struct eeh_pe *pe;
-        pe = eeh_pe_traverse(root, __eeh_pe_get, edev);
-        return pe;
-}
-/**
- * eeh_pe_get_parent - Retrieve the parent PE
- * @edev: EEH device
- *
- * The whole PEs existing in the system are organized as hierarchy
- * tree. The function is used to retrieve the parent PE according
- * to the parent EEH device.
- */
-static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
-{
-        struct device_node *dn;
-        struct eeh_dev *parent;
-        /*
-         * It might have the case for the indirect parent
-         * EEH device already having associated PE, but
-         * the direct parent EEH device doesn't have yet.
-         */
-        dn = edev->dn->parent;
-        while (dn) {
-                /* We're poking out of PCI territory */
-                if (!PCI_DN(dn)) return NULL;
-                parent = of_node_to_eeh_dev(dn);
-                /* We're poking out of PCI territory */
-                if (!parent) return NULL;
-                if (parent->pe)
-                        return parent->pe;
-                dn = dn->parent;
-        }
-        return NULL;
-}
-/**
- * eeh_add_to_parent_pe - Add EEH device to parent PE
- * @edev: EEH device
- *
- * Add EEH device to the parent PE. If the parent PE already
- * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
- * we have to create new PE to hold the EEH device and the new
- * PE will be linked to its parent PE as well.
- */
-int eeh_add_to_parent_pe(struct eeh_dev *edev)
-{
-        struct eeh_pe *pe, *parent;
-        eeh_lock();
-        /*
-         * Search the PE has been existing or not according
-         * to the PE address. If that has been existing, the
-         * PE should be composed of PCI bus and its subordinate
-         * components.
-         */
-        pe = eeh_pe_get(edev);
-        if (pe && !(pe->type & EEH_PE_INVALID)) {
-                if (!edev->pe_config_addr) {
-                        eeh_unlock();
-                        pr_err("%s: PE with addr 0x%x already exists\n",
-                                __func__, edev->config_addr);
-                        return -EEXIST;
-                }
-                /* Mark the PE as type of PCI bus */
-                pe->type = EEH_PE_BUS;
-                edev->pe = pe;
-                /* Put the edev to PE */
-                list_add_tail(&edev->list, &pe->edevs);
-                eeh_unlock();
-                pr_debug("EEH: Add %s to Bus PE#%x\n",
-                        edev->dn->full_name, pe->addr);
-                return 0;
-        } else if (pe && (pe->type & EEH_PE_INVALID)) {
-                list_add_tail(&edev->list, &pe->edevs);
-                edev->pe = pe;
-                /*
-                 * We're running to here because of PCI hotplug caused by
-                 * EEH recovery. We need clear EEH_PE_INVALID until the top.
-                 */
-                parent = pe;
-                while (parent) {
-                        if (!(parent->type & EEH_PE_INVALID))
-                                break;
-                        parent->type &= ~EEH_PE_INVALID;
-                        parent = parent->parent;
-                }
-                eeh_unlock();
-                pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
-                        edev->dn->full_name, pe->addr, pe->parent->addr);
-                return 0;
-        }
-        /* Create a new EEH PE */
-        pe = eeh_pe_alloc(edev->phb, EEH_PE_DEVICE);
-        if (!pe) {
-                eeh_unlock();
-                pr_err("%s: out of memory!\n", __func__);
-                return -ENOMEM;
-        }
-        pe->addr        = edev->pe_config_addr;
-        pe->config_addr = edev->config_addr;
-        /*
-         * Put the new EEH PE into hierarchy tree. If the parent
-         * can't be found, the newly created PE will be attached
-         * to PHB directly. Otherwise, we have to associate the
-         * PE with its parent.
-         */
-        parent = eeh_pe_get_parent(edev);
-        if (!parent) {
-                parent = eeh_phb_pe_get(edev->phb);
-                if (!parent) {
-                        eeh_unlock();
-                        pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
-                                __func__, edev->phb->global_number);
-                        edev->pe = NULL;
-                        kfree(pe);
-                        return -EEXIST;
-                }
-        }
-        pe->parent = parent;
-        /*
-         * Put the newly created PE into the child list and
-         * link the EEH device accordingly.
-         */
-        list_add_tail(&pe->child, &parent->child_list);
-        list_add_tail(&edev->list, &pe->edevs);
-        edev->pe = pe;
-        eeh_unlock();
-        pr_debug("EEH: Add %s to Device PE#%x, Parent PE#%x\n",
-                edev->dn->full_name, pe->addr, pe->parent->addr);
-        return 0;
-}
-/**
- * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
- * @edev: EEH device
- * @purge_pe: remove PE or not
- *
- * The PE hierarchy tree might be changed when doing PCI hotplug.
- * Also, the PCI devices or buses could be removed from the system
- * during EEH recovery. So we have to call the function remove the
- * corresponding PE accordingly if necessary.
- */
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev, int purge_pe)
-{
-        struct eeh_pe *pe, *parent, *child;
-        int cnt;
-        if (!edev->pe) {
-                pr_warning("%s: No PE found for EEH device %s\n",
-                        __func__, edev->dn->full_name);
-                return -EEXIST;
-        }
-        eeh_lock();
-        /* Remove the EEH device */
-        pe = edev->pe;
-        edev->pe = NULL;
-        list_del(&edev->list);
-        /*
-         * Check if the parent PE includes any EEH devices.
-         * If not, we should delete that. Also, we should
-         * delete the parent PE if it doesn't have associated
-         * child PEs and EEH devices.
-         */
-        while (1) {
-                parent = pe->parent;
-                if (pe->type & EEH_PE_PHB)
-                        break;
-                if (purge_pe) {
-                        if (list_empty(&pe->edevs) &&
-                            list_empty(&pe->child_list)) {
-                                list_del(&pe->child);
-                                kfree(pe);
-                        } else {
-                                break;
-                        }
-                } else {
-                        if (list_empty(&pe->edevs)) {
-                                cnt = 0;
-                                list_for_each_entry(child, &pe->child_list, child) {
-                                        if (!(child->type & EEH_PE_INVALID)) {
-                                                cnt++;
-                                                break;
-                                        }
-                                }
-                                if (!cnt)
-                                        pe->type |= EEH_PE_INVALID;
-                                else
-                                        break;
-                        }
-                }
-                pe = parent;
-        }
-        eeh_unlock();
-        return 0;
-}
-/**
- * __eeh_pe_state_mark - Mark the state for the PE
- * @data: EEH PE
- * @flag: state
- *
- * The function is used to mark the indicated state for the given
- * PE. Also, the associated PCI devices will be put into IO frozen
- * state as well.
- */
-static void *__eeh_pe_state_mark(void *data, void *flag)
-{
-        struct eeh_pe *pe = (struct eeh_pe *)data;
-        int state = *((int *)flag);
-        struct eeh_dev *tmp;
-        struct pci_dev *pdev;
-        /*
-         * Mark the PE with the indicated state. Also,
-         * the associated PCI device will be put into
-         * I/O frozen state to avoid I/O accesses from
-         * the PCI device driver.
-         */
-        pe->state |= state;
-        eeh_pe_for_each_dev(pe, tmp) {
-                pdev = eeh_dev_to_pci_dev(tmp);
-                if (pdev)
-                        pdev->error_state = pci_channel_io_frozen;
-        }
-        return NULL;
-}
-/**
- * eeh_pe_state_mark - Mark specified state for PE and its associated device
- * @pe: EEH PE
- *
- * EEH error affects the current PE and its child PEs. The function
- * is used to mark appropriate state for the affected PEs and the
- * associated devices.
- */
-void eeh_pe_state_mark(struct eeh_pe *pe, int state)
-{
-        eeh_lock();
-        eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
-        eeh_unlock();
-}
-/**
- * __eeh_pe_state_clear - Clear state for the PE
- * @data: EEH PE
- * @flag: state
- *
- * The function is used to clear the indicated state from the
- * given PE. Besides, we also clear the check count of the PE
- * as well.
- */
-static void *__eeh_pe_state_clear(void *data, void *flag)
-{
-        struct eeh_pe *pe = (struct eeh_pe *)data;
-        int state = *((int *)flag);
-        pe->state &= ~state;
-        pe->check_count = 0;
-        return NULL;
-}
-/**
- * eeh_pe_state_clear - Clear state for the PE and its children
- * @pe: PE
- * @state: state to be cleared
- *
- * When the PE and its children has been recovered from error,
- * we need clear the error state for that. The function is used
- * for the purpose.
- */
-void eeh_pe_state_clear(struct eeh_pe *pe, int state)
-{
-        eeh_lock();
-        eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
-        eeh_unlock();
-}
-/**
- * eeh_restore_one_device_bars - Restore the Base Address Registers for one device
- * @data: EEH device
- * @flag: Unused
- *
- * Loads the PCI configuration space base address registers,
- * the expansion ROM base address, the latency timer, and etc.
- * from the saved values in the device node.
- */
-static void *eeh_restore_one_device_bars(void *data, void *flag)
-{
-        int i;
-        u32 cmd;
-        struct eeh_dev *edev = (struct eeh_dev *)data;
-        struct device_node *dn = eeh_dev_to_of_node(edev);
-        for (i = 4; i < 10; i++)
-                eeh_ops->write_config(dn, i*4, 4, edev->config_space[i]);
-        /* 12 == Expansion ROM Address */
-        eeh_ops->write_config(dn, 12*4, 4, edev->config_space[12]);
-#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
-#define SAVED_BYTE(OFF) (((u8 *)(edev->config_space))[BYTE_SWAP(OFF)])
-        eeh_ops->write_config(dn, PCI_CACHE_LINE_SIZE, 1,
-                SAVED_BYTE(PCI_CACHE_LINE_SIZE));
-        eeh_ops->write_config(dn, PCI_LATENCY_TIMER, 1,
-                SAVED_BYTE(PCI_LATENCY_TIMER));
-        /* max latency, min grant, interrupt pin and line */
-        eeh_ops->write_config(dn, 15*4, 4, edev->config_space[15]);
-        /*
-         * Restore PERR & SERR bits, some devices require it,
-         * don't touch the other command bits
-         */
-        eeh_ops->read_config(dn, PCI_COMMAND, 4, &cmd);
-        if (edev->config_space[1] & PCI_COMMAND_PARITY)
-                cmd |= PCI_COMMAND_PARITY;
-        else
-                cmd &= ~PCI_COMMAND_PARITY;
-        if (edev->config_space[1] & PCI_COMMAND_SERR)
-                cmd |= PCI_COMMAND_SERR;
-        else
-                cmd &= ~PCI_COMMAND_SERR;
-        eeh_ops->write_config(dn, PCI_COMMAND, 4, cmd);
-        return NULL;
-}
-/**
- * eeh_pe_restore_bars - Restore the PCI config space info
- * @pe: EEH PE
- *
- * This routine performs a recursive walk to the children
- * of this device as well.
- */
-void eeh_pe_restore_bars(struct eeh_pe *pe)
-{
-        /*
-         * We needn't take the EEH lock since eeh_pe_dev_traverse()
-         * will take that.
-         */
-        eeh_pe_dev_traverse(pe, eeh_restore_one_device_bars, NULL);
-}
-/**
- * eeh_pe_bus_get - Retrieve PCI bus according to the given PE
- * @pe: EEH PE
- *
- * Retrieve the PCI bus according to the given PE. Basically,
- * there're 3 types of PEs: PHB/Bus/Device. For PHB PE, the
- * primary PCI bus will be retrieved. The parent bus will be
- * returned for BUS PE. However, we don't have associated PCI
- * bus for DEVICE PE.
- */
-struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
-{
-        struct pci_bus *bus = NULL;
-        struct eeh_dev *edev;
-        struct pci_dev *pdev;
-        eeh_lock();
-        if (pe->type & EEH_PE_PHB) {
-                bus = pe->phb->bus;
-        } else if (pe->type & EEH_PE_BUS ||
-                   pe->type & EEH_PE_DEVICE) {
-                edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
-                pdev = eeh_dev_to_pci_dev(edev);
-                if (pdev)
-                        bus = pdev->bus;
-        }
-        eeh_unlock();
-        return bus;
-}
diff --git a/arch/powerpc/platforms/pseries/eeh_sysfs.c b/arch/powerpc/platforms/pseries/eeh_sysfs.c
deleted file mode 100644
index d37708360f2e..000000000000
--- a/arch/powerpc/platforms/pseries/eeh_sysfs.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Sysfs entries for PCI Error Recovery for PAPR-compliant platform.
- * Copyright IBM Corporation 2007
- * Copyright Linas Vepstas <linas@austin.ibm.com> 2007
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
- */
-#include <linux/pci.h>
-#include <linux/stat.h>
-#include <asm/ppc-pci.h>
-#include <asm/pci-bridge.h>
-/**
- * EEH_SHOW_ATTR -- Create sysfs entry for eeh statistic
- * @_name: name of file in sysfs directory
- * @_memb: name of member in struct pci_dn to access
- * @_format: printf format for display
- *
- * All of the attributes look very similar, so just
- * auto-gen a cut-n-paste routine to display them.
- */
-#define EEH_SHOW_ATTR(_name,_memb,_format)               \
-static ssize_t eeh_show_##_name(struct device *dev,      \
-                struct device_attribute *attr, char *buf)          \
-{                                                        \
-        struct pci_dev *pdev = to_pci_dev(dev);               \
-        struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);      \
-                                                              \
-        if (!edev)                                            \
-                return 0;                                     \
-                                                              \
-        return sprintf(buf, _format "\n", edev->_memb);       \
-}                                                        \
-static DEVICE_ATTR(_name, S_IRUGO, eeh_show_##_name, NULL);
-EEH_SHOW_ATTR(eeh_mode,            mode,            "0x%x");
-EEH_SHOW_ATTR(eeh_config_addr,     config_addr,     "0x%x");
-EEH_SHOW_ATTR(eeh_pe_config_addr,  pe_config_addr,  "0x%x");
-void eeh_sysfs_add_device(struct pci_dev *pdev)
-{
-        int rc=0;
-        rc += device_create_file(&pdev->dev, &dev_attr_eeh_mode);
-        rc += device_create_file(&pdev->dev, &dev_attr_eeh_config_addr);
-        rc += device_create_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
-        if (rc)
-                printk(KERN_WARNING "EEH: Unable to create sysfs entries\n");
-}
-void eeh_sysfs_remove_device(struct pci_dev *pdev)
-{
-        device_remove_file(&pdev->dev, &dev_attr_eeh_mode);
-        device_remove_file(&pdev->dev, &dev_attr_eeh_config_addr);
-        device_remove_file(&pdev->dev, &dev_attr_eeh_pe_config_addr);
-}
diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c
index ef9d9d84c7d5..5ea88d1541f7 100644
--- a/arch/powerpc/platforms/pseries/io_event_irq.c
+++ b/arch/powerpc/platforms/pseries/io_event_irq.c
@@ -115,7 +115,7 @@ static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog)
 *   by scope or event type alone. For example, Torrent ISR route change
 *   event is reported with scope 0x00 (Not Applicatable) rather than
 *   0x3B (Torrent-hub). It is better to let the clients to identify
- *   who owns the the event.
+ *   who owns the event.
 */
 static irqreturn_t ioei_interrupt(int irq, void *dev_id)
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 86ae364900d6..23fc1dcf4434 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -614,6 +614,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus)
        iommu_table_setparms(pci->phb, dn, tbl);
        pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
+        iommu_register_group(tbl, pci_domain_nr(bus), 0);
        /* Divide the rest (1.75GB) among the children */
        pci->phb->dma_window_size = 0x80000000ul;
@@ -658,6 +659,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus)
                                   ppci->phb->node);
                iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window);
                ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node);
+                iommu_register_group(tbl, pci_domain_nr(bus), 0);
                pr_debug("  created table: %p\n", ppci->iommu_table);
        }
 }
@@ -684,6 +686,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev)
                                   phb->node);
                iommu_table_setparms(phb, dn, tbl);
                PCI_DN(dn)->iommu_table = iommu_init_table(tbl, phb->node);
+                iommu_register_group(tbl, pci_domain_nr(phb->bus), 0);
                set_iommu_table_base(&dev->dev, PCI_DN(dn)->iommu_table);
                return;
        }
@@ -1184,6 +1187,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
                                   pci->phb->node);
                iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window);
                pci->iommu_table = iommu_init_table(tbl, pci->phb->node);
+                iommu_register_group(tbl, pci_domain_nr(pci->phb->bus), 0);
                pr_debug("  created table: %p\n", pci->iommu_table);
        } else {
                pr_debug("  found DMA window, table: %p\n", pci->iommu_table);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 6d62072a7d5a..02d6e21619bb 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -45,6 +45,13 @@
 #include "plpar_wrappers.h"
 #include "pseries.h"
+/* Flag bits for H_BULK_REMOVE */
+#define HBR_REQUEST     0x4000000000000000UL
+#define HBR_RESPONSE    0x8000000000000000UL
+#define HBR_END         0xc000000000000000UL
+#define HBR_AVPN        0x0200000000000000UL
+#define HBR_ANDCOND     0x0100000000000000UL
 /* in hvCall.S */
 EXPORT_SYMBOL(plpar_hcall);
@@ -64,6 +71,9 @@ void vpa_init(int cpu)
        if (cpu_has_feature(CPU_FTR_ALTIVEC))
                lppaca_of(cpu).vmxregs_in_use = 1;
+        if (cpu_has_feature(CPU_FTR_ARCH_207S))
+                lppaca_of(cpu).ebb_regs_in_use = 1;
        addr = __pa(&lppaca_of(cpu));
        ret = register_vpa(hwcpu, addr);
@@ -240,7 +250,8 @@ static void pSeries_lpar_hptab_clear(void)
 static long pSeries_lpar_hpte_updatepp(unsigned long slot,
                                       unsigned long newpp,
                                       unsigned long vpn,
-                                       int psize, int ssize, int local)
+                                       int psize, int apsize,
+                                       int ssize, int local)
 {
        unsigned long lpar_rc;
        unsigned long flags = (newpp & 7) | H_AVPN;
@@ -328,7 +339,8 @@ static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
 }
 static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
-                                         int psize, int ssize, int local)
+                                         int psize, int apsize,
+                                         int ssize, int local)
 {
        unsigned long want_v;
        unsigned long lpar_rc;
@@ -345,6 +357,113 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
        BUG_ON(lpar_rc != H_SUCCESS);
 }
+/*
+ * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
+ * to make sure that we avoid bouncing the hypervisor tlbie lock.
+ */
+#define PPC64_HUGE_HPTE_BATCH 12
+static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
+                                             unsigned long *vpn, int count,
+                                             int psize, int ssize)
+{
+        unsigned long param[8];
+        int i = 0, pix = 0, rc;
+        unsigned long flags = 0;
+        int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+        if (lock_tlbie)
+                spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+        for (i = 0; i < count; i++) {
+                if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+                        pSeries_lpar_hpte_invalidate(slot[i], vpn[i], psize, 0,
+                                                     ssize, 0);
+                } else {
+                        param[pix] = HBR_REQUEST | HBR_AVPN | slot[i];
+                        param[pix+1] = hpte_encode_avpn(vpn[i], psize, ssize);
+                        pix += 2;
+                        if (pix == 8) {
+                                rc = plpar_hcall9(H_BULK_REMOVE, param,
+                                                  param[0], param[1], param[2],
+                                                  param[3], param[4], param[5],
+                                                  param[6], param[7]);
+                                BUG_ON(rc != H_SUCCESS);
+                                pix = 0;
+                        }
+                }
+        }
+        if (pix) {
+                param[pix] = HBR_END;
+                rc = plpar_hcall9(H_BULK_REMOVE, param, param[0], param[1],
+                                  param[2], param[3], param[4], param[5],
+                                  param[6], param[7]);
+                BUG_ON(rc != H_SUCCESS);
+        }
+        if (lock_tlbie)
+                spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
+}
+static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm,
+                                       unsigned char *hpte_slot_array,
+                                       unsigned long addr, int psize)
+{
+        int ssize = 0, i, index = 0;
+        unsigned long s_addr = addr;
+        unsigned int max_hpte_count, valid;
+        unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
+        unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
+        unsigned long shift, hidx, vpn = 0, vsid, hash, slot;
+        shift = mmu_psize_defs[psize].shift;
+        max_hpte_count = 1U << (PMD_SHIFT - shift);
+        for (i = 0; i < max_hpte_count; i++) {
+                valid = hpte_valid(hpte_slot_array, i);
+                if (!valid)
+                        continue;
+                hidx =  hpte_hash_index(hpte_slot_array, i);
+                /* get the vpn */
+                addr = s_addr + (i * (1ul << shift));
+                if (!is_kernel_addr(addr)) {
+                        ssize = user_segment_size(addr);
+                        vsid = get_vsid(mm->context.id, addr, ssize);
+                        WARN_ON(vsid == 0);
+                } else {
+                        vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
+                        ssize = mmu_kernel_ssize;
+                }
+                vpn = hpt_vpn(addr, vsid, ssize);
+                hash = hpt_hash(vpn, shift, ssize);
+                if (hidx & _PTEIDX_SECONDARY)
+                        hash = ~hash;
+                slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+                slot += hidx & _PTEIDX_GROUP_IX;
+                slot_array[index] = slot;
+                vpn_array[index] = vpn;
+                if (index == PPC64_HUGE_HPTE_BATCH - 1) {
+                        /*
+                         * Now do a bluk invalidate
+                         */
+                        __pSeries_lpar_hugepage_invalidate(slot_array,
+                                                           vpn_array,
+                                                           PPC64_HUGE_HPTE_BATCH,
+                                                           psize, ssize);
+                        index = 0;
+                } else
+                        index++;
+        }
+        if (index)
+                __pSeries_lpar_hugepage_invalidate(slot_array, vpn_array,
+                                                   index, psize, ssize);
+}
 static void pSeries_lpar_hpte_removebolted(unsigned long ea,
                                           int psize, int ssize)
 {
@@ -356,17 +475,12 @@ static void pSeries_lpar_hpte_removebolted(unsigned long ea,
        slot = pSeries_lpar_hpte_find(vpn, psize, ssize);
        BUG_ON(slot == -1);
+        /*
-        pSeries_lpar_hpte_invalidate(slot, vpn, psize, ssize, 0);
+         * lpar doesn't use the passed actual page size
+         */
+        pSeries_lpar_hpte_invalidate(slot, vpn, psize, 0, ssize, 0);
 }
-/* Flag bits for H_BULK_REMOVE */
-#define HBR_REQUEST     0x4000000000000000UL
-#define HBR_RESPONSE    0x8000000000000000UL
-#define HBR_END         0xc000000000000000UL
-#define HBR_AVPN        0x0200000000000000UL
-#define HBR_ANDCOND     0x0100000000000000UL
 /*
 * Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
 * lock.
@@ -400,8 +514,11 @@ static void pSeries_lpar_flush_hash_range(unsigned long number, int local)
                        slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
                        slot += hidx & _PTEIDX_GROUP_IX;
                        if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
+                                /*
+                                 * lpar doesn't use the passed actual page size
+                                 */
                                pSeries_lpar_hpte_invalidate(slot, vpn, psize,
-                                                             ssize, local);
+                                                             0, ssize, local);
                        } else {
                                param[pix] = HBR_REQUEST | HBR_AVPN | slot;
                                param[pix+1] = hpte_encode_avpn(vpn, psize,
@@ -452,6 +569,7 @@ void __init hpte_init_lpar(void)
        ppc_md.hpte_removebolted = pSeries_lpar_hpte_removebolted;
        ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range;
        ppc_md.hpte_clear_all   = pSeries_lpar_hptab_clear;
+        ppc_md.hugepage_invalidate = pSeries_lpar_hugepage_invalidate;
 }
 #ifdef CONFIG_PPC_SMLPAR
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 8733a86ad52e..9f8671a44551 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -18,6 +18,7 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/kmsg_dump.h>
+#include <linux/pstore.h>
 #include <linux/ctype.h>
 #include <linux/zlib.h>
 #include <asm/uaccess.h>
@@ -29,6 +30,13 @@
 /* Max bytes to read/write in one go */
 #define NVRW_CNT 0x20
+/*
+ * Set oops header version to distingush between old and new format header.
+ * lnx,oops-log partition max size is 4000, header version > 4000 will
+ * help in identifying new header.
+ */
+#define OOPS_HDR_VERSION 5000
 static unsigned int nvram_size;
 static int nvram_fetch, nvram_store;
 static char nvram_buf[NVRW_CNT];        /* assume this is in the first 4GB */
@@ -45,20 +53,23 @@ struct nvram_os_partition {
        int min_size;   /* minimum acceptable size (0 means req_size) */
        long size;      /* size of data portion (excluding err_log_info) */
        long index;     /* offset of data portion of partition */
+        bool os_partition; /* partition initialized by OS, not FW */
 };
 static struct nvram_os_partition rtas_log_partition = {
        .name = "ibm,rtas-log",
        .req_size = 2079,
        .min_size = 1055,
-        .index = -1
+        .index = -1,
+        .os_partition = true
 };
 static struct nvram_os_partition oops_log_partition = {
        .name = "lnx,oops-log",
        .req_size = 4000,
        .min_size = 2000,
-        .index = -1
+        .index = -1,
+        .os_partition = true
 };
 static const char *pseries_nvram_os_partitions[] = {
@@ -67,6 +78,12 @@ static const char *pseries_nvram_os_partitions[] = {
        NULL
 };
+struct oops_log_info {
+        u16 version;
+        u16 report_length;
+        u64 timestamp;
+} __attribute__((packed));
 static void oops_to_nvram(struct kmsg_dumper *dumper,
                          enum kmsg_dump_reason reason);
@@ -83,28 +100,28 @@ static unsigned long last_unread_rtas_event;	/* timestamp */
 * big_oops_buf[] holds the uncompressed text we're capturing.
 *
- * oops_buf[] holds the compressed text, preceded by a prefix.
+ * oops_buf[] holds the compressed text, preceded by a oops header.
- * The prefix is just a u16 holding the length of the compressed* text.
+ * oops header has u16 holding the version of oops header (to differentiate
- * (*Or uncompressed, if compression fails.)  oops_buf[] gets written
+ * between old and new format header) followed by u16 holding the length of
- * to NVRAM.
+ * the compressed* text (*Or uncompressed, if compression fails.) and u64
+ * holding the timestamp. oops_buf[] gets written to NVRAM.
 *
- * oops_len points to the prefix.  oops_data points to the compressed text.
+ * oops_log_info points to the header. oops_data points to the compressed text.
 *
 * +- oops_buf
- * |            +- oops_data
+ * |                                   +- oops_data
- * v            v
+ * v                                   v
- * +------------+-----------------------------------------------+
+ * +-----------+-----------+-----------+------------------------+
- * | length     | text                                          |
+ * | version   | length    | timestamp | text                   |
- * | (2 bytes)  | (oops_data_sz bytes)                          |
+ * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes)   |
- * +------------+-----------------------------------------------+
+ * +-----------+-----------+-----------+------------------------+
 * ^
- * +- oops_len
+ * +- oops_log_info
 *
 * We preallocate these buffers during init to avoid kmalloc during oops/panic.
 */
 static size_t big_oops_buf_sz;
 static char *big_oops_buf, *oops_buf;
-static u16 *oops_len;
 static char *oops_data;
 static size_t oops_data_sz;
@@ -114,6 +131,30 @@ static size_t oops_data_sz;
 #define MEM_LEVEL 4
 static struct z_stream_s stream;
+#ifdef CONFIG_PSTORE
+static struct nvram_os_partition of_config_partition = {
+        .name = "of-config",
+        .index = -1,
+        .os_partition = false
+};
+static struct nvram_os_partition common_partition = {
+        .name = "common",
+        .index = -1,
+        .os_partition = false
+};
+static enum pstore_type_id nvram_type_ids[] = {
+        PSTORE_TYPE_DMESG,
+        PSTORE_TYPE_PPC_RTAS,
+        PSTORE_TYPE_PPC_OF,
+        PSTORE_TYPE_PPC_COMMON,
+        -1
+};
+static int read_type;
+static unsigned long last_rtas_event;
+#endif
 static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index)
 {
        unsigned int i;
@@ -275,48 +316,72 @@ int nvram_write_error_log(char * buff, int length,
 {
        int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
                                                err_type, error_log_cnt);
-        if (!rc)
+        if (!rc) {
                last_unread_rtas_event = get_seconds();
+#ifdef CONFIG_PSTORE
+                last_rtas_event = get_seconds();
+#endif
+        }
        return rc;
 }
-/* nvram_read_error_log
+/* nvram_read_partition
 *
- * Reads nvram for error log for at most 'length'
+ * Reads nvram partition for at most 'length'
 */
-int nvram_read_error_log(char * buff, int length,
+int nvram_read_partition(struct nvram_os_partition *part, char *buff,
-                         unsigned int * err_type, unsigned int * error_log_cnt)
+                        int length, unsigned int *err_type,
+                        unsigned int *error_log_cnt)
 {
        int rc;
        loff_t tmp_index;
        struct err_log_info info;
        
-        if (rtas_log_partition.index == -1)
+        if (part->index == -1)
                return -1;
-        if (length > rtas_log_partition.size)
+        if (length > part->size)
-                length = rtas_log_partition.size;
+                length = part->size;
-        tmp_index = rtas_log_partition.index;
+        tmp_index = part->index;
-        rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
+        if (part->os_partition) {
-        if (rc <= 0) {
+                rc = ppc_md.nvram_read((char *)&info,
-                printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+                                        sizeof(struct err_log_info),
-                return rc;
+                                        &tmp_index);
+                if (rc <= 0) {
+                        pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__,
+                                                                        rc);
+                        return rc;
+                }
        }
        rc = ppc_md.nvram_read(buff, length, &tmp_index);
        if (rc <= 0) {
-                printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+                pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, rc);
                return rc;
        }
-        *error_log_cnt = info.seq_num;
+        if (part->os_partition) {
-        *err_type = info.error_type;
+                *error_log_cnt = info.seq_num;
+                *err_type = info.error_type;
+        }
        return 0;
 }
+/* nvram_read_error_log
+ *
+ * Reads nvram for error log for at most 'length'
+ */
+int nvram_read_error_log(char *buff, int length,
+                        unsigned int *err_type, unsigned int *error_log_cnt)
+{
+        return nvram_read_partition(&rtas_log_partition, buff, length,
+                                                err_type, error_log_cnt);
+}
 /* This doesn't actually zero anything, but it sets the event_logged
 * word to tell that this event is safely in syslog.
 */
@@ -405,6 +470,349 @@ static int __init pseries_nvram_init_os_partition(struct nvram_os_partition
        return 0;
 }
+/*
+ * Are we using the ibm,rtas-log for oops/panic reports?  And if so,
+ * would logging this oops/panic overwrite an RTAS event that rtas_errd
+ * hasn't had a chance to read and process?  Return 1 if so, else 0.
+ *
+ * We assume that if rtas_errd hasn't read the RTAS event in
+ * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
+ */
+static int clobbering_unread_rtas_event(void)
+{
+        return (oops_log_partition.index == rtas_log_partition.index
+                && last_unread_rtas_event
+                && get_seconds() - last_unread_rtas_event <=
+                                                NVRAM_RTAS_READ_TIMEOUT);
+}
+/* Derived from logfs_compress() */
+static int nvram_compress(const void *in, void *out, size_t inlen,
+                                                        size_t outlen)
+{
+        int err, ret;
+        ret = -EIO;
+        err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
+                                                MEM_LEVEL, Z_DEFAULT_STRATEGY);
+        if (err != Z_OK)
+                goto error;
+        stream.next_in = in;
+        stream.avail_in = inlen;
+        stream.total_in = 0;
+        stream.next_out = out;
+        stream.avail_out = outlen;
+        stream.total_out = 0;
+        err = zlib_deflate(&stream, Z_FINISH);
+        if (err != Z_STREAM_END)
+                goto error;
+        err = zlib_deflateEnd(&stream);
+        if (err != Z_OK)
+                goto error;
+        if (stream.total_out >= stream.total_in)
+                goto error;
+        ret = stream.total_out;
+error:
+        return ret;
+}
+/* Compress the text from big_oops_buf into oops_buf. */
+static int zip_oops(size_t text_len)
+{
+        struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
+        int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
+                                                                oops_data_sz);
+        if (zipped_len < 0) {
+                pr_err("nvram: compression failed; returned %d\n", zipped_len);
+                pr_err("nvram: logging uncompressed oops/panic report\n");
+                return -1;
+        }
+        oops_hdr->version = OOPS_HDR_VERSION;
+        oops_hdr->report_length = (u16) zipped_len;
+        oops_hdr->timestamp = get_seconds();
+        return 0;
+}
+#ifdef CONFIG_PSTORE
+/* Derived from logfs_uncompress */
+int nvram_decompress(void *in, void *out, size_t inlen, size_t outlen)
+{
+        int err, ret;
+        ret = -EIO;
+        err = zlib_inflateInit(&stream);
+        if (err != Z_OK)
+                goto error;
+        stream.next_in = in;
+        stream.avail_in = inlen;
+        stream.total_in = 0;
+        stream.next_out = out;
+        stream.avail_out = outlen;
+        stream.total_out = 0;
+        err = zlib_inflate(&stream, Z_FINISH);
+        if (err != Z_STREAM_END)
+                goto error;
+        err = zlib_inflateEnd(&stream);
+        if (err != Z_OK)
+                goto error;
+        ret = stream.total_out;
+error:
+        return ret;
+}
+static int unzip_oops(char *oops_buf, char *big_buf)
+{
+        struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
+        u64 timestamp = oops_hdr->timestamp;
+        char *big_oops_data = NULL;
+        char *oops_data_buf = NULL;
+        size_t big_oops_data_sz;
+        int unzipped_len;
+        big_oops_data = big_buf + sizeof(struct oops_log_info);
+        big_oops_data_sz = big_oops_buf_sz - sizeof(struct oops_log_info);
+        oops_data_buf = oops_buf + sizeof(struct oops_log_info);
+        unzipped_len = nvram_decompress(oops_data_buf, big_oops_data,
+                                        oops_hdr->report_length,
+                                        big_oops_data_sz);
+        if (unzipped_len < 0) {
+                pr_err("nvram: decompression failed; returned %d\n",
+                                                                unzipped_len);
+                return -1;
+        }
+        oops_hdr = (struct oops_log_info *)big_buf;
+        oops_hdr->version = OOPS_HDR_VERSION;
+        oops_hdr->report_length = (u16) unzipped_len;
+        oops_hdr->timestamp = timestamp;
+        return 0;
+}
+static int nvram_pstore_open(struct pstore_info *psi)
+{
+        /* Reset the iterator to start reading partitions again */
+        read_type = -1;
+        return 0;
+}
+/**
+ * nvram_pstore_write - pstore write callback for nvram
+ * @type:               Type of message logged
+ * @reason:             reason behind dump (oops/panic)
+ * @id:                 identifier to indicate the write performed
+ * @part:               pstore writes data to registered buffer in parts,
+ *                      part number will indicate the same.
+ * @count:              Indicates oops count
+ * @hsize:              Size of header added by pstore
+ * @size:               number of bytes written to the registered buffer
+ * @psi:                registered pstore_info structure
+ *
+ * Called by pstore_dump() when an oops or panic report is logged in the
+ * printk buffer.
+ * Returns 0 on successful write.
+ */
+static int nvram_pstore_write(enum pstore_type_id type,
+                                enum kmsg_dump_reason reason,
+                                u64 *id, unsigned int part, int count,
+                                size_t hsize, size_t size,
+                                struct pstore_info *psi)
+{
+        int rc;
+        unsigned int err_type = ERR_TYPE_KERNEL_PANIC;
+        struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf;
+        /* part 1 has the recent messages from printk buffer */
+        if (part > 1 || type != PSTORE_TYPE_DMESG ||
+                                clobbering_unread_rtas_event())
+                return -1;
+        oops_hdr->version = OOPS_HDR_VERSION;
+        oops_hdr->report_length = (u16) size;
+        oops_hdr->timestamp = get_seconds();
+        if (big_oops_buf) {
+                rc = zip_oops(size);
+                /*
+                 * If compression fails copy recent log messages from
+                 * big_oops_buf to oops_data.
+                 */
+                if (rc != 0) {
+                        size_t diff = size - oops_data_sz + hsize;
+                        if (size > oops_data_sz) {
+                                memcpy(oops_data, big_oops_buf, hsize);
+                                memcpy(oops_data + hsize, big_oops_buf + diff,
+                                        oops_data_sz - hsize);
+                                oops_hdr->report_length = (u16) oops_data_sz;
+                        } else
+                                memcpy(oops_data, big_oops_buf, size);
+                } else
+                        err_type = ERR_TYPE_KERNEL_PANIC_GZ;
+        }
+        rc = nvram_write_os_partition(&oops_log_partition, oops_buf,
+                (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type,
+                count);
+        if (rc != 0)
+                return rc;
+        *id = part;
+        return 0;
+}
+/*
+ * Reads the oops/panic report, rtas, of-config and common partition.
+ * Returns the length of the data we read from each partition.
+ * Returns 0 if we've been called before.
+ */
+static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
+                                int *count, struct timespec *time, char **buf,
+                                struct pstore_info *psi)
+{
+        struct oops_log_info *oops_hdr;
+        unsigned int err_type, id_no, size = 0;
+        struct nvram_os_partition *part = NULL;
+        char *buff = NULL, *big_buff = NULL;
+        int rc, sig = 0;
+        loff_t p;
+read_partition:
+        read_type++;
+        switch (nvram_type_ids[read_type]) {
+        case PSTORE_TYPE_DMESG:
+                part = &oops_log_partition;
+                *type = PSTORE_TYPE_DMESG;
+                break;
+        case PSTORE_TYPE_PPC_RTAS:
+                part = &rtas_log_partition;
+                *type = PSTORE_TYPE_PPC_RTAS;
+                time->tv_sec = last_rtas_event;
+                time->tv_nsec = 0;
+                break;
+        case PSTORE_TYPE_PPC_OF:
+                sig = NVRAM_SIG_OF;
+                part = &of_config_partition;
+                *type = PSTORE_TYPE_PPC_OF;
+                *id = PSTORE_TYPE_PPC_OF;
+                time->tv_sec = 0;
+                time->tv_nsec = 0;
+                break;
+        case PSTORE_TYPE_PPC_COMMON:
+                sig = NVRAM_SIG_SYS;
+                part = &common_partition;
+                *type = PSTORE_TYPE_PPC_COMMON;
+                *id = PSTORE_TYPE_PPC_COMMON;
+                time->tv_sec = 0;
+                time->tv_nsec = 0;
+                break;
+        default:
+                return 0;
+        }
+        if (!part->os_partition) {
+                p = nvram_find_partition(part->name, sig, &size);
+                if (p <= 0) {
+                        pr_err("nvram: Failed to find partition %s, "
+                                "err %d\n", part->name, (int)p);
+                        return 0;
+                }
+                part->index = p;
+                part->size = size;
+        }
+        buff = kmalloc(part->size, GFP_KERNEL);
+        if (!buff)
+                return -ENOMEM;
+        if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) {
+                kfree(buff);
+                return 0;
+        }
+        *count = 0;
+        if (part->os_partition)
+                *id = id_no;
+        if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
+                oops_hdr = (struct oops_log_info *)buff;
+                *buf = buff + sizeof(*oops_hdr);
+                if (err_type == ERR_TYPE_KERNEL_PANIC_GZ) {
+                        big_buff = kmalloc(big_oops_buf_sz, GFP_KERNEL);
+                        if (!big_buff)
+                                return -ENOMEM;
+                        rc = unzip_oops(buff, big_buff);
+                        if (rc != 0) {
+                                kfree(buff);
+                                kfree(big_buff);
+                                goto read_partition;
+                        }
+                        oops_hdr = (struct oops_log_info *)big_buff;
+                        *buf = big_buff + sizeof(*oops_hdr);
+                        kfree(buff);
+                }
+                time->tv_sec = oops_hdr->timestamp;
+                time->tv_nsec = 0;
+                return oops_hdr->report_length;
+        }
+        *buf = buff;
+        return part->size;
+}
+static struct pstore_info nvram_pstore_info = {
+        .owner = THIS_MODULE,
+        .name = "nvram",
+        .open = nvram_pstore_open,
+        .read = nvram_pstore_read,
+        .write = nvram_pstore_write,
+};
+static int nvram_pstore_init(void)
+{
+        int rc = 0;
+        if (big_oops_buf) {
+                nvram_pstore_info.buf = big_oops_buf;
+                nvram_pstore_info.bufsize = big_oops_buf_sz;
+        } else {
+                nvram_pstore_info.buf = oops_data;
+                nvram_pstore_info.bufsize = oops_data_sz;
+        }
+        rc = pstore_register(&nvram_pstore_info);
+        if (rc != 0)
+                pr_err("nvram: pstore_register() failed, defaults to "
+                                "kmsg_dump; returned %d\n", rc);
+        return rc;
+}
+#else
+static int nvram_pstore_init(void)
+{
+        return -1;
+}
+#endif
 static void __init nvram_init_oops_partition(int rtas_partition_exists)
 {
        int rc;
@@ -425,9 +833,8 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists)
                                                oops_log_partition.name);
                return;
        }
-        oops_len = (u16*) oops_buf;
+        oops_data = oops_buf + sizeof(struct oops_log_info);
-        oops_data = oops_buf + sizeof(u16);
+        oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
-        oops_data_sz = oops_log_partition.size - sizeof(u16);
        /*
         * Figure compression (preceded by elimination of each line's <n>
@@ -452,6 +859,11 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists)
                stream.workspace = NULL;
        }
+        rc = nvram_pstore_init();
+        if (!rc)
+                return;
        rc = kmsg_dump_register(&nvram_kmsg_dumper);
        if (rc != 0) {
                pr_err("nvram: kmsg_dump_register() failed; returned %d\n", rc);
@@ -501,70 +913,6 @@ int __init pSeries_nvram_init(void)
        return 0;
 }
-/*
- * Are we using the ibm,rtas-log for oops/panic reports?  And if so,
- * would logging this oops/panic overwrite an RTAS event that rtas_errd
- * hasn't had a chance to read and process?  Return 1 if so, else 0.
- *
- * We assume that if rtas_errd hasn't read the RTAS event in
- * NVRAM_RTAS_READ_TIMEOUT seconds, it's probably not going to.
- */
-static int clobbering_unread_rtas_event(void)
-{
-        return (oops_log_partition.index == rtas_log_partition.index
-                && last_unread_rtas_event
-                && get_seconds() - last_unread_rtas_event <=
-                                                NVRAM_RTAS_READ_TIMEOUT);
-}
-/* Derived from logfs_compress() */
-static int nvram_compress(const void *in, void *out, size_t inlen,
-                                                        size_t outlen)
-{
-        int err, ret;
-        ret = -EIO;
-        err = zlib_deflateInit2(&stream, COMPR_LEVEL, Z_DEFLATED, WINDOW_BITS,
-                                                MEM_LEVEL, Z_DEFAULT_STRATEGY);
-        if (err != Z_OK)
-                goto error;
-        stream.next_in = in;
-        stream.avail_in = inlen;
-        stream.total_in = 0;
-        stream.next_out = out;
-        stream.avail_out = outlen;
-        stream.total_out = 0;
-        err = zlib_deflate(&stream, Z_FINISH);
-        if (err != Z_STREAM_END)
-                goto error;
-        err = zlib_deflateEnd(&stream);
-        if (err != Z_OK)
-                goto error;
-        if (stream.total_out >= stream.total_in)
-                goto error;
-        ret = stream.total_out;
-error:
-        return ret;
-}
-/* Compress the text from big_oops_buf into oops_buf. */
-static int zip_oops(size_t text_len)
-{
-        int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
-                                                                oops_data_sz);
-        if (zipped_len < 0) {
-                pr_err("nvram: compression failed; returned %d\n", zipped_len);
-                pr_err("nvram: logging uncompressed oops/panic report\n");
-                return -1;
-        }
-        *oops_len = (u16) zipped_len;
-        return 0;
-}
 /*
 * This is our kmsg_dump callback, called after an oops or panic report
@@ -576,6 +924,7 @@ static int zip_oops(size_t text_len)
 static void oops_to_nvram(struct kmsg_dumper *dumper,
                          enum kmsg_dump_reason reason)
 {
+        struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
        static unsigned int oops_count = 0;
        static bool panicking = false;
        static DEFINE_SPINLOCK(lock);
@@ -619,14 +968,17 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
        }
        if (rc != 0) {
                kmsg_dump_rewind(dumper);
-                kmsg_dump_get_buffer(dumper, true,
+                kmsg_dump_get_buffer(dumper, false,
                                     oops_data, oops_data_sz, &text_len);
                err_type = ERR_TYPE_KERNEL_PANIC;
-                *oops_len = (u16) text_len;
+                oops_hdr->version = OOPS_HDR_VERSION;
+                oops_hdr->report_length = (u16) text_len;
+                oops_hdr->timestamp = get_seconds();
        }
        (void) nvram_write_os_partition(&oops_log_partition, oops_buf,
-                (int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count);
+                (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type,
+                ++oops_count);
        spin_unlock_irqrestore(&lock, flags);
 }
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index c91b22be9288..efe61374f6ea 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -64,91 +64,6 @@ pcibios_find_pci_bus(struct device_node *dn)
 }
 EXPORT_SYMBOL_GPL(pcibios_find_pci_bus);
-/**
- * __pcibios_remove_pci_devices - remove all devices under this bus
- * @bus: the indicated PCI bus
- * @purge_pe: destroy the PE on removal of PCI devices
- *
- * Remove all of the PCI devices under this bus both from the
- * linux pci device tree, and from the powerpc EEH address cache.
- * By default, the corresponding PE will be destroied during the
- * normal PCI hotplug path. For PCI hotplug during EEH recovery,
- * the corresponding PE won't be destroied and deallocated.
- */
-void __pcibios_remove_pci_devices(struct pci_bus *bus, int purge_pe)
-{
-        struct pci_dev *dev, *tmp;
-        struct pci_bus *child_bus;
-        /* First go down child busses */
-        list_for_each_entry(child_bus, &bus->children, node)
-                __pcibios_remove_pci_devices(child_bus, purge_pe);
-        pr_debug("PCI: Removing devices on bus %04x:%02x\n",
-                pci_domain_nr(bus),  bus->number);
-        list_for_each_entry_safe(dev, tmp, &bus->devices, bus_list) {
-                pr_debug("     * Removing %s...\n", pci_name(dev));
-                eeh_remove_bus_device(dev, purge_pe);
-                pci_stop_and_remove_bus_device(dev);
-        }
-}
-/**
- * pcibios_remove_pci_devices - remove all devices under this bus
- *
- * Remove all of the PCI devices under this bus both from the
- * linux pci device tree, and from the powerpc EEH address cache.
- */
-void pcibios_remove_pci_devices(struct pci_bus *bus)
-{
-        __pcibios_remove_pci_devices(bus, 1);
-}
-EXPORT_SYMBOL_GPL(pcibios_remove_pci_devices);
-/**
- * pcibios_add_pci_devices - adds new pci devices to bus
- *
- * This routine will find and fixup new pci devices under
- * the indicated bus. This routine presumes that there
- * might already be some devices under this bridge, so
- * it carefully tries to add only new devices.  (And that
- * is how this routine differs from other, similar pcibios
- * routines.)
- */
-void pcibios_add_pci_devices(struct pci_bus * bus)
-{
-        int slotno, num, mode, pass, max;
-        struct pci_dev *dev;
-        struct device_node *dn = pci_bus_to_OF_node(bus);
-        eeh_add_device_tree_early(dn);
-        mode = PCI_PROBE_NORMAL;
-        if (ppc_md.pci_probe_mode)
-                mode = ppc_md.pci_probe_mode(bus);
-        if (mode == PCI_PROBE_DEVTREE) {
-                /* use ofdt-based probe */
-                of_rescan_bus(dn, bus);
-        } else if (mode == PCI_PROBE_NORMAL) {
-                /* use legacy probe */
-                slotno = PCI_SLOT(PCI_DN(dn->child)->devfn);
-                num = pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
-                if (!num)
-                        return;
-                pcibios_setup_bus_devices(bus);
-                max = bus->busn_res.start;
-                for (pass=0; pass < 2; pass++)
-                        list_for_each_entry(dev, &bus->devices, bus_list) {
-                        if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE ||
-                            dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)
-                                max = pci_scan_bridge(bus, dev, max, pass);
-                }
-        }
-        pcibios_finish_adding_to_bus(bus);
-}
-EXPORT_SYMBOL_GPL(pcibios_add_pci_devices);
 struct pci_controller *init_phb_dynamic(struct device_node *dn)
 {
        struct pci_controller *phb;
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index c4dfccd3a3d9..7b3cbde8c783 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -83,7 +83,7 @@ static void handle_system_shutdown(char event_modifier)
        switch (event_modifier) {
        case EPOW_SHUTDOWN_NORMAL:
                pr_emerg("Firmware initiated power off");
-                orderly_poweroff(1);
+                orderly_poweroff(true);
                break;
        case EPOW_SHUTDOWN_ON_UPS:
@@ -95,13 +95,13 @@ static void handle_system_shutdown(char event_modifier)
                pr_emerg("Loss of system critical functions reported by "
                        "firmware");
                pr_emerg("Check RTAS error log for details");
-                orderly_poweroff(1);
+                orderly_poweroff(true);
                break;
        case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
                pr_emerg("Ambient temperature too high reported by firmware");
                pr_emerg("Check RTAS error log for details");
-                orderly_poweroff(1);
+                orderly_poweroff(true);
                break;
        default:
@@ -162,7 +162,7 @@ void rtas_parse_epow_errlog(struct rtas_error_log *log)
        case EPOW_SYSTEM_HALT:
                pr_emerg("Firmware initiated power off");
-                orderly_poweroff(1);
+                orderly_poweroff(true);
                break;
        case EPOW_MAIN_ENCLOSURE:
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 12bc8c3663ad..306643cc9dbc 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -192,7 +192,7 @@ static int smp_pSeries_cpu_bootable(unsigned int nr)
        /* Special case - we inhibit secondary thread startup
         * during boot if the user requests it.
         */
-        if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
+        if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
                if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
                        return 0;
                if (smt_enabled_at_boot
author	Linus Torvalds <torvalds@linux-foundation.org>	2013-07-04 13:29:23 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2013-07-04 13:29:23 -0400
commit	65b97fb7303050fc826e518cf67fc283da23314f (patch)
tree	595e7f04d65d95a39d65bd2dcf2385b3b6ea7969 /arch/powerpc/platforms
parent	ddcf6600b133697adbafd96e080818bdc0dfd028 (diff)
parent	1d8b368ab4aacfc3f864655baad4d31a3028ec1a (diff)