Merge branch 'next-eeh' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc into next

author: Michael Ellerman <mpe@ellerman.id.au> 2015-04-06 23:24:55 -0400
committer: Michael Ellerman <mpe@ellerman.id.au> 2015-04-06 23:24:55 -0400
commit: 428d4d6520a0b8683fe9eac6df3077001e13d00b (patch)
tree: 8afa1af0babc8f2c375acc244aae969846dfe199 /arch/powerpc/platforms/powernv
parent: 28ea605caac49497e5e34a73ee4f4682fc035f1d (diff)
parent: 027fa02f84e851e21daffdf8900d6117071890f8 (diff)
6 files changed, 1189 insertions, 1370 deletions
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 6f3c5d33c3af..33e44f37212f 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -5,7 +5,7 @@ obj-y			+= opal-msglog.o opal-hmi.o opal-power.o
 obj-$(CONFIG_SMP)       += smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)       += pci.o pci-p5ioc2.o pci-ioda.o
-obj-$(CONFIG_EEH)       += eeh-ioda.o eeh-powernv.o
+obj-$(CONFIG_EEH)       += eeh-powernv.o
 obj-$(CONFIG_PPC_SCOM)  += opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)    += opal-memory-errors.o
 obj-$(CONFIG_TRACEPOINTS)       += opal-tracepoints.o
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
deleted file mode 100644
index 2809c9895288..000000000000
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ /dev/null
@@ -1,1149 +0,0 @@
-/*
- * The file intends to implement the functions needed by EEH, which is
- * built on IODA compliant chip. Actually, lots of functions related
- * to EEH would be built based on the OPAL APIs.
- *
- * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-#include <linux/debugfs.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-#include <linux/irq.h>
-#include <linux/kernel.h>
-#include <linux/msi.h>
-#include <linux/notifier.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-#include <asm/eeh.h>
-#include <asm/eeh_event.h>
-#include <asm/io.h>
-#include <asm/iommu.h>
-#include <asm/msi_bitmap.h>
-#include <asm/opal.h>
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-#include <asm/tce.h>
-#include "powernv.h"
-#include "pci.h"
-static int ioda_eeh_nb_init = 0;
-static int ioda_eeh_event(struct notifier_block *nb,
-                          unsigned long events, void *change)
-{
-        uint64_t changed_evts = (uint64_t)change;
-        /*
-         * We simply send special EEH event if EEH has
-         * been enabled, or clear pending events in
-         * case that we enable EEH soon
-         */
-        if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
-            !(events & OPAL_EVENT_PCI_ERROR))
-                return 0;
-        if (eeh_enabled())
-                eeh_send_failure_event(NULL);
-        else
-                opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
-        return 0;
-}
-static struct notifier_block ioda_eeh_nb = {
-        .notifier_call  = ioda_eeh_event,
-        .next           = NULL,
-        .priority       = 0
-};
-#ifdef CONFIG_DEBUG_FS
-static ssize_t ioda_eeh_ei_write(struct file *filp,
-                                 const char __user *user_buf,
-                                 size_t count, loff_t *ppos)
-{
-        struct pci_controller *hose = filp->private_data;
-        struct pnv_phb *phb = hose->private_data;
-        struct eeh_dev *edev;
-        struct eeh_pe *pe;
-        int pe_no, type, func;
-        unsigned long addr, mask;
-        char buf[50];
-        int ret;
-        if (!phb->eeh_ops || !phb->eeh_ops->err_inject)
-                return -ENXIO;
-        ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
-        if (!ret)
-                return -EFAULT;
-        /* Retrieve parameters */
-        ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
-                     &pe_no, &type, &func, &addr, &mask);
-        if (ret != 5)
-                return -EINVAL;
-        /* Retrieve PE */
-        edev = kzalloc(sizeof(*edev), GFP_KERNEL);
-        if (!edev)
-                return -ENOMEM;
-        edev->phb = hose;
-        edev->pe_config_addr = pe_no;
-        pe = eeh_pe_get(edev);
-        kfree(edev);
-        if (!pe)
-                return -ENODEV;
-        /* Do error injection */
-        ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask);
-        return ret < 0 ? ret : count;
-}
-static const struct file_operations ioda_eeh_ei_fops = {
-        .open   = simple_open,
-        .llseek = no_llseek,
-        .write  = ioda_eeh_ei_write,
-};
-static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val)
-{
-        struct pci_controller *hose = data;
-        struct pnv_phb *phb = hose->private_data;
-        out_be64(phb->regs + offset, val);
-        return 0;
-}
-static int ioda_eeh_dbgfs_get(void *data, int offset, u64 *val)
-{
-        struct pci_controller *hose = data;
-        struct pnv_phb *phb = hose->private_data;
-        *val = in_be64(phb->regs + offset);
-        return 0;
-}
-static int ioda_eeh_outb_dbgfs_set(void *data, u64 val)
-{
-        return ioda_eeh_dbgfs_set(data, 0xD10, val);
-}
-static int ioda_eeh_outb_dbgfs_get(void *data, u64 *val)
-{
-        return ioda_eeh_dbgfs_get(data, 0xD10, val);
-}
-static int ioda_eeh_inbA_dbgfs_set(void *data, u64 val)
-{
-        return ioda_eeh_dbgfs_set(data, 0xD90, val);
-}
-static int ioda_eeh_inbA_dbgfs_get(void *data, u64 *val)
-{
-        return ioda_eeh_dbgfs_get(data, 0xD90, val);
-}
-static int ioda_eeh_inbB_dbgfs_set(void *data, u64 val)
-{
-        return ioda_eeh_dbgfs_set(data, 0xE10, val);
-}
-static int ioda_eeh_inbB_dbgfs_get(void *data, u64 *val)
-{
-        return ioda_eeh_dbgfs_get(data, 0xE10, val);
-}
-DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_outb_dbgfs_ops, ioda_eeh_outb_dbgfs_get,
-                        ioda_eeh_outb_dbgfs_set, "0x%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbA_dbgfs_ops, ioda_eeh_inbA_dbgfs_get,
-                        ioda_eeh_inbA_dbgfs_set, "0x%llx\n");
-DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get,
-                        ioda_eeh_inbB_dbgfs_set, "0x%llx\n");
-#endif /* CONFIG_DEBUG_FS */
-/**
- * ioda_eeh_post_init - Chip dependent post initialization
- * @hose: PCI controller
- *
- * The function will be called after eeh PEs and devices
- * have been built. That means the EEH is ready to supply
- * service with I/O cache.
- */
-static int ioda_eeh_post_init(struct pci_controller *hose)
-{
-        struct pnv_phb *phb = hose->private_data;
-        int ret;
-        /* Register OPAL event notifier */
-        if (!ioda_eeh_nb_init) {
-                ret = opal_notifier_register(&ioda_eeh_nb);
-                if (ret) {
-                        pr_err("%s: Can't register OPAL event notifier (%d)\n",
-                               __func__, ret);
-                        return ret;
-                }
-                ioda_eeh_nb_init = 1;
-        }
-#ifdef CONFIG_DEBUG_FS
-        if (!phb->has_dbgfs && phb->dbgfs) {
-                phb->has_dbgfs = 1;
-                debugfs_create_file("err_injct", 0200,
-                                    phb->dbgfs, hose,
-                                    &ioda_eeh_ei_fops);
-                debugfs_create_file("err_injct_outbound", 0600,
-                                    phb->dbgfs, hose,
-                                    &ioda_eeh_outb_dbgfs_ops);
-                debugfs_create_file("err_injct_inboundA", 0600,
-                                    phb->dbgfs, hose,
-                                    &ioda_eeh_inbA_dbgfs_ops);
-                debugfs_create_file("err_injct_inboundB", 0600,
-                                    phb->dbgfs, hose,
-                                    &ioda_eeh_inbB_dbgfs_ops);
-        }
-#endif
-        /* If EEH is enabled, we're going to rely on that.
-         * Otherwise, we restore to conventional mechanism
-         * to clear frozen PE during PCI config access.
-         */
-        if (eeh_enabled())
-                phb->flags |= PNV_PHB_FLAG_EEH;
-        else
-                phb->flags &= ~PNV_PHB_FLAG_EEH;
-        return 0;
-}
-/**
- * ioda_eeh_set_option - Set EEH operation or I/O setting
- * @pe: EEH PE
- * @option: options
- *
- * Enable or disable EEH option for the indicated PE. The
- * function also can be used to enable I/O or DMA for the
- * PE.
- */
-static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
-{
-        struct pci_controller *hose = pe->phb;
-        struct pnv_phb *phb = hose->private_data;
-        bool freeze_pe = false;
-        int enable, ret = 0;
-        s64 rc;
-        /* Check on PE number */
-        if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) {
-                pr_err("%s: PE address %x out of range [0, %x] "
-                       "on PHB#%x\n",
-                        __func__, pe->addr, phb->ioda.total_pe,
-                        hose->global_number);
-                return -EINVAL;
-        }
-        switch (option) {
-        case EEH_OPT_DISABLE:
-                return -EPERM;
-        case EEH_OPT_ENABLE:
-                return 0;
-        case EEH_OPT_THAW_MMIO:
-                enable = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
-                break;
-        case EEH_OPT_THAW_DMA:
-                enable = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
-                break;
-        case EEH_OPT_FREEZE_PE:
-                freeze_pe = true;
-                enable = OPAL_EEH_ACTION_SET_FREEZE_ALL;
-                break;
-        default:
-                pr_warn("%s: Invalid option %d\n",
-                        __func__, option);
-                return -EINVAL;
-        }
-        /* If PHB supports compound PE, to handle it */
-        if (freeze_pe) {
-                if (phb->freeze_pe) {
-                        phb->freeze_pe(phb, pe->addr);
-                } else {
-                        rc = opal_pci_eeh_freeze_set(phb->opal_id,
-                                                     pe->addr,
-                                                     enable);
-                        if (rc != OPAL_SUCCESS) {
-                                pr_warn("%s: Failure %lld freezing "
-                                        "PHB#%x-PE#%x\n",
-                                        __func__, rc,
-                                        phb->hose->global_number, pe->addr);
-                                ret = -EIO;
-                        }
-                }
-        } else {
-                if (phb->unfreeze_pe) {
-                        ret = phb->unfreeze_pe(phb, pe->addr, enable);
-                } else {
-                        rc = opal_pci_eeh_freeze_clear(phb->opal_id,
-                                                       pe->addr,
-                                                       enable);
-                        if (rc != OPAL_SUCCESS) {
-                                pr_warn("%s: Failure %lld enable %d "
-                                        "for PHB#%x-PE#%x\n",
-                                        __func__, rc, option,
-                                        phb->hose->global_number, pe->addr);
-                                ret = -EIO;
-                        }
-                }
-        }
-        return ret;
-}
-static void ioda_eeh_phb_diag(struct eeh_pe *pe)
-{
-        struct pnv_phb *phb = pe->phb->private_data;
-        long rc;
-        rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
-                                         PNV_PCI_DIAG_BUF_SIZE);
-        if (rc != OPAL_SUCCESS)
-                pr_warn("%s: Failed to get diag-data for PHB#%x (%ld)\n",
-                        __func__, pe->phb->global_number, rc);
-}
-static int ioda_eeh_get_phb_state(struct eeh_pe *pe)
-{
-        struct pnv_phb *phb = pe->phb->private_data;
-        u8 fstate;
-        __be16 pcierr;
-        s64 rc;
-        int result = 0;
-        rc = opal_pci_eeh_freeze_status(phb->opal_id,
-                                        pe->addr,
-                                        &fstate,
-                                        &pcierr,
-                                        NULL);
-        if (rc != OPAL_SUCCESS) {
-                pr_warn("%s: Failure %lld getting PHB#%x state\n",
-                        __func__, rc, phb->hose->global_number);
-                return EEH_STATE_NOT_SUPPORT;
-        }
-        /*
-         * Check PHB state. If the PHB is frozen for the
-         * first time, to dump the PHB diag-data.
-         */
-        if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
-                result = (EEH_STATE_MMIO_ACTIVE  |
-                          EEH_STATE_DMA_ACTIVE   |
-                          EEH_STATE_MMIO_ENABLED |
-                          EEH_STATE_DMA_ENABLED);
-        } else if (!(pe->state & EEH_PE_ISOLATED)) {
-                eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
-                ioda_eeh_phb_diag(pe);
-                if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
-                        pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
-        }
-        return result;
-}
-static int ioda_eeh_get_pe_state(struct eeh_pe *pe)
-{
-        struct pnv_phb *phb = pe->phb->private_data;
-        u8 fstate;
-        __be16 pcierr;
-        s64 rc;
-        int result;
-        /*
-         * We don't clobber hardware frozen state until PE
-         * reset is completed. In order to keep EEH core
-         * moving forward, we have to return operational
-         * state during PE reset.
-         */
-        if (pe->state & EEH_PE_RESET) {
-                result = (EEH_STATE_MMIO_ACTIVE  |
-                          EEH_STATE_DMA_ACTIVE   |
-                          EEH_STATE_MMIO_ENABLED |
-                          EEH_STATE_DMA_ENABLED);
-                return result;
-        }
-        /*
-         * Fetch PE state from hardware. If the PHB
-         * supports compound PE, let it handle that.
-         */
-        if (phb->get_pe_state) {
-                fstate = phb->get_pe_state(phb, pe->addr);
-        } else {
-                rc = opal_pci_eeh_freeze_status(phb->opal_id,
-                                                pe->addr,
-                                                &fstate,
-                                                &pcierr,
-                                                NULL);
-                if (rc != OPAL_SUCCESS) {
-                        pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
-                                __func__, rc, phb->hose->global_number, pe->addr);
-                        return EEH_STATE_NOT_SUPPORT;
-                }
-        }
-        /* Figure out state */
-        switch (fstate) {
-        case OPAL_EEH_STOPPED_NOT_FROZEN:
-                result = (EEH_STATE_MMIO_ACTIVE  |
-                          EEH_STATE_DMA_ACTIVE   |
-                          EEH_STATE_MMIO_ENABLED |
-                          EEH_STATE_DMA_ENABLED);
-                break;
-        case OPAL_EEH_STOPPED_MMIO_FREEZE:
-                result = (EEH_STATE_DMA_ACTIVE |
-                          EEH_STATE_DMA_ENABLED);
-                break;
-        case OPAL_EEH_STOPPED_DMA_FREEZE:
-                result = (EEH_STATE_MMIO_ACTIVE |
-                          EEH_STATE_MMIO_ENABLED);
-                break;
-        case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
-                result = 0;
-                break;
-        case OPAL_EEH_STOPPED_RESET:
-                result = EEH_STATE_RESET_ACTIVE;
-                break;
-        case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
-                result = EEH_STATE_UNAVAILABLE;
-                break;
-        case OPAL_EEH_STOPPED_PERM_UNAVAIL:
-                result = EEH_STATE_NOT_SUPPORT;
-                break;
-        default:
-                result = EEH_STATE_NOT_SUPPORT;
-                pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
-                        __func__, phb->hose->global_number,
-                        pe->addr, fstate);
-        }
-        /*
-         * If PHB supports compound PE, to freeze all
-         * slave PEs for consistency.
-         *
-         * If the PE is switching to frozen state for the
-         * first time, to dump the PHB diag-data.
-         */
-        if (!(result & EEH_STATE_NOT_SUPPORT) &&
-            !(result & EEH_STATE_UNAVAILABLE) &&
-            !(result & EEH_STATE_MMIO_ACTIVE) &&
-            !(result & EEH_STATE_DMA_ACTIVE)  &&
-            !(pe->state & EEH_PE_ISOLATED)) {
-                if (phb->freeze_pe)
-                        phb->freeze_pe(phb, pe->addr);
-                eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
-                ioda_eeh_phb_diag(pe);
-                if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
-                        pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
-        }
-        return result;
-}
-/**
- * ioda_eeh_get_state - Retrieve the state of PE
- * @pe: EEH PE
- *
- * The PE's state should be retrieved from the PEEV, PEST
- * IODA tables. Since the OPAL has exported the function
- * to do it, it'd better to use that.
- */
-static int ioda_eeh_get_state(struct eeh_pe *pe)
-{
-        struct pnv_phb *phb = pe->phb->private_data;
-        /* Sanity check on PE number. PHB PE should have 0 */
-        if (pe->addr < 0 ||
-            pe->addr >= phb->ioda.total_pe) {
-                pr_warn("%s: PHB#%x-PE#%x out of range [0, %x]\n",
-                        __func__, phb->hose->global_number,
-                        pe->addr, phb->ioda.total_pe);
-                return EEH_STATE_NOT_SUPPORT;
-        }
-        if (pe->type & EEH_PE_PHB)
-                return ioda_eeh_get_phb_state(pe);
-        return ioda_eeh_get_pe_state(pe);
-}
-static s64 ioda_eeh_phb_poll(struct pnv_phb *phb)
-{
-        s64 rc = OPAL_HARDWARE;
-        while (1) {
-                rc = opal_pci_poll(phb->opal_id);
-                if (rc <= 0)
-                        break;
-                if (system_state < SYSTEM_RUNNING)
-                        udelay(1000 * rc);
-                else
-                        msleep(rc);
-        }
-        return rc;
-}
-int ioda_eeh_phb_reset(struct pci_controller *hose, int option)
-{
-        struct pnv_phb *phb = hose->private_data;
-        s64 rc = OPAL_HARDWARE;
-        pr_debug("%s: Reset PHB#%x, option=%d\n",
-                 __func__, hose->global_number, option);
-        /* Issue PHB complete reset request */
-        if (option == EEH_RESET_FUNDAMENTAL ||
-            option == EEH_RESET_HOT)
-                rc = opal_pci_reset(phb->opal_id,
-                                OPAL_RESET_PHB_COMPLETE,
-                                OPAL_ASSERT_RESET);
-        else if (option == EEH_RESET_DEACTIVATE)
-                rc = opal_pci_reset(phb->opal_id,
-                                OPAL_RESET_PHB_COMPLETE,
-                                OPAL_DEASSERT_RESET);
-        if (rc < 0)
-                goto out;
-        /*
-         * Poll state of the PHB until the request is done
-         * successfully. The PHB reset is usually PHB complete
-         * reset followed by hot reset on root bus. So we also
-         * need the PCI bus settlement delay.
-         */
-        rc = ioda_eeh_phb_poll(phb);
-        if (option == EEH_RESET_DEACTIVATE) {
-                if (system_state < SYSTEM_RUNNING)
-                        udelay(1000 * EEH_PE_RST_SETTLE_TIME);
-                else
-                        msleep(EEH_PE_RST_SETTLE_TIME);
-        }
-out:
-        if (rc != OPAL_SUCCESS)
-                return -EIO;
-        return 0;
-}
-static int ioda_eeh_root_reset(struct pci_controller *hose, int option)
-{
-        struct pnv_phb *phb = hose->private_data;
-        s64 rc = OPAL_SUCCESS;
-        pr_debug("%s: Reset PHB#%x, option=%d\n",
-                 __func__, hose->global_number, option);
-        /*
-         * During the reset deassert time, we needn't care
-         * the reset scope because the firmware does nothing
-         * for fundamental or hot reset during deassert phase.
-         */
-        if (option == EEH_RESET_FUNDAMENTAL)
-                rc = opal_pci_reset(phb->opal_id,
-                                OPAL_RESET_PCI_FUNDAMENTAL,
-                                OPAL_ASSERT_RESET);
-        else if (option == EEH_RESET_HOT)
-                rc = opal_pci_reset(phb->opal_id,
-                                OPAL_RESET_PCI_HOT,
-                                OPAL_ASSERT_RESET);
-        else if (option == EEH_RESET_DEACTIVATE)
-                rc = opal_pci_reset(phb->opal_id,
-                                OPAL_RESET_PCI_HOT,
-                                OPAL_DEASSERT_RESET);
-        if (rc < 0)
-                goto out;
-        /* Poll state of the PHB until the request is done */
-        rc = ioda_eeh_phb_poll(phb);
-        if (option == EEH_RESET_DEACTIVATE)
-                msleep(EEH_PE_RST_SETTLE_TIME);
-out:
-        if (rc != OPAL_SUCCESS)
-                return -EIO;
-        return 0;
-}
-static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option)
-{
-        struct device_node *dn = pci_device_to_OF_node(dev);
-        struct eeh_dev *edev = of_node_to_eeh_dev(dn);
-        int aer = edev ? edev->aer_cap : 0;
-        u32 ctrl;
-        pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
-                 __func__, pci_domain_nr(dev->bus),
-                 dev->bus->number, option);
-        switch (option) {
-        case EEH_RESET_FUNDAMENTAL:
-        case EEH_RESET_HOT:
-                /* Don't report linkDown event */
-                if (aer) {
-                        eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
-                                             4, &ctrl);
-                        ctrl |= PCI_ERR_UNC_SURPDN;
-                        eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
-                                              4, ctrl);
-                }
-                eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
-                ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
-                eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
-                msleep(EEH_PE_RST_HOLD_TIME);
-                break;
-        case EEH_RESET_DEACTIVATE:
-                eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl);
-                ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
-                eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl);
-                msleep(EEH_PE_RST_SETTLE_TIME);
-                /* Continue reporting linkDown event */
-                if (aer) {
-                        eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK,
-                                             4, &ctrl);
-                        ctrl &= ~PCI_ERR_UNC_SURPDN;
-                        eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK,
-                                              4, ctrl);
-                }
-                break;
-        }
-        return 0;
-}
-void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
-{
-        struct pci_controller *hose;
-        if (pci_is_root_bus(dev->bus)) {
-                hose = pci_bus_to_host(dev->bus);
-                ioda_eeh_root_reset(hose, EEH_RESET_HOT);
-                ioda_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
-        } else {
-                ioda_eeh_bridge_reset(dev, EEH_RESET_HOT);
-                ioda_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
-        }
-}
-/**
- * ioda_eeh_reset - Reset the indicated PE
- * @pe: EEH PE
- * @option: reset option
- *
- * Do reset on the indicated PE. For PCI bus sensitive PE,
- * we need to reset the parent p2p bridge. The PHB has to
- * be reinitialized if the p2p bridge is root bridge. For
- * PCI device sensitive PE, we will try to reset the device
- * through FLR. For now, we don't have OPAL APIs to do HARD
- * reset yet, so all reset would be SOFT (HOT) reset.
- */
-static int ioda_eeh_reset(struct eeh_pe *pe, int option)
-{
-        struct pci_controller *hose = pe->phb;
-        struct pci_bus *bus;
-        int ret;
-        /*
-         * For PHB reset, we always have complete reset. For those PEs whose
-         * primary bus derived from root complex (root bus) or root port
-         * (usually bus#1), we apply hot or fundamental reset on the root port.
-         * For other PEs, we always have hot reset on the PE primary bus.
-         *
-         * Here, we have different design to pHyp, which always clear the
-         * frozen state during PE reset. However, the good idea here from
-         * benh is to keep frozen state before we get PE reset done completely
-         * (until BAR restore). With the frozen state, HW drops illegal IO
-         * or MMIO access, which can incur recrusive frozen PE during PE
-         * reset. The side effect is that EEH core has to clear the frozen
-         * state explicitly after BAR restore.
-         */
-        if (pe->type & EEH_PE_PHB) {
-                ret = ioda_eeh_phb_reset(hose, option);
-        } else {
-                struct pnv_phb *phb;
-                s64 rc;
-                /*
-                 * The frozen PE might be caused by PAPR error injection
-                 * registers, which are expected to be cleared after hitting
-                 * frozen PE as stated in the hardware spec. Unfortunately,
-                 * that's not true on P7IOC. So we have to clear it manually
-                 * to avoid recursive EEH errors during recovery.
-                 */
-                phb = hose->private_data;
-                if (phb->model == PNV_PHB_MODEL_P7IOC &&
-                    (option == EEH_RESET_HOT ||
-                    option == EEH_RESET_FUNDAMENTAL)) {
-                        rc = opal_pci_reset(phb->opal_id,
-                                            OPAL_RESET_PHB_ERROR,
-                                            OPAL_ASSERT_RESET);
-                        if (rc != OPAL_SUCCESS) {
-                                pr_warn("%s: Failure %lld clearing "
-                                        "error injection registers\n",
-                                        __func__, rc);
-                                return -EIO;
-                        }
-                }
-                bus = eeh_pe_bus_get(pe);
-                if (pci_is_root_bus(bus) ||
-                    pci_is_root_bus(bus->parent))
-                        ret = ioda_eeh_root_reset(hose, option);
-                else
-                        ret = ioda_eeh_bridge_reset(bus->self, option);
-        }
-        return ret;
-}
-/**
- * ioda_eeh_get_log - Retrieve error log
- * @pe: frozen PE
- * @severity: permanent or temporary error
- * @drv_log: device driver log
- * @len: length of device driver log
- *
- * Retrieve error log, which contains log from device driver
- * and firmware.
- */
-static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
-                            char *drv_log, unsigned long len)
-{
-        if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
-                pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
-        return 0;
-}
-/**
- * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
- * @pe: EEH PE
- *
- * For particular PE, it might have included PCI bridges. In order
- * to make the PE work properly, those PCI bridges should be configured
- * correctly. However, we need do nothing on P7IOC since the reset
- * function will do everything that should be covered by the function.
- */
-static int ioda_eeh_configure_bridge(struct eeh_pe *pe)
-{
-        return 0;
-}
-static int ioda_eeh_err_inject(struct eeh_pe *pe, int type, int func,
-                               unsigned long addr, unsigned long mask)
-{
-        struct pci_controller *hose = pe->phb;
-        struct pnv_phb *phb = hose->private_data;
-        s64 ret;
-        /* Sanity check on error type */
-        if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
-            type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
-                pr_warn("%s: Invalid error type %d\n",
-                        __func__, type);
-                return -ERANGE;
-        }
-        if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
-            func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
-                pr_warn("%s: Invalid error function %d\n",
-                        __func__, func);
-                return -ERANGE;
-        }
-        /* Firmware supports error injection ? */
-        if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
-                pr_warn("%s: Firmware doesn't support error injection\n",
-                        __func__);
-                return -ENXIO;
-        }
-        /* Do error injection */
-        ret = opal_pci_err_inject(phb->opal_id, pe->addr,
-                                  type, func, addr, mask);
-        if (ret != OPAL_SUCCESS) {
-                pr_warn("%s: Failure %lld injecting error "
-                        "%d-%d to PHB#%x-PE#%x\n",
-                        __func__, ret, type, func,
-                        hose->global_number, pe->addr);
-                return -EIO;
-        }
-        return 0;
-}
-static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data)
-{
-        /* GEM */
-        if (data->gemXfir || data->gemRfir ||
-            data->gemRirqfir || data->gemMask || data->gemRwof)
-                pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
-                        be64_to_cpu(data->gemXfir),
-                        be64_to_cpu(data->gemRfir),
-                        be64_to_cpu(data->gemRirqfir),
-                        be64_to_cpu(data->gemMask),
-                        be64_to_cpu(data->gemRwof));
-        /* LEM */
-        if (data->lemFir || data->lemErrMask ||
-            data->lemAction0 || data->lemAction1 || data->lemWof)
-                pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
-                        be64_to_cpu(data->lemFir),
-                        be64_to_cpu(data->lemErrMask),
-                        be64_to_cpu(data->lemAction0),
-                        be64_to_cpu(data->lemAction1),
-                        be64_to_cpu(data->lemWof));
-}
-static void ioda_eeh_hub_diag(struct pci_controller *hose)
-{
-        struct pnv_phb *phb = hose->private_data;
-        struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
-        long rc;
-        rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
-        if (rc != OPAL_SUCCESS) {
-                pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
-                        __func__, phb->hub_id, rc);
-                return;
-        }
-        switch (data->type) {
-        case OPAL_P7IOC_DIAG_TYPE_RGC:
-                pr_info("P7IOC diag-data for RGC\n\n");
-                ioda_eeh_hub_diag_common(data);
-                if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
-                        pr_info("  RGC: %016llx %016llx\n",
-                                be64_to_cpu(data->rgc.rgcStatus),
-                                be64_to_cpu(data->rgc.rgcLdcp));
-                break;
-        case OPAL_P7IOC_DIAG_TYPE_BI:
-                pr_info("P7IOC diag-data for BI %s\n\n",
-                        data->bi.biDownbound ? "Downbound" : "Upbound");
-                ioda_eeh_hub_diag_common(data);
-                if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
-                    data->bi.biLdcp2 || data->bi.biFenceStatus)
-                        pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
-                                be64_to_cpu(data->bi.biLdcp0),
-                                be64_to_cpu(data->bi.biLdcp1),
-                                be64_to_cpu(data->bi.biLdcp2),
-                                be64_to_cpu(data->bi.biFenceStatus));
-                break;
-        case OPAL_P7IOC_DIAG_TYPE_CI:
-                pr_info("P7IOC diag-data for CI Port %d\n\n",
-                        data->ci.ciPort);
-                ioda_eeh_hub_diag_common(data);
-                if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
-                        pr_info("  CI:  %016llx %016llx\n",
-                                be64_to_cpu(data->ci.ciPortStatus),
-                                be64_to_cpu(data->ci.ciPortLdcp));
-                break;
-        case OPAL_P7IOC_DIAG_TYPE_MISC:
-                pr_info("P7IOC diag-data for MISC\n\n");
-                ioda_eeh_hub_diag_common(data);
-                break;
-        case OPAL_P7IOC_DIAG_TYPE_I2C:
-                pr_info("P7IOC diag-data for I2C\n\n");
-                ioda_eeh_hub_diag_common(data);
-                break;
-        default:
-                pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
-                        __func__, phb->hub_id, data->type);
-        }
-}
-static int ioda_eeh_get_pe(struct pci_controller *hose,
-                           u16 pe_no, struct eeh_pe **pe)
-{
-        struct pnv_phb *phb = hose->private_data;
-        struct pnv_ioda_pe *pnv_pe;
-        struct eeh_pe *dev_pe;
-        struct eeh_dev edev;
-        /*
-         * If PHB supports compound PE, to fetch
-         * the master PE because slave PE is invisible
-         * to EEH core.
-         */
-        pnv_pe = &phb->ioda.pe_array[pe_no];
-        if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
-                pnv_pe = pnv_pe->master;
-                WARN_ON(!pnv_pe ||
-                        !(pnv_pe->flags & PNV_IODA_PE_MASTER));
-                pe_no = pnv_pe->pe_number;
-        }
-        /* Find the PE according to PE# */
-        memset(&edev, 0, sizeof(struct eeh_dev));
-        edev.phb = hose;
-        edev.pe_config_addr = pe_no;
-        dev_pe = eeh_pe_get(&edev);
-        if (!dev_pe)
-                return -EEXIST;
-        /* Freeze the (compound) PE */
-        *pe = dev_pe;
-        if (!(dev_pe->state & EEH_PE_ISOLATED))
-                phb->freeze_pe(phb, pe_no);
-        /*
-         * At this point, we're sure the (compound) PE should
-         * have been frozen. However, we still need poke until
-         * hitting the frozen PE on top level.
-         */
-        dev_pe = dev_pe->parent;
-        while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
-                int ret;
-                int active_flags = (EEH_STATE_MMIO_ACTIVE |
-                                    EEH_STATE_DMA_ACTIVE);
-                ret = eeh_ops->get_state(dev_pe, NULL);
-                if (ret <= 0 || (ret & active_flags) == active_flags) {
-                        dev_pe = dev_pe->parent;
-                        continue;
-                }
-                /* Frozen parent PE */
-                *pe = dev_pe;
-                if (!(dev_pe->state & EEH_PE_ISOLATED))
-                        phb->freeze_pe(phb, dev_pe->addr);
-                /* Next one */
-                dev_pe = dev_pe->parent;
-        }
-        return 0;
-}
-/**
- * ioda_eeh_next_error - Retrieve next error for EEH core to handle
- * @pe: The affected PE
- *
- * The function is expected to be called by EEH core while it gets
- * special EEH event (without binding PE). The function calls to
- * OPAL APIs for next error to handle. The informational error is
- * handled internally by platform. However, the dead IOC, dead PHB,
- * fenced PHB and frozen PE should be handled by EEH core eventually.
- */
-static int ioda_eeh_next_error(struct eeh_pe **pe)
-{
-        struct pci_controller *hose;
-        struct pnv_phb *phb;
-        struct eeh_pe *phb_pe, *parent_pe;
-        __be64 frozen_pe_no;
-        __be16 err_type, severity;
-        int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
-        long rc;
-        int state, ret = EEH_NEXT_ERR_NONE;
-        /*
-         * While running here, it's safe to purge the event queue.
-         * And we should keep the cached OPAL notifier event sychronized
-         * between the kernel and firmware.
-         */
-        eeh_remove_event(NULL, false);
-        opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
-        list_for_each_entry(hose, &hose_list, list_node) {
-                /*
-                 * If the subordinate PCI buses of the PHB has been
-                 * removed or is exactly under error recovery, we
-                 * needn't take care of it any more.
-                 */
-                phb = hose->private_data;
-                phb_pe = eeh_phb_pe_get(hose);
-                if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
-                        continue;
-                rc = opal_pci_next_error(phb->opal_id,
-                                &frozen_pe_no, &err_type, &severity);
-                /* If OPAL API returns error, we needn't proceed */
-                if (rc != OPAL_SUCCESS) {
-                        pr_devel("%s: Invalid return value on "
-                                 "PHB#%x (0x%lx) from opal_pci_next_error",
-                                 __func__, hose->global_number, rc);
-                        continue;
-                }
-                /* If the PHB doesn't have error, stop processing */
-                if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
-                    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
-                        pr_devel("%s: No error found on PHB#%x\n",
-                                 __func__, hose->global_number);
-                        continue;
-                }
-                /*
-                 * Processing the error. We're expecting the error with
-                 * highest priority reported upon multiple errors on the
-                 * specific PHB.
-                 */
-                pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
-                         __func__, be16_to_cpu(err_type), be16_to_cpu(severity),
-                         be64_to_cpu(frozen_pe_no), hose->global_number);
-                switch (be16_to_cpu(err_type)) {
-                case OPAL_EEH_IOC_ERROR:
-                        if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
-                                pr_err("EEH: dead IOC detected\n");
-                                ret = EEH_NEXT_ERR_DEAD_IOC;
-                        } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
-                                pr_info("EEH: IOC informative error "
-                                        "detected\n");
-                                ioda_eeh_hub_diag(hose);
-                                ret = EEH_NEXT_ERR_NONE;
-                        }
-                        break;
-                case OPAL_EEH_PHB_ERROR:
-                        if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
-                                *pe = phb_pe;
-                                pr_err("EEH: dead PHB#%x detected, "
-                                       "location: %s\n",
-                                       hose->global_number,
-                                       eeh_pe_loc_get(phb_pe));
-                                ret = EEH_NEXT_ERR_DEAD_PHB;
-                        } else if (be16_to_cpu(severity) ==
-                                                OPAL_EEH_SEV_PHB_FENCED) {
-                                *pe = phb_pe;
-                                pr_err("EEH: Fenced PHB#%x detected, "
-                                       "location: %s\n",
-                                       hose->global_number,
-                                       eeh_pe_loc_get(phb_pe));
-                                ret = EEH_NEXT_ERR_FENCED_PHB;
-                        } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
-                                pr_info("EEH: PHB#%x informative error "
-                                        "detected, location: %s\n",
-                                        hose->global_number,
-                                        eeh_pe_loc_get(phb_pe));
-                                ioda_eeh_phb_diag(phb_pe);
-                                pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
-                                ret = EEH_NEXT_ERR_NONE;
-                        }
-                        break;
-                case OPAL_EEH_PE_ERROR:
-                        /*
-                         * If we can't find the corresponding PE, we
-                         * just try to unfreeze.
-                         */
-                        if (ioda_eeh_get_pe(hose,
-                                            be64_to_cpu(frozen_pe_no), pe)) {
-                                /* Try best to clear it */
-                                pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
-                                        hose->global_number, frozen_pe_no);
-                                pr_info("EEH: PHB location: %s\n",
-                                        eeh_pe_loc_get(phb_pe));
-                                opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
-                                        OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
-                                ret = EEH_NEXT_ERR_NONE;
-                        } else if ((*pe)->state & EEH_PE_ISOLATED ||
-                                   eeh_pe_passed(*pe)) {
-                                ret = EEH_NEXT_ERR_NONE;
-                        } else {
-                                pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
-                                        (*pe)->addr, (*pe)->phb->global_number);
-                                pr_err("EEH: PE location: %s, PHB location: %s\n",
-                                        eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe));
-                                ret = EEH_NEXT_ERR_FROZEN_PE;
-                        }
-                        break;
-                default:
-                        pr_warn("%s: Unexpected error type %d\n",
-                                __func__, be16_to_cpu(err_type));
-                }
-                /*
-                 * EEH core will try recover from fenced PHB or
-                 * frozen PE. In the time for frozen PE, EEH core
-                 * enable IO path for that before collecting logs,
-                 * but it ruins the site. So we have to dump the
-                 * log in advance here.
-                 */
-                if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
-                    ret == EEH_NEXT_ERR_FENCED_PHB) &&
-                    !((*pe)->state & EEH_PE_ISOLATED)) {
-                        eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
-                        ioda_eeh_phb_diag(*pe);
-                        if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
-                                pnv_pci_dump_phb_diag_data((*pe)->phb,
-                                                           (*pe)->data);
-                }
-                /*
-                 * We probably have the frozen parent PE out there and
-                 * we need have to handle frozen parent PE firstly.
-                 */
-                if (ret == EEH_NEXT_ERR_FROZEN_PE) {
-                        parent_pe = (*pe)->parent;
-                        while (parent_pe) {
-                                /* Hit the ceiling ? */
-                                if (parent_pe->type & EEH_PE_PHB)
-                                        break;
-                                /* Frozen parent PE ? */
-                                state = ioda_eeh_get_state(parent_pe);
-                                if (state > 0 &&
-                                    (state & active_flags) != active_flags)
-                                        *pe = parent_pe;
-                                /* Next parent level */
-                                parent_pe = parent_pe->parent;
-                        }
-                        /* We possibly migrate to another PE */
-                        eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
-                }
-                /*
-                 * If we have no errors on the specific PHB or only
-                 * informative error there, we continue poking it.
-                 * Otherwise, we need actions to be taken by upper
-                 * layer.
-                 */
-                if (ret > EEH_NEXT_ERR_INF)
-                        break;
-        }
-        return ret;
-}
-struct pnv_eeh_ops ioda_eeh_ops = {
-        .post_init              = ioda_eeh_post_init,
-        .set_option             = ioda_eeh_set_option,
-        .get_state              = ioda_eeh_get_state,
-        .reset                  = ioda_eeh_reset,
-        .get_log                = ioda_eeh_get_log,
-        .configure_bridge       = ioda_eeh_configure_bridge,
-        .err_inject             = ioda_eeh_err_inject,
-        .next_error             = ioda_eeh_next_error
-};
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index e261869adc86..ce738ab3d5a9 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -12,6 +12,7 @@
 */
 #include <linux/atomic.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/init.h>
@@ -38,12 +39,14 @@
 #include "powernv.h"
 #include "pci.h"
+static bool pnv_eeh_nb_init = false;
 /**
- * powernv_eeh_init - EEH platform dependent initialization
+ * pnv_eeh_init - EEH platform dependent initialization
 *
 * EEH platform dependent initialization on powernv
 */
-static int powernv_eeh_init(void)
+static int pnv_eeh_init(void)
 {
        struct pci_controller *hose;
        struct pnv_phb *phb;
@@ -85,37 +88,280 @@ static int powernv_eeh_init(void)
        return 0;
 }
+static int pnv_eeh_event(struct notifier_block *nb,
+                         unsigned long events, void *change)
+{
+        uint64_t changed_evts = (uint64_t)change;
+        /*
+         * We simply send special EEH event if EEH has
+         * been enabled, or clear pending events in
+         * case that we enable EEH soon
+         */
+        if (!(changed_evts & OPAL_EVENT_PCI_ERROR) ||
+            !(events & OPAL_EVENT_PCI_ERROR))
+                return 0;
+        if (eeh_enabled())
+                eeh_send_failure_event(NULL);
+        else
+                opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
+        return 0;
+}
+static struct notifier_block pnv_eeh_nb = {
+        .notifier_call  = pnv_eeh_event,
+        .next           = NULL,
+        .priority       = 0
+};
+#ifdef CONFIG_DEBUG_FS
+static ssize_t pnv_eeh_ei_write(struct file *filp,
+                                const char __user *user_buf,
+                                size_t count, loff_t *ppos)
+{
+        struct pci_controller *hose = filp->private_data;
+        struct eeh_dev *edev;
+        struct eeh_pe *pe;
+        int pe_no, type, func;
+        unsigned long addr, mask;
+        char buf[50];
+        int ret;
+        if (!eeh_ops || !eeh_ops->err_inject)
+                return -ENXIO;
+        /* Copy over argument buffer */
+        ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count);
+        if (!ret)
+                return -EFAULT;
+        /* Retrieve parameters */
+        ret = sscanf(buf, "%x:%x:%x:%lx:%lx",
+                     &pe_no, &type, &func, &addr, &mask);
+        if (ret != 5)
+                return -EINVAL;
+        /* Retrieve PE */
+        edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+        if (!edev)
+                return -ENOMEM;
+        edev->phb = hose;
+        edev->pe_config_addr = pe_no;
+        pe = eeh_pe_get(edev);
+        kfree(edev);
+        if (!pe)
+                return -ENODEV;
+        /* Do error injection */
+        ret = eeh_ops->err_inject(pe, type, func, addr, mask);
+        return ret < 0 ? ret : count;
+}
+static const struct file_operations pnv_eeh_ei_fops = {
+        .open   = simple_open,
+        .llseek = no_llseek,
+        .write  = pnv_eeh_ei_write,
+};
+static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val)
+{
+        struct pci_controller *hose = data;
+        struct pnv_phb *phb = hose->private_data;
+        out_be64(phb->regs + offset, val);
+        return 0;
+}
+static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val)
+{
+        struct pci_controller *hose = data;
+        struct pnv_phb *phb = hose->private_data;
+        *val = in_be64(phb->regs + offset);
+        return 0;
+}
+static int pnv_eeh_outb_dbgfs_set(void *data, u64 val)
+{
+        return pnv_eeh_dbgfs_set(data, 0xD10, val);
+}
+static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val)
+{
+        return pnv_eeh_dbgfs_get(data, 0xD10, val);
+}
+static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val)
+{
+        return pnv_eeh_dbgfs_set(data, 0xD90, val);
+}
+static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val)
+{
+        return pnv_eeh_dbgfs_get(data, 0xD90, val);
+}
+static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val)
+{
+        return pnv_eeh_dbgfs_set(data, 0xE10, val);
+}
+static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val)
+{
+        return pnv_eeh_dbgfs_get(data, 0xE10, val);
+}
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get,
+                        pnv_eeh_outb_dbgfs_set, "0x%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get,
+                        pnv_eeh_inbA_dbgfs_set, "0x%llx\n");
+DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get,
+                        pnv_eeh_inbB_dbgfs_set, "0x%llx\n");
+#endif /* CONFIG_DEBUG_FS */
 /**
- * powernv_eeh_post_init - EEH platform dependent post initialization
+ * pnv_eeh_post_init - EEH platform dependent post initialization
 *
 * EEH platform dependent post initialization on powernv. When
 * the function is called, the EEH PEs and devices should have
 * been built. If the I/O cache staff has been built, EEH is
 * ready to supply service.
 */
-static int powernv_eeh_post_init(void)
+static int pnv_eeh_post_init(void)
 {
        struct pci_controller *hose;
        struct pnv_phb *phb;
        int ret = 0;
+        /* Register OPAL event notifier */
+        if (!pnv_eeh_nb_init) {
+                ret = opal_notifier_register(&pnv_eeh_nb);
+                if (ret) {
+                        pr_warn("%s: Can't register OPAL event notifier (%d)\n",
+                                __func__, ret);
+                        return ret;
+                }
+                pnv_eeh_nb_init = true;
+        }
        list_for_each_entry(hose, &hose_list, list_node) {
                phb = hose->private_data;
-                if (phb->eeh_ops && phb->eeh_ops->post_init) {
+                /*
-                        ret = phb->eeh_ops->post_init(hose);
+                 * If EEH is enabled, we're going to rely on that.
-                        if (ret)
+                 * Otherwise, we restore to conventional mechanism
-                                break;
+                 * to clear frozen PE during PCI config access.
-                }
+                 */
+                if (eeh_enabled())
+                        phb->flags |= PNV_PHB_FLAG_EEH;
+                else
+                        phb->flags &= ~PNV_PHB_FLAG_EEH;
+                /* Create debugfs entries */
+#ifdef CONFIG_DEBUG_FS
+                if (phb->has_dbgfs || !phb->dbgfs)
+                        continue;
+                phb->has_dbgfs = 1;
+                debugfs_create_file("err_injct", 0200,
+                                    phb->dbgfs, hose,
+                                    &pnv_eeh_ei_fops);
+                debugfs_create_file("err_injct_outbound", 0600,
+                                    phb->dbgfs, hose,
+                                    &pnv_eeh_outb_dbgfs_ops);
+                debugfs_create_file("err_injct_inboundA", 0600,
+                                    phb->dbgfs, hose,
+                                    &pnv_eeh_inbA_dbgfs_ops);
+                debugfs_create_file("err_injct_inboundB", 0600,
+                                    phb->dbgfs, hose,
+                                    &pnv_eeh_inbB_dbgfs_ops);
+#endif /* CONFIG_DEBUG_FS */
        }
        return ret;
 }
+static int pnv_eeh_cap_start(struct pci_dn *pdn)
+{
+        u32 status;
+        if (!pdn)
+                return 0;
+        pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status);
+        if (!(status & PCI_STATUS_CAP_LIST))
+                return 0;
+        return PCI_CAPABILITY_LIST;
+}
+static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap)
+{
+        int pos = pnv_eeh_cap_start(pdn);
+        int cnt = 48;   /* Maximal number of capabilities */
+        u32 id;
+        if (!pos)
+                return 0;
+        while (cnt--) {
+                pnv_pci_cfg_read(pdn, pos, 1, &pos);
+                if (pos < 0x40)
+                        break;
+                pos &= ~3;
+                pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id);
+                if (id == 0xff)
+                        break;
+                /* Found */
+                if (id == cap)
+                        return pos;
+                /* Next one */
+                pos += PCI_CAP_LIST_NEXT;
+        }
+        return 0;
+}
+static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap)
+{
+        struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+        u32 header;
+        int pos = 256, ttl = (4096 - 256) / 8;
+        if (!edev || !edev->pcie_cap)
+                return 0;
+        if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+                return 0;
+        else if (!header)
+                return 0;
+        while (ttl-- > 0) {
+                if (PCI_EXT_CAP_ID(header) == cap && pos)
+                        return pos;
+                pos = PCI_EXT_CAP_NEXT(header);
+                if (pos < 256)
+                        break;
+                if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL)
+                        break;
+        }
+        return 0;
+}
 /**
- * powernv_eeh_dev_probe - Do probe on PCI device
+ * pnv_eeh_probe - Do probe on PCI device
- * @dev: PCI device
+ * @pdn: PCI device node
- * @flag: unused
+ * @data: unused
 *
 * When EEH module is installed during system boot, all PCI devices
 * are checked one by one to see if it supports EEH. The function
@@ -129,12 +375,12 @@ static int powernv_eeh_post_init(void)
 * was possiblly triggered by EEH core, the binding between EEH device
 * and the PCI device isn't built yet.
 */
-static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
+static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
 {
-        struct pci_controller *hose = pci_bus_to_host(dev->bus);
+        struct pci_controller *hose = pdn->phb;
        struct pnv_phb *phb = hose->private_data;
-        struct device_node *dn = pci_device_to_OF_node(dev);
+        struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
-        struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+        uint32_t pcie_flags;
        int ret;
        /*
@@ -143,40 +389,42 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
         * the root bridge. So it's not reasonable to continue
         * the probing.
         */
-        if (!dn || !edev || edev->pe)
+        if (!edev || edev->pe)
-                return 0;
+                return NULL;
        /* Skip for PCI-ISA bridge */
-        if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
+        if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
-                return 0;
+                return NULL;
        /* Initialize eeh device */
-        edev->class_code = dev->class;
+        edev->class_code = pdn->class_code;
        edev->mode      &= 0xFFFFFF00;
-        if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE)
+        edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
+        edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
+        edev->aer_cap  = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
+        if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
                edev->mode |= EEH_DEV_BRIDGE;
-        edev->pcix_cap = pci_find_capability(dev, PCI_CAP_ID_PCIX);
+                if (edev->pcie_cap) {
-        if (pci_is_pcie(dev)) {
+                        pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
-                edev->pcie_cap = pci_pcie_cap(dev);
+                                         2, &pcie_flags);
+                        pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4;
-                if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT)
+                        if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT)
-                        edev->mode |= EEH_DEV_ROOT_PORT;
+                                edev->mode |= EEH_DEV_ROOT_PORT;
-                else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM)
+                        else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM)
-                        edev->mode |= EEH_DEV_DS_PORT;
+                                edev->mode |= EEH_DEV_DS_PORT;
+                }
-                edev->aer_cap = pci_find_ext_capability(dev,
-                                                        PCI_EXT_CAP_ID_ERR);
        }
-        edev->config_addr       = ((dev->bus->number << 8) | dev->devfn);
+        edev->config_addr    = (pdn->busno << 8) | (pdn->devfn);
-        edev->pe_config_addr    = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff);
+        edev->pe_config_addr = phb->ioda.pe_rmap[edev->config_addr];
        /* Create PE */
        ret = eeh_add_to_parent_pe(edev);
        if (ret) {
-                pr_warn("%s: Can't add PCI dev %s to parent PE (%d)\n",
+                pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%d)\n",
-                        __func__, pci_name(dev), ret);
+                        __func__, hose->global_number, pdn->busno,
-                return ret;
+                        PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret);
+                return NULL;
        }
        /*
@@ -195,8 +443,10 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
         * Broadcom Austin 4-ports NICs (14e4:1657)
         * Broadcom Shiner 2-ports 10G NICs (14e4:168e)
         */
-        if ((dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x1657) ||
+        if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
-            (dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x168e))
+             pdn->device_id == 0x1657) ||
+            (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM &&
+             pdn->device_id == 0x168e))
                edev->pe->state |= EEH_PE_CFG_RESTRICTED;
        /*
@@ -206,7 +456,8 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
         * to PE reset.
         */
        if (!edev->pe->bus)
-                edev->pe->bus = dev->bus;
+                edev->pe->bus = pci_find_bus(hose->global_number,
+                                             pdn->busno);
        /*
         * Enable EEH explicitly so that we will do EEH check
@@ -217,11 +468,11 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
        /* Save memory bars */
        eeh_save_bars(edev);
-        return 0;
+        return NULL;
 }
 /**
- * powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
+ * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable
 * @pe: EEH PE
 * @option: operation to be issued
 *
@@ -229,36 +480,236 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag)
 * Currently, following options are support according to PAPR:
 * Enable EEH, Disable EEH, Enable MMIO and Enable DMA
 */
-static int powernv_eeh_set_option(struct eeh_pe *pe, int option)
+static int pnv_eeh_set_option(struct eeh_pe *pe, int option)
 {
        struct pci_controller *hose = pe->phb;
        struct pnv_phb *phb = hose->private_data;
-        int ret = -EEXIST;
+        bool freeze_pe = false;
+        int opt, ret = 0;
+        s64 rc;
+        /* Sanity check on option */
+        switch (option) {
+        case EEH_OPT_DISABLE:
+                return -EPERM;
+        case EEH_OPT_ENABLE:
+                return 0;
+        case EEH_OPT_THAW_MMIO:
+                opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO;
+                break;
+        case EEH_OPT_THAW_DMA:
+                opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA;
+                break;
+        case EEH_OPT_FREEZE_PE:
+                freeze_pe = true;
+                opt = OPAL_EEH_ACTION_SET_FREEZE_ALL;
+                break;
+        default:
+                pr_warn("%s: Invalid option %d\n", __func__, option);
+                return -EINVAL;
+        }
-        /*
+        /* If PHB supports compound PE, to handle it */
-         * What we need do is pass it down for hardware
+        if (freeze_pe) {
-         * implementation to handle it.
+                if (phb->freeze_pe) {
-         */
+                        phb->freeze_pe(phb, pe->addr);
-        if (phb->eeh_ops && phb->eeh_ops->set_option)
+                } else {
-                ret = phb->eeh_ops->set_option(pe, option);
+                        rc = opal_pci_eeh_freeze_set(phb->opal_id,
+                                                     pe->addr, opt);
+                        if (rc != OPAL_SUCCESS) {
+                                pr_warn("%s: Failure %lld freezing "
+                                        "PHB#%x-PE#%x\n",
+                                        __func__, rc,
+                                        phb->hose->global_number, pe->addr);
+                                ret = -EIO;
+                        }
+                }
+        } else {
+                if (phb->unfreeze_pe) {
+                        ret = phb->unfreeze_pe(phb, pe->addr, opt);
+                } else {
+                        rc = opal_pci_eeh_freeze_clear(phb->opal_id,
+                                                       pe->addr, opt);
+                        if (rc != OPAL_SUCCESS) {
+                                pr_warn("%s: Failure %lld enable %d "
+                                        "for PHB#%x-PE#%x\n",
+                                        __func__, rc, option,
+                                        phb->hose->global_number, pe->addr);
+                                ret = -EIO;
+                        }
+                }
+        }
        return ret;
 }
 /**
- * powernv_eeh_get_pe_addr - Retrieve PE address
+ * pnv_eeh_get_pe_addr - Retrieve PE address
 * @pe: EEH PE
 *
 * Retrieve the PE address according to the given tranditional
 * PCI BDF (Bus/Device/Function) address.
 */
-static int powernv_eeh_get_pe_addr(struct eeh_pe *pe)
+static int pnv_eeh_get_pe_addr(struct eeh_pe *pe)
 {
        return pe->addr;
 }
+static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
+{
+        struct pnv_phb *phb = pe->phb->private_data;
+        s64 rc;
+        rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data,
+                                         PNV_PCI_DIAG_BUF_SIZE);
+        if (rc != OPAL_SUCCESS)
+                pr_warn("%s: Failure %lld getting PHB#%x diag-data\n",
+                        __func__, rc, pe->phb->global_number);
+}
+static int pnv_eeh_get_phb_state(struct eeh_pe *pe)
+{
+        struct pnv_phb *phb = pe->phb->private_data;
+        u8 fstate;
+        __be16 pcierr;
+        s64 rc;
+        int result = 0;
+        rc = opal_pci_eeh_freeze_status(phb->opal_id,
+                                        pe->addr,
+                                        &fstate,
+                                        &pcierr,
+                                        NULL);
+        if (rc != OPAL_SUCCESS) {
+                pr_warn("%s: Failure %lld getting PHB#%x state\n",
+                        __func__, rc, phb->hose->global_number);
+                return EEH_STATE_NOT_SUPPORT;
+        }
+        /*
+         * Check PHB state. If the PHB is frozen for the
+         * first time, to dump the PHB diag-data.
+         */
+        if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) {
+                result = (EEH_STATE_MMIO_ACTIVE  |
+                          EEH_STATE_DMA_ACTIVE   |
+                          EEH_STATE_MMIO_ENABLED |
+                          EEH_STATE_DMA_ENABLED);
+        } else if (!(pe->state & EEH_PE_ISOLATED)) {
+                eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+                pnv_eeh_get_phb_diag(pe);
+                if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+                        pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+        }
+        return result;
+}
+static int pnv_eeh_get_pe_state(struct eeh_pe *pe)
+{
+        struct pnv_phb *phb = pe->phb->private_data;
+        u8 fstate;
+        __be16 pcierr;
+        s64 rc;
+        int result;
+        /*
+         * We don't clobber hardware frozen state until PE
+         * reset is completed. In order to keep EEH core
+         * moving forward, we have to return operational
+         * state during PE reset.
+         */
+        if (pe->state & EEH_PE_RESET) {
+                result = (EEH_STATE_MMIO_ACTIVE  |
+                          EEH_STATE_DMA_ACTIVE   |
+                          EEH_STATE_MMIO_ENABLED |
+                          EEH_STATE_DMA_ENABLED);
+                return result;
+        }
+        /*
+         * Fetch PE state from hardware. If the PHB
+         * supports compound PE, let it handle that.
+         */
+        if (phb->get_pe_state) {
+                fstate = phb->get_pe_state(phb, pe->addr);
+        } else {
+                rc = opal_pci_eeh_freeze_status(phb->opal_id,
+                                                pe->addr,
+                                                &fstate,
+                                                &pcierr,
+                                                NULL);
+                if (rc != OPAL_SUCCESS) {
+                        pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n",
+                                __func__, rc, phb->hose->global_number,
+                                pe->addr);
+                        return EEH_STATE_NOT_SUPPORT;
+                }
+        }
+        /* Figure out state */
+        switch (fstate) {
+        case OPAL_EEH_STOPPED_NOT_FROZEN:
+                result = (EEH_STATE_MMIO_ACTIVE  |
+                          EEH_STATE_DMA_ACTIVE   |
+                          EEH_STATE_MMIO_ENABLED |
+                          EEH_STATE_DMA_ENABLED);
+                break;
+        case OPAL_EEH_STOPPED_MMIO_FREEZE:
+                result = (EEH_STATE_DMA_ACTIVE |
+                          EEH_STATE_DMA_ENABLED);
+                break;
+        case OPAL_EEH_STOPPED_DMA_FREEZE:
+                result = (EEH_STATE_MMIO_ACTIVE |
+                          EEH_STATE_MMIO_ENABLED);
+                break;
+        case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE:
+                result = 0;
+                break;
+        case OPAL_EEH_STOPPED_RESET:
+                result = EEH_STATE_RESET_ACTIVE;
+                break;
+        case OPAL_EEH_STOPPED_TEMP_UNAVAIL:
+                result = EEH_STATE_UNAVAILABLE;
+                break;
+        case OPAL_EEH_STOPPED_PERM_UNAVAIL:
+                result = EEH_STATE_NOT_SUPPORT;
+                break;
+        default:
+                result = EEH_STATE_NOT_SUPPORT;
+                pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n",
+                        __func__, phb->hose->global_number,
+                        pe->addr, fstate);
+        }
+        /*
+         * If PHB supports compound PE, to freeze all
+         * slave PEs for consistency.
+         *
+         * If the PE is switching to frozen state for the
+         * first time, to dump the PHB diag-data.
+         */
+        if (!(result & EEH_STATE_NOT_SUPPORT) &&
+            !(result & EEH_STATE_UNAVAILABLE) &&
+            !(result & EEH_STATE_MMIO_ACTIVE) &&
+            !(result & EEH_STATE_DMA_ACTIVE)  &&
+            !(pe->state & EEH_PE_ISOLATED)) {
+                if (phb->freeze_pe)
+                        phb->freeze_pe(phb, pe->addr);
+                eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+                pnv_eeh_get_phb_diag(pe);
+                if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+                        pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
+        }
+        return result;
+}
 /**
- * powernv_eeh_get_state - Retrieve PE state
+ * pnv_eeh_get_state - Retrieve PE state
 * @pe: EEH PE
 * @delay: delay while PE state is temporarily unavailable
 *
@@ -267,64 +718,279 @@ static int powernv_eeh_get_pe_addr(struct eeh_pe *pe)
 * we prefer passing down to hardware implementation to handle
 * it.
 */
-static int powernv_eeh_get_state(struct eeh_pe *pe, int *delay)
+static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay)
+{
+        int ret;
+        if (pe->type & EEH_PE_PHB)
+                ret = pnv_eeh_get_phb_state(pe);
+        else
+                ret = pnv_eeh_get_pe_state(pe);
+        if (!delay)
+                return ret;
+        /*
+         * If the PE state is temporarily unavailable,
+         * to inform the EEH core delay for default
+         * period (1 second)
+         */
+        *delay = 0;
+        if (ret & EEH_STATE_UNAVAILABLE)
+                *delay = 1000;
+        return ret;
+}
+static s64 pnv_eeh_phb_poll(struct pnv_phb *phb)
+{
+        s64 rc = OPAL_HARDWARE;
+        while (1) {
+                rc = opal_pci_poll(phb->opal_id);
+                if (rc <= 0)
+                        break;
+                if (system_state < SYSTEM_RUNNING)
+                        udelay(1000 * rc);
+                else
+                        msleep(rc);
+        }
+        return rc;
+}
+int pnv_eeh_phb_reset(struct pci_controller *hose, int option)
 {
-        struct pci_controller *hose = pe->phb;
        struct pnv_phb *phb = hose->private_data;
-        int ret = EEH_STATE_NOT_SUPPORT;
+        s64 rc = OPAL_HARDWARE;
+        pr_debug("%s: Reset PHB#%x, option=%d\n",
+                 __func__, hose->global_number, option);
+        /* Issue PHB complete reset request */
+        if (option == EEH_RESET_FUNDAMENTAL ||
+            option == EEH_RESET_HOT)
+                rc = opal_pci_reset(phb->opal_id,
+                                    OPAL_RESET_PHB_COMPLETE,
+                                    OPAL_ASSERT_RESET);
+        else if (option == EEH_RESET_DEACTIVATE)
+                rc = opal_pci_reset(phb->opal_id,
+                                    OPAL_RESET_PHB_COMPLETE,
+                                    OPAL_DEASSERT_RESET);
+        if (rc < 0)
+                goto out;
-        if (phb->eeh_ops && phb->eeh_ops->get_state) {
+        /*
-                ret = phb->eeh_ops->get_state(pe);
+         * Poll state of the PHB until the request is done
+         * successfully. The PHB reset is usually PHB complete
+         * reset followed by hot reset on root bus. So we also
+         * need the PCI bus settlement delay.
+         */
+        rc = pnv_eeh_phb_poll(phb);
+        if (option == EEH_RESET_DEACTIVATE) {
+                if (system_state < SYSTEM_RUNNING)
+                        udelay(1000 * EEH_PE_RST_SETTLE_TIME);
+                else
+                        msleep(EEH_PE_RST_SETTLE_TIME);
+        }
+out:
+        if (rc != OPAL_SUCCESS)
+                return -EIO;
-                /*
+        return 0;
-                 * If the PE state is temporarily unavailable,
+}
-                 * to inform the EEH core delay for default
-                 * period (1 second)
+static int pnv_eeh_root_reset(struct pci_controller *hose, int option)
-                 */
+{
-                if (delay) {
+        struct pnv_phb *phb = hose->private_data;
-                        *delay = 0;
+        s64 rc = OPAL_HARDWARE;
-                        if (ret & EEH_STATE_UNAVAILABLE)
-                                *delay = 1000;
+        pr_debug("%s: Reset PHB#%x, option=%d\n",
+                 __func__, hose->global_number, option);
+        /*
+         * During the reset deassert time, we needn't care
+         * the reset scope because the firmware does nothing
+         * for fundamental or hot reset during deassert phase.
+         */
+        if (option == EEH_RESET_FUNDAMENTAL)
+                rc = opal_pci_reset(phb->opal_id,
+                                    OPAL_RESET_PCI_FUNDAMENTAL,
+                                    OPAL_ASSERT_RESET);
+        else if (option == EEH_RESET_HOT)
+                rc = opal_pci_reset(phb->opal_id,
+                                    OPAL_RESET_PCI_HOT,
+                                    OPAL_ASSERT_RESET);
+        else if (option == EEH_RESET_DEACTIVATE)
+                rc = opal_pci_reset(phb->opal_id,
+                                    OPAL_RESET_PCI_HOT,
+                                    OPAL_DEASSERT_RESET);
+        if (rc < 0)
+                goto out;
+        /* Poll state of the PHB until the request is done */
+        rc = pnv_eeh_phb_poll(phb);
+        if (option == EEH_RESET_DEACTIVATE)
+                msleep(EEH_PE_RST_SETTLE_TIME);
+out:
+        if (rc != OPAL_SUCCESS)
+                return -EIO;
+        return 0;
+}
+static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option)
+{
+        struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
+        struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+        int aer = edev ? edev->aer_cap : 0;
+        u32 ctrl;
+        pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n",
+                 __func__, pci_domain_nr(dev->bus),
+                 dev->bus->number, option);
+        switch (option) {
+        case EEH_RESET_FUNDAMENTAL:
+        case EEH_RESET_HOT:
+                /* Don't report linkDown event */
+                if (aer) {
+                        eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+                                             4, &ctrl);
+                        ctrl |= PCI_ERR_UNC_SURPDN;
+                        eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+                                              4, ctrl);
                }
+                eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
+                ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+                eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
+                msleep(EEH_PE_RST_HOLD_TIME);
+                break;
+        case EEH_RESET_DEACTIVATE:
+                eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
+                ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+                eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
+                msleep(EEH_PE_RST_SETTLE_TIME);
+                /* Continue reporting linkDown event */
+                if (aer) {
+                        eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+                                             4, &ctrl);
+                        ctrl &= ~PCI_ERR_UNC_SURPDN;
+                        eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+                                              4, ctrl);
+                }
+                break;
        }
-        return ret;
+        return 0;
+}
+void pnv_pci_reset_secondary_bus(struct pci_dev *dev)
+{
+        struct pci_controller *hose;
+        if (pci_is_root_bus(dev->bus)) {
+                hose = pci_bus_to_host(dev->bus);
+                pnv_eeh_root_reset(hose, EEH_RESET_HOT);
+                pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE);
+        } else {
+                pnv_eeh_bridge_reset(dev, EEH_RESET_HOT);
+                pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE);
+        }
 }
 /**
- * powernv_eeh_reset - Reset the specified PE
+ * pnv_eeh_reset - Reset the specified PE
 * @pe: EEH PE
 * @option: reset option
 *
- * Reset the specified PE
+ * Do reset on the indicated PE. For PCI bus sensitive PE,
+ * we need to reset the parent p2p bridge. The PHB has to
+ * be reinitialized if the p2p bridge is root bridge. For
+ * PCI device sensitive PE, we will try to reset the device
+ * through FLR. For now, we don't have OPAL APIs to do HARD
+ * reset yet, so all reset would be SOFT (HOT) reset.
 */
-static int powernv_eeh_reset(struct eeh_pe *pe, int option)
+static int pnv_eeh_reset(struct eeh_pe *pe, int option)
 {
        struct pci_controller *hose = pe->phb;
-        struct pnv_phb *phb = hose->private_data;
+        struct pci_bus *bus;
-        int ret = -EEXIST;
+        int ret;
+        /*
+         * For PHB reset, we always have complete reset. For those PEs whose
+         * primary bus derived from root complex (root bus) or root port
+         * (usually bus#1), we apply hot or fundamental reset on the root port.
+         * For other PEs, we always have hot reset on the PE primary bus.
+         *
+         * Here, we have different design to pHyp, which always clear the
+         * frozen state during PE reset. However, the good idea here from
+         * benh is to keep frozen state before we get PE reset done completely
+         * (until BAR restore). With the frozen state, HW drops illegal IO
+         * or MMIO access, which can incur recrusive frozen PE during PE
+         * reset. The side effect is that EEH core has to clear the frozen
+         * state explicitly after BAR restore.
+         */
+        if (pe->type & EEH_PE_PHB) {
+                ret = pnv_eeh_phb_reset(hose, option);
+        } else {
+                struct pnv_phb *phb;
+                s64 rc;
-        if (phb->eeh_ops && phb->eeh_ops->reset)
+                /*
-                ret = phb->eeh_ops->reset(pe, option);
+                 * The frozen PE might be caused by PAPR error injection
+                 * registers, which are expected to be cleared after hitting
+                 * frozen PE as stated in the hardware spec. Unfortunately,
+                 * that's not true on P7IOC. So we have to clear it manually
+                 * to avoid recursive EEH errors during recovery.
+                 */
+                phb = hose->private_data;
+                if (phb->model == PNV_PHB_MODEL_P7IOC &&
+                    (option == EEH_RESET_HOT ||
+                    option == EEH_RESET_FUNDAMENTAL)) {
+                        rc = opal_pci_reset(phb->opal_id,
+                                            OPAL_RESET_PHB_ERROR,
+                                            OPAL_ASSERT_RESET);
+                        if (rc != OPAL_SUCCESS) {
+                                pr_warn("%s: Failure %lld clearing "
+                                        "error injection registers\n",
+                                        __func__, rc);
+                                return -EIO;
+                        }
+                }
+                bus = eeh_pe_bus_get(pe);
+                if (pci_is_root_bus(bus) ||
+                        pci_is_root_bus(bus->parent))
+                        ret = pnv_eeh_root_reset(hose, option);
+                else
+                        ret = pnv_eeh_bridge_reset(bus->self, option);
+        }
        return ret;
 }
 /**
- * powernv_eeh_wait_state - Wait for PE state
+ * pnv_eeh_wait_state - Wait for PE state
 * @pe: EEH PE
 * @max_wait: maximal period in microsecond
 *
 * Wait for the state of associated PE. It might take some time
 * to retrieve the PE's state.
 */
-static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
+static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
 {
        int ret;
        int mwait;
        while (1) {
-                ret = powernv_eeh_get_state(pe, &mwait);
+                ret = pnv_eeh_get_state(pe, &mwait);
                /*
                 * If the PE's state is temporarily unavailable,
@@ -348,7 +1014,7 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
 }
 /**
- * powernv_eeh_get_log - Retrieve error log
+ * pnv_eeh_get_log - Retrieve error log
 * @pe: EEH PE
 * @severity: temporary or permanent error log
 * @drv_log: driver log to be combined with retrieved error log
@@ -356,41 +1022,30 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
 *
 * Retrieve the temporary or permanent error from the PE.
 */
-static int powernv_eeh_get_log(struct eeh_pe *pe, int severity,
+static int pnv_eeh_get_log(struct eeh_pe *pe, int severity,
-                               char *drv_log, unsigned long len)
+                           char *drv_log, unsigned long len)
 {
-        struct pci_controller *hose = pe->phb;
+        if (!eeh_has_flag(EEH_EARLY_DUMP_LOG))
-        struct pnv_phb *phb = hose->private_data;
+                pnv_pci_dump_phb_diag_data(pe->phb, pe->data);
-        int ret = -EEXIST;
-        if (phb->eeh_ops && phb->eeh_ops->get_log)
+        return 0;
-                ret = phb->eeh_ops->get_log(pe, severity, drv_log, len);
-        return ret;
 }
 /**
- * powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
+ * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE
 * @pe: EEH PE
 *
 * The function will be called to reconfigure the bridges included
 * in the specified PE so that the mulfunctional PE would be recovered
 * again.
 */
-static int powernv_eeh_configure_bridge(struct eeh_pe *pe)
+static int pnv_eeh_configure_bridge(struct eeh_pe *pe)
 {
-        struct pci_controller *hose = pe->phb;
+        return 0;
-        struct pnv_phb *phb = hose->private_data;
-        int ret = 0;
-        if (phb->eeh_ops && phb->eeh_ops->configure_bridge)
-                ret = phb->eeh_ops->configure_bridge(pe);
-        return ret;
 }
 /**
- * powernv_pe_err_inject - Inject specified error to the indicated PE
+ * pnv_pe_err_inject - Inject specified error to the indicated PE
 * @pe: the indicated PE
 * @type: error type
 * @func: specific error type
@@ -401,22 +1056,52 @@ static int powernv_eeh_configure_bridge(struct eeh_pe *pe)
 * determined by @type and @func, to the indicated PE for
 * testing purpose.
 */
-static int powernv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
+static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func,
-                                  unsigned long addr, unsigned long mask)
+                              unsigned long addr, unsigned long mask)
 {
        struct pci_controller *hose = pe->phb;
        struct pnv_phb *phb = hose->private_data;
-        int ret = -EEXIST;
+        s64 rc;
+        /* Sanity check on error type */
+        if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR &&
+            type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) {
+                pr_warn("%s: Invalid error type %d\n",
+                        __func__, type);
+                return -ERANGE;
+        }
-        if (phb->eeh_ops && phb->eeh_ops->err_inject)
+        if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR ||
-                ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask);
+            func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) {
+                pr_warn("%s: Invalid error function %d\n",
+                        __func__, func);
+                return -ERANGE;
+        }
-        return ret;
+        /* Firmware supports error injection ? */
+        if (!opal_check_token(OPAL_PCI_ERR_INJECT)) {
+                pr_warn("%s: Firmware doesn't support error injection\n",
+                        __func__);
+                return -ENXIO;
+        }
+        /* Do error injection */
+        rc = opal_pci_err_inject(phb->opal_id, pe->addr,
+                                 type, func, addr, mask);
+        if (rc != OPAL_SUCCESS) {
+                pr_warn("%s: Failure %lld injecting error "
+                        "%d-%d to PHB#%x-PE#%x\n",
+                        __func__, rc, type, func,
+                        hose->global_number, pe->addr);
+                return -EIO;
+        }
+        return 0;
 }
-static inline bool powernv_eeh_cfg_blocked(struct device_node *dn)
+static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn)
 {
-        struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+        struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
        if (!edev || !edev->pe)
                return false;
@@ -427,51 +1112,377 @@ static inline bool powernv_eeh_cfg_blocked(struct device_node *dn)
        return false;
 }
-static int powernv_eeh_read_config(struct device_node *dn,
+static int pnv_eeh_read_config(struct pci_dn *pdn,
-                                   int where, int size, u32 *val)
+                               int where, int size, u32 *val)
 {
-        if (powernv_eeh_cfg_blocked(dn)) {
+        if (!pdn)
+                return PCIBIOS_DEVICE_NOT_FOUND;
+        if (pnv_eeh_cfg_blocked(pdn)) {
                *val = 0xFFFFFFFF;
                return PCIBIOS_SET_FAILED;
        }
-        return pnv_pci_cfg_read(dn, where, size, val);
+        return pnv_pci_cfg_read(pdn, where, size, val);
 }
-static int powernv_eeh_write_config(struct device_node *dn,
+static int pnv_eeh_write_config(struct pci_dn *pdn,
-                                    int where, int size, u32 val)
+                                int where, int size, u32 val)
 {
-        if (powernv_eeh_cfg_blocked(dn))
+        if (!pdn)
+                return PCIBIOS_DEVICE_NOT_FOUND;
+        if (pnv_eeh_cfg_blocked(pdn))
                return PCIBIOS_SET_FAILED;
-        return pnv_pci_cfg_write(dn, where, size, val);
+        return pnv_pci_cfg_write(pdn, where, size, val);
+}
+static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data)
+{
+        /* GEM */
+        if (data->gemXfir || data->gemRfir ||
+            data->gemRirqfir || data->gemMask || data->gemRwof)
+                pr_info("  GEM: %016llx %016llx %016llx %016llx %016llx\n",
+                        be64_to_cpu(data->gemXfir),
+                        be64_to_cpu(data->gemRfir),
+                        be64_to_cpu(data->gemRirqfir),
+                        be64_to_cpu(data->gemMask),
+                        be64_to_cpu(data->gemRwof));
+        /* LEM */
+        if (data->lemFir || data->lemErrMask ||
+            data->lemAction0 || data->lemAction1 || data->lemWof)
+                pr_info("  LEM: %016llx %016llx %016llx %016llx %016llx\n",
+                        be64_to_cpu(data->lemFir),
+                        be64_to_cpu(data->lemErrMask),
+                        be64_to_cpu(data->lemAction0),
+                        be64_to_cpu(data->lemAction1),
+                        be64_to_cpu(data->lemWof));
+}
+static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose)
+{
+        struct pnv_phb *phb = hose->private_data;
+        struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag;
+        long rc;
+        rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data));
+        if (rc != OPAL_SUCCESS) {
+                pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n",
+                        __func__, phb->hub_id, rc);
+                return;
+        }
+        switch (data->type) {
+        case OPAL_P7IOC_DIAG_TYPE_RGC:
+                pr_info("P7IOC diag-data for RGC\n\n");
+                pnv_eeh_dump_hub_diag_common(data);
+                if (data->rgc.rgcStatus || data->rgc.rgcLdcp)
+                        pr_info("  RGC: %016llx %016llx\n",
+                                be64_to_cpu(data->rgc.rgcStatus),
+                                be64_to_cpu(data->rgc.rgcLdcp));
+                break;
+        case OPAL_P7IOC_DIAG_TYPE_BI:
+                pr_info("P7IOC diag-data for BI %s\n\n",
+                        data->bi.biDownbound ? "Downbound" : "Upbound");
+                pnv_eeh_dump_hub_diag_common(data);
+                if (data->bi.biLdcp0 || data->bi.biLdcp1 ||
+                    data->bi.biLdcp2 || data->bi.biFenceStatus)
+                        pr_info("  BI:  %016llx %016llx %016llx %016llx\n",
+                                be64_to_cpu(data->bi.biLdcp0),
+                                be64_to_cpu(data->bi.biLdcp1),
+                                be64_to_cpu(data->bi.biLdcp2),
+                                be64_to_cpu(data->bi.biFenceStatus));
+                break;
+        case OPAL_P7IOC_DIAG_TYPE_CI:
+                pr_info("P7IOC diag-data for CI Port %d\n\n",
+                        data->ci.ciPort);
+                pnv_eeh_dump_hub_diag_common(data);
+                if (data->ci.ciPortStatus || data->ci.ciPortLdcp)
+                        pr_info("  CI:  %016llx %016llx\n",
+                                be64_to_cpu(data->ci.ciPortStatus),
+                                be64_to_cpu(data->ci.ciPortLdcp));
+                break;
+        case OPAL_P7IOC_DIAG_TYPE_MISC:
+                pr_info("P7IOC diag-data for MISC\n\n");
+                pnv_eeh_dump_hub_diag_common(data);
+                break;
+        case OPAL_P7IOC_DIAG_TYPE_I2C:
+                pr_info("P7IOC diag-data for I2C\n\n");
+                pnv_eeh_dump_hub_diag_common(data);
+                break;
+        default:
+                pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n",
+                        __func__, phb->hub_id, data->type);
+        }
+}
+static int pnv_eeh_get_pe(struct pci_controller *hose,
+                          u16 pe_no, struct eeh_pe **pe)
+{
+        struct pnv_phb *phb = hose->private_data;
+        struct pnv_ioda_pe *pnv_pe;
+        struct eeh_pe *dev_pe;
+        struct eeh_dev edev;
+        /*
+         * If PHB supports compound PE, to fetch
+         * the master PE because slave PE is invisible
+         * to EEH core.
+         */
+        pnv_pe = &phb->ioda.pe_array[pe_no];
+        if (pnv_pe->flags & PNV_IODA_PE_SLAVE) {
+                pnv_pe = pnv_pe->master;
+                WARN_ON(!pnv_pe ||
+                        !(pnv_pe->flags & PNV_IODA_PE_MASTER));
+                pe_no = pnv_pe->pe_number;
+        }
+        /* Find the PE according to PE# */
+        memset(&edev, 0, sizeof(struct eeh_dev));
+        edev.phb = hose;
+        edev.pe_config_addr = pe_no;
+        dev_pe = eeh_pe_get(&edev);
+        if (!dev_pe)
+                return -EEXIST;
+        /* Freeze the (compound) PE */
+        *pe = dev_pe;
+        if (!(dev_pe->state & EEH_PE_ISOLATED))
+                phb->freeze_pe(phb, pe_no);
+        /*
+         * At this point, we're sure the (compound) PE should
+         * have been frozen. However, we still need poke until
+         * hitting the frozen PE on top level.
+         */
+        dev_pe = dev_pe->parent;
+        while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) {
+                int ret;
+                int active_flags = (EEH_STATE_MMIO_ACTIVE |
+                                    EEH_STATE_DMA_ACTIVE);
+                ret = eeh_ops->get_state(dev_pe, NULL);
+                if (ret <= 0 || (ret & active_flags) == active_flags) {
+                        dev_pe = dev_pe->parent;
+                        continue;
+                }
+                /* Frozen parent PE */
+                *pe = dev_pe;
+                if (!(dev_pe->state & EEH_PE_ISOLATED))
+                        phb->freeze_pe(phb, dev_pe->addr);
+                /* Next one */
+                dev_pe = dev_pe->parent;
+        }
+        return 0;
 }
 /**
- * powernv_eeh_next_error - Retrieve next EEH error to handle
+ * pnv_eeh_next_error - Retrieve next EEH error to handle
 * @pe: Affected PE
 *
- * Using OPAL API, to retrieve next EEH error for EEH core to handle
+ * The function is expected to be called by EEH core while it gets
+ * special EEH event (without binding PE). The function calls to
+ * OPAL APIs for next error to handle. The informational error is
+ * handled internally by platform. However, the dead IOC, dead PHB,
+ * fenced PHB and frozen PE should be handled by EEH core eventually.
 */
-static int powernv_eeh_next_error(struct eeh_pe **pe)
+static int pnv_eeh_next_error(struct eeh_pe **pe)
 {
        struct pci_controller *hose;
-        struct pnv_phb *phb = NULL;
+        struct pnv_phb *phb;
+        struct eeh_pe *phb_pe, *parent_pe;
+        __be64 frozen_pe_no;
+        __be16 err_type, severity;
+        int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+        long rc;
+        int state, ret = EEH_NEXT_ERR_NONE;
+        /*
+         * While running here, it's safe to purge the event queue.
+         * And we should keep the cached OPAL notifier event sychronized
+         * between the kernel and firmware.
+         */
+        eeh_remove_event(NULL, false);
+        opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul);
        list_for_each_entry(hose, &hose_list, list_node) {
+                /*
+                 * If the subordinate PCI buses of the PHB has been
+                 * removed or is exactly under error recovery, we
+                 * needn't take care of it any more.
+                 */
                phb = hose->private_data;
-                break;
+                phb_pe = eeh_phb_pe_get(hose);
-        }
+                if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED))
+                        continue;
+                rc = opal_pci_next_error(phb->opal_id,
+                                         &frozen_pe_no, &err_type, &severity);
+                if (rc != OPAL_SUCCESS) {
+                        pr_devel("%s: Invalid return value on "
+                                 "PHB#%x (0x%lx) from opal_pci_next_error",
+                                 __func__, hose->global_number, rc);
+                        continue;
+                }
+                /* If the PHB doesn't have error, stop processing */
+                if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR ||
+                    be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) {
+                        pr_devel("%s: No error found on PHB#%x\n",
+                                 __func__, hose->global_number);
+                        continue;
+                }
+                /*
+                 * Processing the error. We're expecting the error with
+                 * highest priority reported upon multiple errors on the
+                 * specific PHB.
+                 */
+                pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n",
+                        __func__, be16_to_cpu(err_type),
+                        be16_to_cpu(severity), be64_to_cpu(frozen_pe_no),
+                        hose->global_number);
+                switch (be16_to_cpu(err_type)) {
+                case OPAL_EEH_IOC_ERROR:
+                        if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) {
+                                pr_err("EEH: dead IOC detected\n");
+                                ret = EEH_NEXT_ERR_DEAD_IOC;
+                        } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
+                                pr_info("EEH: IOC informative error "
+                                        "detected\n");
+                                pnv_eeh_get_and_dump_hub_diag(hose);
+                                ret = EEH_NEXT_ERR_NONE;
+                        }
+                        break;
+                case OPAL_EEH_PHB_ERROR:
+                        if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) {
+                                *pe = phb_pe;
+                                pr_err("EEH: dead PHB#%x detected, "
+                                       "location: %s\n",
+                                        hose->global_number,
+                                        eeh_pe_loc_get(phb_pe));
+                                ret = EEH_NEXT_ERR_DEAD_PHB;
+                        } else if (be16_to_cpu(severity) ==
+                                   OPAL_EEH_SEV_PHB_FENCED) {
+                                *pe = phb_pe;
+                                pr_err("EEH: Fenced PHB#%x detected, "
+                                       "location: %s\n",
+                                        hose->global_number,
+                                        eeh_pe_loc_get(phb_pe));
+                                ret = EEH_NEXT_ERR_FENCED_PHB;
+                        } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) {
+                                pr_info("EEH: PHB#%x informative error "
+                                        "detected, location: %s\n",
+                                        hose->global_number,
+                                        eeh_pe_loc_get(phb_pe));
+                                pnv_eeh_get_phb_diag(phb_pe);
+                                pnv_pci_dump_phb_diag_data(hose, phb_pe->data);
+                                ret = EEH_NEXT_ERR_NONE;
+                        }
+                        break;
+                case OPAL_EEH_PE_ERROR:
+                        /*
+                         * If we can't find the corresponding PE, we
+                         * just try to unfreeze.
+                         */
+                        if (pnv_eeh_get_pe(hose,
+                                be64_to_cpu(frozen_pe_no), pe)) {
+                                /* Try best to clear it */
+                                pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n",
+                                        hose->global_number, frozen_pe_no);
+                                pr_info("EEH: PHB location: %s\n",
+                                        eeh_pe_loc_get(phb_pe));
+                                opal_pci_eeh_freeze_clear(phb->opal_id,
+                                        frozen_pe_no,
+                                        OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+                                ret = EEH_NEXT_ERR_NONE;
+                        } else if ((*pe)->state & EEH_PE_ISOLATED ||
+                                   eeh_pe_passed(*pe)) {
+                                ret = EEH_NEXT_ERR_NONE;
+                        } else {
+                                pr_err("EEH: Frozen PE#%x "
+                                       "on PHB#%x detected\n",
+                                       (*pe)->addr,
+                                        (*pe)->phb->global_number);
+                                pr_err("EEH: PE location: %s, "
+                                       "PHB location: %s\n",
+                                       eeh_pe_loc_get(*pe),
+                                       eeh_pe_loc_get(phb_pe));
+                                ret = EEH_NEXT_ERR_FROZEN_PE;
+                        }
+                        break;
+                default:
+                        pr_warn("%s: Unexpected error type %d\n",
+                                __func__, be16_to_cpu(err_type));
+                }
-        if (phb && phb->eeh_ops->next_error)
+                /*
-                return phb->eeh_ops->next_error(pe);
+                 * EEH core will try recover from fenced PHB or
+                 * frozen PE. In the time for frozen PE, EEH core
+                 * enable IO path for that before collecting logs,
+                 * but it ruins the site. So we have to dump the
+                 * log in advance here.
+                 */
+                if ((ret == EEH_NEXT_ERR_FROZEN_PE  ||
+                    ret == EEH_NEXT_ERR_FENCED_PHB) &&
+                    !((*pe)->state & EEH_PE_ISOLATED)) {
+                        eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+                        pnv_eeh_get_phb_diag(*pe);
+                        if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
+                                pnv_pci_dump_phb_diag_data((*pe)->phb,
+                                                           (*pe)->data);
+                }
-        return -EEXIST;
+                /*
+                 * We probably have the frozen parent PE out there and
+                 * we need have to handle frozen parent PE firstly.
+                 */
+                if (ret == EEH_NEXT_ERR_FROZEN_PE) {
+                        parent_pe = (*pe)->parent;
+                        while (parent_pe) {
+                                /* Hit the ceiling ? */
+                                if (parent_pe->type & EEH_PE_PHB)
+                                        break;
+                                /* Frozen parent PE ? */
+                                state = eeh_ops->get_state(parent_pe, NULL);
+                                if (state > 0 &&
+                                    (state & active_flags) != active_flags)
+                                        *pe = parent_pe;
+                                /* Next parent level */
+                                parent_pe = parent_pe->parent;
+                        }
+                        /* We possibly migrate to another PE */
+                        eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+                }
+                /*
+                 * If we have no errors on the specific PHB or only
+                 * informative error there, we continue poking it.
+                 * Otherwise, we need actions to be taken by upper
+                 * layer.
+                 */
+                if (ret > EEH_NEXT_ERR_INF)
+                        break;
+        }
+        return ret;
 }
-static int powernv_eeh_restore_config(struct device_node *dn)
+static int pnv_eeh_restore_config(struct pci_dn *pdn)
 {
-        struct eeh_dev *edev = of_node_to_eeh_dev(dn);
+        struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
        struct pnv_phb *phb;
        s64 ret;
@@ -490,24 +1501,23 @@ static int powernv_eeh_restore_config(struct device_node *dn)
        return 0;
 }
-static struct eeh_ops powernv_eeh_ops = {
+static struct eeh_ops pnv_eeh_ops = {
        .name                   = "powernv",
-        .init                   = powernv_eeh_init,
+        .init                   = pnv_eeh_init,
-        .post_init              = powernv_eeh_post_init,
+        .post_init              = pnv_eeh_post_init,
-        .of_probe               = NULL,
+        .probe                  = pnv_eeh_probe,
-        .dev_probe              = powernv_eeh_dev_probe,
+        .set_option             = pnv_eeh_set_option,
-        .set_option             = powernv_eeh_set_option,
+        .get_pe_addr            = pnv_eeh_get_pe_addr,
-        .get_pe_addr            = powernv_eeh_get_pe_addr,
+        .get_state              = pnv_eeh_get_state,
-        .get_state              = powernv_eeh_get_state,
+        .reset                  = pnv_eeh_reset,
-        .reset                  = powernv_eeh_reset,
+        .wait_state             = pnv_eeh_wait_state,
-        .wait_state             = powernv_eeh_wait_state,
+        .get_log                = pnv_eeh_get_log,
-        .get_log                = powernv_eeh_get_log,
+        .configure_bridge       = pnv_eeh_configure_bridge,
-        .configure_bridge       = powernv_eeh_configure_bridge,
+        .err_inject             = pnv_eeh_err_inject,
-        .err_inject             = powernv_eeh_err_inject,
+        .read_config            = pnv_eeh_read_config,
-        .read_config            = powernv_eeh_read_config,
+        .write_config           = pnv_eeh_write_config,
-        .write_config           = powernv_eeh_write_config,
+        .next_error             = pnv_eeh_next_error,
-        .next_error             = powernv_eeh_next_error,
+        .restore_config         = pnv_eeh_restore_config
-        .restore_config         = powernv_eeh_restore_config
 };
 /**
@@ -521,7 +1531,7 @@ static int __init eeh_powernv_init(void)
        int ret = -EINVAL;
        eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE);
-        ret = eeh_ops_register(&powernv_eeh_ops);
+        ret = eeh_ops_register(&pnv_eeh_ops);
        if (!ret)
                pr_info("EEH: PowerNV platform initialized\n");
        else
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 6c9ff2b95119..76b344125cef 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1777,7 +1777,8 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose,
                                region.start += phb->ioda.io_segsize;
                                index++;
                        }
-                } else if (res->flags & IORESOURCE_MEM) {
+                } else if ((res->flags & IORESOURCE_MEM) &&
+                           !pnv_pci_is_mem_pref_64(res->flags)) {
                        region.start = res->start -
                                       hose->mem_offset[0] -
                                       phb->ioda.m32_pci_base;
@@ -2078,9 +2079,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
        phb->get_pe_state = pnv_ioda_get_pe_state;
        phb->freeze_pe = pnv_ioda_freeze_pe;
        phb->unfreeze_pe = pnv_ioda_unfreeze_pe;
-#ifdef CONFIG_EEH
-        phb->eeh_ops = &ioda_eeh_ops;
-#endif
        /* Setup RID -> PE mapping function */
        phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe;
@@ -2121,8 +2119,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
         */
        if (is_kdump_kernel()) {
                pr_info("  Issue PHB reset ...\n");
-                ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
+                pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL);
-                ioda_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
+                pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE);
        }
        /* Remove M64 resource if we can't configure it successfully */
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 54323d6b5166..946aa3d62c3c 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -366,9 +366,9 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no)
        spin_unlock_irqrestore(&phb->lock, flags);
 }
-static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
+static void pnv_pci_config_check_eeh(struct pci_dn *pdn)
-                                     struct device_node *dn)
 {
+        struct pnv_phb *phb = pdn->phb->private_data;
        u8      fstate;
        __be16  pcierr;
        int     pe_no;
@@ -379,7 +379,7 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
         * setup that yet. So all ER errors should be mapped to
         * reserved PE.
         */
-        pe_no = PCI_DN(dn)->pe_number;
+        pe_no = pdn->pe_number;
        if (pe_no == IODA_INVALID_PE) {
                if (phb->type == PNV_PHB_P5IOC2)
                        pe_no = 0;
@@ -407,8 +407,7 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
        }
        cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n",
-                (PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn),
+                (pdn->busno << 8) | (pdn->devfn), pe_no, fstate);
-                pe_no, fstate);
        /* Clear the frozen state if applicable */
        if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE ||
@@ -425,10 +424,9 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb,
        }
 }
-int pnv_pci_cfg_read(struct device_node *dn,
+int pnv_pci_cfg_read(struct pci_dn *pdn,
                     int where, int size, u32 *val)
 {
-        struct pci_dn *pdn = PCI_DN(dn);
        struct pnv_phb *phb = pdn->phb->private_data;
        u32 bdfn = (pdn->busno << 8) | pdn->devfn;
        s64 rc;
@@ -462,10 +460,9 @@ int pnv_pci_cfg_read(struct device_node *dn,
        return PCIBIOS_SUCCESSFUL;
 }
-int pnv_pci_cfg_write(struct device_node *dn,
+int pnv_pci_cfg_write(struct pci_dn *pdn,
                      int where, int size, u32 val)
 {
-        struct pci_dn *pdn = PCI_DN(dn);
        struct pnv_phb *phb = pdn->phb->private_data;
        u32 bdfn = (pdn->busno << 8) | pdn->devfn;
@@ -489,18 +486,17 @@ int pnv_pci_cfg_write(struct device_node *dn,
 }
 #if CONFIG_EEH
-static bool pnv_pci_cfg_check(struct pci_controller *hose,
+static bool pnv_pci_cfg_check(struct pci_dn *pdn)
-                              struct device_node *dn)
 {
        struct eeh_dev *edev = NULL;
-        struct pnv_phb *phb = hose->private_data;
+        struct pnv_phb *phb = pdn->phb->private_data;
        /* EEH not enabled ? */
        if (!(phb->flags & PNV_PHB_FLAG_EEH))
                return true;
        /* PE reset or device removed ? */
-        edev = of_node_to_eeh_dev(dn);
+        edev = pdn->edev;
        if (edev) {
                if (edev->pe &&
                    (edev->pe->state & EEH_PE_CFG_BLOCKED))
@@ -513,8 +509,7 @@ static bool pnv_pci_cfg_check(struct pci_controller *hose,
        return true;
 }
 #else
-static inline pnv_pci_cfg_check(struct pci_controller *hose,
+static inline pnv_pci_cfg_check(struct pci_dn *pdn)
-                                struct device_node *dn)
 {
        return true;
 }
@@ -524,32 +519,26 @@ static int pnv_pci_read_config(struct pci_bus *bus,
                               unsigned int devfn,
                               int where, int size, u32 *val)
 {
-        struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
        struct pci_dn *pdn;
        struct pnv_phb *phb;
-        bool found = false;
        int ret;
        *val = 0xFFFFFFFF;
-        for (dn = busdn->child; dn; dn = dn->sibling) {
+        pdn = pci_get_pdn_by_devfn(bus, devfn);
-                pdn = PCI_DN(dn);
+        if (!pdn)
-                if (pdn && pdn->devfn == devfn) {
+                return PCIBIOS_DEVICE_NOT_FOUND;
-                        phb = pdn->phb->private_data;
-                        found = true;
-                        break;
-                }
-        }
-        if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
+        if (!pnv_pci_cfg_check(pdn))
                return PCIBIOS_DEVICE_NOT_FOUND;
-        ret = pnv_pci_cfg_read(dn, where, size, val);
+        ret = pnv_pci_cfg_read(pdn, where, size, val);
-        if (phb->flags & PNV_PHB_FLAG_EEH) {
+        phb = pdn->phb->private_data;
+        if (phb->flags & PNV_PHB_FLAG_EEH && pdn->edev) {
                if (*val == EEH_IO_ERROR_VALUE(size) &&
-                    eeh_dev_check_failure(of_node_to_eeh_dev(dn)))
+                    eeh_dev_check_failure(pdn->edev))
                        return PCIBIOS_DEVICE_NOT_FOUND;
        } else {
-                pnv_pci_config_check_eeh(phb, dn);
+                pnv_pci_config_check_eeh(pdn);
        }
        return ret;
@@ -559,27 +548,21 @@ static int pnv_pci_write_config(struct pci_bus *bus,
                                unsigned int devfn,
                                int where, int size, u32 val)
 {
-        struct device_node *dn, *busdn = pci_bus_to_OF_node(bus);
        struct pci_dn *pdn;
        struct pnv_phb *phb;
-        bool found = false;
        int ret;
-        for (dn = busdn->child; dn; dn = dn->sibling) {
+        pdn = pci_get_pdn_by_devfn(bus, devfn);
-                pdn = PCI_DN(dn);
+        if (!pdn)
-                if (pdn && pdn->devfn == devfn) {
+                return PCIBIOS_DEVICE_NOT_FOUND;
-                        phb = pdn->phb->private_data;
-                        found = true;
-                        break;
-                }
-        }
-        if (!found || !pnv_pci_cfg_check(pdn->phb, dn))
+        if (!pnv_pci_cfg_check(pdn))
                return PCIBIOS_DEVICE_NOT_FOUND;
-        ret = pnv_pci_cfg_write(dn, where, size, val);
+        ret = pnv_pci_cfg_write(pdn, where, size, val);
+        phb = pdn->phb->private_data;
        if (!(phb->flags & PNV_PHB_FLAG_EEH))
-                pnv_pci_config_check_eeh(phb, dn);
+                pnv_pci_config_check_eeh(pdn);
        return ret;
 }
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 6c02ff8dd69f..1f0cb66133a1 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -75,22 +75,6 @@ struct pnv_ioda_pe {
        struct list_head        list;
 };
-/* IOC dependent EEH operations */
-#ifdef CONFIG_EEH
-struct pnv_eeh_ops {
-        int (*post_init)(struct pci_controller *hose);
-        int (*set_option)(struct eeh_pe *pe, int option);
-        int (*get_state)(struct eeh_pe *pe);
-        int (*reset)(struct eeh_pe *pe, int option);
-        int (*get_log)(struct eeh_pe *pe, int severity,
-                       char *drv_log, unsigned long len);
-        int (*configure_bridge)(struct eeh_pe *pe);
-        int (*err_inject)(struct eeh_pe *pe, int type, int func,
-                          unsigned long addr, unsigned long mask);
-        int (*next_error)(struct eeh_pe **pe);
-};
-#endif /* CONFIG_EEH */
 #define PNV_PHB_FLAG_EEH        (1 << 0)
 struct pnv_phb {
@@ -104,10 +88,6 @@ struct pnv_phb {
        int                     initialized;
        spinlock_t              lock;
-#ifdef CONFIG_EEH
-        struct pnv_eeh_ops      *eeh_ops;
-#endif
 #ifdef CONFIG_DEBUG_FS
        int                     has_dbgfs;
        struct dentry           *dbgfs;
@@ -213,15 +193,12 @@ struct pnv_phb {
 };
 extern struct pci_ops pnv_pci_ops;
-#ifdef CONFIG_EEH
-extern struct pnv_eeh_ops ioda_eeh_ops;
-#endif
 void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
                                unsigned char *log_buff);
-int pnv_pci_cfg_read(struct device_node *dn,
+int pnv_pci_cfg_read(struct pci_dn *pdn,
                     int where, int size, u32 *val);
-int pnv_pci_cfg_write(struct device_node *dn,
+int pnv_pci_cfg_write(struct pci_dn *pdn,
                      int where, int size, u32 val);
 extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
                                      void *tce_mem, u64 tce_size,
@@ -232,6 +209,6 @@ extern void pnv_pci_init_ioda2_phb(struct device_node *np);
 extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
                                        __be64 *startp, __be64 *endp, bool rm);
 extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
-extern int ioda_eeh_phb_reset(struct pci_controller *hose, int option);
+extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
 #endif /* __POWERNV_PCI_H */
author	Michael Ellerman <mpe@ellerman.id.au>	2015-04-06 23:24:55 -0400
committer	Michael Ellerman <mpe@ellerman.id.au>	2015-04-06 23:24:55 -0400
commit	428d4d6520a0b8683fe9eac6df3077001e13d00b (patch)
tree	8afa1af0babc8f2c375acc244aae969846dfe199 /arch/powerpc/platforms/powernv
parent	28ea605caac49497e5e34a73ee4f4682fc035f1d (diff)
parent	027fa02f84e851e21daffdf8900d6117071890f8 (diff)