diff options
author | Gavin Shan <gwshan@linux.vnet.ibm.com> | 2014-06-11 04:26:44 -0400 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2014-06-11 05:12:23 -0400 |
commit | 357b2f3dd9b7e220ddbaef5bcc108f0359dc0fcf (patch) | |
tree | 722fdbf7ec1406f437f5f4bb2c7073c4fb629642 | |
parent | d4e58e5928f8c6c49228451dd03e0714cbab299a (diff) |
powerpc/eeh: Dump PE location code
As Ben suggested, it's meaningful to dump PE's location code
for site engineers when hitting EEH errors. The patch introduces
function eeh_pe_loc_get() to retireve the location code from
dev-tree so that we can output it when hitting EEH errors.
If primary PE bus is root bus, the PHB's dev-node would be tried
prior to root port's dev-node. Otherwise, the upstream bridge's
dev-node of the primary PE bus will be check for the location code
directly.
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r-- | arch/powerpc/include/asm/eeh.h | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/eeh.c | 13 | ||||
-rw-r--r-- | arch/powerpc/kernel/eeh_pe.c | 60 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/eeh-ioda.c | 21 |
4 files changed, 84 insertions, 11 deletions
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index b76f58c124ca..fab7743c2640 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h | |||
@@ -254,6 +254,7 @@ void *eeh_pe_traverse(struct eeh_pe *root, | |||
254 | void *eeh_pe_dev_traverse(struct eeh_pe *root, | 254 | void *eeh_pe_dev_traverse(struct eeh_pe *root, |
255 | eeh_traverse_func fn, void *flag); | 255 | eeh_traverse_func fn, void *flag); |
256 | void eeh_pe_restore_bars(struct eeh_pe *pe); | 256 | void eeh_pe_restore_bars(struct eeh_pe *pe); |
257 | const char *eeh_pe_loc_get(struct eeh_pe *pe); | ||
257 | struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); | 258 | struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe); |
258 | 259 | ||
259 | void *eeh_dev_init(struct device_node *dn, void *data); | 260 | void *eeh_dev_init(struct device_node *dn, void *data); |
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index c25064b7d667..86e25702aaca 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c | |||
@@ -330,8 +330,8 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) | |||
330 | eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); | 330 | eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); |
331 | eeh_serialize_unlock(flags); | 331 | eeh_serialize_unlock(flags); |
332 | 332 | ||
333 | pr_err("EEH: PHB#%x failure detected\n", | 333 | pr_err("EEH: PHB#%x failure detected, location: %s\n", |
334 | phb_pe->phb->global_number); | 334 | phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); |
335 | dump_stack(); | 335 | dump_stack(); |
336 | eeh_send_failure_event(phb_pe); | 336 | eeh_send_failure_event(phb_pe); |
337 | 337 | ||
@@ -362,7 +362,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev) | |||
362 | unsigned long flags; | 362 | unsigned long flags; |
363 | struct device_node *dn; | 363 | struct device_node *dn; |
364 | struct pci_dev *dev; | 364 | struct pci_dev *dev; |
365 | struct eeh_pe *pe, *parent_pe; | 365 | struct eeh_pe *pe, *parent_pe, *phb_pe; |
366 | int rc = 0; | 366 | int rc = 0; |
367 | const char *location; | 367 | const char *location; |
368 | 368 | ||
@@ -481,8 +481,11 @@ int eeh_dev_check_failure(struct eeh_dev *edev) | |||
481 | * a stack trace will help the device-driver authors figure | 481 | * a stack trace will help the device-driver authors figure |
482 | * out what happened. So print that out. | 482 | * out what happened. So print that out. |
483 | */ | 483 | */ |
484 | pr_err("EEH: Frozen PE#%x detected on PHB#%x\n", | 484 | phb_pe = eeh_phb_pe_get(pe->phb); |
485 | pe->addr, pe->phb->global_number); | 485 | pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", |
486 | pe->phb->global_number, pe->addr); | ||
487 | pr_err("EEH: PE location: %s, PHB location: %s\n", | ||
488 | eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); | ||
486 | dump_stack(); | 489 | dump_stack(); |
487 | 490 | ||
488 | eeh_send_failure_event(pe); | 491 | eeh_send_failure_event(pe); |
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index 995c2a284630..fbd01eba4473 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c | |||
@@ -792,6 +792,66 @@ void eeh_pe_restore_bars(struct eeh_pe *pe) | |||
792 | } | 792 | } |
793 | 793 | ||
794 | /** | 794 | /** |
795 | * eeh_pe_loc_get - Retrieve location code binding to the given PE | ||
796 | * @pe: EEH PE | ||
797 | * | ||
798 | * Retrieve the location code of the given PE. If the primary PE bus | ||
799 | * is root bus, we will grab location code from PHB device tree node | ||
800 | * or root port. Otherwise, the upstream bridge's device tree node | ||
801 | * of the primary PE bus will be checked for the location code. | ||
802 | */ | ||
803 | const char *eeh_pe_loc_get(struct eeh_pe *pe) | ||
804 | { | ||
805 | struct pci_controller *hose; | ||
806 | struct pci_bus *bus = eeh_pe_bus_get(pe); | ||
807 | struct pci_dev *pdev; | ||
808 | struct device_node *dn; | ||
809 | const char *loc; | ||
810 | |||
811 | if (!bus) | ||
812 | return "N/A"; | ||
813 | |||
814 | /* PHB PE or root PE ? */ | ||
815 | if (pci_is_root_bus(bus)) { | ||
816 | hose = pci_bus_to_host(bus); | ||
817 | loc = of_get_property(hose->dn, | ||
818 | "ibm,loc-code", NULL); | ||
819 | if (loc) | ||
820 | return loc; | ||
821 | loc = of_get_property(hose->dn, | ||
822 | "ibm,io-base-loc-code", NULL); | ||
823 | if (loc) | ||
824 | return loc; | ||
825 | |||
826 | pdev = pci_get_slot(bus, 0x0); | ||
827 | } else { | ||
828 | pdev = bus->self; | ||
829 | } | ||
830 | |||
831 | if (!pdev) { | ||
832 | loc = "N/A"; | ||
833 | goto out; | ||
834 | } | ||
835 | |||
836 | dn = pci_device_to_OF_node(pdev); | ||
837 | if (!dn) { | ||
838 | loc = "N/A"; | ||
839 | goto out; | ||
840 | } | ||
841 | |||
842 | loc = of_get_property(dn, "ibm,loc-code", NULL); | ||
843 | if (!loc) | ||
844 | loc = of_get_property(dn, "ibm,slot-location-code", NULL); | ||
845 | if (!loc) | ||
846 | loc = "N/A"; | ||
847 | |||
848 | out: | ||
849 | if (pci_is_root_bus(bus) && pdev) | ||
850 | pci_dev_put(pdev); | ||
851 | return loc; | ||
852 | } | ||
853 | |||
854 | /** | ||
795 | * eeh_pe_bus_get - Retrieve PCI bus according to the given PE | 855 | * eeh_pe_bus_get - Retrieve PCI bus according to the given PE |
796 | * @pe: EEH PE | 856 | * @pe: EEH PE |
797 | * | 857 | * |
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 9c002099f875..8ad0c5b891f4 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c | |||
@@ -774,19 +774,24 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) | |||
774 | case OPAL_EEH_PHB_ERROR: | 774 | case OPAL_EEH_PHB_ERROR: |
775 | if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) { | 775 | if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) { |
776 | *pe = phb_pe; | 776 | *pe = phb_pe; |
777 | pr_err("EEH: dead PHB#%x detected\n", | 777 | pr_err("EEH: dead PHB#%x detected, " |
778 | hose->global_number); | 778 | "location: %s\n", |
779 | hose->global_number, | ||
780 | eeh_pe_loc_get(phb_pe)); | ||
779 | ret = EEH_NEXT_ERR_DEAD_PHB; | 781 | ret = EEH_NEXT_ERR_DEAD_PHB; |
780 | } else if (be16_to_cpu(severity) == | 782 | } else if (be16_to_cpu(severity) == |
781 | OPAL_EEH_SEV_PHB_FENCED) { | 783 | OPAL_EEH_SEV_PHB_FENCED) { |
782 | *pe = phb_pe; | 784 | *pe = phb_pe; |
783 | pr_err("EEH: fenced PHB#%x detected\n", | 785 | pr_err("EEH: Fenced PHB#%x detected, " |
784 | hose->global_number); | 786 | "location: %s\n", |
787 | hose->global_number, | ||
788 | eeh_pe_loc_get(phb_pe)); | ||
785 | ret = EEH_NEXT_ERR_FENCED_PHB; | 789 | ret = EEH_NEXT_ERR_FENCED_PHB; |
786 | } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { | 790 | } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { |
787 | pr_info("EEH: PHB#%x informative error " | 791 | pr_info("EEH: PHB#%x informative error " |
788 | "detected\n", | 792 | "detected, location: %s\n", |
789 | hose->global_number); | 793 | hose->global_number, |
794 | eeh_pe_loc_get(phb_pe)); | ||
790 | ioda_eeh_phb_diag(hose); | 795 | ioda_eeh_phb_diag(hose); |
791 | ret = EEH_NEXT_ERR_NONE; | 796 | ret = EEH_NEXT_ERR_NONE; |
792 | } | 797 | } |
@@ -802,6 +807,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) | |||
802 | /* Try best to clear it */ | 807 | /* Try best to clear it */ |
803 | pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n", | 808 | pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n", |
804 | hose->global_number, frozen_pe_no); | 809 | hose->global_number, frozen_pe_no); |
810 | pr_info("EEH: PHB location: %s\n", | ||
811 | eeh_pe_loc_get(phb_pe)); | ||
805 | opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no, | 812 | opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no, |
806 | OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); | 813 | OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); |
807 | ret = EEH_NEXT_ERR_NONE; | 814 | ret = EEH_NEXT_ERR_NONE; |
@@ -810,6 +817,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) | |||
810 | } else { | 817 | } else { |
811 | pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", | 818 | pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", |
812 | (*pe)->addr, (*pe)->phb->global_number); | 819 | (*pe)->addr, (*pe)->phb->global_number); |
820 | pr_err("EEH: PE location: %s, PHB location: %s\n", | ||
821 | eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe)); | ||
813 | ret = EEH_NEXT_ERR_FROZEN_PE; | 822 | ret = EEH_NEXT_ERR_FROZEN_PE; |
814 | } | 823 | } |
815 | 824 | ||