aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGavin Shan <gwshan@linux.vnet.ibm.com>2014-05-04 19:29:03 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-06-11 03:04:16 -0400
commit1ad7a72c5e57bc6a7a3190c580df14dc3642febf (patch)
tree37c000d1386347ccfd8cd24bdc959a99d32dcb7a
parent2c66599206938412d1781171953d565652ca3b93 (diff)
powerpc/eeh: Report frozen parent PE prior to child PE
When we have the corner case of frozen parent and child PE at the same time, we have to handle the frozen parent PE prior to the child. Without clearning the frozen state on parent PE, the child PE can't be recovered successfully. The patch searches the EEH PE hierarchy tree and returns the toppest frozen PE to be handled. It ensures the frozen parent PE will be handled prior to child PE. Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/kernel/eeh.c27
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c30
2 files changed, 52 insertions, 5 deletions
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 7051ea3101b9..c25064b7d667 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -358,10 +358,11 @@ out:
358int eeh_dev_check_failure(struct eeh_dev *edev) 358int eeh_dev_check_failure(struct eeh_dev *edev)
359{ 359{
360 int ret; 360 int ret;
361 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
361 unsigned long flags; 362 unsigned long flags;
362 struct device_node *dn; 363 struct device_node *dn;
363 struct pci_dev *dev; 364 struct pci_dev *dev;
364 struct eeh_pe *pe; 365 struct eeh_pe *pe, *parent_pe;
365 int rc = 0; 366 int rc = 0;
366 const char *location; 367 const char *location;
367 368
@@ -439,14 +440,34 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
439 */ 440 */
440 if ((ret < 0) || 441 if ((ret < 0) ||
441 (ret == EEH_STATE_NOT_SUPPORT) || 442 (ret == EEH_STATE_NOT_SUPPORT) ||
442 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 443 ((ret & active_flags) == active_flags)) {
443 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) {
444 eeh_stats.false_positives++; 444 eeh_stats.false_positives++;
445 pe->false_positives++; 445 pe->false_positives++;
446 rc = 0; 446 rc = 0;
447 goto dn_unlock; 447 goto dn_unlock;
448 } 448 }
449 449
450 /*
451 * It should be corner case that the parent PE has been
452 * put into frozen state as well. We should take care
453 * that at first.
454 */
455 parent_pe = pe->parent;
456 while (parent_pe) {
457 /* Hit the ceiling ? */
458 if (parent_pe->type & EEH_PE_PHB)
459 break;
460
461 /* Frozen parent PE ? */
462 ret = eeh_ops->get_state(parent_pe, NULL);
463 if (ret > 0 &&
464 (ret & active_flags) != active_flags)
465 pe = parent_pe;
466
467 /* Next parent level */
468 parent_pe = parent_pe->parent;
469 }
470
450 eeh_stats.slot_resets++; 471 eeh_stats.slot_resets++;
451 472
452 /* Avoid repeated reports of this failure, including problems 473 /* Avoid repeated reports of this failure, including problems
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index e0d6a3a213e2..68167cd9ea97 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -705,11 +705,12 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
705{ 705{
706 struct pci_controller *hose; 706 struct pci_controller *hose;
707 struct pnv_phb *phb; 707 struct pnv_phb *phb;
708 struct eeh_pe *phb_pe; 708 struct eeh_pe *phb_pe, *parent_pe;
709 __be64 frozen_pe_no; 709 __be64 frozen_pe_no;
710 __be16 err_type, severity; 710 __be16 err_type, severity;
711 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
711 long rc; 712 long rc;
712 int ret = EEH_NEXT_ERR_NONE; 713 int state, ret = EEH_NEXT_ERR_NONE;
713 714
714 /* 715 /*
715 * While running here, it's safe to purge the event queue. 716 * While running here, it's safe to purge the event queue.
@@ -839,6 +840,31 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
839 } 840 }
840 841
841 /* 842 /*
843 * We probably have the frozen parent PE out there and
844 * we need have to handle frozen parent PE firstly.
845 */
846 if (ret == EEH_NEXT_ERR_FROZEN_PE) {
847 parent_pe = (*pe)->parent;
848 while (parent_pe) {
849 /* Hit the ceiling ? */
850 if (parent_pe->type & EEH_PE_PHB)
851 break;
852
853 /* Frozen parent PE ? */
854 state = ioda_eeh_get_state(parent_pe);
855 if (state > 0 &&
856 (state & active_flags) != active_flags)
857 *pe = parent_pe;
858
859 /* Next parent level */
860 parent_pe = parent_pe->parent;
861 }
862
863 /* We possibly migrate to another PE */
864 eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
865 }
866
867 /*
842 * If we have no errors on the specific PHB or only 868 * If we have no errors on the specific PHB or only
843 * informative error there, we continue poking it. 869 * informative error there, we continue poking it.
844 * Otherwise, we need actions to be taken by upper 870 * Otherwise, we need actions to be taken by upper