aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorGavin Shan <gwshan@linux.vnet.ibm.com>2014-06-09 21:41:55 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-08-05 01:28:47 -0400
commit05ec424e38fbba43829820b8f3634154f812e67e (patch)
tree3adea52ae78ec67819c5cde299436fa4c4fd37c7 /arch
parent9287b95ec9ded0a4458094ebd967502263d80112 (diff)
powerpc/eeh: Avoid event on passed PE
We must not handle EEH error on devices which are passed to somebody else. Instead, we expect that the frozen device owner detects an EEH error and recovers from it. This avoids EEH error handling on passed through devices so the device owner gets a chance to handle them. Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Acked-by: Alexander Graf <agraf@suse.de> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/eeh.h7
-rw-r--r--arch/powerpc/kernel/eeh.c8
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c3
3 files changed, 17 insertions, 1 deletions
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index fab7743c2640..9537d83b8320 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -25,6 +25,7 @@
25#include <linux/list.h> 25#include <linux/list.h>
26#include <linux/string.h> 26#include <linux/string.h>
27#include <linux/time.h> 27#include <linux/time.h>
28#include <linux/atomic.h>
28 29
29struct pci_dev; 30struct pci_dev;
30struct pci_bus; 31struct pci_bus;
@@ -84,6 +85,7 @@ struct eeh_pe {
84 int freeze_count; /* Times of froze up */ 85 int freeze_count; /* Times of froze up */
85 struct timeval tstamp; /* Time on first-time freeze */ 86 struct timeval tstamp; /* Time on first-time freeze */
86 int false_positives; /* Times of reported #ff's */ 87 int false_positives; /* Times of reported #ff's */
88 atomic_t pass_dev_cnt; /* Count of passed through devs */
87 struct eeh_pe *parent; /* Parent PE */ 89 struct eeh_pe *parent; /* Parent PE */
88 struct list_head child_list; /* Link PE to the child list */ 90 struct list_head child_list; /* Link PE to the child list */
89 struct list_head edevs; /* Link list of EEH devices */ 91 struct list_head edevs; /* Link list of EEH devices */
@@ -93,6 +95,11 @@ struct eeh_pe {
93#define eeh_pe_for_each_dev(pe, edev, tmp) \ 95#define eeh_pe_for_each_dev(pe, edev, tmp) \
94 list_for_each_entry_safe(edev, tmp, &pe->edevs, list) 96 list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
95 97
98static inline bool eeh_pe_passed(struct eeh_pe *pe)
99{
100 return pe ? !!atomic_read(&pe->pass_dev_cnt) : false;
101}
102
96/* 103/*
97 * The struct is used to trace EEH state for the associated 104 * The struct is used to trace EEH state for the associated
98 * PCI device node or PCI device. In future, it might 105 * PCI device node or PCI device. In future, it might
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 86e25702aaca..c8f1a9d2a67b 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -400,6 +400,14 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
400 if (ret > 0) 400 if (ret > 0)
401 return ret; 401 return ret;
402 402
403 /*
404 * If the PE isn't owned by us, we shouldn't check the
405 * state. Instead, let the owner handle it if the PE has
406 * been frozen.
407 */
408 if (eeh_pe_passed(pe))
409 return 0;
410
403 /* If we already have a pending isolation event for this 411 /* If we already have a pending isolation event for this
404 * slot, we know it's bad already, we don't need to check. 412 * slot, we know it's bad already, we don't need to check.
405 * Do this checking under a lock; as multiple PCI devices 413 * Do this checking under a lock; as multiple PCI devices
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 8ad0c5b891f4..f6abdb11ee84 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -812,7 +812,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
812 opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no, 812 opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no,
813 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 813 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
814 ret = EEH_NEXT_ERR_NONE; 814 ret = EEH_NEXT_ERR_NONE;
815 } else if ((*pe)->state & EEH_PE_ISOLATED) { 815 } else if ((*pe)->state & EEH_PE_ISOLATED ||
816 eeh_pe_passed(*pe)) {
816 ret = EEH_NEXT_ERR_NONE; 817 ret = EEH_NEXT_ERR_NONE;
817 } else { 818 } else {
818 pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", 819 pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",