aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ppc64/kernel/eeh.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ppc64/kernel/eeh.c')
-rw-r--r--arch/ppc64/kernel/eeh.c21
1 files changed, 13 insertions, 8 deletions
diff --git a/arch/ppc64/kernel/eeh.c b/arch/ppc64/kernel/eeh.c
index e7522f6da69d..0c52c2de92e0 100644
--- a/arch/ppc64/kernel/eeh.c
+++ b/arch/ppc64/kernel/eeh.c
@@ -78,14 +78,12 @@ DECLARE_WORK(eeh_event_wq, eeh_event_handler, NULL);
78 78
79static struct notifier_block *eeh_notifier_chain; 79static struct notifier_block *eeh_notifier_chain;
80 80
81/* 81/* If a device driver keeps reading an MMIO register in an interrupt
82 * If a device driver keeps reading an MMIO register in an interrupt
83 * handler after a slot isolation event has occurred, we assume it 82 * handler after a slot isolation event has occurred, we assume it
84 * is broken and panic. This sets the threshold for how many read 83 * is broken and panic. This sets the threshold for how many read
85 * attempts we allow before panicking. 84 * attempts we allow before panicking.
86 */ 85 */
87#define EEH_MAX_FAILS 1000 86#define EEH_MAX_FAILS 100000
88static atomic_t eeh_fail_count;
89 87
90/* RTAS tokens */ 88/* RTAS tokens */
91static int ibm_set_eeh_option; 89static int ibm_set_eeh_option;
@@ -521,7 +519,6 @@ static void eeh_event_handler(void *dummy)
521 "%s\n", event->reset_state, 519 "%s\n", event->reset_state,
522 pci_name(event->dev)); 520 pci_name(event->dev));
523 521
524 atomic_set(&eeh_fail_count, 0);
525 notifier_call_chain (&eeh_notifier_chain, 522 notifier_call_chain (&eeh_notifier_chain,
526 EEH_NOTIFY_FREEZE, event); 523 EEH_NOTIFY_FREEZE, event);
527 524
@@ -657,12 +654,18 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
657 spin_lock_irqsave(&confirm_error_lock, flags); 654 spin_lock_irqsave(&confirm_error_lock, flags);
658 rc = 1; 655 rc = 1;
659 if (pdn->eeh_mode & EEH_MODE_ISOLATED) { 656 if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
660 atomic_inc(&eeh_fail_count); 657 pdn->eeh_check_count ++;
661 if (atomic_read(&eeh_fail_count) >= EEH_MAX_FAILS) { 658 if (pdn->eeh_check_count >= EEH_MAX_FAILS) {
659 printk (KERN_ERR "EEH: Device driver ignored %d bad reads, panicing\n",
660 pdn->eeh_check_count);
661 dump_stack();
662
662 /* re-read the slot reset state */ 663 /* re-read the slot reset state */
663 if (read_slot_reset_state(pdn, rets) != 0) 664 if (read_slot_reset_state(pdn, rets) != 0)
664 rets[0] = -1; /* reset state unknown */ 665 rets[0] = -1; /* reset state unknown */
665 eeh_panic(dev, rets[0]); 666
667 /* If we are here, then we hit an infinite loop. Stop. */
668 panic("EEH: MMIO halt (%d) on device:%s\n", rets[0], pci_name(dev));
666 } 669 }
667 goto dn_unlock; 670 goto dn_unlock;
668 } 671 }
@@ -808,6 +811,8 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
808 struct pci_dn *pdn = PCI_DN(dn); 811 struct pci_dn *pdn = PCI_DN(dn);
809 812
810 pdn->eeh_mode = 0; 813 pdn->eeh_mode = 0;
814 pdn->eeh_check_count = 0;
815 pdn->eeh_freeze_count = 0;
811 816
812 if (status && strcmp(status, "ok") != 0) 817 if (status && strcmp(status, "ok") != 0)
813 return NULL; /* ignore devices with bad status */ 818 return NULL; /* ignore devices with bad status */