diff options
-rw-r--r-- | arch/ppc64/kernel/eeh.c | 21 |
1 files changed, 13 insertions, 8 deletions
diff --git a/arch/ppc64/kernel/eeh.c b/arch/ppc64/kernel/eeh.c index e7522f6da69d..0c52c2de92e0 100644 --- a/arch/ppc64/kernel/eeh.c +++ b/arch/ppc64/kernel/eeh.c | |||
@@ -78,14 +78,12 @@ DECLARE_WORK(eeh_event_wq, eeh_event_handler, NULL); | |||
78 | 78 | ||
79 | static struct notifier_block *eeh_notifier_chain; | 79 | static struct notifier_block *eeh_notifier_chain; |
80 | 80 | ||
81 | /* | 81 | /* If a device driver keeps reading an MMIO register in an interrupt |
82 | * If a device driver keeps reading an MMIO register in an interrupt | ||
83 | * handler after a slot isolation event has occurred, we assume it | 82 | * handler after a slot isolation event has occurred, we assume it |
84 | * is broken and panic. This sets the threshold for how many read | 83 | * is broken and panic. This sets the threshold for how many read |
85 | * attempts we allow before panicking. | 84 | * attempts we allow before panicking. |
86 | */ | 85 | */ |
87 | #define EEH_MAX_FAILS 1000 | 86 | #define EEH_MAX_FAILS 100000 |
88 | static atomic_t eeh_fail_count; | ||
89 | 87 | ||
90 | /* RTAS tokens */ | 88 | /* RTAS tokens */ |
91 | static int ibm_set_eeh_option; | 89 | static int ibm_set_eeh_option; |
@@ -521,7 +519,6 @@ static void eeh_event_handler(void *dummy) | |||
521 | "%s\n", event->reset_state, | 519 | "%s\n", event->reset_state, |
522 | pci_name(event->dev)); | 520 | pci_name(event->dev)); |
523 | 521 | ||
524 | atomic_set(&eeh_fail_count, 0); | ||
525 | notifier_call_chain (&eeh_notifier_chain, | 522 | notifier_call_chain (&eeh_notifier_chain, |
526 | EEH_NOTIFY_FREEZE, event); | 523 | EEH_NOTIFY_FREEZE, event); |
527 | 524 | ||
@@ -657,12 +654,18 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) | |||
657 | spin_lock_irqsave(&confirm_error_lock, flags); | 654 | spin_lock_irqsave(&confirm_error_lock, flags); |
658 | rc = 1; | 655 | rc = 1; |
659 | if (pdn->eeh_mode & EEH_MODE_ISOLATED) { | 656 | if (pdn->eeh_mode & EEH_MODE_ISOLATED) { |
660 | atomic_inc(&eeh_fail_count); | 657 | pdn->eeh_check_count ++; |
661 | if (atomic_read(&eeh_fail_count) >= EEH_MAX_FAILS) { | 658 | if (pdn->eeh_check_count >= EEH_MAX_FAILS) { |
659 | printk (KERN_ERR "EEH: Device driver ignored %d bad reads, panicing\n", | ||
660 | pdn->eeh_check_count); | ||
661 | dump_stack(); | ||
662 | |||
662 | /* re-read the slot reset state */ | 663 | /* re-read the slot reset state */ |
663 | if (read_slot_reset_state(pdn, rets) != 0) | 664 | if (read_slot_reset_state(pdn, rets) != 0) |
664 | rets[0] = -1; /* reset state unknown */ | 665 | rets[0] = -1; /* reset state unknown */ |
665 | eeh_panic(dev, rets[0]); | 666 | |
667 | /* If we are here, then we hit an infinite loop. Stop. */ | ||
668 | panic("EEH: MMIO halt (%d) on device:%s\n", rets[0], pci_name(dev)); | ||
666 | } | 669 | } |
667 | goto dn_unlock; | 670 | goto dn_unlock; |
668 | } | 671 | } |
@@ -808,6 +811,8 @@ static void *early_enable_eeh(struct device_node *dn, void *data) | |||
808 | struct pci_dn *pdn = PCI_DN(dn); | 811 | struct pci_dn *pdn = PCI_DN(dn); |
809 | 812 | ||
810 | pdn->eeh_mode = 0; | 813 | pdn->eeh_mode = 0; |
814 | pdn->eeh_check_count = 0; | ||
815 | pdn->eeh_freeze_count = 0; | ||
811 | 816 | ||
812 | if (status && strcmp(status, "ok") != 0) | 817 | if (status && strcmp(status, "ok") != 0) |
813 | return NULL; /* ignore devices with bad status */ | 818 | return NULL; /* ignore devices with bad status */ |