aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMike Mason <mmlnx@us.ibm.com>2008-07-21 12:40:17 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2008-07-21 20:39:37 -0400
commitf36c5227cd88b6340c40d62b05859e8213740a97 (patch)
tree4a858eb42fea4a1984bd37e0854c4d96c8c800a2 /arch
parent57a20d8fb0d2a05abe40abd6bb29e3f923721f1b (diff)
powerpc/eeh: Don't panic when EEH_MAX_FAILS is exceeded
This patch changes the EEH_MAX_FAILS action from panic to printing an error message. Panicking under under this condition is too harsh. Although performance will be affected and the device may not recover, the system is still running, which at the very least should allow for a more graceful shutdown. The patch also removes the msleep() within a spinlock, which can lead to a deadlock and is not recommended. Signed-off-by: Mike Mason <mmlnx@us.ibm.com> Acked-by: Linas Vepstas <linasvepstas@gmail.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c26
1 files changed, 12 insertions, 14 deletions
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index c027f0a70a04..54816d75b578 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -75,9 +75,9 @@
75 */ 75 */
76 76
77/* If a device driver keeps reading an MMIO register in an interrupt 77/* If a device driver keeps reading an MMIO register in an interrupt
78 * handler after a slot isolation event has occurred, we assume it 78 * handler after a slot isolation event, it might be broken.
79 * is broken and panic. This sets the threshold for how many read 79 * This sets the threshold for how many read attempts we allow
80 * attempts we allow before panicking. 80 * before printing an error message.
81 */ 81 */
82#define EEH_MAX_FAILS 2100000 82#define EEH_MAX_FAILS 2100000
83 83
@@ -470,6 +470,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
470 unsigned long flags; 470 unsigned long flags;
471 struct pci_dn *pdn; 471 struct pci_dn *pdn;
472 int rc = 0; 472 int rc = 0;
473 const char *location;
473 474
474 total_mmio_ffs++; 475 total_mmio_ffs++;
475 476
@@ -509,18 +510,15 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
509 rc = 1; 510 rc = 1;
510 if (pdn->eeh_mode & EEH_MODE_ISOLATED) { 511 if (pdn->eeh_mode & EEH_MODE_ISOLATED) {
511 pdn->eeh_check_count ++; 512 pdn->eeh_check_count ++;
512 if (pdn->eeh_check_count >= EEH_MAX_FAILS) { 513 if (pdn->eeh_check_count % EEH_MAX_FAILS == 0) {
513 printk (KERN_ERR "EEH: Device driver ignored %d bad reads, panicing\n", 514 location = of_get_property(dn, "ibm,loc-code", NULL);
514 pdn->eeh_check_count); 515 printk (KERN_ERR "EEH: %d reads ignored for recovering device at "
516 "location=%s driver=%s pci addr=%s\n",
517 pdn->eeh_check_count, location,
518 dev->driver->name, pci_name(dev));
519 printk (KERN_ERR "EEH: Might be infinite loop in %s driver\n",
520 dev->driver->name);
515 dump_stack(); 521 dump_stack();
516 msleep(5000);
517
518 /* re-read the slot reset state */
519 if (read_slot_reset_state(pdn, rets) != 0)
520 rets[0] = -1; /* reset state unknown */
521
522 /* If we are here, then we hit an infinite loop. Stop. */
523 panic("EEH: MMIO halt (%d) on device:%s\n", rets[0], pci_name(dev));
524 } 522 }
525 goto dn_unlock; 523 goto dn_unlock;
526 } 524 }