diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/include/asm/eeh.h | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/eeh_driver.c | 128 |
2 files changed, 112 insertions, 18 deletions
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 0c0ac93f422f..a0b11fb3237e 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h | |||
@@ -53,6 +53,7 @@ struct device_node; | |||
53 | 53 | ||
54 | #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ | 54 | #define EEH_PE_ISOLATED (1 << 0) /* Isolated PE */ |
55 | #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ | 55 | #define EEH_PE_RECOVERING (1 << 1) /* Recovering PE */ |
56 | #define EEH_PE_PHB_DEAD (1 << 2) /* Dead PHB */ | ||
56 | 57 | ||
57 | struct eeh_pe { | 58 | struct eeh_pe { |
58 | int type; /* PE type: PHB/Bus/Device */ | 59 | int type; /* PE type: PHB/Bus/Device */ |
@@ -145,6 +146,7 @@ struct eeh_ops { | |||
145 | int (*configure_bridge)(struct eeh_pe *pe); | 146 | int (*configure_bridge)(struct eeh_pe *pe); |
146 | int (*read_config)(struct device_node *dn, int where, int size, u32 *val); | 147 | int (*read_config)(struct device_node *dn, int where, int size, u32 *val); |
147 | int (*write_config)(struct device_node *dn, int where, int size, u32 val); | 148 | int (*write_config)(struct device_node *dn, int where, int size, u32 val); |
149 | int (*next_error)(struct eeh_pe **pe); | ||
148 | }; | 150 | }; |
149 | 151 | ||
150 | extern struct eeh_ops *eeh_ops; | 152 | extern struct eeh_ops *eeh_ops; |
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 678bc6cddf82..0974e1326842 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c | |||
@@ -399,24 +399,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus) | |||
399 | */ | 399 | */ |
400 | #define MAX_WAIT_FOR_RECOVERY 150 | 400 | #define MAX_WAIT_FOR_RECOVERY 150 |
401 | 401 | ||
402 | /** | 402 | static void eeh_handle_normal_event(struct eeh_pe *pe) |
403 | * eeh_handle_event - Reset a PCI device after hard lockup. | ||
404 | * @pe: EEH PE | ||
405 | * | ||
406 | * While PHB detects address or data parity errors on particular PCI | ||
407 | * slot, the associated PE will be frozen. Besides, DMA's occurring | ||
408 | * to wild addresses (which usually happen due to bugs in device | ||
409 | * drivers or in PCI adapter firmware) can cause EEH error. #SERR, | ||
410 | * #PERR or other misc PCI-related errors also can trigger EEH errors. | ||
411 | * | ||
412 | * Recovery process consists of unplugging the device driver (which | ||
413 | * generated hotplug events to userspace), then issuing a PCI #RST to | ||
414 | * the device, then reconfiguring the PCI config space for all bridges | ||
415 | * & devices under this slot, and then finally restarting the device | ||
416 | * drivers (which cause a second set of hotplug events to go out to | ||
417 | * userspace). | ||
418 | */ | ||
419 | void eeh_handle_event(struct eeh_pe *pe) | ||
420 | { | 403 | { |
421 | struct pci_bus *frozen_bus; | 404 | struct pci_bus *frozen_bus; |
422 | int rc = 0; | 405 | int rc = 0; |
@@ -554,3 +537,112 @@ perm_error: | |||
554 | if (frozen_bus) | 537 | if (frozen_bus) |
555 | pcibios_remove_pci_devices(frozen_bus); | 538 | pcibios_remove_pci_devices(frozen_bus); |
556 | } | 539 | } |
540 | |||
541 | static void eeh_handle_special_event(void) | ||
542 | { | ||
543 | struct eeh_pe *pe, *phb_pe; | ||
544 | struct pci_bus *bus; | ||
545 | struct pci_controller *hose, *tmp; | ||
546 | unsigned long flags; | ||
547 | int rc = 0; | ||
548 | |||
549 | /* | ||
550 | * The return value from next_error() has been classified as follows. | ||
551 | * It might be good to enumerate them. However, next_error() is only | ||
552 | * supported by PowerNV platform for now. So it would be fine to use | ||
553 | * integer directly: | ||
554 | * | ||
555 | * 4 - Dead IOC 3 - Dead PHB | ||
556 | * 2 - Fenced PHB 1 - Frozen PE | ||
557 | * 0 - No error found | ||
558 | * | ||
559 | */ | ||
560 | rc = eeh_ops->next_error(&pe); | ||
561 | if (rc <= 0) | ||
562 | return; | ||
563 | |||
564 | switch (rc) { | ||
565 | case 4: | ||
566 | /* Mark all PHBs in dead state */ | ||
567 | eeh_serialize_lock(&flags); | ||
568 | list_for_each_entry_safe(hose, tmp, | ||
569 | &hose_list, list_node) { | ||
570 | phb_pe = eeh_phb_pe_get(hose); | ||
571 | if (!phb_pe) continue; | ||
572 | |||
573 | eeh_pe_state_mark(phb_pe, | ||
574 | EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); | ||
575 | } | ||
576 | eeh_serialize_unlock(flags); | ||
577 | |||
578 | /* Purge all events */ | ||
579 | eeh_remove_event(NULL); | ||
580 | break; | ||
581 | case 3: | ||
582 | case 2: | ||
583 | case 1: | ||
584 | /* Mark the PE in fenced state */ | ||
585 | eeh_serialize_lock(&flags); | ||
586 | if (rc == 3) | ||
587 | eeh_pe_state_mark(pe, | ||
588 | EEH_PE_ISOLATED | EEH_PE_PHB_DEAD); | ||
589 | else | ||
590 | eeh_pe_state_mark(pe, | ||
591 | EEH_PE_ISOLATED | EEH_PE_RECOVERING); | ||
592 | eeh_serialize_unlock(flags); | ||
593 | |||
594 | /* Purge all events of the PHB */ | ||
595 | eeh_remove_event(pe); | ||
596 | break; | ||
597 | default: | ||
598 | pr_err("%s: Invalid value %d from next_error()\n", | ||
599 | __func__, rc); | ||
600 | return; | ||
601 | } | ||
602 | |||
603 | /* | ||
604 | * For fenced PHB and frozen PE, it's handled as normal | ||
605 | * event. We have to remove the affected PHBs for dead | ||
606 | * PHB and IOC | ||
607 | */ | ||
608 | if (rc == 2 || rc == 1) | ||
609 | eeh_handle_normal_event(pe); | ||
610 | else { | ||
611 | list_for_each_entry_safe(hose, tmp, | ||
612 | &hose_list, list_node) { | ||
613 | phb_pe = eeh_phb_pe_get(hose); | ||
614 | if (!phb_pe || !(phb_pe->state & EEH_PE_PHB_DEAD)) | ||
615 | continue; | ||
616 | |||
617 | bus = eeh_pe_bus_get(phb_pe); | ||
618 | /* Notify all devices that they're about to go down. */ | ||
619 | eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); | ||
620 | pcibios_remove_pci_devices(bus); | ||
621 | } | ||
622 | } | ||
623 | } | ||
624 | |||
625 | /** | ||
626 | * eeh_handle_event - Reset a PCI device after hard lockup. | ||
627 | * @pe: EEH PE | ||
628 | * | ||
629 | * While PHB detects address or data parity errors on particular PCI | ||
630 | * slot, the associated PE will be frozen. Besides, DMA's occurring | ||
631 | * to wild addresses (which usually happen due to bugs in device | ||
632 | * drivers or in PCI adapter firmware) can cause EEH error. #SERR, | ||
633 | * #PERR or other misc PCI-related errors also can trigger EEH errors. | ||
634 | * | ||
635 | * Recovery process consists of unplugging the device driver (which | ||
636 | * generated hotplug events to userspace), then issuing a PCI #RST to | ||
637 | * the device, then reconfiguring the PCI config space for all bridges | ||
638 | * & devices under this slot, and then finally restarting the device | ||
639 | * drivers (which cause a second set of hotplug events to go out to | ||
640 | * userspace). | ||
641 | */ | ||
642 | void eeh_handle_event(struct eeh_pe *pe) | ||
643 | { | ||
644 | if (pe) | ||
645 | eeh_handle_normal_event(pe); | ||
646 | else | ||
647 | eeh_handle_special_event(); | ||
648 | } | ||