diff options
author | Linas Vepstas <linas@linas.org> | 2005-11-03 19:50:04 -0500 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2005-11-09 19:38:05 -0500 |
commit | 172ca9261800bacbbc7d320d9924d9b482dff8de (patch) | |
tree | 7abd6ddf1e6b9a147a0826c374f0d1bca80806d3 /arch/powerpc/platforms | |
parent | 7f79da7accd63a6adb84f4602f66779f6a701e7b (diff) |
[PATCH] ppc64: PCI error event dispatcher
12-eeh-event-dispatcher.patch
ppc64: EEH Recovery dispatcher thread
This patch adds a mechanism to create recovery threads when an
EEH event is received. Since an EEH freeze state may be detected
within an interrupt context, we need to get out of the interrupt
context before starting recovery. This dispatcher does this in
two steps: first, it uses a workqueue to get out, and then
lanuches a kernel thread, so that the recovery routine can
sleep for exteded periods without upseting the keventd.
A kernel thread is created with each EEH event, rather than
having one long-running daemon started at boot time. This is
because it is anticipated that EEH events will be very rare
(very very rare, ideally) and so its pointless to cluter the
process tables with a daemon that will almost never run.
Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r-- | arch/powerpc/platforms/pseries/Makefile | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh.c | 138 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_event.c | 155 |
3 files changed, 171 insertions, 124 deletions
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index dbdffb2fe429..27515476ad6c 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile | |||
@@ -3,4 +3,4 @@ obj-y := pci.o lpar.o hvCall.o nvram.o reconfig.o \ | |||
3 | obj-$(CONFIG_SMP) += smp.o | 3 | obj-$(CONFIG_SMP) += smp.o |
4 | obj-$(CONFIG_IBMVIO) += vio.o | 4 | obj-$(CONFIG_IBMVIO) += vio.o |
5 | obj-$(CONFIG_XICS) += xics.o | 5 | obj-$(CONFIG_XICS) += xics.o |
6 | obj-$(CONFIG_EEH) += eeh.o | 6 | obj-$(CONFIG_EEH) += eeh.o eeh_event.o |
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 9df1d5018363..1fec99d53311 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c | |||
@@ -19,7 +19,6 @@ | |||
19 | 19 | ||
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/list.h> | 21 | #include <linux/list.h> |
22 | #include <linux/notifier.h> | ||
23 | #include <linux/pci.h> | 22 | #include <linux/pci.h> |
24 | #include <linux/proc_fs.h> | 23 | #include <linux/proc_fs.h> |
25 | #include <linux/rbtree.h> | 24 | #include <linux/rbtree.h> |
@@ -27,12 +26,12 @@ | |||
27 | #include <linux/spinlock.h> | 26 | #include <linux/spinlock.h> |
28 | #include <asm/atomic.h> | 27 | #include <asm/atomic.h> |
29 | #include <asm/eeh.h> | 28 | #include <asm/eeh.h> |
29 | #include <asm/eeh_event.h> | ||
30 | #include <asm/io.h> | 30 | #include <asm/io.h> |
31 | #include <asm/machdep.h> | 31 | #include <asm/machdep.h> |
32 | #include <asm/ppc-pci.h> | ||
32 | #include <asm/rtas.h> | 33 | #include <asm/rtas.h> |
33 | #include <asm/atomic.h> | ||
34 | #include <asm/systemcfg.h> | 34 | #include <asm/systemcfg.h> |
35 | #include <asm/ppc-pci.h> | ||
36 | 35 | ||
37 | #undef DEBUG | 36 | #undef DEBUG |
38 | 37 | ||
@@ -70,14 +69,6 @@ | |||
70 | * and sent out for processing. | 69 | * and sent out for processing. |
71 | */ | 70 | */ |
72 | 71 | ||
73 | /* EEH event workqueue setup. */ | ||
74 | static DEFINE_SPINLOCK(eeh_eventlist_lock); | ||
75 | LIST_HEAD(eeh_eventlist); | ||
76 | static void eeh_event_handler(void *); | ||
77 | DECLARE_WORK(eeh_event_wq, eeh_event_handler, NULL); | ||
78 | |||
79 | static struct notifier_block *eeh_notifier_chain; | ||
80 | |||
81 | /* If a device driver keeps reading an MMIO register in an interrupt | 72 | /* If a device driver keeps reading an MMIO register in an interrupt |
82 | * handler after a slot isolation event has occurred, we assume it | 73 | * handler after a slot isolation event has occurred, we assume it |
83 | * is broken and panic. This sets the threshold for how many read | 74 | * is broken and panic. This sets the threshold for how many read |
@@ -421,24 +412,6 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity) | |||
421 | } | 412 | } |
422 | 413 | ||
423 | /** | 414 | /** |
424 | * eeh_register_notifier - Register to find out about EEH events. | ||
425 | * @nb: notifier block to callback on events | ||
426 | */ | ||
427 | int eeh_register_notifier(struct notifier_block *nb) | ||
428 | { | ||
429 | return notifier_chain_register(&eeh_notifier_chain, nb); | ||
430 | } | ||
431 | |||
432 | /** | ||
433 | * eeh_unregister_notifier - Unregister to an EEH event notifier. | ||
434 | * @nb: notifier block to callback on events | ||
435 | */ | ||
436 | int eeh_unregister_notifier(struct notifier_block *nb) | ||
437 | { | ||
438 | return notifier_chain_unregister(&eeh_notifier_chain, nb); | ||
439 | } | ||
440 | |||
441 | /** | ||
442 | * read_slot_reset_state - Read the reset state of a device node's slot | 415 | * read_slot_reset_state - Read the reset state of a device node's slot |
443 | * @dn: device node to read | 416 | * @dn: device node to read |
444 | * @rets: array to return results in | 417 | * @rets: array to return results in |
@@ -461,73 +434,6 @@ static int read_slot_reset_state(struct pci_dn *pdn, int rets[]) | |||
461 | } | 434 | } |
462 | 435 | ||
463 | /** | 436 | /** |
464 | * eeh_panic - call panic() for an eeh event that cannot be handled. | ||
465 | * The philosophy of this routine is that it is better to panic and | ||
466 | * halt the OS than it is to risk possible data corruption by | ||
467 | * oblivious device drivers that don't know better. | ||
468 | * | ||
469 | * @dev pci device that had an eeh event | ||
470 | * @reset_state current reset state of the device slot | ||
471 | */ | ||
472 | static void eeh_panic(struct pci_dev *dev, int reset_state) | ||
473 | { | ||
474 | /* | ||
475 | * XXX We should create a separate sysctl for this. | ||
476 | * | ||
477 | * Since the panic_on_oops sysctl is used to halt the system | ||
478 | * in light of potential corruption, we can use it here. | ||
479 | */ | ||
480 | if (panic_on_oops) { | ||
481 | struct device_node *dn = pci_device_to_OF_node(dev); | ||
482 | eeh_slot_error_detail (PCI_DN(dn), 2 /* Permanent Error */); | ||
483 | panic("EEH: MMIO failure (%d) on device:%s\n", reset_state, | ||
484 | pci_name(dev)); | ||
485 | } | ||
486 | else { | ||
487 | __get_cpu_var(ignored_failures)++; | ||
488 | printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n", | ||
489 | reset_state, pci_name(dev)); | ||
490 | } | ||
491 | } | ||
492 | |||
493 | /** | ||
494 | * eeh_event_handler - dispatch EEH events. The detection of a frozen | ||
495 | * slot can occur inside an interrupt, where it can be hard to do | ||
496 | * anything about it. The goal of this routine is to pull these | ||
497 | * detection events out of the context of the interrupt handler, and | ||
498 | * re-dispatch them for processing at a later time in a normal context. | ||
499 | * | ||
500 | * @dummy - unused | ||
501 | */ | ||
502 | static void eeh_event_handler(void *dummy) | ||
503 | { | ||
504 | unsigned long flags; | ||
505 | struct eeh_event *event; | ||
506 | |||
507 | while (1) { | ||
508 | spin_lock_irqsave(&eeh_eventlist_lock, flags); | ||
509 | event = NULL; | ||
510 | if (!list_empty(&eeh_eventlist)) { | ||
511 | event = list_entry(eeh_eventlist.next, struct eeh_event, list); | ||
512 | list_del(&event->list); | ||
513 | } | ||
514 | spin_unlock_irqrestore(&eeh_eventlist_lock, flags); | ||
515 | if (event == NULL) | ||
516 | break; | ||
517 | |||
518 | printk(KERN_INFO "EEH: MMIO failure (%d), notifiying device " | ||
519 | "%s\n", event->reset_state, | ||
520 | pci_name(event->dev)); | ||
521 | |||
522 | notifier_call_chain (&eeh_notifier_chain, | ||
523 | EEH_NOTIFY_FREEZE, event); | ||
524 | |||
525 | pci_dev_put(event->dev); | ||
526 | kfree(event); | ||
527 | } | ||
528 | } | ||
529 | |||
530 | /** | ||
531 | * eeh_token_to_phys - convert EEH address token to phys address | 437 | * eeh_token_to_phys - convert EEH address token to phys address |
532 | * @token i/o token, should be address in the form 0xA.... | 438 | * @token i/o token, should be address in the form 0xA.... |
533 | */ | 439 | */ |
@@ -613,8 +519,6 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) | |||
613 | int ret; | 519 | int ret; |
614 | int rets[3]; | 520 | int rets[3]; |
615 | unsigned long flags; | 521 | unsigned long flags; |
616 | int reset_state; | ||
617 | struct eeh_event *event; | ||
618 | struct pci_dn *pdn; | 522 | struct pci_dn *pdn; |
619 | struct device_node *pe_dn; | 523 | struct device_node *pe_dn; |
620 | int rc = 0; | 524 | int rc = 0; |
@@ -722,33 +626,12 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) | |||
722 | __eeh_mark_slot (pe_dn); | 626 | __eeh_mark_slot (pe_dn); |
723 | spin_unlock_irqrestore(&confirm_error_lock, flags); | 627 | spin_unlock_irqrestore(&confirm_error_lock, flags); |
724 | 628 | ||
725 | reset_state = rets[0]; | 629 | eeh_send_failure_event (dn, dev, rets[0], rets[2]); |
726 | 630 | ||
727 | eeh_slot_error_detail (pdn, 1 /* Temporary Error */); | ||
728 | |||
729 | printk(KERN_INFO "EEH: MMIO failure (%d) on device: %s %s\n", | ||
730 | rets[0], dn->name, dn->full_name); | ||
731 | event = kmalloc(sizeof(*event), GFP_ATOMIC); | ||
732 | if (event == NULL) { | ||
733 | eeh_panic(dev, reset_state); | ||
734 | return 1; | ||
735 | } | ||
736 | |||
737 | event->dev = dev; | ||
738 | event->dn = dn; | ||
739 | event->reset_state = reset_state; | ||
740 | |||
741 | /* We may or may not be called in an interrupt context */ | ||
742 | spin_lock_irqsave(&eeh_eventlist_lock, flags); | ||
743 | list_add(&event->list, &eeh_eventlist); | ||
744 | spin_unlock_irqrestore(&eeh_eventlist_lock, flags); | ||
745 | |||
746 | /* Most EEH events are due to device driver bugs. Having | 631 | /* Most EEH events are due to device driver bugs. Having |
747 | * a stack trace will help the device-driver authors figure | 632 | * a stack trace will help the device-driver authors figure |
748 | * out what happened. So print that out. */ | 633 | * out what happened. So print that out. */ |
749 | if (rets[0] != 5) dump_stack(); | 634 | if (rets[0] != 5) dump_stack(); |
750 | schedule_work(&eeh_event_wq); | ||
751 | |||
752 | return 1; | 635 | return 1; |
753 | 636 | ||
754 | dn_unlock: | 637 | dn_unlock: |
@@ -793,6 +676,14 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon | |||
793 | 676 | ||
794 | EXPORT_SYMBOL(eeh_check_failure); | 677 | EXPORT_SYMBOL(eeh_check_failure); |
795 | 678 | ||
679 | /* ------------------------------------------------------------- */ | ||
680 | /* The code below deals with enabling EEH for devices during the | ||
681 | * early boot sequence. EEH must be enabled before any PCI probing | ||
682 | * can be done. | ||
683 | */ | ||
684 | |||
685 | #define EEH_ENABLE 1 | ||
686 | |||
796 | struct eeh_early_enable_info { | 687 | struct eeh_early_enable_info { |
797 | unsigned int buid_hi; | 688 | unsigned int buid_hi; |
798 | unsigned int buid_lo; | 689 | unsigned int buid_lo; |
@@ -850,8 +741,9 @@ static void *early_enable_eeh(struct device_node *dn, void *data) | |||
850 | /* First register entry is addr (00BBSS00) */ | 741 | /* First register entry is addr (00BBSS00) */ |
851 | /* Try to enable eeh */ | 742 | /* Try to enable eeh */ |
852 | ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, | 743 | ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL, |
853 | regs[0], info->buid_hi, info->buid_lo, | 744 | regs[0], info->buid_hi, info->buid_lo, |
854 | EEH_ENABLE); | 745 | EEH_ENABLE); |
746 | |||
855 | if (ret == 0) { | 747 | if (ret == 0) { |
856 | eeh_subsystem_enabled = 1; | 748 | eeh_subsystem_enabled = 1; |
857 | pdn->eeh_mode |= EEH_MODE_SUPPORTED; | 749 | pdn->eeh_mode |= EEH_MODE_SUPPORTED; |
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c new file mode 100644 index 000000000000..92497333c2b6 --- /dev/null +++ b/arch/powerpc/platforms/pseries/eeh_event.c | |||
@@ -0,0 +1,155 @@ | |||
1 | /* | ||
2 | * eeh_event.c | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | * | ||
18 | * Copyright (c) 2005 Linas Vepstas <linas@linas.org> | ||
19 | */ | ||
20 | |||
21 | #include <linux/list.h> | ||
22 | #include <linux/pci.h> | ||
23 | #include <asm/eeh_event.h> | ||
24 | |||
25 | /** Overview: | ||
26 | * EEH error states may be detected within exception handlers; | ||
27 | * however, the recovery processing needs to occur asynchronously | ||
28 | * in a normal kernel context and not an interrupt context. | ||
29 | * This pair of routines creates an event and queues it onto a | ||
30 | * work-queue, where a worker thread can drive recovery. | ||
31 | */ | ||
32 | |||
33 | /* EEH event workqueue setup. */ | ||
34 | static spinlock_t eeh_eventlist_lock = SPIN_LOCK_UNLOCKED; | ||
35 | LIST_HEAD(eeh_eventlist); | ||
36 | static void eeh_thread_launcher(void *); | ||
37 | DECLARE_WORK(eeh_event_wq, eeh_thread_launcher, NULL); | ||
38 | |||
39 | /** | ||
40 | * eeh_panic - call panic() for an eeh event that cannot be handled. | ||
41 | * The philosophy of this routine is that it is better to panic and | ||
42 | * halt the OS than it is to risk possible data corruption by | ||
43 | * oblivious device drivers that don't know better. | ||
44 | * | ||
45 | * @dev pci device that had an eeh event | ||
46 | * @reset_state current reset state of the device slot | ||
47 | */ | ||
48 | static void eeh_panic(struct pci_dev *dev, int reset_state) | ||
49 | { | ||
50 | /* | ||
51 | * Since the panic_on_oops sysctl is used to halt the system | ||
52 | * in light of potential corruption, we can use it here. | ||
53 | */ | ||
54 | if (panic_on_oops) { | ||
55 | panic("EEH: MMIO failure (%d) on device:%s\n", reset_state, | ||
56 | pci_name(dev)); | ||
57 | } | ||
58 | else { | ||
59 | printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n", | ||
60 | reset_state, pci_name(dev)); | ||
61 | } | ||
62 | } | ||
63 | |||
64 | /** | ||
65 | * eeh_event_handler - dispatch EEH events. The detection of a frozen | ||
66 | * slot can occur inside an interrupt, where it can be hard to do | ||
67 | * anything about it. The goal of this routine is to pull these | ||
68 | * detection events out of the context of the interrupt handler, and | ||
69 | * re-dispatch them for processing at a later time in a normal context. | ||
70 | * | ||
71 | * @dummy - unused | ||
72 | */ | ||
73 | static int eeh_event_handler(void * dummy) | ||
74 | { | ||
75 | unsigned long flags; | ||
76 | struct eeh_event *event; | ||
77 | |||
78 | daemonize ("eehd"); | ||
79 | |||
80 | while (1) { | ||
81 | set_current_state(TASK_INTERRUPTIBLE); | ||
82 | |||
83 | spin_lock_irqsave(&eeh_eventlist_lock, flags); | ||
84 | event = NULL; | ||
85 | if (!list_empty(&eeh_eventlist)) { | ||
86 | event = list_entry(eeh_eventlist.next, struct eeh_event, list); | ||
87 | list_del(&event->list); | ||
88 | } | ||
89 | spin_unlock_irqrestore(&eeh_eventlist_lock, flags); | ||
90 | if (event == NULL) | ||
91 | break; | ||
92 | |||
93 | printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", | ||
94 | pci_name(event->dev)); | ||
95 | |||
96 | eeh_panic (event->dev, event->state); | ||
97 | |||
98 | kfree(event); | ||
99 | } | ||
100 | |||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | /** | ||
105 | * eeh_thread_launcher | ||
106 | * | ||
107 | * @dummy - unused | ||
108 | */ | ||
109 | static void eeh_thread_launcher(void *dummy) | ||
110 | { | ||
111 | if (kernel_thread(eeh_event_handler, NULL, CLONE_KERNEL) < 0) | ||
112 | printk(KERN_ERR "Failed to start EEH daemon\n"); | ||
113 | } | ||
114 | |||
115 | /** | ||
116 | * eeh_send_failure_event - generate a PCI error event | ||
117 | * @dev pci device | ||
118 | * | ||
119 | * This routine can be called within an interrupt context; | ||
120 | * the actual event will be delivered in a normal context | ||
121 | * (from a workqueue). | ||
122 | */ | ||
123 | int eeh_send_failure_event (struct device_node *dn, | ||
124 | struct pci_dev *dev, | ||
125 | int state, | ||
126 | int time_unavail) | ||
127 | { | ||
128 | unsigned long flags; | ||
129 | struct eeh_event *event; | ||
130 | |||
131 | event = kmalloc(sizeof(*event), GFP_ATOMIC); | ||
132 | if (event == NULL) { | ||
133 | printk (KERN_ERR "EEH: out of memory, event not handled\n"); | ||
134 | return 1; | ||
135 | } | ||
136 | |||
137 | if (dev) | ||
138 | pci_dev_get(dev); | ||
139 | |||
140 | event->dn = dn; | ||
141 | event->dev = dev; | ||
142 | event->state = state; | ||
143 | event->time_unavail = time_unavail; | ||
144 | |||
145 | /* We may or may not be called in an interrupt context */ | ||
146 | spin_lock_irqsave(&eeh_eventlist_lock, flags); | ||
147 | list_add(&event->list, &eeh_eventlist); | ||
148 | spin_unlock_irqrestore(&eeh_eventlist_lock, flags); | ||
149 | |||
150 | schedule_work(&eeh_event_wq); | ||
151 | |||
152 | return 0; | ||
153 | } | ||
154 | |||
155 | /********************** END OF FILE ******************************/ | ||