aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinas Vepstas <linas@linas.org>2005-11-03 19:50:04 -0500
committerPaul Mackerras <paulus@samba.org>2005-11-09 19:38:05 -0500
commit172ca9261800bacbbc7d320d9924d9b482dff8de (patch)
tree7abd6ddf1e6b9a147a0826c374f0d1bca80806d3 /include
parent7f79da7accd63a6adb84f4602f66779f6a701e7b (diff)
[PATCH] ppc64: PCI error event dispatcher
12-eeh-event-dispatcher.patch ppc64: EEH Recovery dispatcher thread This patch adds a mechanism to create recovery threads when an EEH event is received. Since an EEH freeze state may be detected within an interrupt context, we need to get out of the interrupt context before starting recovery. This dispatcher does this in two steps: first, it uses a workqueue to get out, and then lanuches a kernel thread, so that the recovery routine can sleep for exteded periods without upseting the keventd. A kernel thread is created with each EEH event, rather than having one long-running daemon started at boot time. This is because it is anticipated that EEH events will be very rare (very very rare, ideally) and so its pointless to cluter the process tables with a daemon that will almost never run. Signed-off-by: Linas Vepstas <linas@austin.ibm.com> Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'include')
-rw-r--r--include/asm-powerpc/eeh_event.h52
-rw-r--r--include/asm-ppc64/eeh.h46
2 files changed, 65 insertions, 33 deletions
diff --git a/include/asm-powerpc/eeh_event.h b/include/asm-powerpc/eeh_event.h
new file mode 100644
index 000000000000..d168a30b3866
--- /dev/null
+++ b/include/asm-powerpc/eeh_event.h
@@ -0,0 +1,52 @@
1/*
2 * eeh_event.h
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 * Copyright (c) 2005 Linas Vepstas <linas@linas.org>
19 */
20
21#ifndef ASM_PPC64_EEH_EVENT_H
22#define ASM_PPC64_EEH_EVENT_H
23
24/** EEH event -- structure holding pci controller data that describes
25 * a change in the isolation status of a PCI slot. A pointer
26 * to this struct is passed as the data pointer in a notify callback.
27 */
28struct eeh_event {
29 struct list_head list;
30 struct device_node *dn; /* struct device node */
31 struct pci_dev *dev; /* affected device */
32 int state;
33 int time_unavail; /* milliseconds until device might be available */
34};
35
36/**
37 * eeh_send_failure_event - generate a PCI error event
38 * @dev pci device
39 *
40 * This routine builds a PCI error event which will be delivered
41 * to all listeners on the peh_notifier_chain.
42 *
43 * This routine can be called within an interrupt context;
44 * the actual event will be delivered in a normal context
45 * (from a workqueue).
46 */
47int eeh_send_failure_event (struct device_node *dn,
48 struct pci_dev *dev,
49 int reset_state,
50 int time_unavail);
51
52#endif /* ASM_PPC64_EEH_EVENT_H */
diff --git a/include/asm-ppc64/eeh.h b/include/asm-ppc64/eeh.h
index 40c8eb57493e..89f26ab31908 100644
--- a/include/asm-ppc64/eeh.h
+++ b/include/asm-ppc64/eeh.h
@@ -1,4 +1,4 @@
1/* 1/*
2 * eeh.h 2 * eeh.h
3 * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation. 3 * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation.
4 * 4 *
@@ -6,12 +6,12 @@
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or 7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version. 8 * (at your option) any later version.
9 * 9 *
10 * This program is distributed in the hope that it will be useful, 10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details. 13 * GNU General Public License for more details.
14 * 14 *
15 * You should have received a copy of the GNU General Public License 15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software 16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
@@ -27,8 +27,6 @@
27 27
28struct pci_dev; 28struct pci_dev;
29struct device_node; 29struct device_node;
30struct device_node;
31struct notifier_block;
32 30
33#ifdef CONFIG_EEH 31#ifdef CONFIG_EEH
34 32
@@ -37,6 +35,10 @@ struct notifier_block;
37#define EEH_MODE_NOCHECK (1<<1) 35#define EEH_MODE_NOCHECK (1<<1)
38#define EEH_MODE_ISOLATED (1<<2) 36#define EEH_MODE_ISOLATED (1<<2)
39 37
38/* Max number of EEH freezes allowed before we consider the device
39 * to be permanently disabled. */
40#define EEH_MAX_ALLOWED_FREEZES 5
41
40void __init eeh_init(void); 42void __init eeh_init(void);
41unsigned long eeh_check_failure(const volatile void __iomem *token, 43unsigned long eeh_check_failure(const volatile void __iomem *token,
42 unsigned long val); 44 unsigned long val);
@@ -59,36 +61,14 @@ void eeh_add_device_late(struct pci_dev *);
59 * eeh_remove_device - undo EEH setup for the indicated pci device 61 * eeh_remove_device - undo EEH setup for the indicated pci device
60 * @dev: pci device to be removed 62 * @dev: pci device to be removed
61 * 63 *
62 * This routine should be when a device is removed from a running 64 * This routine should be called when a device is removed from
63 * system (e.g. by hotplug or dlpar). 65 * a running system (e.g. by hotplug or dlpar). It unregisters
66 * the PCI device from the EEH subsystem. I/O errors affecting
67 * this device will no longer be detected after this call; thus,
68 * i/o errors affecting this slot may leave this device unusable.
64 */ 69 */
65void eeh_remove_device(struct pci_dev *); 70void eeh_remove_device(struct pci_dev *);
66 71
67#define EEH_DISABLE 0
68#define EEH_ENABLE 1
69#define EEH_RELEASE_LOADSTORE 2
70#define EEH_RELEASE_DMA 3
71
72/**
73 * Notifier event flags.
74 */
75#define EEH_NOTIFY_FREEZE 1
76
77/** EEH event -- structure holding pci slot data that describes
78 * a change in the isolation status of a PCI slot. A pointer
79 * to this struct is passed as the data pointer in a notify callback.
80 */
81struct eeh_event {
82 struct list_head list;
83 struct pci_dev *dev;
84 struct device_node *dn;
85 int reset_state;
86};
87
88/** Register to find out about EEH events. */
89int eeh_register_notifier(struct notifier_block *nb);
90int eeh_unregister_notifier(struct notifier_block *nb);
91
92/** 72/**
93 * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure. 73 * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
94 * 74 *
@@ -129,7 +109,7 @@ static inline void eeh_remove_device(struct pci_dev *dev) { }
129#define EEH_IO_ERROR_VALUE(size) (-1UL) 109#define EEH_IO_ERROR_VALUE(size) (-1UL)
130#endif /* CONFIG_EEH */ 110#endif /* CONFIG_EEH */
131 111
132/* 112/*
133 * MMIO read/write operations with EEH support. 113 * MMIO read/write operations with EEH support.
134 */ 114 */
135static inline u8 eeh_readb(const volatile void __iomem *addr) 115static inline u8 eeh_readb(const volatile void __iomem *addr)