diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries/ras.c')
-rw-r--r-- | arch/powerpc/platforms/pseries/ras.c | 194 |
1 files changed, 128 insertions, 66 deletions
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 9e248ef6cc6..c4dfccd3a3d 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c | |||
@@ -16,36 +16,15 @@ | |||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | */ | 17 | */ |
18 | 18 | ||
19 | /* Change Activity: | ||
20 | * 2001/09/21 : engebret : Created with minimal EPOW and HW exception support. | ||
21 | * End Change Activity | ||
22 | */ | ||
23 | |||
24 | #include <linux/errno.h> | ||
25 | #include <linux/threads.h> | ||
26 | #include <linux/kernel_stat.h> | ||
27 | #include <linux/signal.h> | ||
28 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
29 | #include <linux/ioport.h> | ||
30 | #include <linux/interrupt.h> | 20 | #include <linux/interrupt.h> |
31 | #include <linux/timex.h> | ||
32 | #include <linux/init.h> | ||
33 | #include <linux/delay.h> | ||
34 | #include <linux/irq.h> | 21 | #include <linux/irq.h> |
35 | #include <linux/random.h> | 22 | #include <linux/of.h> |
36 | #include <linux/sysrq.h> | 23 | #include <linux/fs.h> |
37 | #include <linux/bitops.h> | 24 | #include <linux/reboot.h> |
38 | 25 | ||
39 | #include <asm/uaccess.h> | ||
40 | #include <asm/io.h> | ||
41 | #include <asm/pgtable.h> | ||
42 | #include <asm/irq.h> | ||
43 | #include <asm/cache.h> | ||
44 | #include <asm/prom.h> | ||
45 | #include <asm/ptrace.h> | ||
46 | #include <asm/machdep.h> | 26 | #include <asm/machdep.h> |
47 | #include <asm/rtas.h> | 27 | #include <asm/rtas.h> |
48 | #include <asm/udbg.h> | ||
49 | #include <asm/firmware.h> | 28 | #include <asm/firmware.h> |
50 | 29 | ||
51 | #include "pseries.h" | 30 | #include "pseries.h" |
@@ -56,7 +35,6 @@ static DEFINE_SPINLOCK(ras_log_buf_lock); | |||
56 | static char global_mce_data_buf[RTAS_ERROR_LOG_MAX]; | 35 | static char global_mce_data_buf[RTAS_ERROR_LOG_MAX]; |
57 | static DEFINE_PER_CPU(__u64, mce_data_buf); | 36 | static DEFINE_PER_CPU(__u64, mce_data_buf); |
58 | 37 | ||
59 | static int ras_get_sensor_state_token; | ||
60 | static int ras_check_exception_token; | 38 | static int ras_check_exception_token; |
61 | 39 | ||
62 | #define EPOW_SENSOR_TOKEN 9 | 40 | #define EPOW_SENSOR_TOKEN 9 |
@@ -74,7 +52,6 @@ static int __init init_ras_IRQ(void) | |||
74 | { | 52 | { |
75 | struct device_node *np; | 53 | struct device_node *np; |
76 | 54 | ||
77 | ras_get_sensor_state_token = rtas_token("get-sensor-state"); | ||
78 | ras_check_exception_token = rtas_token("check-exception"); | 55 | ras_check_exception_token = rtas_token("check-exception"); |
79 | 56 | ||
80 | /* Internal Errors */ | 57 | /* Internal Errors */ |
@@ -94,26 +71,126 @@ static int __init init_ras_IRQ(void) | |||
94 | 71 | ||
95 | return 0; | 72 | return 0; |
96 | } | 73 | } |
97 | __initcall(init_ras_IRQ); | 74 | subsys_initcall(init_ras_IRQ); |
98 | 75 | ||
99 | /* | 76 | #define EPOW_SHUTDOWN_NORMAL 1 |
100 | * Handle power subsystem events (EPOW). | 77 | #define EPOW_SHUTDOWN_ON_UPS 2 |
101 | * | 78 | #define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3 |
102 | * Presently we just log the event has occurred. This should be fixed | 79 | #define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4 |
103 | * to examine the type of power failure and take appropriate action where | 80 | |
104 | * the time horizon permits something useful to be done. | 81 | static void handle_system_shutdown(char event_modifier) |
105 | */ | 82 | { |
83 | switch (event_modifier) { | ||
84 | case EPOW_SHUTDOWN_NORMAL: | ||
85 | pr_emerg("Firmware initiated power off"); | ||
86 | orderly_poweroff(1); | ||
87 | break; | ||
88 | |||
89 | case EPOW_SHUTDOWN_ON_UPS: | ||
90 | pr_emerg("Loss of power reported by firmware, system is " | ||
91 | "running on UPS/battery"); | ||
92 | break; | ||
93 | |||
94 | case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS: | ||
95 | pr_emerg("Loss of system critical functions reported by " | ||
96 | "firmware"); | ||
97 | pr_emerg("Check RTAS error log for details"); | ||
98 | orderly_poweroff(1); | ||
99 | break; | ||
100 | |||
101 | case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH: | ||
102 | pr_emerg("Ambient temperature too high reported by firmware"); | ||
103 | pr_emerg("Check RTAS error log for details"); | ||
104 | orderly_poweroff(1); | ||
105 | break; | ||
106 | |||
107 | default: | ||
108 | pr_err("Unknown power/cooling shutdown event (modifier %d)", | ||
109 | event_modifier); | ||
110 | } | ||
111 | } | ||
112 | |||
113 | struct epow_errorlog { | ||
114 | unsigned char sensor_value; | ||
115 | unsigned char event_modifier; | ||
116 | unsigned char extended_modifier; | ||
117 | unsigned char reserved; | ||
118 | unsigned char platform_reason; | ||
119 | }; | ||
120 | |||
121 | #define EPOW_RESET 0 | ||
122 | #define EPOW_WARN_COOLING 1 | ||
123 | #define EPOW_WARN_POWER 2 | ||
124 | #define EPOW_SYSTEM_SHUTDOWN 3 | ||
125 | #define EPOW_SYSTEM_HALT 4 | ||
126 | #define EPOW_MAIN_ENCLOSURE 5 | ||
127 | #define EPOW_POWER_OFF 7 | ||
128 | |||
129 | void rtas_parse_epow_errlog(struct rtas_error_log *log) | ||
130 | { | ||
131 | struct pseries_errorlog *pseries_log; | ||
132 | struct epow_errorlog *epow_log; | ||
133 | char action_code; | ||
134 | char modifier; | ||
135 | |||
136 | pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW); | ||
137 | if (pseries_log == NULL) | ||
138 | return; | ||
139 | |||
140 | epow_log = (struct epow_errorlog *)pseries_log->data; | ||
141 | action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */ | ||
142 | modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */ | ||
143 | |||
144 | switch (action_code) { | ||
145 | case EPOW_RESET: | ||
146 | pr_err("Non critical power or cooling issue cleared"); | ||
147 | break; | ||
148 | |||
149 | case EPOW_WARN_COOLING: | ||
150 | pr_err("Non critical cooling issue reported by firmware"); | ||
151 | pr_err("Check RTAS error log for details"); | ||
152 | break; | ||
153 | |||
154 | case EPOW_WARN_POWER: | ||
155 | pr_err("Non critical power issue reported by firmware"); | ||
156 | pr_err("Check RTAS error log for details"); | ||
157 | break; | ||
158 | |||
159 | case EPOW_SYSTEM_SHUTDOWN: | ||
160 | handle_system_shutdown(epow_log->event_modifier); | ||
161 | break; | ||
162 | |||
163 | case EPOW_SYSTEM_HALT: | ||
164 | pr_emerg("Firmware initiated power off"); | ||
165 | orderly_poweroff(1); | ||
166 | break; | ||
167 | |||
168 | case EPOW_MAIN_ENCLOSURE: | ||
169 | case EPOW_POWER_OFF: | ||
170 | pr_emerg("Critical power/cooling issue reported by firmware"); | ||
171 | pr_emerg("Check RTAS error log for details"); | ||
172 | pr_emerg("Immediate power off"); | ||
173 | emergency_sync(); | ||
174 | kernel_power_off(); | ||
175 | break; | ||
176 | |||
177 | default: | ||
178 | pr_err("Unknown power/cooling event (action code %d)", | ||
179 | action_code); | ||
180 | } | ||
181 | } | ||
182 | |||
183 | /* Handle environmental and power warning (EPOW) interrupts. */ | ||
106 | static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) | 184 | static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) |
107 | { | 185 | { |
108 | int status = 0xdeadbeef; | 186 | int status; |
109 | int state = 0; | 187 | int state; |
110 | int critical; | 188 | int critical; |
111 | 189 | ||
112 | status = rtas_call(ras_get_sensor_state_token, 2, 2, &state, | 190 | status = rtas_get_sensor(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); |
113 | EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX); | ||
114 | 191 | ||
115 | if (state > 3) | 192 | if (state > 3) |
116 | critical = 1; /* Time Critical */ | 193 | critical = 1; /* Time Critical */ |
117 | else | 194 | else |
118 | critical = 0; | 195 | critical = 0; |
119 | 196 | ||
@@ -122,18 +199,14 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) | |||
122 | status = rtas_call(ras_check_exception_token, 6, 1, NULL, | 199 | status = rtas_call(ras_check_exception_token, 6, 1, NULL, |
123 | RTAS_VECTOR_EXTERNAL_INTERRUPT, | 200 | RTAS_VECTOR_EXTERNAL_INTERRUPT, |
124 | virq_to_hw(irq), | 201 | virq_to_hw(irq), |
125 | RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS, | 202 | RTAS_EPOW_WARNING, |
126 | critical, __pa(&ras_log_buf), | 203 | critical, __pa(&ras_log_buf), |
127 | rtas_get_error_log_max()); | 204 | rtas_get_error_log_max()); |
128 | 205 | ||
129 | udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n", | ||
130 | *((unsigned long *)&ras_log_buf), status, state); | ||
131 | printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n", | ||
132 | *((unsigned long *)&ras_log_buf), status, state); | ||
133 | |||
134 | /* format and print the extended information */ | ||
135 | log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); | 206 | log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); |
136 | 207 | ||
208 | rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf); | ||
209 | |||
137 | spin_unlock(&ras_log_buf_lock); | 210 | spin_unlock(&ras_log_buf_lock); |
138 | return IRQ_HANDLED; | 211 | return IRQ_HANDLED; |
139 | } | 212 | } |
@@ -149,7 +222,7 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) | |||
149 | static irqreturn_t ras_error_interrupt(int irq, void *dev_id) | 222 | static irqreturn_t ras_error_interrupt(int irq, void *dev_id) |
150 | { | 223 | { |
151 | struct rtas_error_log *rtas_elog; | 224 | struct rtas_error_log *rtas_elog; |
152 | int status = 0xdeadbeef; | 225 | int status; |
153 | int fatal; | 226 | int fatal; |
154 | 227 | ||
155 | spin_lock(&ras_log_buf_lock); | 228 | spin_lock(&ras_log_buf_lock); |
@@ -157,7 +230,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) | |||
157 | status = rtas_call(ras_check_exception_token, 6, 1, NULL, | 230 | status = rtas_call(ras_check_exception_token, 6, 1, NULL, |
158 | RTAS_VECTOR_EXTERNAL_INTERRUPT, | 231 | RTAS_VECTOR_EXTERNAL_INTERRUPT, |
159 | virq_to_hw(irq), | 232 | virq_to_hw(irq), |
160 | RTAS_INTERNAL_ERROR, 1 /*Time Critical */, | 233 | RTAS_INTERNAL_ERROR, 1 /* Time Critical */, |
161 | __pa(&ras_log_buf), | 234 | __pa(&ras_log_buf), |
162 | rtas_get_error_log_max()); | 235 | rtas_get_error_log_max()); |
163 | 236 | ||
@@ -172,24 +245,13 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) | |||
172 | log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); | 245 | log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); |
173 | 246 | ||
174 | if (fatal) { | 247 | if (fatal) { |
175 | udbg_printf("Fatal HW Error <0x%lx 0x%x>\n", | 248 | pr_emerg("Fatal hardware error reported by firmware"); |
176 | *((unsigned long *)&ras_log_buf), status); | 249 | pr_emerg("Check RTAS error log for details"); |
177 | printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n", | 250 | pr_emerg("Immediate power off"); |
178 | *((unsigned long *)&ras_log_buf), status); | 251 | emergency_sync(); |
179 | 252 | kernel_power_off(); | |
180 | #ifndef DEBUG_RTAS_POWER_OFF | ||
181 | /* Don't actually power off when debugging so we can test | ||
182 | * without actually failing while injecting errors. | ||
183 | * Error data will not be logged to syslog. | ||
184 | */ | ||
185 | ppc_md.power_off(); | ||
186 | #endif | ||
187 | } else { | 253 | } else { |
188 | udbg_printf("Recoverable HW Error <0x%lx 0x%x>\n", | 254 | pr_err("Recoverable hardware error reported by firmware"); |
189 | *((unsigned long *)&ras_log_buf), status); | ||
190 | printk(KERN_WARNING | ||
191 | "Warning: Recoverable hardware error <0x%lx 0x%x>\n", | ||
192 | *((unsigned long *)&ras_log_buf), status); | ||
193 | } | 255 | } |
194 | 256 | ||
195 | spin_unlock(&ras_log_buf_lock); | 257 | spin_unlock(&ras_log_buf_lock); |