aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorAnton Blanchard <anton@samba.org>2012-03-21 11:49:59 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2012-03-27 20:31:59 -0400
commit55fc0c561742c710857dc5a7a591b461a561cf1f (patch)
treea2ef018edbb6055e3d1e086ea64a5bab63cb9796 /arch/powerpc
parent6431f20879e338306b997cd75a36824cf9d6e687 (diff)
powerpc/pseries: Parse and handle EPOW interrupts
We have code to take environmental and power warning (EPOW) interrupts but it simply prints a terse error message: EPOW <0x6240040000000b8 0x0 0x0> which tells us nothing about what happened. Even worse, if we don't correctly respond to the interrupt we may get terminated by firmware. Add code to printk some useful information when we get EPOW events. We want to make it clear that we have an error, that it was reported by firmware and that the RTAS error log will have more detailed information. eg: Ambient temperature too high reported by firmware. Check RTAS error log for details Depending on the error encountered, we now issue an immediate or an orderly power down. Move initialization of the EPOW interrupt earlier in boot since we want to respond to them as early as possible. Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/platforms/pseries/ras.c133
1 files changed, 116 insertions, 17 deletions
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 086d2ae4e06a..4246d7b9dce9 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -35,6 +35,8 @@
35#include <linux/random.h> 35#include <linux/random.h>
36#include <linux/sysrq.h> 36#include <linux/sysrq.h>
37#include <linux/bitops.h> 37#include <linux/bitops.h>
38#include <linux/fs.h>
39#include <linux/reboot.h>
38 40
39#include <asm/uaccess.h> 41#include <asm/uaccess.h>
40#include <asm/system.h> 42#include <asm/system.h>
@@ -95,26 +97,127 @@ static int __init init_ras_IRQ(void)
95 97
96 return 0; 98 return 0;
97} 99}
98__initcall(init_ras_IRQ); 100subsys_initcall(init_ras_IRQ);
99 101
100/* 102#define EPOW_SHUTDOWN_NORMAL 1
101 * Handle power subsystem events (EPOW). 103#define EPOW_SHUTDOWN_ON_UPS 2
102 * 104#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3
103 * Presently we just log the event has occurred. This should be fixed 105#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4
104 * to examine the type of power failure and take appropriate action where 106
105 * the time horizon permits something useful to be done. 107static void handle_system_shutdown(char event_modifier)
106 */ 108{
109 switch (event_modifier) {
110 case EPOW_SHUTDOWN_NORMAL:
111 pr_emerg("Firmware initiated power off");
112 orderly_poweroff(1);
113 break;
114
115 case EPOW_SHUTDOWN_ON_UPS:
116 pr_emerg("Loss of power reported by firmware, system is "
117 "running on UPS/battery");
118 break;
119
120 case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
121 pr_emerg("Loss of system critical functions reported by "
122 "firmware");
123 pr_emerg("Check RTAS error log for details");
124 orderly_poweroff(1);
125 break;
126
127 case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
128 pr_emerg("Ambient temperature too high reported by firmware");
129 pr_emerg("Check RTAS error log for details");
130 orderly_poweroff(1);
131 break;
132
133 default:
134 pr_err("Unknown power/cooling shutdown event (modifier %d)",
135 event_modifier);
136 }
137}
138
139struct epow_errorlog {
140 unsigned char sensor_value;
141 unsigned char event_modifier;
142 unsigned char extended_modifier;
143 unsigned char reserved;
144 unsigned char platform_reason;
145};
146
147#define EPOW_RESET 0
148#define EPOW_WARN_COOLING 1
149#define EPOW_WARN_POWER 2
150#define EPOW_SYSTEM_SHUTDOWN 3
151#define EPOW_SYSTEM_HALT 4
152#define EPOW_MAIN_ENCLOSURE 5
153#define EPOW_POWER_OFF 7
154
155void rtas_parse_epow_errlog(struct rtas_error_log *log)
156{
157 struct pseries_errorlog *pseries_log;
158 struct epow_errorlog *epow_log;
159 char action_code;
160 char modifier;
161
162 pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
163 if (pseries_log == NULL)
164 return;
165
166 epow_log = (struct epow_errorlog *)pseries_log->data;
167 action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */
168 modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */
169
170 switch (action_code) {
171 case EPOW_RESET:
172 pr_err("Non critical power or cooling issue cleared");
173 break;
174
175 case EPOW_WARN_COOLING:
176 pr_err("Non critical cooling issue reported by firmware");
177 pr_err("Check RTAS error log for details");
178 break;
179
180 case EPOW_WARN_POWER:
181 pr_err("Non critical power issue reported by firmware");
182 pr_err("Check RTAS error log for details");
183 break;
184
185 case EPOW_SYSTEM_SHUTDOWN:
186 handle_system_shutdown(epow_log->event_modifier);
187 break;
188
189 case EPOW_SYSTEM_HALT:
190 pr_emerg("Firmware initiated power off");
191 orderly_poweroff(1);
192 break;
193
194 case EPOW_MAIN_ENCLOSURE:
195 case EPOW_POWER_OFF:
196 pr_emerg("Critical power/cooling issue reported by firmware");
197 pr_emerg("Check RTAS error log for details");
198 pr_emerg("Immediate power off");
199 emergency_sync();
200 kernel_power_off();
201 break;
202
203 default:
204 pr_err("Unknown power/cooling event (action code %d)",
205 action_code);
206 }
207}
208
209/* Handle environmental and power warning (EPOW) interrupts. */
107static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) 210static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
108{ 211{
109 int status = 0xdeadbeef; 212 int status;
110 int state = 0; 213 int state;
111 int critical; 214 int critical;
112 215
113 status = rtas_call(ras_get_sensor_state_token, 2, 2, &state, 216 status = rtas_call(ras_get_sensor_state_token, 2, 2, &state,
114 EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX); 217 EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX);
115 218
116 if (state > 3) 219 if (state > 3)
117 critical = 1; /* Time Critical */ 220 critical = 1; /* Time Critical */
118 else 221 else
119 critical = 0; 222 critical = 0;
120 223
@@ -127,14 +230,10 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
127 critical, __pa(&ras_log_buf), 230 critical, __pa(&ras_log_buf),
128 rtas_get_error_log_max()); 231 rtas_get_error_log_max());
129 232
130 udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n",
131 *((unsigned long *)&ras_log_buf), status, state);
132 printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n",
133 *((unsigned long *)&ras_log_buf), status, state);
134
135 /* format and print the extended information */
136 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); 233 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
137 234
235 rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
236
138 spin_unlock(&ras_log_buf_lock); 237 spin_unlock(&ras_log_buf_lock);
139 return IRQ_HANDLED; 238 return IRQ_HANDLED;
140} 239}