aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries/ras.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries/ras.c')
-rw-r--r--arch/powerpc/platforms/pseries/ras.c195
1 files changed, 128 insertions, 67 deletions
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 086d2ae4e06a..c4dfccd3a3d9 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -16,37 +16,15 @@
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */ 17 */
18 18
19/* Change Activity:
20 * 2001/09/21 : engebret : Created with minimal EPOW and HW exception support.
21 * End Change Activity
22 */
23
24#include <linux/errno.h>
25#include <linux/threads.h>
26#include <linux/kernel_stat.h>
27#include <linux/signal.h>
28#include <linux/sched.h> 19#include <linux/sched.h>
29#include <linux/ioport.h>
30#include <linux/interrupt.h> 20#include <linux/interrupt.h>
31#include <linux/timex.h>
32#include <linux/init.h>
33#include <linux/delay.h>
34#include <linux/irq.h> 21#include <linux/irq.h>
35#include <linux/random.h> 22#include <linux/of.h>
36#include <linux/sysrq.h> 23#include <linux/fs.h>
37#include <linux/bitops.h> 24#include <linux/reboot.h>
38 25
39#include <asm/uaccess.h>
40#include <asm/system.h>
41#include <asm/io.h>
42#include <asm/pgtable.h>
43#include <asm/irq.h>
44#include <asm/cache.h>
45#include <asm/prom.h>
46#include <asm/ptrace.h>
47#include <asm/machdep.h> 26#include <asm/machdep.h>
48#include <asm/rtas.h> 27#include <asm/rtas.h>
49#include <asm/udbg.h>
50#include <asm/firmware.h> 28#include <asm/firmware.h>
51 29
52#include "pseries.h" 30#include "pseries.h"
@@ -57,7 +35,6 @@ static DEFINE_SPINLOCK(ras_log_buf_lock);
57static char global_mce_data_buf[RTAS_ERROR_LOG_MAX]; 35static char global_mce_data_buf[RTAS_ERROR_LOG_MAX];
58static DEFINE_PER_CPU(__u64, mce_data_buf); 36static DEFINE_PER_CPU(__u64, mce_data_buf);
59 37
60static int ras_get_sensor_state_token;
61static int ras_check_exception_token; 38static int ras_check_exception_token;
62 39
63#define EPOW_SENSOR_TOKEN 9 40#define EPOW_SENSOR_TOKEN 9
@@ -75,7 +52,6 @@ static int __init init_ras_IRQ(void)
75{ 52{
76 struct device_node *np; 53 struct device_node *np;
77 54
78 ras_get_sensor_state_token = rtas_token("get-sensor-state");
79 ras_check_exception_token = rtas_token("check-exception"); 55 ras_check_exception_token = rtas_token("check-exception");
80 56
81 /* Internal Errors */ 57 /* Internal Errors */
@@ -95,26 +71,126 @@ static int __init init_ras_IRQ(void)
95 71
96 return 0; 72 return 0;
97} 73}
98__initcall(init_ras_IRQ); 74subsys_initcall(init_ras_IRQ);
99 75
100/* 76#define EPOW_SHUTDOWN_NORMAL 1
101 * Handle power subsystem events (EPOW). 77#define EPOW_SHUTDOWN_ON_UPS 2
102 * 78#define EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS 3
103 * Presently we just log the event has occurred. This should be fixed 79#define EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH 4
104 * to examine the type of power failure and take appropriate action where 80
105 * the time horizon permits something useful to be done. 81static void handle_system_shutdown(char event_modifier)
106 */ 82{
83 switch (event_modifier) {
84 case EPOW_SHUTDOWN_NORMAL:
85 pr_emerg("Firmware initiated power off");
86 orderly_poweroff(1);
87 break;
88
89 case EPOW_SHUTDOWN_ON_UPS:
90 pr_emerg("Loss of power reported by firmware, system is "
91 "running on UPS/battery");
92 break;
93
94 case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
95 pr_emerg("Loss of system critical functions reported by "
96 "firmware");
97 pr_emerg("Check RTAS error log for details");
98 orderly_poweroff(1);
99 break;
100
101 case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
102 pr_emerg("Ambient temperature too high reported by firmware");
103 pr_emerg("Check RTAS error log for details");
104 orderly_poweroff(1);
105 break;
106
107 default:
108 pr_err("Unknown power/cooling shutdown event (modifier %d)",
109 event_modifier);
110 }
111}
112
113struct epow_errorlog {
114 unsigned char sensor_value;
115 unsigned char event_modifier;
116 unsigned char extended_modifier;
117 unsigned char reserved;
118 unsigned char platform_reason;
119};
120
121#define EPOW_RESET 0
122#define EPOW_WARN_COOLING 1
123#define EPOW_WARN_POWER 2
124#define EPOW_SYSTEM_SHUTDOWN 3
125#define EPOW_SYSTEM_HALT 4
126#define EPOW_MAIN_ENCLOSURE 5
127#define EPOW_POWER_OFF 7
128
129void rtas_parse_epow_errlog(struct rtas_error_log *log)
130{
131 struct pseries_errorlog *pseries_log;
132 struct epow_errorlog *epow_log;
133 char action_code;
134 char modifier;
135
136 pseries_log = get_pseries_errorlog(log, PSERIES_ELOG_SECT_ID_EPOW);
137 if (pseries_log == NULL)
138 return;
139
140 epow_log = (struct epow_errorlog *)pseries_log->data;
141 action_code = epow_log->sensor_value & 0xF; /* bottom 4 bits */
142 modifier = epow_log->event_modifier & 0xF; /* bottom 4 bits */
143
144 switch (action_code) {
145 case EPOW_RESET:
146 pr_err("Non critical power or cooling issue cleared");
147 break;
148
149 case EPOW_WARN_COOLING:
150 pr_err("Non critical cooling issue reported by firmware");
151 pr_err("Check RTAS error log for details");
152 break;
153
154 case EPOW_WARN_POWER:
155 pr_err("Non critical power issue reported by firmware");
156 pr_err("Check RTAS error log for details");
157 break;
158
159 case EPOW_SYSTEM_SHUTDOWN:
160 handle_system_shutdown(epow_log->event_modifier);
161 break;
162
163 case EPOW_SYSTEM_HALT:
164 pr_emerg("Firmware initiated power off");
165 orderly_poweroff(1);
166 break;
167
168 case EPOW_MAIN_ENCLOSURE:
169 case EPOW_POWER_OFF:
170 pr_emerg("Critical power/cooling issue reported by firmware");
171 pr_emerg("Check RTAS error log for details");
172 pr_emerg("Immediate power off");
173 emergency_sync();
174 kernel_power_off();
175 break;
176
177 default:
178 pr_err("Unknown power/cooling event (action code %d)",
179 action_code);
180 }
181}
182
183/* Handle environmental and power warning (EPOW) interrupts. */
107static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) 184static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
108{ 185{
109 int status = 0xdeadbeef; 186 int status;
110 int state = 0; 187 int state;
111 int critical; 188 int critical;
112 189
113 status = rtas_call(ras_get_sensor_state_token, 2, 2, &state, 190 status = rtas_get_sensor(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state);
114 EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX);
115 191
116 if (state > 3) 192 if (state > 3)
117 critical = 1; /* Time Critical */ 193 critical = 1; /* Time Critical */
118 else 194 else
119 critical = 0; 195 critical = 0;
120 196
@@ -123,18 +199,14 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
123 status = rtas_call(ras_check_exception_token, 6, 1, NULL, 199 status = rtas_call(ras_check_exception_token, 6, 1, NULL,
124 RTAS_VECTOR_EXTERNAL_INTERRUPT, 200 RTAS_VECTOR_EXTERNAL_INTERRUPT,
125 virq_to_hw(irq), 201 virq_to_hw(irq),
126 RTAS_EPOW_WARNING | RTAS_POWERMGM_EVENTS, 202 RTAS_EPOW_WARNING,
127 critical, __pa(&ras_log_buf), 203 critical, __pa(&ras_log_buf),
128 rtas_get_error_log_max()); 204 rtas_get_error_log_max());
129 205
130 udbg_printf("EPOW <0x%lx 0x%x 0x%x>\n",
131 *((unsigned long *)&ras_log_buf), status, state);
132 printk(KERN_WARNING "EPOW <0x%lx 0x%x 0x%x>\n",
133 *((unsigned long *)&ras_log_buf), status, state);
134
135 /* format and print the extended information */
136 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0); 206 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
137 207
208 rtas_parse_epow_errlog((struct rtas_error_log *)ras_log_buf);
209
138 spin_unlock(&ras_log_buf_lock); 210 spin_unlock(&ras_log_buf_lock);
139 return IRQ_HANDLED; 211 return IRQ_HANDLED;
140} 212}
@@ -150,7 +222,7 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id)
150static irqreturn_t ras_error_interrupt(int irq, void *dev_id) 222static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
151{ 223{
152 struct rtas_error_log *rtas_elog; 224 struct rtas_error_log *rtas_elog;
153 int status = 0xdeadbeef; 225 int status;
154 int fatal; 226 int fatal;
155 227
156 spin_lock(&ras_log_buf_lock); 228 spin_lock(&ras_log_buf_lock);
@@ -158,7 +230,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
158 status = rtas_call(ras_check_exception_token, 6, 1, NULL, 230 status = rtas_call(ras_check_exception_token, 6, 1, NULL,
159 RTAS_VECTOR_EXTERNAL_INTERRUPT, 231 RTAS_VECTOR_EXTERNAL_INTERRUPT,
160 virq_to_hw(irq), 232 virq_to_hw(irq),
161 RTAS_INTERNAL_ERROR, 1 /*Time Critical */, 233 RTAS_INTERNAL_ERROR, 1 /* Time Critical */,
162 __pa(&ras_log_buf), 234 __pa(&ras_log_buf),
163 rtas_get_error_log_max()); 235 rtas_get_error_log_max());
164 236
@@ -173,24 +245,13 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
173 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal); 245 log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
174 246
175 if (fatal) { 247 if (fatal) {
176 udbg_printf("Fatal HW Error <0x%lx 0x%x>\n", 248 pr_emerg("Fatal hardware error reported by firmware");
177 *((unsigned long *)&ras_log_buf), status); 249 pr_emerg("Check RTAS error log for details");
178 printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n", 250 pr_emerg("Immediate power off");
179 *((unsigned long *)&ras_log_buf), status); 251 emergency_sync();
180 252 kernel_power_off();
181#ifndef DEBUG_RTAS_POWER_OFF
182 /* Don't actually power off when debugging so we can test
183 * without actually failing while injecting errors.
184 * Error data will not be logged to syslog.
185 */
186 ppc_md.power_off();
187#endif
188 } else { 253 } else {
189 udbg_printf("Recoverable HW Error <0x%lx 0x%x>\n", 254 pr_err("Recoverable hardware error reported by firmware");
190 *((unsigned long *)&ras_log_buf), status);
191 printk(KERN_WARNING
192 "Warning: Recoverable hardware error <0x%lx 0x%x>\n",
193 *((unsigned long *)&ras_log_buf), status);
194 } 255 }
195 256
196 spin_unlock(&ras_log_buf_lock); 257 spin_unlock(&ras_log_buf_lock);