diff options
Diffstat (limited to 'arch/ia64')
-rw-r--r-- | arch/ia64/kernel/mca.c | 216 | ||||
-rw-r--r-- | arch/ia64/kernel/mca_drv.c | 54 | ||||
-rw-r--r-- | arch/ia64/kernel/mca_drv.h | 4 | ||||
-rw-r--r-- | arch/ia64/kernel/salinfo.c | 4 |
4 files changed, 242 insertions, 36 deletions
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 2fbe4536fe18..98f3b26d7aff 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c | |||
@@ -54,6 +54,9 @@ | |||
54 | * | 54 | * |
55 | * 2005-10-07 Keith Owens <kaos@sgi.com> | 55 | * 2005-10-07 Keith Owens <kaos@sgi.com> |
56 | * Add notify_die() hooks. | 56 | * Add notify_die() hooks. |
57 | * | ||
58 | * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> | ||
59 | * Add printing support for MCA/INIT. | ||
57 | */ | 60 | */ |
58 | #include <linux/types.h> | 61 | #include <linux/types.h> |
59 | #include <linux/init.h> | 62 | #include <linux/init.h> |
@@ -136,11 +139,175 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); | |||
136 | 139 | ||
137 | static int mca_init __initdata; | 140 | static int mca_init __initdata; |
138 | 141 | ||
142 | /* | ||
143 | * limited & delayed printing support for MCA/INIT handler | ||
144 | */ | ||
145 | |||
146 | #define mprintk(fmt...) ia64_mca_printk(fmt) | ||
147 | |||
148 | #define MLOGBUF_SIZE (512+256*NR_CPUS) | ||
149 | #define MLOGBUF_MSGMAX 256 | ||
150 | static char mlogbuf[MLOGBUF_SIZE]; | ||
151 | static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */ | ||
152 | static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */ | ||
153 | static unsigned long mlogbuf_start; | ||
154 | static unsigned long mlogbuf_end; | ||
155 | static unsigned int mlogbuf_finished = 0; | ||
156 | static unsigned long mlogbuf_timestamp = 0; | ||
157 | |||
158 | static int loglevel_save = -1; | ||
159 | #define BREAK_LOGLEVEL(__console_loglevel) \ | ||
160 | oops_in_progress = 1; \ | ||
161 | if (loglevel_save < 0) \ | ||
162 | loglevel_save = __console_loglevel; \ | ||
163 | __console_loglevel = 15; | ||
164 | |||
165 | #define RESTORE_LOGLEVEL(__console_loglevel) \ | ||
166 | if (loglevel_save >= 0) { \ | ||
167 | __console_loglevel = loglevel_save; \ | ||
168 | loglevel_save = -1; \ | ||
169 | } \ | ||
170 | mlogbuf_finished = 0; \ | ||
171 | oops_in_progress = 0; | ||
172 | |||
173 | /* | ||
174 | * Push messages into buffer, print them later if not urgent. | ||
175 | */ | ||
176 | void ia64_mca_printk(const char *fmt, ...) | ||
177 | { | ||
178 | va_list args; | ||
179 | int printed_len; | ||
180 | char temp_buf[MLOGBUF_MSGMAX]; | ||
181 | char *p; | ||
182 | |||
183 | va_start(args, fmt); | ||
184 | printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args); | ||
185 | va_end(args); | ||
186 | |||
187 | /* Copy the output into mlogbuf */ | ||
188 | if (oops_in_progress) { | ||
189 | /* mlogbuf was abandoned, use printk directly instead. */ | ||
190 | printk(temp_buf); | ||
191 | } else { | ||
192 | spin_lock(&mlogbuf_wlock); | ||
193 | for (p = temp_buf; *p; p++) { | ||
194 | unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE; | ||
195 | if (next != mlogbuf_start) { | ||
196 | mlogbuf[mlogbuf_end] = *p; | ||
197 | mlogbuf_end = next; | ||
198 | } else { | ||
199 | /* buffer full */ | ||
200 | break; | ||
201 | } | ||
202 | } | ||
203 | mlogbuf[mlogbuf_end] = '\0'; | ||
204 | spin_unlock(&mlogbuf_wlock); | ||
205 | } | ||
206 | } | ||
207 | EXPORT_SYMBOL(ia64_mca_printk); | ||
208 | |||
209 | /* | ||
210 | * Print buffered messages. | ||
211 | * NOTE: call this after returning normal context. (ex. from salinfod) | ||
212 | */ | ||
213 | void ia64_mlogbuf_dump(void) | ||
214 | { | ||
215 | char temp_buf[MLOGBUF_MSGMAX]; | ||
216 | char *p; | ||
217 | unsigned long index; | ||
218 | unsigned long flags; | ||
219 | unsigned int printed_len; | ||
220 | |||
221 | /* Get output from mlogbuf */ | ||
222 | while (mlogbuf_start != mlogbuf_end) { | ||
223 | temp_buf[0] = '\0'; | ||
224 | p = temp_buf; | ||
225 | printed_len = 0; | ||
226 | |||
227 | spin_lock_irqsave(&mlogbuf_rlock, flags); | ||
228 | |||
229 | index = mlogbuf_start; | ||
230 | while (index != mlogbuf_end) { | ||
231 | *p = mlogbuf[index]; | ||
232 | index = (index + 1) % MLOGBUF_SIZE; | ||
233 | if (!*p) | ||
234 | break; | ||
235 | p++; | ||
236 | if (++printed_len >= MLOGBUF_MSGMAX - 1) | ||
237 | break; | ||
238 | } | ||
239 | *p = '\0'; | ||
240 | if (temp_buf[0]) | ||
241 | printk(temp_buf); | ||
242 | mlogbuf_start = index; | ||
243 | |||
244 | mlogbuf_timestamp = 0; | ||
245 | spin_unlock_irqrestore(&mlogbuf_rlock, flags); | ||
246 | } | ||
247 | } | ||
248 | EXPORT_SYMBOL(ia64_mlogbuf_dump); | ||
249 | |||
250 | /* | ||
251 | * Call this if system is going to down or if immediate flushing messages to | ||
252 | * console is required. (ex. recovery was failed, crash dump is going to be | ||
253 | * invoked, long-wait rendezvous etc.) | ||
254 | * NOTE: this should be called from monarch. | ||
255 | */ | ||
256 | static void ia64_mlogbuf_finish(int wait) | ||
257 | { | ||
258 | BREAK_LOGLEVEL(console_loglevel); | ||
259 | |||
260 | spin_lock_init(&mlogbuf_rlock); | ||
261 | ia64_mlogbuf_dump(); | ||
262 | printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, " | ||
263 | "MCA/INIT might be dodgy or fail.\n"); | ||
264 | |||
265 | if (!wait) | ||
266 | return; | ||
267 | |||
268 | /* wait for console */ | ||
269 | printk("Delaying for 5 seconds...\n"); | ||
270 | udelay(5*1000000); | ||
271 | |||
272 | mlogbuf_finished = 1; | ||
273 | } | ||
274 | EXPORT_SYMBOL(ia64_mlogbuf_finish); | ||
275 | |||
276 | /* | ||
277 | * Print buffered messages from INIT context. | ||
278 | */ | ||
279 | static void ia64_mlogbuf_dump_from_init(void) | ||
280 | { | ||
281 | if (mlogbuf_finished) | ||
282 | return; | ||
283 | |||
284 | if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) { | ||
285 | printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " | ||
286 | " and the system seems to be messed up.\n"); | ||
287 | ia64_mlogbuf_finish(0); | ||
288 | return; | ||
289 | } | ||
290 | |||
291 | if (!spin_trylock(&mlogbuf_rlock)) { | ||
292 | printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. " | ||
293 | "Generated messages other than stack dump will be " | ||
294 | "buffered to mlogbuf and will be printed later.\n"); | ||
295 | printk(KERN_ERR "INIT: If messages would not printed after " | ||
296 | "this INIT, wait 30sec and assert INIT again.\n"); | ||
297 | if (!mlogbuf_timestamp) | ||
298 | mlogbuf_timestamp = jiffies; | ||
299 | return; | ||
300 | } | ||
301 | spin_unlock(&mlogbuf_rlock); | ||
302 | ia64_mlogbuf_dump(); | ||
303 | } | ||
139 | 304 | ||
140 | static void inline | 305 | static void inline |
141 | ia64_mca_spin(const char *func) | 306 | ia64_mca_spin(const char *func) |
142 | { | 307 | { |
143 | printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); | 308 | if (monarch_cpu == smp_processor_id()) |
309 | ia64_mlogbuf_finish(0); | ||
310 | mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); | ||
144 | while (1) | 311 | while (1) |
145 | cpu_relax(); | 312 | cpu_relax(); |
146 | } | 313 | } |
@@ -988,18 +1155,22 @@ ia64_wait_for_slaves(int monarch, const char *type) | |||
988 | } | 1155 | } |
989 | if (!missing) | 1156 | if (!missing) |
990 | goto all_in; | 1157 | goto all_in; |
991 | printk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); | 1158 | /* |
1159 | * Maybe slave(s) dead. Print buffered messages immediately. | ||
1160 | */ | ||
1161 | ia64_mlogbuf_finish(0); | ||
1162 | mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); | ||
992 | for_each_online_cpu(c) { | 1163 | for_each_online_cpu(c) { |
993 | if (c == monarch) | 1164 | if (c == monarch) |
994 | continue; | 1165 | continue; |
995 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) | 1166 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) |
996 | printk(" %d", c); | 1167 | mprintk(" %d", c); |
997 | } | 1168 | } |
998 | printk("\n"); | 1169 | mprintk("\n"); |
999 | return; | 1170 | return; |
1000 | 1171 | ||
1001 | all_in: | 1172 | all_in: |
1002 | printk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); | 1173 | mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); |
1003 | return; | 1174 | return; |
1004 | } | 1175 | } |
1005 | 1176 | ||
@@ -1027,10 +1198,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1027 | struct ia64_mca_notify_die nd = | 1198 | struct ia64_mca_notify_die nd = |
1028 | { .sos = sos, .monarch_cpu = &monarch_cpu }; | 1199 | { .sos = sos, .monarch_cpu = &monarch_cpu }; |
1029 | 1200 | ||
1030 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ | 1201 | mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " |
1031 | console_loglevel = 15; /* make sure printks make it to console */ | 1202 | "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); |
1032 | printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", | ||
1033 | sos->proc_state_param, cpu, sos->monarch); | ||
1034 | 1203 | ||
1035 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); | 1204 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); |
1036 | monarch_cpu = cpu; | 1205 | monarch_cpu = cpu; |
@@ -1066,6 +1235,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1066 | rh->severity = sal_log_severity_corrected; | 1235 | rh->severity = sal_log_severity_corrected; |
1067 | ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); | 1236 | ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); |
1068 | sos->os_status = IA64_MCA_CORRECTED; | 1237 | sos->os_status = IA64_MCA_CORRECTED; |
1238 | } else { | ||
1239 | /* Dump buffered message to console */ | ||
1240 | ia64_mlogbuf_finish(1); | ||
1069 | } | 1241 | } |
1070 | if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) | 1242 | if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) |
1071 | == NOTIFY_STOP) | 1243 | == NOTIFY_STOP) |
@@ -1305,6 +1477,15 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi | |||
1305 | struct task_struct *g, *t; | 1477 | struct task_struct *g, *t; |
1306 | if (val != DIE_INIT_MONARCH_PROCESS) | 1478 | if (val != DIE_INIT_MONARCH_PROCESS) |
1307 | return NOTIFY_DONE; | 1479 | return NOTIFY_DONE; |
1480 | |||
1481 | /* | ||
1482 | * FIXME: mlogbuf will brim over with INIT stack dumps. | ||
1483 | * To enable show_stack from INIT, we use oops_in_progress which should | ||
1484 | * be used in real oops. This would cause something wrong after INIT. | ||
1485 | */ | ||
1486 | BREAK_LOGLEVEL(console_loglevel); | ||
1487 | ia64_mlogbuf_dump_from_init(); | ||
1488 | |||
1308 | printk(KERN_ERR "Processes interrupted by INIT -"); | 1489 | printk(KERN_ERR "Processes interrupted by INIT -"); |
1309 | for_each_online_cpu(c) { | 1490 | for_each_online_cpu(c) { |
1310 | struct ia64_sal_os_state *s; | 1491 | struct ia64_sal_os_state *s; |
@@ -1326,6 +1507,8 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi | |||
1326 | } while_each_thread (g, t); | 1507 | } while_each_thread (g, t); |
1327 | read_unlock(&tasklist_lock); | 1508 | read_unlock(&tasklist_lock); |
1328 | } | 1509 | } |
1510 | /* FIXME: This will not restore zapped printk locks. */ | ||
1511 | RESTORE_LOGLEVEL(console_loglevel); | ||
1329 | return NOTIFY_DONE; | 1512 | return NOTIFY_DONE; |
1330 | } | 1513 | } |
1331 | 1514 | ||
@@ -1357,12 +1540,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1357 | struct ia64_mca_notify_die nd = | 1540 | struct ia64_mca_notify_die nd = |
1358 | { .sos = sos, .monarch_cpu = &monarch_cpu }; | 1541 | { .sos = sos, .monarch_cpu = &monarch_cpu }; |
1359 | 1542 | ||
1360 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ | ||
1361 | console_loglevel = 15; /* make sure printks make it to console */ | ||
1362 | |||
1363 | (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); | 1543 | (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); |
1364 | 1544 | ||
1365 | printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", | 1545 | mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", |
1366 | sos->proc_state_param, cpu, sos->monarch); | 1546 | sos->proc_state_param, cpu, sos->monarch); |
1367 | salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); | 1547 | salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); |
1368 | 1548 | ||
@@ -1375,7 +1555,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1375 | * fix their proms and get their customers updated. | 1555 | * fix their proms and get their customers updated. |
1376 | */ | 1556 | */ |
1377 | if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { | 1557 | if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { |
1378 | printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", | 1558 | mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", |
1379 | __FUNCTION__, cpu); | 1559 | __FUNCTION__, cpu); |
1380 | atomic_dec(&slaves); | 1560 | atomic_dec(&slaves); |
1381 | sos->monarch = 1; | 1561 | sos->monarch = 1; |
@@ -1387,7 +1567,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1387 | * fix their proms and get their customers updated. | 1567 | * fix their proms and get their customers updated. |
1388 | */ | 1568 | */ |
1389 | if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { | 1569 | if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { |
1390 | printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", | 1570 | mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", |
1391 | __FUNCTION__, cpu); | 1571 | __FUNCTION__, cpu); |
1392 | atomic_dec(&monarchs); | 1572 | atomic_dec(&monarchs); |
1393 | sos->monarch = 0; | 1573 | sos->monarch = 0; |
@@ -1408,7 +1588,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1408 | if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) | 1588 | if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) |
1409 | == NOTIFY_STOP) | 1589 | == NOTIFY_STOP) |
1410 | ia64_mca_spin(__FUNCTION__); | 1590 | ia64_mca_spin(__FUNCTION__); |
1411 | printk("Slave on cpu %d returning to normal service.\n", cpu); | 1591 | mprintk("Slave on cpu %d returning to normal service.\n", cpu); |
1412 | set_curr_task(cpu, previous_current); | 1592 | set_curr_task(cpu, previous_current); |
1413 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; | 1593 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; |
1414 | atomic_dec(&slaves); | 1594 | atomic_dec(&slaves); |
@@ -1426,7 +1606,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1426 | * same serial line, the user will need some time to switch out of the BMC before | 1606 | * same serial line, the user will need some time to switch out of the BMC before |
1427 | * the dump begins. | 1607 | * the dump begins. |
1428 | */ | 1608 | */ |
1429 | printk("Delaying for 5 seconds...\n"); | 1609 | mprintk("Delaying for 5 seconds...\n"); |
1430 | udelay(5*1000000); | 1610 | udelay(5*1000000); |
1431 | ia64_wait_for_slaves(cpu, "INIT"); | 1611 | ia64_wait_for_slaves(cpu, "INIT"); |
1432 | /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through | 1612 | /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through |
@@ -1439,7 +1619,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1439 | if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) | 1619 | if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) |
1440 | == NOTIFY_STOP) | 1620 | == NOTIFY_STOP) |
1441 | ia64_mca_spin(__FUNCTION__); | 1621 | ia64_mca_spin(__FUNCTION__); |
1442 | printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); | 1622 | mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); |
1443 | atomic_dec(&monarchs); | 1623 | atomic_dec(&monarchs); |
1444 | set_curr_task(cpu, previous_current); | 1624 | set_curr_task(cpu, previous_current); |
1445 | monarch_cpu = -1; | 1625 | monarch_cpu = -1; |
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 8db6e0cedadc..a45009d2bc90 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c | |||
@@ -79,14 +79,30 @@ static int | |||
79 | fatal_mca(const char *fmt, ...) | 79 | fatal_mca(const char *fmt, ...) |
80 | { | 80 | { |
81 | va_list args; | 81 | va_list args; |
82 | char buf[256]; | ||
82 | 83 | ||
83 | va_start(args, fmt); | 84 | va_start(args, fmt); |
84 | vprintk(fmt, args); | 85 | vsnprintf(buf, sizeof(buf), fmt, args); |
85 | va_end(args); | 86 | va_end(args); |
87 | ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf); | ||
86 | 88 | ||
87 | return MCA_NOT_RECOVERED; | 89 | return MCA_NOT_RECOVERED; |
88 | } | 90 | } |
89 | 91 | ||
92 | static int | ||
93 | mca_recovered(const char *fmt, ...) | ||
94 | { | ||
95 | va_list args; | ||
96 | char buf[256]; | ||
97 | |||
98 | va_start(args, fmt); | ||
99 | vsnprintf(buf, sizeof(buf), fmt, args); | ||
100 | va_end(args); | ||
101 | ia64_mca_printk(KERN_INFO "MCA: %s\n", buf); | ||
102 | |||
103 | return MCA_RECOVERED; | ||
104 | } | ||
105 | |||
90 | /** | 106 | /** |
91 | * mca_page_isolate - isolate a poisoned page in order not to use it later | 107 | * mca_page_isolate - isolate a poisoned page in order not to use it later |
92 | * @paddr: poisoned memory location | 108 | * @paddr: poisoned memory location |
@@ -140,6 +156,7 @@ mca_page_isolate(unsigned long paddr) | |||
140 | void | 156 | void |
141 | mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) | 157 | mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) |
142 | { | 158 | { |
159 | ia64_mlogbuf_dump(); | ||
143 | printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " | 160 | printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " |
144 | "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", | 161 | "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", |
145 | raw_smp_processor_id(), current->pid, current->uid, | 162 | raw_smp_processor_id(), current->pid, current->uid, |
@@ -440,7 +457,7 @@ recover_from_read_error(slidx_table_t *slidx, | |||
440 | 457 | ||
441 | /* Is target address valid? */ | 458 | /* Is target address valid? */ |
442 | if (!pbci->tv) | 459 | if (!pbci->tv) |
443 | return fatal_mca(KERN_ALERT "MCA: target address not valid\n"); | 460 | return fatal_mca("target address not valid"); |
444 | 461 | ||
445 | /* | 462 | /* |
446 | * cpu read or memory-mapped io read | 463 | * cpu read or memory-mapped io read |
@@ -458,7 +475,7 @@ recover_from_read_error(slidx_table_t *slidx, | |||
458 | 475 | ||
459 | /* Is minstate valid? */ | 476 | /* Is minstate valid? */ |
460 | if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) | 477 | if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) |
461 | return fatal_mca(KERN_ALERT "MCA: minstate not valid\n"); | 478 | return fatal_mca("minstate not valid"); |
462 | psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); | 479 | psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); |
463 | psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); | 480 | psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); |
464 | 481 | ||
@@ -492,13 +509,14 @@ recover_from_read_error(slidx_table_t *slidx, | |||
492 | psr2->bn = 1; | 509 | psr2->bn = 1; |
493 | psr2->i = 0; | 510 | psr2->i = 0; |
494 | 511 | ||
495 | return MCA_RECOVERED; | 512 | return mca_recovered("user memory corruption. " |
513 | "kill affected process - recovered."); | ||
496 | } | 514 | } |
497 | 515 | ||
498 | } | 516 | } |
499 | 517 | ||
500 | return fatal_mca(KERN_ALERT "MCA: kernel context not recovered," | 518 | return fatal_mca("kernel context not recovered, iip 0x%lx\n", |
501 | " iip 0x%lx\n", pmsa->pmsa_iip); | 519 | pmsa->pmsa_iip); |
502 | } | 520 | } |
503 | 521 | ||
504 | /** | 522 | /** |
@@ -584,13 +602,13 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, | |||
584 | * The machine check is corrected. | 602 | * The machine check is corrected. |
585 | */ | 603 | */ |
586 | if (psp->cm == 1) | 604 | if (psp->cm == 1) |
587 | return MCA_RECOVERED; | 605 | return mca_recovered("machine check is already corrected."); |
588 | 606 | ||
589 | /* | 607 | /* |
590 | * The error was not contained. Software must be reset. | 608 | * The error was not contained. Software must be reset. |
591 | */ | 609 | */ |
592 | if (psp->us || psp->ci == 0) | 610 | if (psp->us || psp->ci == 0) |
593 | return fatal_mca(KERN_ALERT "MCA: error not contained\n"); | 611 | return fatal_mca("error not contained"); |
594 | 612 | ||
595 | /* | 613 | /* |
596 | * The cache check and bus check bits have four possible states | 614 | * The cache check and bus check bits have four possible states |
@@ -601,22 +619,22 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, | |||
601 | * 1 1 Memory error, attempt recovery | 619 | * 1 1 Memory error, attempt recovery |
602 | */ | 620 | */ |
603 | if (psp->bc == 0 || pbci == NULL) | 621 | if (psp->bc == 0 || pbci == NULL) |
604 | return fatal_mca(KERN_ALERT "MCA: No bus check\n"); | 622 | return fatal_mca("No bus check"); |
605 | 623 | ||
606 | /* | 624 | /* |
607 | * Sorry, we cannot handle so many. | 625 | * Sorry, we cannot handle so many. |
608 | */ | 626 | */ |
609 | if (peidx_bus_check_num(peidx) > 1) | 627 | if (peidx_bus_check_num(peidx) > 1) |
610 | return fatal_mca(KERN_ALERT "MCA: Too many bus checks\n"); | 628 | return fatal_mca("Too many bus checks"); |
611 | /* | 629 | /* |
612 | * Well, here is only one bus error. | 630 | * Well, here is only one bus error. |
613 | */ | 631 | */ |
614 | if (pbci->ib) | 632 | if (pbci->ib) |
615 | return fatal_mca(KERN_ALERT "MCA: Internal Bus error\n"); | 633 | return fatal_mca("Internal Bus error"); |
616 | if (pbci->cc) | 634 | if (pbci->cc) |
617 | return fatal_mca(KERN_ALERT "MCA: Cache-cache error\n"); | 635 | return fatal_mca("Cache-cache error"); |
618 | if (pbci->eb && pbci->bsi > 0) | 636 | if (pbci->eb && pbci->bsi > 0) |
619 | return fatal_mca(KERN_ALERT "MCA: External bus check fatal status\n"); | 637 | return fatal_mca("External bus check fatal status"); |
620 | 638 | ||
621 | /* | 639 | /* |
622 | * This is a local MCA and estimated as recoverble external bus error. | 640 | * This is a local MCA and estimated as recoverble external bus error. |
@@ -628,7 +646,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, | |||
628 | /* | 646 | /* |
629 | * On account of strange SAL error record, we cannot recover. | 647 | * On account of strange SAL error record, we cannot recover. |
630 | */ | 648 | */ |
631 | return fatal_mca(KERN_ALERT "MCA: Strange SAL record\n"); | 649 | return fatal_mca("Strange SAL record"); |
632 | } | 650 | } |
633 | 651 | ||
634 | /** | 652 | /** |
@@ -657,10 +675,10 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) | |||
657 | 675 | ||
658 | /* Now, OS can recover when there is one processor error section */ | 676 | /* Now, OS can recover when there is one processor error section */ |
659 | if (n_proc_err > 1) | 677 | if (n_proc_err > 1) |
660 | return fatal_mca(KERN_ALERT "MCA: Too Many Errors\n"); | 678 | return fatal_mca("Too Many Errors"); |
661 | else if (n_proc_err == 0) | 679 | else if (n_proc_err == 0) |
662 | /* Weird SAL record ... We need not to recover */ | 680 | /* Weird SAL record ... We can't do anything */ |
663 | return fatal_mca(KERN_ALERT "MCA: Weird SAL record\n"); | 681 | return fatal_mca("Weird SAL record"); |
664 | 682 | ||
665 | /* Make index of processor error section */ | 683 | /* Make index of processor error section */ |
666 | mca_make_peidx((sal_log_processor_info_t*) | 684 | mca_make_peidx((sal_log_processor_info_t*) |
@@ -671,7 +689,7 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) | |||
671 | 689 | ||
672 | /* Check whether MCA is global or not */ | 690 | /* Check whether MCA is global or not */ |
673 | if (is_mca_global(&peidx, &pbci, sos)) | 691 | if (is_mca_global(&peidx, &pbci, sos)) |
674 | return fatal_mca(KERN_ALERT "MCA: global MCA\n"); | 692 | return fatal_mca("global MCA"); |
675 | 693 | ||
676 | /* Try to recover a processor error */ | 694 | /* Try to recover a processor error */ |
677 | return recover_from_processor_error(platform_err, &slidx, &peidx, | 695 | return recover_from_processor_error(platform_err, &slidx, &peidx, |
diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h index 31a2e52bb16f..c85e943ba5fd 100644 --- a/arch/ia64/kernel/mca_drv.h +++ b/arch/ia64/kernel/mca_drv.h | |||
@@ -118,3 +118,7 @@ struct mca_table_entry { | |||
118 | 118 | ||
119 | extern const struct mca_table_entry *search_mca_tables (unsigned long addr); | 119 | extern const struct mca_table_entry *search_mca_tables (unsigned long addr); |
120 | extern int mca_recover_range(unsigned long); | 120 | extern int mca_recover_range(unsigned long); |
121 | extern void ia64_mca_printk(const char * fmt, ...) | ||
122 | __attribute__ ((format (printf, 1, 2))); | ||
123 | extern void ia64_mlogbuf_dump(void); | ||
124 | |||
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 9065f0f01ba3..e63b8ca5344a 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c | |||
@@ -266,6 +266,7 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe) | |||
266 | /* Check for outstanding MCA/INIT records every minute (arbitrary) */ | 266 | /* Check for outstanding MCA/INIT records every minute (arbitrary) */ |
267 | #define SALINFO_TIMER_DELAY (60*HZ) | 267 | #define SALINFO_TIMER_DELAY (60*HZ) |
268 | static struct timer_list salinfo_timer; | 268 | static struct timer_list salinfo_timer; |
269 | extern void ia64_mlogbuf_dump(void); | ||
269 | 270 | ||
270 | static void | 271 | static void |
271 | salinfo_timeout_check(struct salinfo_data *data) | 272 | salinfo_timeout_check(struct salinfo_data *data) |
@@ -283,6 +284,7 @@ salinfo_timeout_check(struct salinfo_data *data) | |||
283 | static void | 284 | static void |
284 | salinfo_timeout (unsigned long arg) | 285 | salinfo_timeout (unsigned long arg) |
285 | { | 286 | { |
287 | ia64_mlogbuf_dump(); | ||
286 | salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); | 288 | salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); |
287 | salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); | 289 | salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); |
288 | salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; | 290 | salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; |
@@ -332,6 +334,8 @@ retry: | |||
332 | if (cpu == -1) | 334 | if (cpu == -1) |
333 | goto retry; | 335 | goto retry; |
334 | 336 | ||
337 | ia64_mlogbuf_dump(); | ||
338 | |||
335 | /* for next read, start checking at next CPU */ | 339 | /* for next read, start checking at next CPU */ |
336 | data->cpu_check = cpu; | 340 | data->cpu_check = cpu; |
337 | if (++data->cpu_check == NR_CPUS) | 341 | if (++data->cpu_check == NR_CPUS) |