diff options
| -rw-r--r-- | arch/ia64/kernel/mca.c | 216 | ||||
| -rw-r--r-- | arch/ia64/kernel/mca_drv.c | 54 | ||||
| -rw-r--r-- | arch/ia64/kernel/mca_drv.h | 4 | ||||
| -rw-r--r-- | arch/ia64/kernel/salinfo.c | 4 |
4 files changed, 242 insertions, 36 deletions
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 2fbe4536fe18..98f3b26d7aff 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c | |||
| @@ -54,6 +54,9 @@ | |||
| 54 | * | 54 | * |
| 55 | * 2005-10-07 Keith Owens <kaos@sgi.com> | 55 | * 2005-10-07 Keith Owens <kaos@sgi.com> |
| 56 | * Add notify_die() hooks. | 56 | * Add notify_die() hooks. |
| 57 | * | ||
| 58 | * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> | ||
| 59 | * Add printing support for MCA/INIT. | ||
| 57 | */ | 60 | */ |
| 58 | #include <linux/types.h> | 61 | #include <linux/types.h> |
| 59 | #include <linux/init.h> | 62 | #include <linux/init.h> |
| @@ -136,11 +139,175 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); | |||
| 136 | 139 | ||
| 137 | static int mca_init __initdata; | 140 | static int mca_init __initdata; |
| 138 | 141 | ||
| 142 | /* | ||
| 143 | * limited & delayed printing support for MCA/INIT handler | ||
| 144 | */ | ||
| 145 | |||
| 146 | #define mprintk(fmt...) ia64_mca_printk(fmt) | ||
| 147 | |||
| 148 | #define MLOGBUF_SIZE (512+256*NR_CPUS) | ||
| 149 | #define MLOGBUF_MSGMAX 256 | ||
| 150 | static char mlogbuf[MLOGBUF_SIZE]; | ||
| 151 | static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */ | ||
| 152 | static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */ | ||
| 153 | static unsigned long mlogbuf_start; | ||
| 154 | static unsigned long mlogbuf_end; | ||
| 155 | static unsigned int mlogbuf_finished = 0; | ||
| 156 | static unsigned long mlogbuf_timestamp = 0; | ||
| 157 | |||
| 158 | static int loglevel_save = -1; | ||
| 159 | #define BREAK_LOGLEVEL(__console_loglevel) \ | ||
| 160 | oops_in_progress = 1; \ | ||
| 161 | if (loglevel_save < 0) \ | ||
| 162 | loglevel_save = __console_loglevel; \ | ||
| 163 | __console_loglevel = 15; | ||
| 164 | |||
| 165 | #define RESTORE_LOGLEVEL(__console_loglevel) \ | ||
| 166 | if (loglevel_save >= 0) { \ | ||
| 167 | __console_loglevel = loglevel_save; \ | ||
| 168 | loglevel_save = -1; \ | ||
| 169 | } \ | ||
| 170 | mlogbuf_finished = 0; \ | ||
| 171 | oops_in_progress = 0; | ||
| 172 | |||
| 173 | /* | ||
| 174 | * Push messages into buffer, print them later if not urgent. | ||
| 175 | */ | ||
| 176 | void ia64_mca_printk(const char *fmt, ...) | ||
| 177 | { | ||
| 178 | va_list args; | ||
| 179 | int printed_len; | ||
| 180 | char temp_buf[MLOGBUF_MSGMAX]; | ||
| 181 | char *p; | ||
| 182 | |||
| 183 | va_start(args, fmt); | ||
| 184 | printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args); | ||
| 185 | va_end(args); | ||
| 186 | |||
| 187 | /* Copy the output into mlogbuf */ | ||
| 188 | if (oops_in_progress) { | ||
| 189 | /* mlogbuf was abandoned, use printk directly instead. */ | ||
| 190 | printk(temp_buf); | ||
| 191 | } else { | ||
| 192 | spin_lock(&mlogbuf_wlock); | ||
| 193 | for (p = temp_buf; *p; p++) { | ||
| 194 | unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE; | ||
| 195 | if (next != mlogbuf_start) { | ||
| 196 | mlogbuf[mlogbuf_end] = *p; | ||
| 197 | mlogbuf_end = next; | ||
| 198 | } else { | ||
| 199 | /* buffer full */ | ||
| 200 | break; | ||
| 201 | } | ||
| 202 | } | ||
| 203 | mlogbuf[mlogbuf_end] = '\0'; | ||
| 204 | spin_unlock(&mlogbuf_wlock); | ||
| 205 | } | ||
| 206 | } | ||
| 207 | EXPORT_SYMBOL(ia64_mca_printk); | ||
| 208 | |||
| 209 | /* | ||
| 210 | * Print buffered messages. | ||
| 211 | * NOTE: call this after returning normal context. (ex. from salinfod) | ||
| 212 | */ | ||
| 213 | void ia64_mlogbuf_dump(void) | ||
| 214 | { | ||
| 215 | char temp_buf[MLOGBUF_MSGMAX]; | ||
| 216 | char *p; | ||
| 217 | unsigned long index; | ||
| 218 | unsigned long flags; | ||
| 219 | unsigned int printed_len; | ||
| 220 | |||
| 221 | /* Get output from mlogbuf */ | ||
| 222 | while (mlogbuf_start != mlogbuf_end) { | ||
| 223 | temp_buf[0] = '\0'; | ||
| 224 | p = temp_buf; | ||
| 225 | printed_len = 0; | ||
| 226 | |||
| 227 | spin_lock_irqsave(&mlogbuf_rlock, flags); | ||
| 228 | |||
| 229 | index = mlogbuf_start; | ||
| 230 | while (index != mlogbuf_end) { | ||
| 231 | *p = mlogbuf[index]; | ||
| 232 | index = (index + 1) % MLOGBUF_SIZE; | ||
| 233 | if (!*p) | ||
| 234 | break; | ||
| 235 | p++; | ||
| 236 | if (++printed_len >= MLOGBUF_MSGMAX - 1) | ||
| 237 | break; | ||
| 238 | } | ||
| 239 | *p = '\0'; | ||
| 240 | if (temp_buf[0]) | ||
| 241 | printk(temp_buf); | ||
| 242 | mlogbuf_start = index; | ||
| 243 | |||
| 244 | mlogbuf_timestamp = 0; | ||
| 245 | spin_unlock_irqrestore(&mlogbuf_rlock, flags); | ||
| 246 | } | ||
| 247 | } | ||
| 248 | EXPORT_SYMBOL(ia64_mlogbuf_dump); | ||
| 249 | |||
| 250 | /* | ||
| 251 | * Call this if system is going to down or if immediate flushing messages to | ||
| 252 | * console is required. (ex. recovery was failed, crash dump is going to be | ||
| 253 | * invoked, long-wait rendezvous etc.) | ||
| 254 | * NOTE: this should be called from monarch. | ||
| 255 | */ | ||
| 256 | static void ia64_mlogbuf_finish(int wait) | ||
| 257 | { | ||
| 258 | BREAK_LOGLEVEL(console_loglevel); | ||
| 259 | |||
| 260 | spin_lock_init(&mlogbuf_rlock); | ||
| 261 | ia64_mlogbuf_dump(); | ||
| 262 | printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, " | ||
| 263 | "MCA/INIT might be dodgy or fail.\n"); | ||
| 264 | |||
| 265 | if (!wait) | ||
| 266 | return; | ||
| 267 | |||
| 268 | /* wait for console */ | ||
| 269 | printk("Delaying for 5 seconds...\n"); | ||
| 270 | udelay(5*1000000); | ||
| 271 | |||
| 272 | mlogbuf_finished = 1; | ||
| 273 | } | ||
| 274 | EXPORT_SYMBOL(ia64_mlogbuf_finish); | ||
| 275 | |||
| 276 | /* | ||
| 277 | * Print buffered messages from INIT context. | ||
| 278 | */ | ||
| 279 | static void ia64_mlogbuf_dump_from_init(void) | ||
| 280 | { | ||
| 281 | if (mlogbuf_finished) | ||
| 282 | return; | ||
| 283 | |||
| 284 | if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) { | ||
| 285 | printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " | ||
| 286 | " and the system seems to be messed up.\n"); | ||
| 287 | ia64_mlogbuf_finish(0); | ||
| 288 | return; | ||
| 289 | } | ||
| 290 | |||
| 291 | if (!spin_trylock(&mlogbuf_rlock)) { | ||
| 292 | printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. " | ||
| 293 | "Generated messages other than stack dump will be " | ||
| 294 | "buffered to mlogbuf and will be printed later.\n"); | ||
| 295 | printk(KERN_ERR "INIT: If messages would not printed after " | ||
| 296 | "this INIT, wait 30sec and assert INIT again.\n"); | ||
| 297 | if (!mlogbuf_timestamp) | ||
| 298 | mlogbuf_timestamp = jiffies; | ||
| 299 | return; | ||
| 300 | } | ||
| 301 | spin_unlock(&mlogbuf_rlock); | ||
| 302 | ia64_mlogbuf_dump(); | ||
| 303 | } | ||
| 139 | 304 | ||
| 140 | static void inline | 305 | static void inline |
| 141 | ia64_mca_spin(const char *func) | 306 | ia64_mca_spin(const char *func) |
| 142 | { | 307 | { |
| 143 | printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); | 308 | if (monarch_cpu == smp_processor_id()) |
| 309 | ia64_mlogbuf_finish(0); | ||
| 310 | mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); | ||
| 144 | while (1) | 311 | while (1) |
| 145 | cpu_relax(); | 312 | cpu_relax(); |
| 146 | } | 313 | } |
| @@ -988,18 +1155,22 @@ ia64_wait_for_slaves(int monarch, const char *type) | |||
| 988 | } | 1155 | } |
| 989 | if (!missing) | 1156 | if (!missing) |
| 990 | goto all_in; | 1157 | goto all_in; |
| 991 | printk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); | 1158 | /* |
| 1159 | * Maybe slave(s) dead. Print buffered messages immediately. | ||
| 1160 | */ | ||
| 1161 | ia64_mlogbuf_finish(0); | ||
| 1162 | mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); | ||
| 992 | for_each_online_cpu(c) { | 1163 | for_each_online_cpu(c) { |
| 993 | if (c == monarch) | 1164 | if (c == monarch) |
| 994 | continue; | 1165 | continue; |
| 995 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) | 1166 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) |
| 996 | printk(" %d", c); | 1167 | mprintk(" %d", c); |
| 997 | } | 1168 | } |
| 998 | printk("\n"); | 1169 | mprintk("\n"); |
| 999 | return; | 1170 | return; |
| 1000 | 1171 | ||
| 1001 | all_in: | 1172 | all_in: |
| 1002 | printk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); | 1173 | mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); |
| 1003 | return; | 1174 | return; |
| 1004 | } | 1175 | } |
| 1005 | 1176 | ||
| @@ -1027,10 +1198,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
| 1027 | struct ia64_mca_notify_die nd = | 1198 | struct ia64_mca_notify_die nd = |
| 1028 | { .sos = sos, .monarch_cpu = &monarch_cpu }; | 1199 | { .sos = sos, .monarch_cpu = &monarch_cpu }; |
| 1029 | 1200 | ||
| 1030 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ | 1201 | mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " |
| 1031 | console_loglevel = 15; /* make sure printks make it to console */ | 1202 | "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); |
| 1032 | printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", | ||
| 1033 | sos->proc_state_param, cpu, sos->monarch); | ||
| 1034 | 1203 | ||
| 1035 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); | 1204 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); |
| 1036 | monarch_cpu = cpu; | 1205 | monarch_cpu = cpu; |
| @@ -1066,6 +1235,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
| 1066 | rh->severity = sal_log_severity_corrected; | 1235 | rh->severity = sal_log_severity_corrected; |
| 1067 | ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); | 1236 | ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); |
| 1068 | sos->os_status = IA64_MCA_CORRECTED; | 1237 | sos->os_status = IA64_MCA_CORRECTED; |
| 1238 | } else { | ||
| 1239 | /* Dump buffered message to console */ | ||
| 1240 | ia64_mlogbuf_finish(1); | ||
| 1069 | } | 1241 | } |
| 1070 | if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) | 1242 | if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) |
| 1071 | == NOTIFY_STOP) | 1243 | == NOTIFY_STOP) |
| @@ -1305,6 +1477,15 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi | |||
| 1305 | struct task_struct *g, *t; | 1477 | struct task_struct *g, *t; |
| 1306 | if (val != DIE_INIT_MONARCH_PROCESS) | 1478 | if (val != DIE_INIT_MONARCH_PROCESS) |
| 1307 | return NOTIFY_DONE; | 1479 | return NOTIFY_DONE; |
| 1480 | |||
| 1481 | /* | ||
| 1482 | * FIXME: mlogbuf will brim over with INIT stack dumps. | ||
| 1483 | * To enable show_stack from INIT, we use oops_in_progress which should | ||
| 1484 | * be used in real oops. This would cause something wrong after INIT. | ||
| 1485 | */ | ||
| 1486 | BREAK_LOGLEVEL(console_loglevel); | ||
| 1487 | ia64_mlogbuf_dump_from_init(); | ||
| 1488 | |||
| 1308 | printk(KERN_ERR "Processes interrupted by INIT -"); | 1489 | printk(KERN_ERR "Processes interrupted by INIT -"); |
| 1309 | for_each_online_cpu(c) { | 1490 | for_each_online_cpu(c) { |
| 1310 | struct ia64_sal_os_state *s; | 1491 | struct ia64_sal_os_state *s; |
| @@ -1326,6 +1507,8 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi | |||
| 1326 | } while_each_thread (g, t); | 1507 | } while_each_thread (g, t); |
| 1327 | read_unlock(&tasklist_lock); | 1508 | read_unlock(&tasklist_lock); |
| 1328 | } | 1509 | } |
| 1510 | /* FIXME: This will not restore zapped printk locks. */ | ||
| 1511 | RESTORE_LOGLEVEL(console_loglevel); | ||
| 1329 | return NOTIFY_DONE; | 1512 | return NOTIFY_DONE; |
| 1330 | } | 1513 | } |
| 1331 | 1514 | ||
| @@ -1357,12 +1540,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
| 1357 | struct ia64_mca_notify_die nd = | 1540 | struct ia64_mca_notify_die nd = |
| 1358 | { .sos = sos, .monarch_cpu = &monarch_cpu }; | 1541 | { .sos = sos, .monarch_cpu = &monarch_cpu }; |
| 1359 | 1542 | ||
| 1360 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ | ||
| 1361 | console_loglevel = 15; /* make sure printks make it to console */ | ||
| 1362 | |||
| 1363 | (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); | 1543 | (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); |
| 1364 | 1544 | ||
| 1365 | printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", | 1545 | mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", |
| 1366 | sos->proc_state_param, cpu, sos->monarch); | 1546 | sos->proc_state_param, cpu, sos->monarch); |
| 1367 | salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); | 1547 | salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); |
| 1368 | 1548 | ||
| @@ -1375,7 +1555,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
| 1375 | * fix their proms and get their customers updated. | 1555 | * fix their proms and get their customers updated. |
| 1376 | */ | 1556 | */ |
| 1377 | if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { | 1557 | if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { |
| 1378 | printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", | 1558 | mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", |
| 1379 | __FUNCTION__, cpu); | 1559 | __FUNCTION__, cpu); |
| 1380 | atomic_dec(&slaves); | 1560 | atomic_dec(&slaves); |
| 1381 | sos->monarch = 1; | 1561 | sos->monarch = 1; |
| @@ -1387,7 +1567,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
| 1387 | * fix their proms and get their customers updated. | 1567 | * fix their proms and get their customers updated. |
| 1388 | */ | 1568 | */ |
| 1389 | if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { | 1569 | if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { |
| 1390 | printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", | 1570 | mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", |
| 1391 | __FUNCTION__, cpu); | 1571 | __FUNCTION__, cpu); |
| 1392 | atomic_dec(&monarchs); | 1572 | atomic_dec(&monarchs); |
| 1393 | sos->monarch = 0; | 1573 | sos->monarch = 0; |
| @@ -1408,7 +1588,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
| 1408 | if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) | 1588 | if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) |
| 1409 | == NOTIFY_STOP) | 1589 | == NOTIFY_STOP) |
| 1410 | ia64_mca_spin(__FUNCTION__); | 1590 | ia64_mca_spin(__FUNCTION__); |
| 1411 | printk("Slave on cpu %d returning to normal service.\n", cpu); | 1591 | mprintk("Slave on cpu %d returning to normal service.\n", cpu); |
| 1412 | set_curr_task(cpu, previous_current); | 1592 | set_curr_task(cpu, previous_current); |
| 1413 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; | 1593 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; |
| 1414 | atomic_dec(&slaves); | 1594 | atomic_dec(&slaves); |
| @@ -1426,7 +1606,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
| 1426 | * same serial line, the user will need some time to switch out of the BMC before | 1606 | * same serial line, the user will need some time to switch out of the BMC before |
| 1427 | * the dump begins. | 1607 | * the dump begins. |
| 1428 | */ | 1608 | */ |
| 1429 | printk("Delaying for 5 seconds...\n"); | 1609 | mprintk("Delaying for 5 seconds...\n"); |
| 1430 | udelay(5*1000000); | 1610 | udelay(5*1000000); |
| 1431 | ia64_wait_for_slaves(cpu, "INIT"); | 1611 | ia64_wait_for_slaves(cpu, "INIT"); |
| 1432 | /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through | 1612 | /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through |
| @@ -1439,7 +1619,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
| 1439 | if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) | 1619 | if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) |
| 1440 | == NOTIFY_STOP) | 1620 | == NOTIFY_STOP) |
| 1441 | ia64_mca_spin(__FUNCTION__); | 1621 | ia64_mca_spin(__FUNCTION__); |
| 1442 | printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); | 1622 | mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); |
| 1443 | atomic_dec(&monarchs); | 1623 | atomic_dec(&monarchs); |
| 1444 | set_curr_task(cpu, previous_current); | 1624 | set_curr_task(cpu, previous_current); |
| 1445 | monarch_cpu = -1; | 1625 | monarch_cpu = -1; |
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 8db6e0cedadc..a45009d2bc90 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c | |||
| @@ -79,14 +79,30 @@ static int | |||
| 79 | fatal_mca(const char *fmt, ...) | 79 | fatal_mca(const char *fmt, ...) |
| 80 | { | 80 | { |
| 81 | va_list args; | 81 | va_list args; |
| 82 | char buf[256]; | ||
| 82 | 83 | ||
| 83 | va_start(args, fmt); | 84 | va_start(args, fmt); |
| 84 | vprintk(fmt, args); | 85 | vsnprintf(buf, sizeof(buf), fmt, args); |
| 85 | va_end(args); | 86 | va_end(args); |
| 87 | ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf); | ||
| 86 | 88 | ||
| 87 | return MCA_NOT_RECOVERED; | 89 | return MCA_NOT_RECOVERED; |
| 88 | } | 90 | } |
| 89 | 91 | ||
| 92 | static int | ||
| 93 | mca_recovered(const char *fmt, ...) | ||
| 94 | { | ||
| 95 | va_list args; | ||
| 96 | char buf[256]; | ||
| 97 | |||
| 98 | va_start(args, fmt); | ||
| 99 | vsnprintf(buf, sizeof(buf), fmt, args); | ||
| 100 | va_end(args); | ||
| 101 | ia64_mca_printk(KERN_INFO "MCA: %s\n", buf); | ||
| 102 | |||
| 103 | return MCA_RECOVERED; | ||
| 104 | } | ||
| 105 | |||
| 90 | /** | 106 | /** |
| 91 | * mca_page_isolate - isolate a poisoned page in order not to use it later | 107 | * mca_page_isolate - isolate a poisoned page in order not to use it later |
| 92 | * @paddr: poisoned memory location | 108 | * @paddr: poisoned memory location |
| @@ -140,6 +156,7 @@ mca_page_isolate(unsigned long paddr) | |||
| 140 | void | 156 | void |
| 141 | mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) | 157 | mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) |
| 142 | { | 158 | { |
| 159 | ia64_mlogbuf_dump(); | ||
| 143 | printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " | 160 | printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " |
| 144 | "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", | 161 | "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", |
| 145 | raw_smp_processor_id(), current->pid, current->uid, | 162 | raw_smp_processor_id(), current->pid, current->uid, |
| @@ -440,7 +457,7 @@ recover_from_read_error(slidx_table_t *slidx, | |||
| 440 | 457 | ||
| 441 | /* Is target address valid? */ | 458 | /* Is target address valid? */ |
| 442 | if (!pbci->tv) | 459 | if (!pbci->tv) |
| 443 | return fatal_mca(KERN_ALERT "MCA: target address not valid\n"); | 460 | return fatal_mca("target address not valid"); |
| 444 | 461 | ||
| 445 | /* | 462 | /* |
| 446 | * cpu read or memory-mapped io read | 463 | * cpu read or memory-mapped io read |
| @@ -458,7 +475,7 @@ recover_from_read_error(slidx_table_t *slidx, | |||
| 458 | 475 | ||
| 459 | /* Is minstate valid? */ | 476 | /* Is minstate valid? */ |
| 460 | if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) | 477 | if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) |
| 461 | return fatal_mca(KERN_ALERT "MCA: minstate not valid\n"); | 478 | return fatal_mca("minstate not valid"); |
| 462 | psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); | 479 | psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); |
| 463 | psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); | 480 | psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); |
| 464 | 481 | ||
| @@ -492,13 +509,14 @@ recover_from_read_error(slidx_table_t *slidx, | |||
| 492 | psr2->bn = 1; | 509 | psr2->bn = 1; |
| 493 | psr2->i = 0; | 510 | psr2->i = 0; |
| 494 | 511 | ||
| 495 | return MCA_RECOVERED; | 512 | return mca_recovered("user memory corruption. " |
| 513 | "kill affected process - recovered."); | ||
| 496 | } | 514 | } |
| 497 | 515 | ||
| 498 | } | 516 | } |
| 499 | 517 | ||
| 500 | return fatal_mca(KERN_ALERT "MCA: kernel context not recovered," | 518 | return fatal_mca("kernel context not recovered, iip 0x%lx\n", |
| 501 | " iip 0x%lx\n", pmsa->pmsa_iip); | 519 | pmsa->pmsa_iip); |
| 502 | } | 520 | } |
| 503 | 521 | ||
| 504 | /** | 522 | /** |
| @@ -584,13 +602,13 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, | |||
| 584 | * The machine check is corrected. | 602 | * The machine check is corrected. |
| 585 | */ | 603 | */ |
| 586 | if (psp->cm == 1) | 604 | if (psp->cm == 1) |
| 587 | return MCA_RECOVERED; | 605 | return mca_recovered("machine check is already corrected."); |
| 588 | 606 | ||
| 589 | /* | 607 | /* |
| 590 | * The error was not contained. Software must be reset. | 608 | * The error was not contained. Software must be reset. |
| 591 | */ | 609 | */ |
| 592 | if (psp->us || psp->ci == 0) | 610 | if (psp->us || psp->ci == 0) |
| 593 | return fatal_mca(KERN_ALERT "MCA: error not contained\n"); | 611 | return fatal_mca("error not contained"); |
| 594 | 612 | ||
| 595 | /* | 613 | /* |
| 596 | * The cache check and bus check bits have four possible states | 614 | * The cache check and bus check bits have four possible states |
| @@ -601,22 +619,22 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, | |||
| 601 | * 1 1 Memory error, attempt recovery | 619 | * 1 1 Memory error, attempt recovery |
| 602 | */ | 620 | */ |
| 603 | if (psp->bc == 0 || pbci == NULL) | 621 | if (psp->bc == 0 || pbci == NULL) |
| 604 | return fatal_mca(KERN_ALERT "MCA: No bus check\n"); | 622 | return fatal_mca("No bus check"); |
| 605 | 623 | ||
| 606 | /* | 624 | /* |
| 607 | * Sorry, we cannot handle so many. | 625 | * Sorry, we cannot handle so many. |
| 608 | */ | 626 | */ |
| 609 | if (peidx_bus_check_num(peidx) > 1) | 627 | if (peidx_bus_check_num(peidx) > 1) |
| 610 | return fatal_mca(KERN_ALERT "MCA: Too many bus checks\n"); | 628 | return fatal_mca("Too many bus checks"); |
| 611 | /* | 629 | /* |
| 612 | * Well, here is only one bus error. | 630 | * Well, here is only one bus error. |
| 613 | */ | 631 | */ |
| 614 | if (pbci->ib) | 632 | if (pbci->ib) |
| 615 | return fatal_mca(KERN_ALERT "MCA: Internal Bus error\n"); | 633 | return fatal_mca("Internal Bus error"); |
| 616 | if (pbci->cc) | 634 | if (pbci->cc) |
| 617 | return fatal_mca(KERN_ALERT "MCA: Cache-cache error\n"); | 635 | return fatal_mca("Cache-cache error"); |
| 618 | if (pbci->eb && pbci->bsi > 0) | 636 | if (pbci->eb && pbci->bsi > 0) |
| 619 | return fatal_mca(KERN_ALERT "MCA: External bus check fatal status\n"); | 637 | return fatal_mca("External bus check fatal status"); |
| 620 | 638 | ||
| 621 | /* | 639 | /* |
| 622 | * This is a local MCA and estimated as recoverble external bus error. | 640 | * This is a local MCA and estimated as recoverble external bus error. |
| @@ -628,7 +646,7 @@ recover_from_processor_error(int platform, slidx_table_t *slidx, | |||
| 628 | /* | 646 | /* |
| 629 | * On account of strange SAL error record, we cannot recover. | 647 | * On account of strange SAL error record, we cannot recover. |
| 630 | */ | 648 | */ |
| 631 | return fatal_mca(KERN_ALERT "MCA: Strange SAL record\n"); | 649 | return fatal_mca("Strange SAL record"); |
| 632 | } | 650 | } |
| 633 | 651 | ||
| 634 | /** | 652 | /** |
| @@ -657,10 +675,10 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) | |||
| 657 | 675 | ||
| 658 | /* Now, OS can recover when there is one processor error section */ | 676 | /* Now, OS can recover when there is one processor error section */ |
| 659 | if (n_proc_err > 1) | 677 | if (n_proc_err > 1) |
| 660 | return fatal_mca(KERN_ALERT "MCA: Too Many Errors\n"); | 678 | return fatal_mca("Too Many Errors"); |
| 661 | else if (n_proc_err == 0) | 679 | else if (n_proc_err == 0) |
| 662 | /* Weird SAL record ... We need not to recover */ | 680 | /* Weird SAL record ... We can't do anything */ |
| 663 | return fatal_mca(KERN_ALERT "MCA: Weird SAL record\n"); | 681 | return fatal_mca("Weird SAL record"); |
| 664 | 682 | ||
| 665 | /* Make index of processor error section */ | 683 | /* Make index of processor error section */ |
| 666 | mca_make_peidx((sal_log_processor_info_t*) | 684 | mca_make_peidx((sal_log_processor_info_t*) |
| @@ -671,7 +689,7 @@ mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) | |||
| 671 | 689 | ||
| 672 | /* Check whether MCA is global or not */ | 690 | /* Check whether MCA is global or not */ |
| 673 | if (is_mca_global(&peidx, &pbci, sos)) | 691 | if (is_mca_global(&peidx, &pbci, sos)) |
| 674 | return fatal_mca(KERN_ALERT "MCA: global MCA\n"); | 692 | return fatal_mca("global MCA"); |
| 675 | 693 | ||
| 676 | /* Try to recover a processor error */ | 694 | /* Try to recover a processor error */ |
| 677 | return recover_from_processor_error(platform_err, &slidx, &peidx, | 695 | return recover_from_processor_error(platform_err, &slidx, &peidx, |
diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h index 31a2e52bb16f..c85e943ba5fd 100644 --- a/arch/ia64/kernel/mca_drv.h +++ b/arch/ia64/kernel/mca_drv.h | |||
| @@ -118,3 +118,7 @@ struct mca_table_entry { | |||
| 118 | 118 | ||
| 119 | extern const struct mca_table_entry *search_mca_tables (unsigned long addr); | 119 | extern const struct mca_table_entry *search_mca_tables (unsigned long addr); |
| 120 | extern int mca_recover_range(unsigned long); | 120 | extern int mca_recover_range(unsigned long); |
| 121 | extern void ia64_mca_printk(const char * fmt, ...) | ||
| 122 | __attribute__ ((format (printf, 1, 2))); | ||
| 123 | extern void ia64_mlogbuf_dump(void); | ||
| 124 | |||
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 9065f0f01ba3..e63b8ca5344a 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c | |||
| @@ -266,6 +266,7 @@ salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe) | |||
| 266 | /* Check for outstanding MCA/INIT records every minute (arbitrary) */ | 266 | /* Check for outstanding MCA/INIT records every minute (arbitrary) */ |
| 267 | #define SALINFO_TIMER_DELAY (60*HZ) | 267 | #define SALINFO_TIMER_DELAY (60*HZ) |
| 268 | static struct timer_list salinfo_timer; | 268 | static struct timer_list salinfo_timer; |
| 269 | extern void ia64_mlogbuf_dump(void); | ||
| 269 | 270 | ||
| 270 | static void | 271 | static void |
| 271 | salinfo_timeout_check(struct salinfo_data *data) | 272 | salinfo_timeout_check(struct salinfo_data *data) |
| @@ -283,6 +284,7 @@ salinfo_timeout_check(struct salinfo_data *data) | |||
| 283 | static void | 284 | static void |
| 284 | salinfo_timeout (unsigned long arg) | 285 | salinfo_timeout (unsigned long arg) |
| 285 | { | 286 | { |
| 287 | ia64_mlogbuf_dump(); | ||
| 286 | salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); | 288 | salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); |
| 287 | salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); | 289 | salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); |
| 288 | salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; | 290 | salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; |
| @@ -332,6 +334,8 @@ retry: | |||
| 332 | if (cpu == -1) | 334 | if (cpu == -1) |
| 333 | goto retry; | 335 | goto retry; |
| 334 | 336 | ||
| 337 | ia64_mlogbuf_dump(); | ||
| 338 | |||
| 335 | /* for next read, start checking at next CPU */ | 339 | /* for next read, start checking at next CPU */ |
| 336 | data->cpu_check = cpu; | 340 | data->cpu_check = cpu; |
| 337 | if (++data->cpu_check == NR_CPUS) | 341 | if (++data->cpu_check == NR_CPUS) |
