diff options
author | Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> | 2006-09-26 17:44:37 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2006-09-26 17:44:37 -0400 |
commit | 43ed3baf623410b3fa6ca14a9d3f6deca3493c56 (patch) | |
tree | b086b18adff2af6b2633e239e9d1b26d764ae333 /arch/ia64/kernel/mca.c | |
parent | 816add4e986499145135c4014a7c8a8857f9f3c3 (diff) |
[IA64] printing support for MCA/INIT
Printing message to console from MCA/INIT handler is useful,
however doing oops_in_progress = 1 in them exactly makes
something in kernel wrong. Especially it sounds ugly if
system goes wrong after returning from recoverable MCA.
This patch adds ia64_mca_printk() function that collects
messages into temporary-not-so-large message buffer during
in MCA/INIT environment and print them out later, after
returning to normal context or when handlers determine to
down the system.
Also this print function is exported for use in extensional
MCA handler. It would be useful to describe detail about
recovery.
NOTE:
I don't think it is sane thing if temporary message buffer
is enlarged enough to hold whole stack dumps from INIT, so
buffering is disabled during stack dump from INIT-monarch
(= default_monarch_init_process). please fix it in future.
Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Acked-by: Russ Anderson <rja@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/mca.c')
-rw-r--r-- | arch/ia64/kernel/mca.c | 216 |
1 files changed, 198 insertions, 18 deletions
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 2fbe4536fe18..98f3b26d7aff 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c | |||
@@ -54,6 +54,9 @@ | |||
54 | * | 54 | * |
55 | * 2005-10-07 Keith Owens <kaos@sgi.com> | 55 | * 2005-10-07 Keith Owens <kaos@sgi.com> |
56 | * Add notify_die() hooks. | 56 | * Add notify_die() hooks. |
57 | * | ||
58 | * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> | ||
59 | * Add printing support for MCA/INIT. | ||
57 | */ | 60 | */ |
58 | #include <linux/types.h> | 61 | #include <linux/types.h> |
59 | #include <linux/init.h> | 62 | #include <linux/init.h> |
@@ -136,11 +139,175 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); | |||
136 | 139 | ||
137 | static int mca_init __initdata; | 140 | static int mca_init __initdata; |
138 | 141 | ||
142 | /* | ||
143 | * limited & delayed printing support for MCA/INIT handler | ||
144 | */ | ||
145 | |||
146 | #define mprintk(fmt...) ia64_mca_printk(fmt) | ||
147 | |||
148 | #define MLOGBUF_SIZE (512+256*NR_CPUS) | ||
149 | #define MLOGBUF_MSGMAX 256 | ||
150 | static char mlogbuf[MLOGBUF_SIZE]; | ||
151 | static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */ | ||
152 | static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */ | ||
153 | static unsigned long mlogbuf_start; | ||
154 | static unsigned long mlogbuf_end; | ||
155 | static unsigned int mlogbuf_finished = 0; | ||
156 | static unsigned long mlogbuf_timestamp = 0; | ||
157 | |||
158 | static int loglevel_save = -1; | ||
159 | #define BREAK_LOGLEVEL(__console_loglevel) \ | ||
160 | oops_in_progress = 1; \ | ||
161 | if (loglevel_save < 0) \ | ||
162 | loglevel_save = __console_loglevel; \ | ||
163 | __console_loglevel = 15; | ||
164 | |||
165 | #define RESTORE_LOGLEVEL(__console_loglevel) \ | ||
166 | if (loglevel_save >= 0) { \ | ||
167 | __console_loglevel = loglevel_save; \ | ||
168 | loglevel_save = -1; \ | ||
169 | } \ | ||
170 | mlogbuf_finished = 0; \ | ||
171 | oops_in_progress = 0; | ||
172 | |||
173 | /* | ||
174 | * Push messages into buffer, print them later if not urgent. | ||
175 | */ | ||
176 | void ia64_mca_printk(const char *fmt, ...) | ||
177 | { | ||
178 | va_list args; | ||
179 | int printed_len; | ||
180 | char temp_buf[MLOGBUF_MSGMAX]; | ||
181 | char *p; | ||
182 | |||
183 | va_start(args, fmt); | ||
184 | printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args); | ||
185 | va_end(args); | ||
186 | |||
187 | /* Copy the output into mlogbuf */ | ||
188 | if (oops_in_progress) { | ||
189 | /* mlogbuf was abandoned, use printk directly instead. */ | ||
190 | printk(temp_buf); | ||
191 | } else { | ||
192 | spin_lock(&mlogbuf_wlock); | ||
193 | for (p = temp_buf; *p; p++) { | ||
194 | unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE; | ||
195 | if (next != mlogbuf_start) { | ||
196 | mlogbuf[mlogbuf_end] = *p; | ||
197 | mlogbuf_end = next; | ||
198 | } else { | ||
199 | /* buffer full */ | ||
200 | break; | ||
201 | } | ||
202 | } | ||
203 | mlogbuf[mlogbuf_end] = '\0'; | ||
204 | spin_unlock(&mlogbuf_wlock); | ||
205 | } | ||
206 | } | ||
207 | EXPORT_SYMBOL(ia64_mca_printk); | ||
208 | |||
209 | /* | ||
210 | * Print buffered messages. | ||
211 | * NOTE: call this after returning normal context. (ex. from salinfod) | ||
212 | */ | ||
213 | void ia64_mlogbuf_dump(void) | ||
214 | { | ||
215 | char temp_buf[MLOGBUF_MSGMAX]; | ||
216 | char *p; | ||
217 | unsigned long index; | ||
218 | unsigned long flags; | ||
219 | unsigned int printed_len; | ||
220 | |||
221 | /* Get output from mlogbuf */ | ||
222 | while (mlogbuf_start != mlogbuf_end) { | ||
223 | temp_buf[0] = '\0'; | ||
224 | p = temp_buf; | ||
225 | printed_len = 0; | ||
226 | |||
227 | spin_lock_irqsave(&mlogbuf_rlock, flags); | ||
228 | |||
229 | index = mlogbuf_start; | ||
230 | while (index != mlogbuf_end) { | ||
231 | *p = mlogbuf[index]; | ||
232 | index = (index + 1) % MLOGBUF_SIZE; | ||
233 | if (!*p) | ||
234 | break; | ||
235 | p++; | ||
236 | if (++printed_len >= MLOGBUF_MSGMAX - 1) | ||
237 | break; | ||
238 | } | ||
239 | *p = '\0'; | ||
240 | if (temp_buf[0]) | ||
241 | printk(temp_buf); | ||
242 | mlogbuf_start = index; | ||
243 | |||
244 | mlogbuf_timestamp = 0; | ||
245 | spin_unlock_irqrestore(&mlogbuf_rlock, flags); | ||
246 | } | ||
247 | } | ||
248 | EXPORT_SYMBOL(ia64_mlogbuf_dump); | ||
249 | |||
250 | /* | ||
251 | * Call this if system is going to down or if immediate flushing messages to | ||
252 | * console is required. (ex. recovery was failed, crash dump is going to be | ||
253 | * invoked, long-wait rendezvous etc.) | ||
254 | * NOTE: this should be called from monarch. | ||
255 | */ | ||
256 | static void ia64_mlogbuf_finish(int wait) | ||
257 | { | ||
258 | BREAK_LOGLEVEL(console_loglevel); | ||
259 | |||
260 | spin_lock_init(&mlogbuf_rlock); | ||
261 | ia64_mlogbuf_dump(); | ||
262 | printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, " | ||
263 | "MCA/INIT might be dodgy or fail.\n"); | ||
264 | |||
265 | if (!wait) | ||
266 | return; | ||
267 | |||
268 | /* wait for console */ | ||
269 | printk("Delaying for 5 seconds...\n"); | ||
270 | udelay(5*1000000); | ||
271 | |||
272 | mlogbuf_finished = 1; | ||
273 | } | ||
274 | EXPORT_SYMBOL(ia64_mlogbuf_finish); | ||
275 | |||
276 | /* | ||
277 | * Print buffered messages from INIT context. | ||
278 | */ | ||
279 | static void ia64_mlogbuf_dump_from_init(void) | ||
280 | { | ||
281 | if (mlogbuf_finished) | ||
282 | return; | ||
283 | |||
284 | if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) { | ||
285 | printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " | ||
286 | " and the system seems to be messed up.\n"); | ||
287 | ia64_mlogbuf_finish(0); | ||
288 | return; | ||
289 | } | ||
290 | |||
291 | if (!spin_trylock(&mlogbuf_rlock)) { | ||
292 | printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. " | ||
293 | "Generated messages other than stack dump will be " | ||
294 | "buffered to mlogbuf and will be printed later.\n"); | ||
295 | printk(KERN_ERR "INIT: If messages would not printed after " | ||
296 | "this INIT, wait 30sec and assert INIT again.\n"); | ||
297 | if (!mlogbuf_timestamp) | ||
298 | mlogbuf_timestamp = jiffies; | ||
299 | return; | ||
300 | } | ||
301 | spin_unlock(&mlogbuf_rlock); | ||
302 | ia64_mlogbuf_dump(); | ||
303 | } | ||
139 | 304 | ||
140 | static void inline | 305 | static void inline |
141 | ia64_mca_spin(const char *func) | 306 | ia64_mca_spin(const char *func) |
142 | { | 307 | { |
143 | printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); | 308 | if (monarch_cpu == smp_processor_id()) |
309 | ia64_mlogbuf_finish(0); | ||
310 | mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); | ||
144 | while (1) | 311 | while (1) |
145 | cpu_relax(); | 312 | cpu_relax(); |
146 | } | 313 | } |
@@ -988,18 +1155,22 @@ ia64_wait_for_slaves(int monarch, const char *type) | |||
988 | } | 1155 | } |
989 | if (!missing) | 1156 | if (!missing) |
990 | goto all_in; | 1157 | goto all_in; |
991 | printk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); | 1158 | /* |
1159 | * Maybe slave(s) dead. Print buffered messages immediately. | ||
1160 | */ | ||
1161 | ia64_mlogbuf_finish(0); | ||
1162 | mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); | ||
992 | for_each_online_cpu(c) { | 1163 | for_each_online_cpu(c) { |
993 | if (c == monarch) | 1164 | if (c == monarch) |
994 | continue; | 1165 | continue; |
995 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) | 1166 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) |
996 | printk(" %d", c); | 1167 | mprintk(" %d", c); |
997 | } | 1168 | } |
998 | printk("\n"); | 1169 | mprintk("\n"); |
999 | return; | 1170 | return; |
1000 | 1171 | ||
1001 | all_in: | 1172 | all_in: |
1002 | printk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); | 1173 | mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); |
1003 | return; | 1174 | return; |
1004 | } | 1175 | } |
1005 | 1176 | ||
@@ -1027,10 +1198,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1027 | struct ia64_mca_notify_die nd = | 1198 | struct ia64_mca_notify_die nd = |
1028 | { .sos = sos, .monarch_cpu = &monarch_cpu }; | 1199 | { .sos = sos, .monarch_cpu = &monarch_cpu }; |
1029 | 1200 | ||
1030 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ | 1201 | mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " |
1031 | console_loglevel = 15; /* make sure printks make it to console */ | 1202 | "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); |
1032 | printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n", | ||
1033 | sos->proc_state_param, cpu, sos->monarch); | ||
1034 | 1203 | ||
1035 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); | 1204 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); |
1036 | monarch_cpu = cpu; | 1205 | monarch_cpu = cpu; |
@@ -1066,6 +1235,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1066 | rh->severity = sal_log_severity_corrected; | 1235 | rh->severity = sal_log_severity_corrected; |
1067 | ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); | 1236 | ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); |
1068 | sos->os_status = IA64_MCA_CORRECTED; | 1237 | sos->os_status = IA64_MCA_CORRECTED; |
1238 | } else { | ||
1239 | /* Dump buffered message to console */ | ||
1240 | ia64_mlogbuf_finish(1); | ||
1069 | } | 1241 | } |
1070 | if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) | 1242 | if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) |
1071 | == NOTIFY_STOP) | 1243 | == NOTIFY_STOP) |
@@ -1305,6 +1477,15 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi | |||
1305 | struct task_struct *g, *t; | 1477 | struct task_struct *g, *t; |
1306 | if (val != DIE_INIT_MONARCH_PROCESS) | 1478 | if (val != DIE_INIT_MONARCH_PROCESS) |
1307 | return NOTIFY_DONE; | 1479 | return NOTIFY_DONE; |
1480 | |||
1481 | /* | ||
1482 | * FIXME: mlogbuf will brim over with INIT stack dumps. | ||
1483 | * To enable show_stack from INIT, we use oops_in_progress which should | ||
1484 | * be used in real oops. This would cause something wrong after INIT. | ||
1485 | */ | ||
1486 | BREAK_LOGLEVEL(console_loglevel); | ||
1487 | ia64_mlogbuf_dump_from_init(); | ||
1488 | |||
1308 | printk(KERN_ERR "Processes interrupted by INIT -"); | 1489 | printk(KERN_ERR "Processes interrupted by INIT -"); |
1309 | for_each_online_cpu(c) { | 1490 | for_each_online_cpu(c) { |
1310 | struct ia64_sal_os_state *s; | 1491 | struct ia64_sal_os_state *s; |
@@ -1326,6 +1507,8 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi | |||
1326 | } while_each_thread (g, t); | 1507 | } while_each_thread (g, t); |
1327 | read_unlock(&tasklist_lock); | 1508 | read_unlock(&tasklist_lock); |
1328 | } | 1509 | } |
1510 | /* FIXME: This will not restore zapped printk locks. */ | ||
1511 | RESTORE_LOGLEVEL(console_loglevel); | ||
1329 | return NOTIFY_DONE; | 1512 | return NOTIFY_DONE; |
1330 | } | 1513 | } |
1331 | 1514 | ||
@@ -1357,12 +1540,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1357 | struct ia64_mca_notify_die nd = | 1540 | struct ia64_mca_notify_die nd = |
1358 | { .sos = sos, .monarch_cpu = &monarch_cpu }; | 1541 | { .sos = sos, .monarch_cpu = &monarch_cpu }; |
1359 | 1542 | ||
1360 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ | ||
1361 | console_loglevel = 15; /* make sure printks make it to console */ | ||
1362 | |||
1363 | (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); | 1543 | (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); |
1364 | 1544 | ||
1365 | printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", | 1545 | mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", |
1366 | sos->proc_state_param, cpu, sos->monarch); | 1546 | sos->proc_state_param, cpu, sos->monarch); |
1367 | salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); | 1547 | salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); |
1368 | 1548 | ||
@@ -1375,7 +1555,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1375 | * fix their proms and get their customers updated. | 1555 | * fix their proms and get their customers updated. |
1376 | */ | 1556 | */ |
1377 | if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { | 1557 | if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { |
1378 | printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", | 1558 | mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", |
1379 | __FUNCTION__, cpu); | 1559 | __FUNCTION__, cpu); |
1380 | atomic_dec(&slaves); | 1560 | atomic_dec(&slaves); |
1381 | sos->monarch = 1; | 1561 | sos->monarch = 1; |
@@ -1387,7 +1567,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1387 | * fix their proms and get their customers updated. | 1567 | * fix their proms and get their customers updated. |
1388 | */ | 1568 | */ |
1389 | if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { | 1569 | if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { |
1390 | printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", | 1570 | mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", |
1391 | __FUNCTION__, cpu); | 1571 | __FUNCTION__, cpu); |
1392 | atomic_dec(&monarchs); | 1572 | atomic_dec(&monarchs); |
1393 | sos->monarch = 0; | 1573 | sos->monarch = 0; |
@@ -1408,7 +1588,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1408 | if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) | 1588 | if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) |
1409 | == NOTIFY_STOP) | 1589 | == NOTIFY_STOP) |
1410 | ia64_mca_spin(__FUNCTION__); | 1590 | ia64_mca_spin(__FUNCTION__); |
1411 | printk("Slave on cpu %d returning to normal service.\n", cpu); | 1591 | mprintk("Slave on cpu %d returning to normal service.\n", cpu); |
1412 | set_curr_task(cpu, previous_current); | 1592 | set_curr_task(cpu, previous_current); |
1413 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; | 1593 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; |
1414 | atomic_dec(&slaves); | 1594 | atomic_dec(&slaves); |
@@ -1426,7 +1606,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1426 | * same serial line, the user will need some time to switch out of the BMC before | 1606 | * same serial line, the user will need some time to switch out of the BMC before |
1427 | * the dump begins. | 1607 | * the dump begins. |
1428 | */ | 1608 | */ |
1429 | printk("Delaying for 5 seconds...\n"); | 1609 | mprintk("Delaying for 5 seconds...\n"); |
1430 | udelay(5*1000000); | 1610 | udelay(5*1000000); |
1431 | ia64_wait_for_slaves(cpu, "INIT"); | 1611 | ia64_wait_for_slaves(cpu, "INIT"); |
1432 | /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through | 1612 | /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through |
@@ -1439,7 +1619,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1439 | if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) | 1619 | if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) |
1440 | == NOTIFY_STOP) | 1620 | == NOTIFY_STOP) |
1441 | ia64_mca_spin(__FUNCTION__); | 1621 | ia64_mca_spin(__FUNCTION__); |
1442 | printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); | 1622 | mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); |
1443 | atomic_dec(&monarchs); | 1623 | atomic_dec(&monarchs); |
1444 | set_curr_task(cpu, previous_current); | 1624 | set_curr_task(cpu, previous_current); |
1445 | monarch_cpu = -1; | 1625 | monarch_cpu = -1; |