aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel/mca.c
diff options
context:
space:
mode:
authorHidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>2006-09-26 17:44:37 -0400
committerTony Luck <tony.luck@intel.com>2006-09-26 17:44:37 -0400
commit43ed3baf623410b3fa6ca14a9d3f6deca3493c56 (patch)
treeb086b18adff2af6b2633e239e9d1b26d764ae333 /arch/ia64/kernel/mca.c
parent816add4e986499145135c4014a7c8a8857f9f3c3 (diff)
[IA64] printing support for MCA/INIT
Printing message to console from MCA/INIT handler is useful, however doing oops_in_progress = 1 in them exactly makes something in kernel wrong. Especially it sounds ugly if system goes wrong after returning from recoverable MCA. This patch adds ia64_mca_printk() function that collects messages into temporary-not-so-large message buffer during in MCA/INIT environment and print them out later, after returning to normal context or when handlers determine to down the system. Also this print function is exported for use in extensional MCA handler. It would be useful to describe detail about recovery. NOTE: I don't think it is sane thing if temporary message buffer is enlarged enough to hold whole stack dumps from INIT, so buffering is disabled during stack dump from INIT-monarch (= default_monarch_init_process). please fix it in future. Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Acked-by: Russ Anderson <rja@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/mca.c')
-rw-r--r--arch/ia64/kernel/mca.c216
1 files changed, 198 insertions, 18 deletions
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 2fbe4536fe18..98f3b26d7aff 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -54,6 +54,9 @@
54 * 54 *
55 * 2005-10-07 Keith Owens <kaos@sgi.com> 55 * 2005-10-07 Keith Owens <kaos@sgi.com>
56 * Add notify_die() hooks. 56 * Add notify_die() hooks.
57 *
58 * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
59 * Add printing support for MCA/INIT.
57 */ 60 */
58#include <linux/types.h> 61#include <linux/types.h>
59#include <linux/init.h> 62#include <linux/init.h>
@@ -136,11 +139,175 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);
136 139
137static int mca_init __initdata; 140static int mca_init __initdata;
138 141
142/*
143 * limited & delayed printing support for MCA/INIT handler
144 */
145
146#define mprintk(fmt...) ia64_mca_printk(fmt)
147
148#define MLOGBUF_SIZE (512+256*NR_CPUS)
149#define MLOGBUF_MSGMAX 256
150static char mlogbuf[MLOGBUF_SIZE];
151static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */
152static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */
153static unsigned long mlogbuf_start;
154static unsigned long mlogbuf_end;
155static unsigned int mlogbuf_finished = 0;
156static unsigned long mlogbuf_timestamp = 0;
157
158static int loglevel_save = -1;
159#define BREAK_LOGLEVEL(__console_loglevel) \
160 oops_in_progress = 1; \
161 if (loglevel_save < 0) \
162 loglevel_save = __console_loglevel; \
163 __console_loglevel = 15;
164
165#define RESTORE_LOGLEVEL(__console_loglevel) \
166 if (loglevel_save >= 0) { \
167 __console_loglevel = loglevel_save; \
168 loglevel_save = -1; \
169 } \
170 mlogbuf_finished = 0; \
171 oops_in_progress = 0;
172
173/*
174 * Push messages into buffer, print them later if not urgent.
175 */
176void ia64_mca_printk(const char *fmt, ...)
177{
178 va_list args;
179 int printed_len;
180 char temp_buf[MLOGBUF_MSGMAX];
181 char *p;
182
183 va_start(args, fmt);
184 printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args);
185 va_end(args);
186
187 /* Copy the output into mlogbuf */
188 if (oops_in_progress) {
189 /* mlogbuf was abandoned, use printk directly instead. */
190 printk(temp_buf);
191 } else {
192 spin_lock(&mlogbuf_wlock);
193 for (p = temp_buf; *p; p++) {
194 unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE;
195 if (next != mlogbuf_start) {
196 mlogbuf[mlogbuf_end] = *p;
197 mlogbuf_end = next;
198 } else {
199 /* buffer full */
200 break;
201 }
202 }
203 mlogbuf[mlogbuf_end] = '\0';
204 spin_unlock(&mlogbuf_wlock);
205 }
206}
207EXPORT_SYMBOL(ia64_mca_printk);
208
209/*
210 * Print buffered messages.
211 * NOTE: call this after returning normal context. (ex. from salinfod)
212 */
213void ia64_mlogbuf_dump(void)
214{
215 char temp_buf[MLOGBUF_MSGMAX];
216 char *p;
217 unsigned long index;
218 unsigned long flags;
219 unsigned int printed_len;
220
221 /* Get output from mlogbuf */
222 while (mlogbuf_start != mlogbuf_end) {
223 temp_buf[0] = '\0';
224 p = temp_buf;
225 printed_len = 0;
226
227 spin_lock_irqsave(&mlogbuf_rlock, flags);
228
229 index = mlogbuf_start;
230 while (index != mlogbuf_end) {
231 *p = mlogbuf[index];
232 index = (index + 1) % MLOGBUF_SIZE;
233 if (!*p)
234 break;
235 p++;
236 if (++printed_len >= MLOGBUF_MSGMAX - 1)
237 break;
238 }
239 *p = '\0';
240 if (temp_buf[0])
241 printk(temp_buf);
242 mlogbuf_start = index;
243
244 mlogbuf_timestamp = 0;
245 spin_unlock_irqrestore(&mlogbuf_rlock, flags);
246 }
247}
248EXPORT_SYMBOL(ia64_mlogbuf_dump);
249
250/*
251 * Call this if system is going to down or if immediate flushing messages to
252 * console is required. (ex. recovery was failed, crash dump is going to be
253 * invoked, long-wait rendezvous etc.)
254 * NOTE: this should be called from monarch.
255 */
256static void ia64_mlogbuf_finish(int wait)
257{
258 BREAK_LOGLEVEL(console_loglevel);
259
260 spin_lock_init(&mlogbuf_rlock);
261 ia64_mlogbuf_dump();
262 printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, "
263 "MCA/INIT might be dodgy or fail.\n");
264
265 if (!wait)
266 return;
267
268 /* wait for console */
269 printk("Delaying for 5 seconds...\n");
270 udelay(5*1000000);
271
272 mlogbuf_finished = 1;
273}
274EXPORT_SYMBOL(ia64_mlogbuf_finish);
275
276/*
277 * Print buffered messages from INIT context.
278 */
279static void ia64_mlogbuf_dump_from_init(void)
280{
281 if (mlogbuf_finished)
282 return;
283
284 if (mlogbuf_timestamp && (mlogbuf_timestamp + 30*HZ > jiffies)) {
285 printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT "
286 " and the system seems to be messed up.\n");
287 ia64_mlogbuf_finish(0);
288 return;
289 }
290
291 if (!spin_trylock(&mlogbuf_rlock)) {
292 printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. "
293 "Generated messages other than stack dump will be "
294 "buffered to mlogbuf and will be printed later.\n");
295 printk(KERN_ERR "INIT: If messages would not printed after "
296 "this INIT, wait 30sec and assert INIT again.\n");
297 if (!mlogbuf_timestamp)
298 mlogbuf_timestamp = jiffies;
299 return;
300 }
301 spin_unlock(&mlogbuf_rlock);
302 ia64_mlogbuf_dump();
303}
139 304
140static void inline 305static void inline
141ia64_mca_spin(const char *func) 306ia64_mca_spin(const char *func)
142{ 307{
143 printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); 308 if (monarch_cpu == smp_processor_id())
309 ia64_mlogbuf_finish(0);
310 mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func);
144 while (1) 311 while (1)
145 cpu_relax(); 312 cpu_relax();
146} 313}
@@ -988,18 +1155,22 @@ ia64_wait_for_slaves(int monarch, const char *type)
988 } 1155 }
989 if (!missing) 1156 if (!missing)
990 goto all_in; 1157 goto all_in;
991 printk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); 1158 /*
1159 * Maybe slave(s) dead. Print buffered messages immediately.
1160 */
1161 ia64_mlogbuf_finish(0);
1162 mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type);
992 for_each_online_cpu(c) { 1163 for_each_online_cpu(c) {
993 if (c == monarch) 1164 if (c == monarch)
994 continue; 1165 continue;
995 if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) 1166 if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
996 printk(" %d", c); 1167 mprintk(" %d", c);
997 } 1168 }
998 printk("\n"); 1169 mprintk("\n");
999 return; 1170 return;
1000 1171
1001all_in: 1172all_in:
1002 printk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); 1173 mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type);
1003 return; 1174 return;
1004} 1175}
1005 1176
@@ -1027,10 +1198,8 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
1027 struct ia64_mca_notify_die nd = 1198 struct ia64_mca_notify_die nd =
1028 { .sos = sos, .monarch_cpu = &monarch_cpu }; 1199 { .sos = sos, .monarch_cpu = &monarch_cpu };
1029 1200
1030 oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ 1201 mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d "
1031 console_loglevel = 15; /* make sure printks make it to console */ 1202 "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch);
1032 printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n",
1033 sos->proc_state_param, cpu, sos->monarch);
1034 1203
1035 previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); 1204 previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
1036 monarch_cpu = cpu; 1205 monarch_cpu = cpu;
@@ -1066,6 +1235,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
1066 rh->severity = sal_log_severity_corrected; 1235 rh->severity = sal_log_severity_corrected;
1067 ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); 1236 ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
1068 sos->os_status = IA64_MCA_CORRECTED; 1237 sos->os_status = IA64_MCA_CORRECTED;
1238 } else {
1239 /* Dump buffered message to console */
1240 ia64_mlogbuf_finish(1);
1069 } 1241 }
1070 if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover) 1242 if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, (long)&nd, 0, recover)
1071 == NOTIFY_STOP) 1243 == NOTIFY_STOP)
@@ -1305,6 +1477,15 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi
1305 struct task_struct *g, *t; 1477 struct task_struct *g, *t;
1306 if (val != DIE_INIT_MONARCH_PROCESS) 1478 if (val != DIE_INIT_MONARCH_PROCESS)
1307 return NOTIFY_DONE; 1479 return NOTIFY_DONE;
1480
1481 /*
1482 * FIXME: mlogbuf will brim over with INIT stack dumps.
1483 * To enable show_stack from INIT, we use oops_in_progress which should
1484 * be used in real oops. This would cause something wrong after INIT.
1485 */
1486 BREAK_LOGLEVEL(console_loglevel);
1487 ia64_mlogbuf_dump_from_init();
1488
1308 printk(KERN_ERR "Processes interrupted by INIT -"); 1489 printk(KERN_ERR "Processes interrupted by INIT -");
1309 for_each_online_cpu(c) { 1490 for_each_online_cpu(c) {
1310 struct ia64_sal_os_state *s; 1491 struct ia64_sal_os_state *s;
@@ -1326,6 +1507,8 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi
1326 } while_each_thread (g, t); 1507 } while_each_thread (g, t);
1327 read_unlock(&tasklist_lock); 1508 read_unlock(&tasklist_lock);
1328 } 1509 }
1510 /* FIXME: This will not restore zapped printk locks. */
1511 RESTORE_LOGLEVEL(console_loglevel);
1329 return NOTIFY_DONE; 1512 return NOTIFY_DONE;
1330} 1513}
1331 1514
@@ -1357,12 +1540,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
1357 struct ia64_mca_notify_die nd = 1540 struct ia64_mca_notify_die nd =
1358 { .sos = sos, .monarch_cpu = &monarch_cpu }; 1541 { .sos = sos, .monarch_cpu = &monarch_cpu };
1359 1542
1360 oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */
1361 console_loglevel = 15; /* make sure printks make it to console */
1362
1363 (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0); 1543 (void) notify_die(DIE_INIT_ENTER, "INIT", regs, (long)&nd, 0, 0);
1364 1544
1365 printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", 1545 mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n",
1366 sos->proc_state_param, cpu, sos->monarch); 1546 sos->proc_state_param, cpu, sos->monarch);
1367 salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); 1547 salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0);
1368 1548
@@ -1375,7 +1555,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
1375 * fix their proms and get their customers updated. 1555 * fix their proms and get their customers updated.
1376 */ 1556 */
1377 if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { 1557 if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) {
1378 printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", 1558 mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n",
1379 __FUNCTION__, cpu); 1559 __FUNCTION__, cpu);
1380 atomic_dec(&slaves); 1560 atomic_dec(&slaves);
1381 sos->monarch = 1; 1561 sos->monarch = 1;
@@ -1387,7 +1567,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
1387 * fix their proms and get their customers updated. 1567 * fix their proms and get their customers updated.
1388 */ 1568 */
1389 if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { 1569 if (sos->monarch && atomic_add_return(1, &monarchs) > 1) {
1390 printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", 1570 mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n",
1391 __FUNCTION__, cpu); 1571 __FUNCTION__, cpu);
1392 atomic_dec(&monarchs); 1572 atomic_dec(&monarchs);
1393 sos->monarch = 0; 1573 sos->monarch = 0;
@@ -1408,7 +1588,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
1408 if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0) 1588 if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, (long)&nd, 0, 0)
1409 == NOTIFY_STOP) 1589 == NOTIFY_STOP)
1410 ia64_mca_spin(__FUNCTION__); 1590 ia64_mca_spin(__FUNCTION__);
1411 printk("Slave on cpu %d returning to normal service.\n", cpu); 1591 mprintk("Slave on cpu %d returning to normal service.\n", cpu);
1412 set_curr_task(cpu, previous_current); 1592 set_curr_task(cpu, previous_current);
1413 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; 1593 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
1414 atomic_dec(&slaves); 1594 atomic_dec(&slaves);
@@ -1426,7 +1606,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
1426 * same serial line, the user will need some time to switch out of the BMC before 1606 * same serial line, the user will need some time to switch out of the BMC before
1427 * the dump begins. 1607 * the dump begins.
1428 */ 1608 */
1429 printk("Delaying for 5 seconds...\n"); 1609 mprintk("Delaying for 5 seconds...\n");
1430 udelay(5*1000000); 1610 udelay(5*1000000);
1431 ia64_wait_for_slaves(cpu, "INIT"); 1611 ia64_wait_for_slaves(cpu, "INIT");
1432 /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through 1612 /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through
@@ -1439,7 +1619,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
1439 if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0) 1619 if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, (long)&nd, 0, 0)
1440 == NOTIFY_STOP) 1620 == NOTIFY_STOP)
1441 ia64_mca_spin(__FUNCTION__); 1621 ia64_mca_spin(__FUNCTION__);
1442 printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); 1622 mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu);
1443 atomic_dec(&monarchs); 1623 atomic_dec(&monarchs);
1444 set_curr_task(cpu, previous_current); 1624 set_curr_task(cpu, previous_current);
1445 monarch_cpu = -1; 1625 monarch_cpu = -1;