aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel/mca.c
diff options
context:
space:
mode:
authorKeith Owens <kaos@sgi.com>2005-11-07 14:27:13 -0500
committerTony Luck <tony.luck@intel.com>2005-11-07 14:27:13 -0500
commit9138d581b0ef855c0314c41c14852a7231b9941c (patch)
treef8aed7413d873877bc8373a37055308d388faa06 /arch/ia64/kernel/mca.c
parent5b2f7ffcb734d3046144dfbd5ac6d76254a9e522 (diff)
[IA64] Extend notify_die() hooks for IA64
notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple notification points for these events because they can take many seconds to run which has nasty effects on the behaviour of the rest of the system. DIE_SS replaced by a generic DIE_FAULT which checks the vector number, to allow interception of faults other than SS. DIE_MACHINE_{HALT,RESTART} added to allow last minute close down processing, especially when the halt/restart routines are called from error handlers. DIE_OOPS added. The check for kprobe's break numbers has been moved from traps.c to kprobes.c, allowing DIE_BREAK to be used for any additional break numbers, i.e. it is no longer kprobes specific. Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE. Both of these disable the system for long periods which impact on watchdogs and heartbeat systems in general. More patches to come that use these events to reset watchdogs and heartbeats. unregister_die_notifier() added and both routines exported. Requested by Dean Nelson. Lock removed from {un,}register_die_notifier. notifier_chain_register() already takes a lock. Also the generic notifier chain locking is being reworked to distinguish between callbacks that can block and those that cannot, the lock in {un,}register_die_notifier would interfere with that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2 Leading white space removed from arch/ia64/kernel/kprobes.c. Typo in mca.c in original version of this patch found & fixed by Dean Nelson. Signed-off-by: Keith Owens <kaos@sgi.com> Acked-by: Dean Nelson <dcn@sgi.com> Acked-by: Anil Keshavamurthy <anil.s.keshavamurthy@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/mca.c')
-rw-r--r--arch/ia64/kernel/mca.c120
1 files changed, 95 insertions, 25 deletions
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 52c47da17246..355af15287c7 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -51,6 +51,9 @@
51 * 51 *
52 * 2005-08-12 Keith Owens <kaos@sgi.com> 52 * 2005-08-12 Keith Owens <kaos@sgi.com>
53 * Convert MCA/INIT handlers to use per event stacks and SAL/OS state. 53 * Convert MCA/INIT handlers to use per event stacks and SAL/OS state.
54 *
55 * 2005-10-07 Keith Owens <kaos@sgi.com>
56 * Add notify_die() hooks.
54 */ 57 */
55#include <linux/config.h> 58#include <linux/config.h>
56#include <linux/types.h> 59#include <linux/types.h>
@@ -58,7 +61,6 @@
58#include <linux/sched.h> 61#include <linux/sched.h>
59#include <linux/interrupt.h> 62#include <linux/interrupt.h>
60#include <linux/irq.h> 63#include <linux/irq.h>
61#include <linux/kallsyms.h>
62#include <linux/smp_lock.h> 64#include <linux/smp_lock.h>
63#include <linux/bootmem.h> 65#include <linux/bootmem.h>
64#include <linux/acpi.h> 66#include <linux/acpi.h>
@@ -69,6 +71,7 @@
69#include <linux/workqueue.h> 71#include <linux/workqueue.h>
70 72
71#include <asm/delay.h> 73#include <asm/delay.h>
74#include <asm/kdebug.h>
72#include <asm/machvec.h> 75#include <asm/machvec.h>
73#include <asm/meminit.h> 76#include <asm/meminit.h>
74#include <asm/page.h> 77#include <asm/page.h>
@@ -132,6 +135,14 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);
132 135
133static int mca_init; 136static int mca_init;
134 137
138
139static void inline
140ia64_mca_spin(const char *func)
141{
142 printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func);
143 while (1)
144 cpu_relax();
145}
135/* 146/*
136 * IA64_MCA log support 147 * IA64_MCA log support
137 */ 148 */
@@ -526,13 +537,16 @@ ia64_mca_wakeup_all(void)
526 * Outputs : None 537 * Outputs : None
527 */ 538 */
528static irqreturn_t 539static irqreturn_t
529ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) 540ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *regs)
530{ 541{
531 unsigned long flags; 542 unsigned long flags;
532 int cpu = smp_processor_id(); 543 int cpu = smp_processor_id();
533 544
534 /* Mask all interrupts */ 545 /* Mask all interrupts */
535 local_irq_save(flags); 546 local_irq_save(flags);
547 if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", regs, 0, 0, 0)
548 == NOTIFY_STOP)
549 ia64_mca_spin(__FUNCTION__);
536 550
537 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; 551 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE;
538 /* Register with the SAL monarch that the slave has 552 /* Register with the SAL monarch that the slave has
@@ -540,10 +554,18 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs)
540 */ 554 */
541 ia64_sal_mc_rendez(); 555 ia64_sal_mc_rendez();
542 556
557 if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", regs, 0, 0, 0)
558 == NOTIFY_STOP)
559 ia64_mca_spin(__FUNCTION__);
560
543 /* Wait for the monarch cpu to exit. */ 561 /* Wait for the monarch cpu to exit. */
544 while (monarch_cpu != -1) 562 while (monarch_cpu != -1)
545 cpu_relax(); /* spin until monarch leaves */ 563 cpu_relax(); /* spin until monarch leaves */
546 564
565 if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", regs, 0, 0, 0)
566 == NOTIFY_STOP)
567 ia64_mca_spin(__FUNCTION__);
568
547 /* Enable all interrupts */ 569 /* Enable all interrupts */
548 local_irq_restore(flags); 570 local_irq_restore(flags);
549 return IRQ_HANDLED; 571 return IRQ_HANDLED;
@@ -933,6 +955,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
933 oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ 955 oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */
934 previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); 956 previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
935 monarch_cpu = cpu; 957 monarch_cpu = cpu;
958 if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0)
959 == NOTIFY_STOP)
960 ia64_mca_spin(__FUNCTION__);
936 ia64_wait_for_slaves(cpu); 961 ia64_wait_for_slaves(cpu);
937 962
938 /* Wakeup all the processors which are spinning in the rendezvous loop. 963 /* Wakeup all the processors which are spinning in the rendezvous loop.
@@ -942,6 +967,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
942 * spinning in SAL does not work. 967 * spinning in SAL does not work.
943 */ 968 */
944 ia64_mca_wakeup_all(); 969 ia64_mca_wakeup_all();
970 if (notify_die(DIE_MCA_MONARCH_PROCESS, "MCA", regs, 0, 0, 0)
971 == NOTIFY_STOP)
972 ia64_mca_spin(__FUNCTION__);
945 973
946 /* Get the MCA error record and log it */ 974 /* Get the MCA error record and log it */
947 ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); 975 ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
@@ -960,6 +988,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
960 ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); 988 ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
961 sos->os_status = IA64_MCA_CORRECTED; 989 sos->os_status = IA64_MCA_CORRECTED;
962 } 990 }
991 if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, 0, 0, recover)
992 == NOTIFY_STOP)
993 ia64_mca_spin(__FUNCTION__);
963 994
964 set_curr_task(cpu, previous_current); 995 set_curr_task(cpu, previous_current);
965 monarch_cpu = -1; 996 monarch_cpu = -1;
@@ -1188,6 +1219,37 @@ ia64_mca_cpe_poll (unsigned long dummy)
1188 1219
1189#endif /* CONFIG_ACPI */ 1220#endif /* CONFIG_ACPI */
1190 1221
1222static int
1223default_monarch_init_process(struct notifier_block *self, unsigned long val, void *data)
1224{
1225 int c;
1226 struct task_struct *g, *t;
1227 if (val != DIE_INIT_MONARCH_PROCESS)
1228 return NOTIFY_DONE;
1229 printk(KERN_ERR "Processes interrupted by INIT -");
1230 for_each_online_cpu(c) {
1231 struct ia64_sal_os_state *s;
1232 t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET);
1233 s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET);
1234 g = s->prev_task;
1235 if (g) {
1236 if (g->pid)
1237 printk(" %d", g->pid);
1238 else
1239 printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g);
1240 }
1241 }
1242 printk("\n\n");
1243 if (read_trylock(&tasklist_lock)) {
1244 do_each_thread (g, t) {
1245 printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
1246 show_stack(t, NULL);
1247 } while_each_thread (g, t);
1248 read_unlock(&tasklist_lock);
1249 }
1250 return NOTIFY_DONE;
1251}
1252
1191/* 1253/*
1192 * C portion of the OS INIT handler 1254 * C portion of the OS INIT handler
1193 * 1255 *
@@ -1212,8 +1274,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
1212 static atomic_t slaves; 1274 static atomic_t slaves;
1213 static atomic_t monarchs; 1275 static atomic_t monarchs;
1214 task_t *previous_current; 1276 task_t *previous_current;
1215 int cpu = smp_processor_id(), c; 1277 int cpu = smp_processor_id();
1216 struct task_struct *g, *t;
1217 1278
1218 oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ 1279 oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */
1219 console_loglevel = 15; /* make sure printks make it to console */ 1280 console_loglevel = 15; /* make sure printks make it to console */
@@ -1253,8 +1314,17 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
1253 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; 1314 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT;
1254 while (monarch_cpu == -1) 1315 while (monarch_cpu == -1)
1255 cpu_relax(); /* spin until monarch enters */ 1316 cpu_relax(); /* spin until monarch enters */
1317 if (notify_die(DIE_INIT_SLAVE_ENTER, "INIT", regs, 0, 0, 0)
1318 == NOTIFY_STOP)
1319 ia64_mca_spin(__FUNCTION__);
1320 if (notify_die(DIE_INIT_SLAVE_PROCESS, "INIT", regs, 0, 0, 0)
1321 == NOTIFY_STOP)
1322 ia64_mca_spin(__FUNCTION__);
1256 while (monarch_cpu != -1) 1323 while (monarch_cpu != -1)
1257 cpu_relax(); /* spin until monarch leaves */ 1324 cpu_relax(); /* spin until monarch leaves */
1325 if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, 0, 0, 0)
1326 == NOTIFY_STOP)
1327 ia64_mca_spin(__FUNCTION__);
1258 printk("Slave on cpu %d returning to normal service.\n", cpu); 1328 printk("Slave on cpu %d returning to normal service.\n", cpu);
1259 set_curr_task(cpu, previous_current); 1329 set_curr_task(cpu, previous_current);
1260 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; 1330 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
@@ -1263,6 +1333,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
1263 } 1333 }
1264 1334
1265 monarch_cpu = cpu; 1335 monarch_cpu = cpu;
1336 if (notify_die(DIE_INIT_MONARCH_ENTER, "INIT", regs, 0, 0, 0)
1337 == NOTIFY_STOP)
1338 ia64_mca_spin(__FUNCTION__);
1266 1339
1267 /* 1340 /*
1268 * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be 1341 * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be
@@ -1273,27 +1346,16 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw,
1273 printk("Delaying for 5 seconds...\n"); 1346 printk("Delaying for 5 seconds...\n");
1274 udelay(5*1000000); 1347 udelay(5*1000000);
1275 ia64_wait_for_slaves(cpu); 1348 ia64_wait_for_slaves(cpu);
1276 printk(KERN_ERR "Processes interrupted by INIT -"); 1349 /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through
1277 for_each_online_cpu(c) { 1350 * to default_monarch_init_process() above and just print all the
1278 struct ia64_sal_os_state *s; 1351 * tasks.
1279 t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); 1352 */
1280 s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); 1353 if (notify_die(DIE_INIT_MONARCH_PROCESS, "INIT", regs, 0, 0, 0)
1281 g = s->prev_task; 1354 == NOTIFY_STOP)
1282 if (g) { 1355 ia64_mca_spin(__FUNCTION__);
1283 if (g->pid) 1356 if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, 0, 0, 0)
1284 printk(" %d", g->pid); 1357 == NOTIFY_STOP)
1285 else 1358 ia64_mca_spin(__FUNCTION__);
1286 printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g);
1287 }
1288 }
1289 printk("\n\n");
1290 if (read_trylock(&tasklist_lock)) {
1291 do_each_thread (g, t) {
1292 printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
1293 show_stack(t, NULL);
1294 } while_each_thread (g, t);
1295 read_unlock(&tasklist_lock);
1296 }
1297 printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); 1359 printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu);
1298 atomic_dec(&monarchs); 1360 atomic_dec(&monarchs);
1299 set_curr_task(cpu, previous_current); 1361 set_curr_task(cpu, previous_current);
@@ -1462,6 +1524,10 @@ ia64_mca_init(void)
1462 s64 rc; 1524 s64 rc;
1463 struct ia64_sal_retval isrv; 1525 struct ia64_sal_retval isrv;
1464 u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ 1526 u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */
1527 static struct notifier_block default_init_monarch_nb = {
1528 .notifier_call = default_monarch_init_process,
1529 .priority = 0/* we need to notified last */
1530 };
1465 1531
1466 IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__); 1532 IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__);
1467 1533
@@ -1555,6 +1621,10 @@ ia64_mca_init(void)
1555 "(status %ld)\n", rc); 1621 "(status %ld)\n", rc);
1556 return; 1622 return;
1557 } 1623 }
1624 if (register_die_notifier(&default_init_monarch_nb)) {
1625 printk(KERN_ERR "Failed to register default monarch INIT process\n");
1626 return;
1627 }
1558 1628
1559 IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__); 1629 IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__);
1560 1630