diff options
author | Keith Owens <kaos@sgi.com> | 2005-11-07 14:27:13 -0500 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2005-11-07 14:27:13 -0500 |
commit | 9138d581b0ef855c0314c41c14852a7231b9941c (patch) | |
tree | f8aed7413d873877bc8373a37055308d388faa06 /arch/ia64/kernel/mca.c | |
parent | 5b2f7ffcb734d3046144dfbd5ac6d76254a9e522 (diff) |
[IA64] Extend notify_die() hooks for IA64
notify_die() added for MCA_{MONARCH,SLAVE,RENDEZVOUS}_{ENTER,PROCESS,LEAVE} and
INIT_{MONARCH,SLAVE}_{ENTER,PROCESS,LEAVE}. We need multiple
notification points for these events because they can take many seconds
to run which has nasty effects on the behaviour of the rest of the
system.
DIE_SS replaced by a generic DIE_FAULT which checks the vector number,
to allow interception of faults other than SS.
DIE_MACHINE_{HALT,RESTART} added to allow last minute close down
processing, especially when the halt/restart routines are called from
error handlers.
DIE_OOPS added.
The check for kprobe's break numbers has been moved from traps.c to
kprobes.c, allowing DIE_BREAK to be used for any additional break
numbers, i.e. it is no longer kprobes specific.
Hooks for kernel debuggers and kernel dumpers added, ENTER and LEAVE.
Both of these disable the system for long periods which impact on
watchdogs and heartbeat systems in general. More patches to come that
use these events to reset watchdogs and heartbeats.
unregister_die_notifier() added and both routines exported. Requested
by Dean Nelson.
Lock removed from {un,}register_die_notifier. notifier_chain_register()
already takes a lock. Also the generic notifier chain locking is being
reworked to distinguish between callbacks that can block and those that
cannot, the lock in {un,}register_die_notifier would interfere with
that change. http://marc.theaimsgroup.com/?l=linux-kernel&m=113018709002036&w=2
Leading white space removed from arch/ia64/kernel/kprobes.c.
Typo in mca.c in original version of this patch found & fixed by Dean
Nelson.
Signed-off-by: Keith Owens <kaos@sgi.com>
Acked-by: Dean Nelson <dcn@sgi.com>
Acked-by: Anil Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/mca.c')
-rw-r--r-- | arch/ia64/kernel/mca.c | 120 |
1 files changed, 95 insertions, 25 deletions
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 52c47da17246..355af15287c7 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c | |||
@@ -51,6 +51,9 @@ | |||
51 | * | 51 | * |
52 | * 2005-08-12 Keith Owens <kaos@sgi.com> | 52 | * 2005-08-12 Keith Owens <kaos@sgi.com> |
53 | * Convert MCA/INIT handlers to use per event stacks and SAL/OS state. | 53 | * Convert MCA/INIT handlers to use per event stacks and SAL/OS state. |
54 | * | ||
55 | * 2005-10-07 Keith Owens <kaos@sgi.com> | ||
56 | * Add notify_die() hooks. | ||
54 | */ | 57 | */ |
55 | #include <linux/config.h> | 58 | #include <linux/config.h> |
56 | #include <linux/types.h> | 59 | #include <linux/types.h> |
@@ -58,7 +61,6 @@ | |||
58 | #include <linux/sched.h> | 61 | #include <linux/sched.h> |
59 | #include <linux/interrupt.h> | 62 | #include <linux/interrupt.h> |
60 | #include <linux/irq.h> | 63 | #include <linux/irq.h> |
61 | #include <linux/kallsyms.h> | ||
62 | #include <linux/smp_lock.h> | 64 | #include <linux/smp_lock.h> |
63 | #include <linux/bootmem.h> | 65 | #include <linux/bootmem.h> |
64 | #include <linux/acpi.h> | 66 | #include <linux/acpi.h> |
@@ -69,6 +71,7 @@ | |||
69 | #include <linux/workqueue.h> | 71 | #include <linux/workqueue.h> |
70 | 72 | ||
71 | #include <asm/delay.h> | 73 | #include <asm/delay.h> |
74 | #include <asm/kdebug.h> | ||
72 | #include <asm/machvec.h> | 75 | #include <asm/machvec.h> |
73 | #include <asm/meminit.h> | 76 | #include <asm/meminit.h> |
74 | #include <asm/page.h> | 77 | #include <asm/page.h> |
@@ -132,6 +135,14 @@ extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); | |||
132 | 135 | ||
133 | static int mca_init; | 136 | static int mca_init; |
134 | 137 | ||
138 | |||
139 | static void inline | ||
140 | ia64_mca_spin(const char *func) | ||
141 | { | ||
142 | printk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); | ||
143 | while (1) | ||
144 | cpu_relax(); | ||
145 | } | ||
135 | /* | 146 | /* |
136 | * IA64_MCA log support | 147 | * IA64_MCA log support |
137 | */ | 148 | */ |
@@ -526,13 +537,16 @@ ia64_mca_wakeup_all(void) | |||
526 | * Outputs : None | 537 | * Outputs : None |
527 | */ | 538 | */ |
528 | static irqreturn_t | 539 | static irqreturn_t |
529 | ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) | 540 | ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *regs) |
530 | { | 541 | { |
531 | unsigned long flags; | 542 | unsigned long flags; |
532 | int cpu = smp_processor_id(); | 543 | int cpu = smp_processor_id(); |
533 | 544 | ||
534 | /* Mask all interrupts */ | 545 | /* Mask all interrupts */ |
535 | local_irq_save(flags); | 546 | local_irq_save(flags); |
547 | if (notify_die(DIE_MCA_RENDZVOUS_ENTER, "MCA", regs, 0, 0, 0) | ||
548 | == NOTIFY_STOP) | ||
549 | ia64_mca_spin(__FUNCTION__); | ||
536 | 550 | ||
537 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; | 551 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; |
538 | /* Register with the SAL monarch that the slave has | 552 | /* Register with the SAL monarch that the slave has |
@@ -540,10 +554,18 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) | |||
540 | */ | 554 | */ |
541 | ia64_sal_mc_rendez(); | 555 | ia64_sal_mc_rendez(); |
542 | 556 | ||
557 | if (notify_die(DIE_MCA_RENDZVOUS_PROCESS, "MCA", regs, 0, 0, 0) | ||
558 | == NOTIFY_STOP) | ||
559 | ia64_mca_spin(__FUNCTION__); | ||
560 | |||
543 | /* Wait for the monarch cpu to exit. */ | 561 | /* Wait for the monarch cpu to exit. */ |
544 | while (monarch_cpu != -1) | 562 | while (monarch_cpu != -1) |
545 | cpu_relax(); /* spin until monarch leaves */ | 563 | cpu_relax(); /* spin until monarch leaves */ |
546 | 564 | ||
565 | if (notify_die(DIE_MCA_RENDZVOUS_LEAVE, "MCA", regs, 0, 0, 0) | ||
566 | == NOTIFY_STOP) | ||
567 | ia64_mca_spin(__FUNCTION__); | ||
568 | |||
547 | /* Enable all interrupts */ | 569 | /* Enable all interrupts */ |
548 | local_irq_restore(flags); | 570 | local_irq_restore(flags); |
549 | return IRQ_HANDLED; | 571 | return IRQ_HANDLED; |
@@ -933,6 +955,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
933 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ | 955 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ |
934 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); | 956 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); |
935 | monarch_cpu = cpu; | 957 | monarch_cpu = cpu; |
958 | if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0) | ||
959 | == NOTIFY_STOP) | ||
960 | ia64_mca_spin(__FUNCTION__); | ||
936 | ia64_wait_for_slaves(cpu); | 961 | ia64_wait_for_slaves(cpu); |
937 | 962 | ||
938 | /* Wakeup all the processors which are spinning in the rendezvous loop. | 963 | /* Wakeup all the processors which are spinning in the rendezvous loop. |
@@ -942,6 +967,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
942 | * spinning in SAL does not work. | 967 | * spinning in SAL does not work. |
943 | */ | 968 | */ |
944 | ia64_mca_wakeup_all(); | 969 | ia64_mca_wakeup_all(); |
970 | if (notify_die(DIE_MCA_MONARCH_PROCESS, "MCA", regs, 0, 0, 0) | ||
971 | == NOTIFY_STOP) | ||
972 | ia64_mca_spin(__FUNCTION__); | ||
945 | 973 | ||
946 | /* Get the MCA error record and log it */ | 974 | /* Get the MCA error record and log it */ |
947 | ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); | 975 | ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); |
@@ -960,6 +988,9 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
960 | ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); | 988 | ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); |
961 | sos->os_status = IA64_MCA_CORRECTED; | 989 | sos->os_status = IA64_MCA_CORRECTED; |
962 | } | 990 | } |
991 | if (notify_die(DIE_MCA_MONARCH_LEAVE, "MCA", regs, 0, 0, recover) | ||
992 | == NOTIFY_STOP) | ||
993 | ia64_mca_spin(__FUNCTION__); | ||
963 | 994 | ||
964 | set_curr_task(cpu, previous_current); | 995 | set_curr_task(cpu, previous_current); |
965 | monarch_cpu = -1; | 996 | monarch_cpu = -1; |
@@ -1188,6 +1219,37 @@ ia64_mca_cpe_poll (unsigned long dummy) | |||
1188 | 1219 | ||
1189 | #endif /* CONFIG_ACPI */ | 1220 | #endif /* CONFIG_ACPI */ |
1190 | 1221 | ||
1222 | static int | ||
1223 | default_monarch_init_process(struct notifier_block *self, unsigned long val, void *data) | ||
1224 | { | ||
1225 | int c; | ||
1226 | struct task_struct *g, *t; | ||
1227 | if (val != DIE_INIT_MONARCH_PROCESS) | ||
1228 | return NOTIFY_DONE; | ||
1229 | printk(KERN_ERR "Processes interrupted by INIT -"); | ||
1230 | for_each_online_cpu(c) { | ||
1231 | struct ia64_sal_os_state *s; | ||
1232 | t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); | ||
1233 | s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); | ||
1234 | g = s->prev_task; | ||
1235 | if (g) { | ||
1236 | if (g->pid) | ||
1237 | printk(" %d", g->pid); | ||
1238 | else | ||
1239 | printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g); | ||
1240 | } | ||
1241 | } | ||
1242 | printk("\n\n"); | ||
1243 | if (read_trylock(&tasklist_lock)) { | ||
1244 | do_each_thread (g, t) { | ||
1245 | printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); | ||
1246 | show_stack(t, NULL); | ||
1247 | } while_each_thread (g, t); | ||
1248 | read_unlock(&tasklist_lock); | ||
1249 | } | ||
1250 | return NOTIFY_DONE; | ||
1251 | } | ||
1252 | |||
1191 | /* | 1253 | /* |
1192 | * C portion of the OS INIT handler | 1254 | * C portion of the OS INIT handler |
1193 | * | 1255 | * |
@@ -1212,8 +1274,7 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1212 | static atomic_t slaves; | 1274 | static atomic_t slaves; |
1213 | static atomic_t monarchs; | 1275 | static atomic_t monarchs; |
1214 | task_t *previous_current; | 1276 | task_t *previous_current; |
1215 | int cpu = smp_processor_id(), c; | 1277 | int cpu = smp_processor_id(); |
1216 | struct task_struct *g, *t; | ||
1217 | 1278 | ||
1218 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ | 1279 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ |
1219 | console_loglevel = 15; /* make sure printks make it to console */ | 1280 | console_loglevel = 15; /* make sure printks make it to console */ |
@@ -1253,8 +1314,17 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1253 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; | 1314 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; |
1254 | while (monarch_cpu == -1) | 1315 | while (monarch_cpu == -1) |
1255 | cpu_relax(); /* spin until monarch enters */ | 1316 | cpu_relax(); /* spin until monarch enters */ |
1317 | if (notify_die(DIE_INIT_SLAVE_ENTER, "INIT", regs, 0, 0, 0) | ||
1318 | == NOTIFY_STOP) | ||
1319 | ia64_mca_spin(__FUNCTION__); | ||
1320 | if (notify_die(DIE_INIT_SLAVE_PROCESS, "INIT", regs, 0, 0, 0) | ||
1321 | == NOTIFY_STOP) | ||
1322 | ia64_mca_spin(__FUNCTION__); | ||
1256 | while (monarch_cpu != -1) | 1323 | while (monarch_cpu != -1) |
1257 | cpu_relax(); /* spin until monarch leaves */ | 1324 | cpu_relax(); /* spin until monarch leaves */ |
1325 | if (notify_die(DIE_INIT_SLAVE_LEAVE, "INIT", regs, 0, 0, 0) | ||
1326 | == NOTIFY_STOP) | ||
1327 | ia64_mca_spin(__FUNCTION__); | ||
1258 | printk("Slave on cpu %d returning to normal service.\n", cpu); | 1328 | printk("Slave on cpu %d returning to normal service.\n", cpu); |
1259 | set_curr_task(cpu, previous_current); | 1329 | set_curr_task(cpu, previous_current); |
1260 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; | 1330 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; |
@@ -1263,6 +1333,9 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1263 | } | 1333 | } |
1264 | 1334 | ||
1265 | monarch_cpu = cpu; | 1335 | monarch_cpu = cpu; |
1336 | if (notify_die(DIE_INIT_MONARCH_ENTER, "INIT", regs, 0, 0, 0) | ||
1337 | == NOTIFY_STOP) | ||
1338 | ia64_mca_spin(__FUNCTION__); | ||
1266 | 1339 | ||
1267 | /* | 1340 | /* |
1268 | * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be | 1341 | * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be |
@@ -1273,27 +1346,16 @@ ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | |||
1273 | printk("Delaying for 5 seconds...\n"); | 1346 | printk("Delaying for 5 seconds...\n"); |
1274 | udelay(5*1000000); | 1347 | udelay(5*1000000); |
1275 | ia64_wait_for_slaves(cpu); | 1348 | ia64_wait_for_slaves(cpu); |
1276 | printk(KERN_ERR "Processes interrupted by INIT -"); | 1349 | /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through |
1277 | for_each_online_cpu(c) { | 1350 | * to default_monarch_init_process() above and just print all the |
1278 | struct ia64_sal_os_state *s; | 1351 | * tasks. |
1279 | t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); | 1352 | */ |
1280 | s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); | 1353 | if (notify_die(DIE_INIT_MONARCH_PROCESS, "INIT", regs, 0, 0, 0) |
1281 | g = s->prev_task; | 1354 | == NOTIFY_STOP) |
1282 | if (g) { | 1355 | ia64_mca_spin(__FUNCTION__); |
1283 | if (g->pid) | 1356 | if (notify_die(DIE_INIT_MONARCH_LEAVE, "INIT", regs, 0, 0, 0) |
1284 | printk(" %d", g->pid); | 1357 | == NOTIFY_STOP) |
1285 | else | 1358 | ia64_mca_spin(__FUNCTION__); |
1286 | printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g); | ||
1287 | } | ||
1288 | } | ||
1289 | printk("\n\n"); | ||
1290 | if (read_trylock(&tasklist_lock)) { | ||
1291 | do_each_thread (g, t) { | ||
1292 | printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); | ||
1293 | show_stack(t, NULL); | ||
1294 | } while_each_thread (g, t); | ||
1295 | read_unlock(&tasklist_lock); | ||
1296 | } | ||
1297 | printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); | 1359 | printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); |
1298 | atomic_dec(&monarchs); | 1360 | atomic_dec(&monarchs); |
1299 | set_curr_task(cpu, previous_current); | 1361 | set_curr_task(cpu, previous_current); |
@@ -1462,6 +1524,10 @@ ia64_mca_init(void) | |||
1462 | s64 rc; | 1524 | s64 rc; |
1463 | struct ia64_sal_retval isrv; | 1525 | struct ia64_sal_retval isrv; |
1464 | u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ | 1526 | u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ |
1527 | static struct notifier_block default_init_monarch_nb = { | ||
1528 | .notifier_call = default_monarch_init_process, | ||
1529 | .priority = 0/* we need to notified last */ | ||
1530 | }; | ||
1465 | 1531 | ||
1466 | IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__); | 1532 | IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__); |
1467 | 1533 | ||
@@ -1555,6 +1621,10 @@ ia64_mca_init(void) | |||
1555 | "(status %ld)\n", rc); | 1621 | "(status %ld)\n", rc); |
1556 | return; | 1622 | return; |
1557 | } | 1623 | } |
1624 | if (register_die_notifier(&default_init_monarch_nb)) { | ||
1625 | printk(KERN_ERR "Failed to register default monarch INIT process\n"); | ||
1626 | return; | ||
1627 | } | ||
1558 | 1628 | ||
1559 | IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__); | 1629 | IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__); |
1560 | 1630 | ||