diff options
author | Keith Owens <kaos@sgi.com> | 2005-09-11 03:22:53 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2005-09-11 17:08:41 -0400 |
commit | 7f613c7d2203ae137d98fc1c38abc30fd7048637 (patch) | |
tree | d8155a5cca33e4fe178625396886fcbb81f39e7a /arch/ia64/kernel/mca.c | |
parent | 289d773ee89ea80dcc364ef97d1be7ad1817387e (diff) |
[PATCH] MCA/INIT: use per cpu stacks
The bulk of the change. Use per cpu MCA/INIT stacks. Change the SAL
to OS state (sos) to be per process. Do all the assembler work on the
MCA/INIT stacks, leaving the original stack alone. Pass per cpu state
data to the C handlers for MCA and INIT, which also means changing the
mca_drv interfaces slightly. Lots of verification on whether the
original stack is usable before converting it to a sleeping process.
Signed-off-by: Keith Owens <kaos@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/mca.c')
-rw-r--r-- | arch/ia64/kernel/mca.c | 821 |
1 files changed, 505 insertions, 316 deletions
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 8d484204a3ff..6dc726ad7137 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c | |||
@@ -48,6 +48,9 @@ | |||
48 | * Delete dead variables and functions. | 48 | * Delete dead variables and functions. |
49 | * Reorder to remove the need for forward declarations and to consolidate | 49 | * Reorder to remove the need for forward declarations and to consolidate |
50 | * related code. | 50 | * related code. |
51 | * | ||
52 | * 2005-08-12 Keith Owens <kaos@sgi.com> | ||
53 | * Convert MCA/INIT handlers to use per event stacks and SAL/OS state. | ||
51 | */ | 54 | */ |
52 | #include <linux/config.h> | 55 | #include <linux/config.h> |
53 | #include <linux/types.h> | 56 | #include <linux/types.h> |
@@ -77,6 +80,8 @@ | |||
77 | #include <asm/irq.h> | 80 | #include <asm/irq.h> |
78 | #include <asm/hw_irq.h> | 81 | #include <asm/hw_irq.h> |
79 | 82 | ||
83 | #include "entry.h" | ||
84 | |||
80 | #if defined(IA64_MCA_DEBUG_INFO) | 85 | #if defined(IA64_MCA_DEBUG_INFO) |
81 | # define IA64_MCA_DEBUG(fmt...) printk(fmt) | 86 | # define IA64_MCA_DEBUG(fmt...) printk(fmt) |
82 | #else | 87 | #else |
@@ -84,9 +89,7 @@ | |||
84 | #endif | 89 | #endif |
85 | 90 | ||
86 | /* Used by mca_asm.S */ | 91 | /* Used by mca_asm.S */ |
87 | ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state; | 92 | u32 ia64_mca_serialize; |
88 | ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state; | ||
89 | u64 ia64_mca_serialize; | ||
90 | DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ | 93 | DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ |
91 | DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ | 94 | DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ |
92 | DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ | 95 | DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ |
@@ -95,8 +98,10 @@ DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */ | |||
95 | unsigned long __per_cpu_mca[NR_CPUS]; | 98 | unsigned long __per_cpu_mca[NR_CPUS]; |
96 | 99 | ||
97 | /* In mca_asm.S */ | 100 | /* In mca_asm.S */ |
98 | extern void ia64_monarch_init_handler (void); | 101 | extern void ia64_os_init_dispatch_monarch (void); |
99 | extern void ia64_slave_init_handler (void); | 102 | extern void ia64_os_init_dispatch_slave (void); |
103 | |||
104 | static int monarch_cpu = -1; | ||
100 | 105 | ||
101 | static ia64_mc_info_t ia64_mc_info; | 106 | static ia64_mc_info_t ia64_mc_info; |
102 | 107 | ||
@@ -234,7 +239,8 @@ ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe) | |||
234 | * This function retrieves a specified error record type from SAL | 239 | * This function retrieves a specified error record type from SAL |
235 | * and wakes up any processes waiting for error records. | 240 | * and wakes up any processes waiting for error records. |
236 | * | 241 | * |
237 | * Inputs : sal_info_type (Type of error record MCA/CMC/CPE/INIT) | 242 | * Inputs : sal_info_type (Type of error record MCA/CMC/CPE) |
243 | * FIXME: remove MCA and irq_safe. | ||
238 | */ | 244 | */ |
239 | static void | 245 | static void |
240 | ia64_mca_log_sal_error_record(int sal_info_type) | 246 | ia64_mca_log_sal_error_record(int sal_info_type) |
@@ -242,7 +248,7 @@ ia64_mca_log_sal_error_record(int sal_info_type) | |||
242 | u8 *buffer; | 248 | u8 *buffer; |
243 | sal_log_record_header_t *rh; | 249 | sal_log_record_header_t *rh; |
244 | u64 size; | 250 | u64 size; |
245 | int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA && sal_info_type != SAL_INFO_TYPE_INIT; | 251 | int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA; |
246 | #ifdef IA64_MCA_DEBUG_INFO | 252 | #ifdef IA64_MCA_DEBUG_INFO |
247 | static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" }; | 253 | static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" }; |
248 | #endif | 254 | #endif |
@@ -330,182 +336,6 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) | |||
330 | 336 | ||
331 | #endif /* CONFIG_ACPI */ | 337 | #endif /* CONFIG_ACPI */ |
332 | 338 | ||
333 | static void | ||
334 | show_min_state (pal_min_state_area_t *minstate) | ||
335 | { | ||
336 | u64 iip = minstate->pmsa_iip + ((struct ia64_psr *)(&minstate->pmsa_ipsr))->ri; | ||
337 | u64 xip = minstate->pmsa_xip + ((struct ia64_psr *)(&minstate->pmsa_xpsr))->ri; | ||
338 | |||
339 | printk("NaT bits\t%016lx\n", minstate->pmsa_nat_bits); | ||
340 | printk("pr\t\t%016lx\n", minstate->pmsa_pr); | ||
341 | printk("b0\t\t%016lx ", minstate->pmsa_br0); print_symbol("%s\n", minstate->pmsa_br0); | ||
342 | printk("ar.rsc\t\t%016lx\n", minstate->pmsa_rsc); | ||
343 | printk("cr.iip\t\t%016lx ", iip); print_symbol("%s\n", iip); | ||
344 | printk("cr.ipsr\t\t%016lx\n", minstate->pmsa_ipsr); | ||
345 | printk("cr.ifs\t\t%016lx\n", minstate->pmsa_ifs); | ||
346 | printk("xip\t\t%016lx ", xip); print_symbol("%s\n", xip); | ||
347 | printk("xpsr\t\t%016lx\n", minstate->pmsa_xpsr); | ||
348 | printk("xfs\t\t%016lx\n", minstate->pmsa_xfs); | ||
349 | printk("b1\t\t%016lx ", minstate->pmsa_br1); | ||
350 | print_symbol("%s\n", minstate->pmsa_br1); | ||
351 | |||
352 | printk("\nstatic registers r0-r15:\n"); | ||
353 | printk(" r0- 3 %016lx %016lx %016lx %016lx\n", | ||
354 | 0UL, minstate->pmsa_gr[0], minstate->pmsa_gr[1], minstate->pmsa_gr[2]); | ||
355 | printk(" r4- 7 %016lx %016lx %016lx %016lx\n", | ||
356 | minstate->pmsa_gr[3], minstate->pmsa_gr[4], | ||
357 | minstate->pmsa_gr[5], minstate->pmsa_gr[6]); | ||
358 | printk(" r8-11 %016lx %016lx %016lx %016lx\n", | ||
359 | minstate->pmsa_gr[7], minstate->pmsa_gr[8], | ||
360 | minstate->pmsa_gr[9], minstate->pmsa_gr[10]); | ||
361 | printk("r12-15 %016lx %016lx %016lx %016lx\n", | ||
362 | minstate->pmsa_gr[11], minstate->pmsa_gr[12], | ||
363 | minstate->pmsa_gr[13], minstate->pmsa_gr[14]); | ||
364 | |||
365 | printk("\nbank 0:\n"); | ||
366 | printk("r16-19 %016lx %016lx %016lx %016lx\n", | ||
367 | minstate->pmsa_bank0_gr[0], minstate->pmsa_bank0_gr[1], | ||
368 | minstate->pmsa_bank0_gr[2], minstate->pmsa_bank0_gr[3]); | ||
369 | printk("r20-23 %016lx %016lx %016lx %016lx\n", | ||
370 | minstate->pmsa_bank0_gr[4], minstate->pmsa_bank0_gr[5], | ||
371 | minstate->pmsa_bank0_gr[6], minstate->pmsa_bank0_gr[7]); | ||
372 | printk("r24-27 %016lx %016lx %016lx %016lx\n", | ||
373 | minstate->pmsa_bank0_gr[8], minstate->pmsa_bank0_gr[9], | ||
374 | minstate->pmsa_bank0_gr[10], minstate->pmsa_bank0_gr[11]); | ||
375 | printk("r28-31 %016lx %016lx %016lx %016lx\n", | ||
376 | minstate->pmsa_bank0_gr[12], minstate->pmsa_bank0_gr[13], | ||
377 | minstate->pmsa_bank0_gr[14], minstate->pmsa_bank0_gr[15]); | ||
378 | |||
379 | printk("\nbank 1:\n"); | ||
380 | printk("r16-19 %016lx %016lx %016lx %016lx\n", | ||
381 | minstate->pmsa_bank1_gr[0], minstate->pmsa_bank1_gr[1], | ||
382 | minstate->pmsa_bank1_gr[2], minstate->pmsa_bank1_gr[3]); | ||
383 | printk("r20-23 %016lx %016lx %016lx %016lx\n", | ||
384 | minstate->pmsa_bank1_gr[4], minstate->pmsa_bank1_gr[5], | ||
385 | minstate->pmsa_bank1_gr[6], minstate->pmsa_bank1_gr[7]); | ||
386 | printk("r24-27 %016lx %016lx %016lx %016lx\n", | ||
387 | minstate->pmsa_bank1_gr[8], minstate->pmsa_bank1_gr[9], | ||
388 | minstate->pmsa_bank1_gr[10], minstate->pmsa_bank1_gr[11]); | ||
389 | printk("r28-31 %016lx %016lx %016lx %016lx\n", | ||
390 | minstate->pmsa_bank1_gr[12], minstate->pmsa_bank1_gr[13], | ||
391 | minstate->pmsa_bank1_gr[14], minstate->pmsa_bank1_gr[15]); | ||
392 | } | ||
393 | |||
394 | static void | ||
395 | fetch_min_state (pal_min_state_area_t *ms, struct pt_regs *pt, struct switch_stack *sw) | ||
396 | { | ||
397 | u64 *dst_banked, *src_banked, bit, shift, nat_bits; | ||
398 | int i; | ||
399 | |||
400 | /* | ||
401 | * First, update the pt-regs and switch-stack structures with the contents stored | ||
402 | * in the min-state area: | ||
403 | */ | ||
404 | if (((struct ia64_psr *) &ms->pmsa_ipsr)->ic == 0) { | ||
405 | pt->cr_ipsr = ms->pmsa_xpsr; | ||
406 | pt->cr_iip = ms->pmsa_xip; | ||
407 | pt->cr_ifs = ms->pmsa_xfs; | ||
408 | } else { | ||
409 | pt->cr_ipsr = ms->pmsa_ipsr; | ||
410 | pt->cr_iip = ms->pmsa_iip; | ||
411 | pt->cr_ifs = ms->pmsa_ifs; | ||
412 | } | ||
413 | pt->ar_rsc = ms->pmsa_rsc; | ||
414 | pt->pr = ms->pmsa_pr; | ||
415 | pt->r1 = ms->pmsa_gr[0]; | ||
416 | pt->r2 = ms->pmsa_gr[1]; | ||
417 | pt->r3 = ms->pmsa_gr[2]; | ||
418 | sw->r4 = ms->pmsa_gr[3]; | ||
419 | sw->r5 = ms->pmsa_gr[4]; | ||
420 | sw->r6 = ms->pmsa_gr[5]; | ||
421 | sw->r7 = ms->pmsa_gr[6]; | ||
422 | pt->r8 = ms->pmsa_gr[7]; | ||
423 | pt->r9 = ms->pmsa_gr[8]; | ||
424 | pt->r10 = ms->pmsa_gr[9]; | ||
425 | pt->r11 = ms->pmsa_gr[10]; | ||
426 | pt->r12 = ms->pmsa_gr[11]; | ||
427 | pt->r13 = ms->pmsa_gr[12]; | ||
428 | pt->r14 = ms->pmsa_gr[13]; | ||
429 | pt->r15 = ms->pmsa_gr[14]; | ||
430 | dst_banked = &pt->r16; /* r16-r31 are contiguous in struct pt_regs */ | ||
431 | src_banked = ms->pmsa_bank1_gr; | ||
432 | for (i = 0; i < 16; ++i) | ||
433 | dst_banked[i] = src_banked[i]; | ||
434 | pt->b0 = ms->pmsa_br0; | ||
435 | sw->b1 = ms->pmsa_br1; | ||
436 | |||
437 | /* construct the NaT bits for the pt-regs structure: */ | ||
438 | # define PUT_NAT_BIT(dst, addr) \ | ||
439 | do { \ | ||
440 | bit = nat_bits & 1; nat_bits >>= 1; \ | ||
441 | shift = ((unsigned long) addr >> 3) & 0x3f; \ | ||
442 | dst = ((dst) & ~(1UL << shift)) | (bit << shift); \ | ||
443 | } while (0) | ||
444 | |||
445 | /* Rotate the saved NaT bits such that bit 0 corresponds to pmsa_gr[0]: */ | ||
446 | shift = ((unsigned long) &ms->pmsa_gr[0] >> 3) & 0x3f; | ||
447 | nat_bits = (ms->pmsa_nat_bits >> shift) | (ms->pmsa_nat_bits << (64 - shift)); | ||
448 | |||
449 | PUT_NAT_BIT(sw->caller_unat, &pt->r1); | ||
450 | PUT_NAT_BIT(sw->caller_unat, &pt->r2); | ||
451 | PUT_NAT_BIT(sw->caller_unat, &pt->r3); | ||
452 | PUT_NAT_BIT(sw->ar_unat, &sw->r4); | ||
453 | PUT_NAT_BIT(sw->ar_unat, &sw->r5); | ||
454 | PUT_NAT_BIT(sw->ar_unat, &sw->r6); | ||
455 | PUT_NAT_BIT(sw->ar_unat, &sw->r7); | ||
456 | PUT_NAT_BIT(sw->caller_unat, &pt->r8); PUT_NAT_BIT(sw->caller_unat, &pt->r9); | ||
457 | PUT_NAT_BIT(sw->caller_unat, &pt->r10); PUT_NAT_BIT(sw->caller_unat, &pt->r11); | ||
458 | PUT_NAT_BIT(sw->caller_unat, &pt->r12); PUT_NAT_BIT(sw->caller_unat, &pt->r13); | ||
459 | PUT_NAT_BIT(sw->caller_unat, &pt->r14); PUT_NAT_BIT(sw->caller_unat, &pt->r15); | ||
460 | nat_bits >>= 16; /* skip over bank0 NaT bits */ | ||
461 | PUT_NAT_BIT(sw->caller_unat, &pt->r16); PUT_NAT_BIT(sw->caller_unat, &pt->r17); | ||
462 | PUT_NAT_BIT(sw->caller_unat, &pt->r18); PUT_NAT_BIT(sw->caller_unat, &pt->r19); | ||
463 | PUT_NAT_BIT(sw->caller_unat, &pt->r20); PUT_NAT_BIT(sw->caller_unat, &pt->r21); | ||
464 | PUT_NAT_BIT(sw->caller_unat, &pt->r22); PUT_NAT_BIT(sw->caller_unat, &pt->r23); | ||
465 | PUT_NAT_BIT(sw->caller_unat, &pt->r24); PUT_NAT_BIT(sw->caller_unat, &pt->r25); | ||
466 | PUT_NAT_BIT(sw->caller_unat, &pt->r26); PUT_NAT_BIT(sw->caller_unat, &pt->r27); | ||
467 | PUT_NAT_BIT(sw->caller_unat, &pt->r28); PUT_NAT_BIT(sw->caller_unat, &pt->r29); | ||
468 | PUT_NAT_BIT(sw->caller_unat, &pt->r30); PUT_NAT_BIT(sw->caller_unat, &pt->r31); | ||
469 | } | ||
470 | |||
471 | static void | ||
472 | init_handler_platform (pal_min_state_area_t *ms, | ||
473 | struct pt_regs *pt, struct switch_stack *sw) | ||
474 | { | ||
475 | struct unw_frame_info info; | ||
476 | |||
477 | /* if a kernel debugger is available call it here else just dump the registers */ | ||
478 | |||
479 | /* | ||
480 | * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be | ||
481 | * generated via the BMC's command-line interface, but since the console is on the | ||
482 | * same serial line, the user will need some time to switch out of the BMC before | ||
483 | * the dump begins. | ||
484 | */ | ||
485 | printk("Delaying for 5 seconds...\n"); | ||
486 | udelay(5*1000000); | ||
487 | show_min_state(ms); | ||
488 | |||
489 | printk("Backtrace of current task (pid %d, %s)\n", current->pid, current->comm); | ||
490 | fetch_min_state(ms, pt, sw); | ||
491 | unw_init_from_interruption(&info, current, pt, sw); | ||
492 | ia64_do_show_stack(&info, NULL); | ||
493 | |||
494 | if (read_trylock(&tasklist_lock)) { | ||
495 | struct task_struct *g, *t; | ||
496 | do_each_thread (g, t) { | ||
497 | if (t == current) | ||
498 | continue; | ||
499 | |||
500 | printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); | ||
501 | show_stack(t, NULL); | ||
502 | } while_each_thread (g, t); | ||
503 | } | ||
504 | |||
505 | printk("\nINIT dump complete. Please reboot now.\n"); | ||
506 | while (1); /* hang city if no debugger */ | ||
507 | } | ||
508 | |||
509 | #ifdef CONFIG_ACPI | 339 | #ifdef CONFIG_ACPI |
510 | /* | 340 | /* |
511 | * ia64_mca_register_cpev | 341 | * ia64_mca_register_cpev |
@@ -648,42 +478,6 @@ ia64_mca_cmc_vector_enable_keventd(void *unused) | |||
648 | } | 478 | } |
649 | 479 | ||
650 | /* | 480 | /* |
651 | * ia64_mca_wakeup_ipi_wait | ||
652 | * | ||
653 | * Wait for the inter-cpu interrupt to be sent by the | ||
654 | * monarch processor once it is done with handling the | ||
655 | * MCA. | ||
656 | * | ||
657 | * Inputs : None | ||
658 | * Outputs : None | ||
659 | */ | ||
660 | static void | ||
661 | ia64_mca_wakeup_ipi_wait(void) | ||
662 | { | ||
663 | int irr_num = (IA64_MCA_WAKEUP_VECTOR >> 6); | ||
664 | int irr_bit = (IA64_MCA_WAKEUP_VECTOR & 0x3f); | ||
665 | u64 irr = 0; | ||
666 | |||
667 | do { | ||
668 | switch(irr_num) { | ||
669 | case 0: | ||
670 | irr = ia64_getreg(_IA64_REG_CR_IRR0); | ||
671 | break; | ||
672 | case 1: | ||
673 | irr = ia64_getreg(_IA64_REG_CR_IRR1); | ||
674 | break; | ||
675 | case 2: | ||
676 | irr = ia64_getreg(_IA64_REG_CR_IRR2); | ||
677 | break; | ||
678 | case 3: | ||
679 | irr = ia64_getreg(_IA64_REG_CR_IRR3); | ||
680 | break; | ||
681 | } | ||
682 | cpu_relax(); | ||
683 | } while (!(irr & (1UL << irr_bit))) ; | ||
684 | } | ||
685 | |||
686 | /* | ||
687 | * ia64_mca_wakeup | 481 | * ia64_mca_wakeup |
688 | * | 482 | * |
689 | * Send an inter-cpu interrupt to wake-up a particular cpu | 483 | * Send an inter-cpu interrupt to wake-up a particular cpu |
@@ -748,11 +542,9 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) | |||
748 | */ | 542 | */ |
749 | ia64_sal_mc_rendez(); | 543 | ia64_sal_mc_rendez(); |
750 | 544 | ||
751 | /* Wait for the wakeup IPI from the monarch | 545 | /* Wait for the monarch cpu to exit. */ |
752 | * This waiting is done by polling on the wakeup-interrupt | 546 | while (monarch_cpu != -1) |
753 | * vector bit in the processor's IRRs | 547 | cpu_relax(); /* spin until monarch leaves */ |
754 | */ | ||
755 | ia64_mca_wakeup_ipi_wait(); | ||
756 | 548 | ||
757 | /* Enable all interrupts */ | 549 | /* Enable all interrupts */ |
758 | local_irq_restore(flags); | 550 | local_irq_restore(flags); |
@@ -780,53 +572,13 @@ ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs) | |||
780 | return IRQ_HANDLED; | 572 | return IRQ_HANDLED; |
781 | } | 573 | } |
782 | 574 | ||
783 | /* | ||
784 | * ia64_return_to_sal_check | ||
785 | * | ||
786 | * This is function called before going back from the OS_MCA handler | ||
787 | * to the OS_MCA dispatch code which finally takes the control back | ||
788 | * to the SAL. | ||
789 | * The main purpose of this routine is to setup the OS_MCA to SAL | ||
790 | * return state which can be used by the OS_MCA dispatch code | ||
791 | * just before going back to SAL. | ||
792 | * | ||
793 | * Inputs : None | ||
794 | * Outputs : None | ||
795 | */ | ||
796 | |||
797 | static void | ||
798 | ia64_return_to_sal_check(int recover) | ||
799 | { | ||
800 | |||
801 | /* Copy over some relevant stuff from the sal_to_os_mca_handoff | ||
802 | * so that it can be used at the time of os_mca_to_sal_handoff | ||
803 | */ | ||
804 | ia64_os_to_sal_handoff_state.imots_sal_gp = | ||
805 | ia64_sal_to_os_handoff_state.imsto_sal_gp; | ||
806 | |||
807 | ia64_os_to_sal_handoff_state.imots_sal_check_ra = | ||
808 | ia64_sal_to_os_handoff_state.imsto_sal_check_ra; | ||
809 | |||
810 | if (recover) | ||
811 | ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED; | ||
812 | else | ||
813 | ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT; | ||
814 | |||
815 | /* Default = tell SAL to return to same context */ | ||
816 | ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT; | ||
817 | |||
818 | ia64_os_to_sal_handoff_state.imots_new_min_state = | ||
819 | (u64 *)ia64_sal_to_os_handoff_state.pal_min_state; | ||
820 | |||
821 | } | ||
822 | |||
823 | /* Function pointer for extra MCA recovery */ | 575 | /* Function pointer for extra MCA recovery */ |
824 | int (*ia64_mca_ucmc_extension) | 576 | int (*ia64_mca_ucmc_extension) |
825 | (void*,ia64_mca_sal_to_os_state_t*,ia64_mca_os_to_sal_state_t*) | 577 | (void*,struct ia64_sal_os_state*) |
826 | = NULL; | 578 | = NULL; |
827 | 579 | ||
828 | int | 580 | int |
829 | ia64_reg_MCA_extension(void *fn) | 581 | ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *)) |
830 | { | 582 | { |
831 | if (ia64_mca_ucmc_extension) | 583 | if (ia64_mca_ucmc_extension) |
832 | return 1; | 584 | return 1; |
@@ -845,8 +597,321 @@ ia64_unreg_MCA_extension(void) | |||
845 | EXPORT_SYMBOL(ia64_reg_MCA_extension); | 597 | EXPORT_SYMBOL(ia64_reg_MCA_extension); |
846 | EXPORT_SYMBOL(ia64_unreg_MCA_extension); | 598 | EXPORT_SYMBOL(ia64_unreg_MCA_extension); |
847 | 599 | ||
600 | |||
601 | static inline void | ||
602 | copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat) | ||
603 | { | ||
604 | u64 fslot, tslot, nat; | ||
605 | *tr = *fr; | ||
606 | fslot = ((unsigned long)fr >> 3) & 63; | ||
607 | tslot = ((unsigned long)tr >> 3) & 63; | ||
608 | *tnat &= ~(1UL << tslot); | ||
609 | nat = (fnat >> fslot) & 1; | ||
610 | *tnat |= (nat << tslot); | ||
611 | } | ||
612 | |||
613 | /* On entry to this routine, we are running on the per cpu stack, see | ||
614 | * mca_asm.h. The original stack has not been touched by this event. Some of | ||
615 | * the original stack's registers will be in the RBS on this stack. This stack | ||
616 | * also contains a partial pt_regs and switch_stack, the rest of the data is in | ||
617 | * PAL minstate. | ||
618 | * | ||
619 | * The first thing to do is modify the original stack to look like a blocked | ||
620 | * task so we can run backtrace on the original task. Also mark the per cpu | ||
621 | * stack as current to ensure that we use the correct task state, it also means | ||
622 | * that we can do backtrace on the MCA/INIT handler code itself. | ||
623 | */ | ||
624 | |||
625 | static task_t * | ||
626 | ia64_mca_modify_original_stack(struct pt_regs *regs, | ||
627 | const struct switch_stack *sw, | ||
628 | struct ia64_sal_os_state *sos, | ||
629 | const char *type) | ||
630 | { | ||
631 | char *p, comm[sizeof(current->comm)]; | ||
632 | ia64_va va; | ||
633 | extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ | ||
634 | const pal_min_state_area_t *ms = sos->pal_min_state; | ||
635 | task_t *previous_current; | ||
636 | struct pt_regs *old_regs; | ||
637 | struct switch_stack *old_sw; | ||
638 | unsigned size = sizeof(struct pt_regs) + | ||
639 | sizeof(struct switch_stack) + 16; | ||
640 | u64 *old_bspstore, *old_bsp; | ||
641 | u64 *new_bspstore, *new_bsp; | ||
642 | u64 old_unat, old_rnat, new_rnat, nat; | ||
643 | u64 slots, loadrs = regs->loadrs; | ||
644 | u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1]; | ||
645 | u64 ar_bspstore = regs->ar_bspstore; | ||
646 | u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16); | ||
647 | const u64 *bank; | ||
648 | const char *msg; | ||
649 | int cpu = smp_processor_id(); | ||
650 | |||
651 | previous_current = curr_task(cpu); | ||
652 | set_curr_task(cpu, current); | ||
653 | if ((p = strchr(current->comm, ' '))) | ||
654 | *p = '\0'; | ||
655 | |||
656 | /* Best effort attempt to cope with MCA/INIT delivered while in | ||
657 | * physical mode. | ||
658 | */ | ||
659 | regs->cr_ipsr = ms->pmsa_ipsr; | ||
660 | if (ia64_psr(regs)->dt == 0) { | ||
661 | va.l = r12; | ||
662 | if (va.f.reg == 0) { | ||
663 | va.f.reg = 7; | ||
664 | r12 = va.l; | ||
665 | } | ||
666 | va.l = r13; | ||
667 | if (va.f.reg == 0) { | ||
668 | va.f.reg = 7; | ||
669 | r13 = va.l; | ||
670 | } | ||
671 | } | ||
672 | if (ia64_psr(regs)->rt == 0) { | ||
673 | va.l = ar_bspstore; | ||
674 | if (va.f.reg == 0) { | ||
675 | va.f.reg = 7; | ||
676 | ar_bspstore = va.l; | ||
677 | } | ||
678 | va.l = ar_bsp; | ||
679 | if (va.f.reg == 0) { | ||
680 | va.f.reg = 7; | ||
681 | ar_bsp = va.l; | ||
682 | } | ||
683 | } | ||
684 | |||
685 | /* mca_asm.S ia64_old_stack() cannot assume that the dirty registers | ||
686 | * have been copied to the old stack, the old stack may fail the | ||
687 | * validation tests below. So ia64_old_stack() must restore the dirty | ||
688 | * registers from the new stack. The old and new bspstore probably | ||
689 | * have different alignments, so loadrs calculated on the old bsp | ||
690 | * cannot be used to restore from the new bsp. Calculate a suitable | ||
691 | * loadrs for the new stack and save it in the new pt_regs, where | ||
692 | * ia64_old_stack() can get it. | ||
693 | */ | ||
694 | old_bspstore = (u64 *)ar_bspstore; | ||
695 | old_bsp = (u64 *)ar_bsp; | ||
696 | slots = ia64_rse_num_regs(old_bspstore, old_bsp); | ||
697 | new_bspstore = (u64 *)((u64)current + IA64_RBS_OFFSET); | ||
698 | new_bsp = ia64_rse_skip_regs(new_bspstore, slots); | ||
699 | regs->loadrs = (new_bsp - new_bspstore) * 8 << 16; | ||
700 | |||
701 | /* Verify the previous stack state before we change it */ | ||
702 | if (user_mode(regs)) { | ||
703 | msg = "occurred in user space"; | ||
704 | goto no_mod; | ||
705 | } | ||
706 | if (r13 != sos->prev_IA64_KR_CURRENT) { | ||
707 | msg = "inconsistent previous current and r13"; | ||
708 | goto no_mod; | ||
709 | } | ||
710 | if ((r12 - r13) >= KERNEL_STACK_SIZE) { | ||
711 | msg = "inconsistent r12 and r13"; | ||
712 | goto no_mod; | ||
713 | } | ||
714 | if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) { | ||
715 | msg = "inconsistent ar.bspstore and r13"; | ||
716 | goto no_mod; | ||
717 | } | ||
718 | va.p = old_bspstore; | ||
719 | if (va.f.reg < 5) { | ||
720 | msg = "old_bspstore is in the wrong region"; | ||
721 | goto no_mod; | ||
722 | } | ||
723 | if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) { | ||
724 | msg = "inconsistent ar.bsp and r13"; | ||
725 | goto no_mod; | ||
726 | } | ||
727 | size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8; | ||
728 | if (ar_bspstore + size > r12) { | ||
729 | msg = "no room for blocked state"; | ||
730 | goto no_mod; | ||
731 | } | ||
732 | |||
733 | /* Change the comm field on the MCA/INT task to include the pid that | ||
734 | * was interrupted, it makes for easier debugging. If that pid was 0 | ||
735 | * (swapper or nested MCA/INIT) then use the start of the previous comm | ||
736 | * field suffixed with its cpu. | ||
737 | */ | ||
738 | if (previous_current->pid) | ||
739 | snprintf(comm, sizeof(comm), "%s %d", | ||
740 | current->comm, previous_current->pid); | ||
741 | else { | ||
742 | int l; | ||
743 | if ((p = strchr(previous_current->comm, ' '))) | ||
744 | l = p - previous_current->comm; | ||
745 | else | ||
746 | l = strlen(previous_current->comm); | ||
747 | snprintf(comm, sizeof(comm), "%s %*s %d", | ||
748 | current->comm, l, previous_current->comm, | ||
749 | previous_current->thread_info->cpu); | ||
750 | } | ||
751 | memcpy(current->comm, comm, sizeof(current->comm)); | ||
752 | |||
753 | /* Make the original task look blocked. First stack a struct pt_regs, | ||
754 | * describing the state at the time of interrupt. mca_asm.S built a | ||
755 | * partial pt_regs, copy it and fill in the blanks using minstate. | ||
756 | */ | ||
757 | p = (char *)r12 - sizeof(*regs); | ||
758 | old_regs = (struct pt_regs *)p; | ||
759 | memcpy(old_regs, regs, sizeof(*regs)); | ||
760 | /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use | ||
761 | * pmsa_{xip,xpsr,xfs} | ||
762 | */ | ||
763 | if (ia64_psr(regs)->ic) { | ||
764 | old_regs->cr_iip = ms->pmsa_iip; | ||
765 | old_regs->cr_ipsr = ms->pmsa_ipsr; | ||
766 | old_regs->cr_ifs = ms->pmsa_ifs; | ||
767 | } else { | ||
768 | old_regs->cr_iip = ms->pmsa_xip; | ||
769 | old_regs->cr_ipsr = ms->pmsa_xpsr; | ||
770 | old_regs->cr_ifs = ms->pmsa_xfs; | ||
771 | } | ||
772 | old_regs->pr = ms->pmsa_pr; | ||
773 | old_regs->b0 = ms->pmsa_br0; | ||
774 | old_regs->loadrs = loadrs; | ||
775 | old_regs->ar_rsc = ms->pmsa_rsc; | ||
776 | old_unat = old_regs->ar_unat; | ||
777 | copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &old_regs->r1, &old_unat); | ||
778 | copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &old_regs->r2, &old_unat); | ||
779 | copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &old_regs->r3, &old_unat); | ||
780 | copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &old_regs->r8, &old_unat); | ||
781 | copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &old_regs->r9, &old_unat); | ||
782 | copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &old_regs->r10, &old_unat); | ||
783 | copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &old_regs->r11, &old_unat); | ||
784 | copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &old_regs->r12, &old_unat); | ||
785 | copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &old_regs->r13, &old_unat); | ||
786 | copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &old_regs->r14, &old_unat); | ||
787 | copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &old_regs->r15, &old_unat); | ||
788 | if (ia64_psr(old_regs)->bn) | ||
789 | bank = ms->pmsa_bank1_gr; | ||
790 | else | ||
791 | bank = ms->pmsa_bank0_gr; | ||
792 | copy_reg(&bank[16-16], ms->pmsa_nat_bits, &old_regs->r16, &old_unat); | ||
793 | copy_reg(&bank[17-16], ms->pmsa_nat_bits, &old_regs->r17, &old_unat); | ||
794 | copy_reg(&bank[18-16], ms->pmsa_nat_bits, &old_regs->r18, &old_unat); | ||
795 | copy_reg(&bank[19-16], ms->pmsa_nat_bits, &old_regs->r19, &old_unat); | ||
796 | copy_reg(&bank[20-16], ms->pmsa_nat_bits, &old_regs->r20, &old_unat); | ||
797 | copy_reg(&bank[21-16], ms->pmsa_nat_bits, &old_regs->r21, &old_unat); | ||
798 | copy_reg(&bank[22-16], ms->pmsa_nat_bits, &old_regs->r22, &old_unat); | ||
799 | copy_reg(&bank[23-16], ms->pmsa_nat_bits, &old_regs->r23, &old_unat); | ||
800 | copy_reg(&bank[24-16], ms->pmsa_nat_bits, &old_regs->r24, &old_unat); | ||
801 | copy_reg(&bank[25-16], ms->pmsa_nat_bits, &old_regs->r25, &old_unat); | ||
802 | copy_reg(&bank[26-16], ms->pmsa_nat_bits, &old_regs->r26, &old_unat); | ||
803 | copy_reg(&bank[27-16], ms->pmsa_nat_bits, &old_regs->r27, &old_unat); | ||
804 | copy_reg(&bank[28-16], ms->pmsa_nat_bits, &old_regs->r28, &old_unat); | ||
805 | copy_reg(&bank[29-16], ms->pmsa_nat_bits, &old_regs->r29, &old_unat); | ||
806 | copy_reg(&bank[30-16], ms->pmsa_nat_bits, &old_regs->r30, &old_unat); | ||
807 | copy_reg(&bank[31-16], ms->pmsa_nat_bits, &old_regs->r31, &old_unat); | ||
808 | |||
809 | /* Next stack a struct switch_stack. mca_asm.S built a partial | ||
810 | * switch_stack, copy it and fill in the blanks using pt_regs and | ||
811 | * minstate. | ||
812 | * | ||
813 | * In the synthesized switch_stack, b0 points to ia64_leave_kernel, | ||
814 | * ar.pfs is set to 0. | ||
815 | * | ||
816 | * unwind.c::unw_unwind() does special processing for interrupt frames. | ||
817 | * It checks if the PRED_NON_SYSCALL predicate is set, if the predicate | ||
818 | * is clear then unw_unwind() does _not_ adjust bsp over pt_regs. Not | ||
819 | * that this is documented, of course. Set PRED_NON_SYSCALL in the | ||
820 | * switch_stack on the original stack so it will unwind correctly when | ||
821 | * unwind.c reads pt_regs. | ||
822 | * | ||
823 | * thread.ksp is updated to point to the synthesized switch_stack. | ||
824 | */ | ||
825 | p -= sizeof(struct switch_stack); | ||
826 | old_sw = (struct switch_stack *)p; | ||
827 | memcpy(old_sw, sw, sizeof(*sw)); | ||
828 | old_sw->caller_unat = old_unat; | ||
829 | old_sw->ar_fpsr = old_regs->ar_fpsr; | ||
830 | copy_reg(&ms->pmsa_gr[4-1], ms->pmsa_nat_bits, &old_sw->r4, &old_unat); | ||
831 | copy_reg(&ms->pmsa_gr[5-1], ms->pmsa_nat_bits, &old_sw->r5, &old_unat); | ||
832 | copy_reg(&ms->pmsa_gr[6-1], ms->pmsa_nat_bits, &old_sw->r6, &old_unat); | ||
833 | copy_reg(&ms->pmsa_gr[7-1], ms->pmsa_nat_bits, &old_sw->r7, &old_unat); | ||
834 | old_sw->b0 = (u64)ia64_leave_kernel; | ||
835 | old_sw->b1 = ms->pmsa_br1; | ||
836 | old_sw->ar_pfs = 0; | ||
837 | old_sw->ar_unat = old_unat; | ||
838 | old_sw->pr = old_regs->pr | (1UL << PRED_NON_SYSCALL); | ||
839 | previous_current->thread.ksp = (u64)p - 16; | ||
840 | |||
841 | /* Finally copy the original stack's registers back to its RBS. | ||
842 | * Registers from ar.bspstore through ar.bsp at the time of the event | ||
843 | * are in the current RBS, copy them back to the original stack. The | ||
844 | * copy must be done register by register because the original bspstore | ||
845 | * and the current one have different alignments, so the saved RNAT | ||
846 | * data occurs at different places. | ||
847 | * | ||
848 | * mca_asm does cover, so the old_bsp already includes all registers at | ||
849 | * the time of MCA/INIT. It also does flushrs, so all registers before | ||
850 | * this function have been written to backing store on the MCA/INIT | ||
851 | * stack. | ||
852 | */ | ||
853 | new_rnat = ia64_get_rnat(ia64_rse_rnat_addr(new_bspstore)); | ||
854 | old_rnat = regs->ar_rnat; | ||
855 | while (slots--) { | ||
856 | if (ia64_rse_is_rnat_slot(new_bspstore)) { | ||
857 | new_rnat = ia64_get_rnat(new_bspstore++); | ||
858 | } | ||
859 | if (ia64_rse_is_rnat_slot(old_bspstore)) { | ||
860 | *old_bspstore++ = old_rnat; | ||
861 | old_rnat = 0; | ||
862 | } | ||
863 | nat = (new_rnat >> ia64_rse_slot_num(new_bspstore)) & 1UL; | ||
864 | old_rnat &= ~(1UL << ia64_rse_slot_num(old_bspstore)); | ||
865 | old_rnat |= (nat << ia64_rse_slot_num(old_bspstore)); | ||
866 | *old_bspstore++ = *new_bspstore++; | ||
867 | } | ||
868 | old_sw->ar_bspstore = (unsigned long)old_bspstore; | ||
869 | old_sw->ar_rnat = old_rnat; | ||
870 | |||
871 | sos->prev_task = previous_current; | ||
872 | return previous_current; | ||
873 | |||
874 | no_mod: | ||
875 | printk(KERN_INFO "cpu %d, %s %s, original stack not modified\n", | ||
876 | smp_processor_id(), type, msg); | ||
877 | return previous_current; | ||
878 | } | ||
879 | |||
880 | /* The monarch/slave interaction is based on monarch_cpu and requires that all | ||
881 | * slaves have entered rendezvous before the monarch leaves. If any cpu has | ||
882 | * not entered rendezvous yet then wait a bit. The assumption is that any | ||
883 | * slave that has not rendezvoused after a reasonable time is never going to do | ||
884 | * so. In this context, slave includes cpus that respond to the MCA rendezvous | ||
885 | * interrupt, as well as cpus that receive the INIT slave event. | ||
886 | */ | ||
887 | |||
888 | static void | ||
889 | ia64_wait_for_slaves(int monarch) | ||
890 | { | ||
891 | int c, wait = 0; | ||
892 | for_each_online_cpu(c) { | ||
893 | if (c == monarch) | ||
894 | continue; | ||
895 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { | ||
896 | udelay(1000); /* short wait first */ | ||
897 | wait = 1; | ||
898 | break; | ||
899 | } | ||
900 | } | ||
901 | if (!wait) | ||
902 | return; | ||
903 | for_each_online_cpu(c) { | ||
904 | if (c == monarch) | ||
905 | continue; | ||
906 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { | ||
907 | udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */ | ||
908 | break; | ||
909 | } | ||
910 | } | ||
911 | } | ||
912 | |||
848 | /* | 913 | /* |
849 | * ia64_mca_ucmc_handler | 914 | * ia64_mca_handler |
850 | * | 915 | * |
851 | * This is uncorrectable machine check handler called from OS_MCA | 916 | * This is uncorrectable machine check handler called from OS_MCA |
852 | * dispatch code which is in turn called from SAL_CHECK(). | 917 | * dispatch code which is in turn called from SAL_CHECK(). |
@@ -857,16 +922,28 @@ EXPORT_SYMBOL(ia64_unreg_MCA_extension); | |||
857 | * further MCA logging is enabled by clearing logs. | 922 | * further MCA logging is enabled by clearing logs. |
858 | * Monarch also has the duty of sending wakeup-IPIs to pull the | 923 | * Monarch also has the duty of sending wakeup-IPIs to pull the |
859 | * slave processors out of rendezvous spinloop. | 924 | * slave processors out of rendezvous spinloop. |
860 | * | ||
861 | * Inputs : None | ||
862 | * Outputs : None | ||
863 | */ | 925 | */ |
864 | void | 926 | void |
865 | ia64_mca_ucmc_handler(void) | 927 | ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, |
928 | struct ia64_sal_os_state *sos) | ||
866 | { | 929 | { |
867 | pal_processor_state_info_t *psp = (pal_processor_state_info_t *) | 930 | pal_processor_state_info_t *psp = (pal_processor_state_info_t *) |
868 | &ia64_sal_to_os_handoff_state.proc_state_param; | 931 | &sos->proc_state_param; |
869 | int recover; | 932 | int recover, cpu = smp_processor_id(); |
933 | task_t *previous_current; | ||
934 | |||
935 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ | ||
936 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); | ||
937 | monarch_cpu = cpu; | ||
938 | ia64_wait_for_slaves(cpu); | ||
939 | |||
940 | /* Wakeup all the processors which are spinning in the rendezvous loop. | ||
941 | * They will leave SAL, then spin in the OS with interrupts disabled | ||
942 | * until this monarch cpu leaves the MCA handler. That gets control | ||
943 | * back to the OS so we can backtrace the other cpus, backtrace when | ||
944 | * spinning in SAL does not work. | ||
945 | */ | ||
946 | ia64_mca_wakeup_all(); | ||
870 | 947 | ||
871 | /* Get the MCA error record and log it */ | 948 | /* Get the MCA error record and log it */ |
872 | ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); | 949 | ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); |
@@ -874,25 +951,20 @@ ia64_mca_ucmc_handler(void) | |||
874 | /* TLB error is only exist in this SAL error record */ | 951 | /* TLB error is only exist in this SAL error record */ |
875 | recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) | 952 | recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) |
876 | /* other error recovery */ | 953 | /* other error recovery */ |
877 | || (ia64_mca_ucmc_extension | 954 | || (ia64_mca_ucmc_extension |
878 | && ia64_mca_ucmc_extension( | 955 | && ia64_mca_ucmc_extension( |
879 | IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), | 956 | IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), |
880 | &ia64_sal_to_os_handoff_state, | 957 | sos)); |
881 | &ia64_os_to_sal_handoff_state)); | ||
882 | 958 | ||
883 | if (recover) { | 959 | if (recover) { |
884 | sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA); | 960 | sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA); |
885 | rh->severity = sal_log_severity_corrected; | 961 | rh->severity = sal_log_severity_corrected; |
886 | ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); | 962 | ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); |
963 | sos->os_status = IA64_MCA_CORRECTED; | ||
887 | } | 964 | } |
888 | /* | ||
889 | * Wakeup all the processors which are spinning in the rendezvous | ||
890 | * loop. | ||
891 | */ | ||
892 | ia64_mca_wakeup_all(); | ||
893 | 965 | ||
894 | /* Return to SAL */ | 966 | set_curr_task(cpu, previous_current); |
895 | ia64_return_to_sal_check(recover); | 967 | monarch_cpu = -1; |
896 | } | 968 | } |
897 | 969 | ||
898 | static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL); | 970 | static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL); |
@@ -1116,34 +1188,114 @@ ia64_mca_cpe_poll (unsigned long dummy) | |||
1116 | /* | 1188 | /* |
1117 | * C portion of the OS INIT handler | 1189 | * C portion of the OS INIT handler |
1118 | * | 1190 | * |
1119 | * Called from ia64_monarch_init_handler | 1191 | * Called from ia64_os_init_dispatch |
1120 | * | ||
1121 | * Inputs: pointer to pt_regs where processor info was saved. | ||
1122 | * | 1192 | * |
1123 | * Returns: | 1193 | * Inputs: pointer to pt_regs where processor info was saved. SAL/OS state for |
1124 | * 0 if SAL must warm boot the System | 1194 | * this event. This code is used for both monarch and slave INIT events, see |
1125 | * 1 if SAL must return to interrupted context using PAL_MC_RESUME | 1195 | * sos->monarch. |
1126 | * | 1196 | * |
1197 | * All INIT events switch to the INIT stack and change the previous process to | ||
1198 | * blocked status. If one of the INIT events is the monarch then we are | ||
1199 | * probably processing the nmi button/command. Use the monarch cpu to dump all | ||
1200 | * the processes. The slave INIT events all spin until the monarch cpu | ||
1201 | * returns. We can also get INIT slave events for MCA, in which case the MCA | ||
1202 | * process is the monarch. | ||
1127 | */ | 1203 | */ |
1204 | |||
1128 | void | 1205 | void |
1129 | ia64_init_handler (struct pt_regs *pt, struct switch_stack *sw) | 1206 | ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, |
1207 | struct ia64_sal_os_state *sos) | ||
1130 | { | 1208 | { |
1131 | pal_min_state_area_t *ms; | 1209 | static atomic_t slaves; |
1210 | static atomic_t monarchs; | ||
1211 | task_t *previous_current; | ||
1212 | int cpu = smp_processor_id(), c; | ||
1213 | struct task_struct *g, *t; | ||
1132 | 1214 | ||
1133 | oops_in_progress = 1; /* avoid deadlock in printk, but it makes recovery dodgy */ | 1215 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ |
1134 | console_loglevel = 15; /* make sure printks make it to console */ | 1216 | console_loglevel = 15; /* make sure printks make it to console */ |
1135 | 1217 | ||
1136 | printk(KERN_INFO "Entered OS INIT handler. PSP=%lx\n", | 1218 | printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", |
1137 | ia64_sal_to_os_handoff_state.proc_state_param); | 1219 | sos->proc_state_param, cpu, sos->monarch); |
1220 | salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); | ||
1138 | 1221 | ||
1139 | /* | 1222 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "INIT"); |
1140 | * Address of minstate area provided by PAL is physical, | 1223 | sos->os_status = IA64_INIT_RESUME; |
1141 | * uncacheable (bit 63 set). Convert to Linux virtual | 1224 | |
1142 | * address in region 6. | 1225 | /* FIXME: Workaround for broken proms that drive all INIT events as |
1226 | * slaves. The last slave that enters is promoted to be a monarch. | ||
1227 | * Remove this code in September 2006, that gives platforms a year to | ||
1228 | * fix their proms and get their customers updated. | ||
1143 | */ | 1229 | */ |
1144 | ms = (pal_min_state_area_t *)(ia64_sal_to_os_handoff_state.pal_min_state | (6ul<<61)); | 1230 | if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { |
1231 | printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", | ||
1232 | __FUNCTION__, cpu); | ||
1233 | atomic_dec(&slaves); | ||
1234 | sos->monarch = 1; | ||
1235 | } | ||
1145 | 1236 | ||
1146 | init_handler_platform(ms, pt, sw); /* call platform specific routines */ | 1237 | /* FIXME: Workaround for broken proms that drive all INIT events as |
1238 | * monarchs. Second and subsequent monarchs are demoted to slaves. | ||
1239 | * Remove this code in September 2006, that gives platforms a year to | ||
1240 | * fix their proms and get their customers updated. | ||
1241 | */ | ||
1242 | if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { | ||
1243 | printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", | ||
1244 | __FUNCTION__, cpu); | ||
1245 | atomic_dec(&monarchs); | ||
1246 | sos->monarch = 0; | ||
1247 | } | ||
1248 | |||
1249 | if (!sos->monarch) { | ||
1250 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; | ||
1251 | while (monarch_cpu == -1) | ||
1252 | cpu_relax(); /* spin until monarch enters */ | ||
1253 | while (monarch_cpu != -1) | ||
1254 | cpu_relax(); /* spin until monarch leaves */ | ||
1255 | printk("Slave on cpu %d returning to normal service.\n", cpu); | ||
1256 | set_curr_task(cpu, previous_current); | ||
1257 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; | ||
1258 | atomic_dec(&slaves); | ||
1259 | return; | ||
1260 | } | ||
1261 | |||
1262 | monarch_cpu = cpu; | ||
1263 | |||
1264 | /* | ||
1265 | * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be | ||
1266 | * generated via the BMC's command-line interface, but since the console is on the | ||
1267 | * same serial line, the user will need some time to switch out of the BMC before | ||
1268 | * the dump begins. | ||
1269 | */ | ||
1270 | printk("Delaying for 5 seconds...\n"); | ||
1271 | udelay(5*1000000); | ||
1272 | ia64_wait_for_slaves(cpu); | ||
1273 | printk(KERN_ERR "Processes interrupted by INIT -"); | ||
1274 | for_each_online_cpu(c) { | ||
1275 | struct ia64_sal_os_state *s; | ||
1276 | t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); | ||
1277 | s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); | ||
1278 | g = s->prev_task; | ||
1279 | if (g) { | ||
1280 | if (g->pid) | ||
1281 | printk(" %d", g->pid); | ||
1282 | else | ||
1283 | printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g); | ||
1284 | } | ||
1285 | } | ||
1286 | printk("\n\n"); | ||
1287 | if (read_trylock(&tasklist_lock)) { | ||
1288 | do_each_thread (g, t) { | ||
1289 | printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); | ||
1290 | show_stack(t, NULL); | ||
1291 | } while_each_thread (g, t); | ||
1292 | read_unlock(&tasklist_lock); | ||
1293 | } | ||
1294 | printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); | ||
1295 | atomic_dec(&monarchs); | ||
1296 | set_curr_task(cpu, previous_current); | ||
1297 | monarch_cpu = -1; | ||
1298 | return; | ||
1147 | } | 1299 | } |
1148 | 1300 | ||
1149 | static int __init | 1301 | static int __init |
@@ -1193,6 +1345,34 @@ static struct irqaction mca_cpep_irqaction = { | |||
1193 | }; | 1345 | }; |
1194 | #endif /* CONFIG_ACPI */ | 1346 | #endif /* CONFIG_ACPI */ |
1195 | 1347 | ||
1348 | /* Minimal format of the MCA/INIT stacks. The pseudo processes that run on | ||
1349 | * these stacks can never sleep, they cannot return from the kernel to user | ||
1350 | * space, they do not appear in a normal ps listing. So there is no need to | ||
1351 | * format most of the fields. | ||
1352 | */ | ||
1353 | |||
1354 | static void | ||
1355 | format_mca_init_stack(void *mca_data, unsigned long offset, | ||
1356 | const char *type, int cpu) | ||
1357 | { | ||
1358 | struct task_struct *p = (struct task_struct *)((char *)mca_data + offset); | ||
1359 | struct thread_info *ti; | ||
1360 | memset(p, 0, KERNEL_STACK_SIZE); | ||
1361 | ti = (struct thread_info *)((char *)p + IA64_TASK_SIZE); | ||
1362 | ti->flags = _TIF_MCA_INIT; | ||
1363 | ti->preempt_count = 1; | ||
1364 | ti->task = p; | ||
1365 | ti->cpu = cpu; | ||
1366 | p->thread_info = ti; | ||
1367 | p->state = TASK_UNINTERRUPTIBLE; | ||
1368 | __set_bit(cpu, &p->cpus_allowed); | ||
1369 | INIT_LIST_HEAD(&p->tasks); | ||
1370 | p->parent = p->real_parent = p->group_leader = p; | ||
1371 | INIT_LIST_HEAD(&p->children); | ||
1372 | INIT_LIST_HEAD(&p->sibling); | ||
1373 | strncpy(p->comm, type, sizeof(p->comm)-1); | ||
1374 | } | ||
1375 | |||
1196 | /* Do per-CPU MCA-related initialization. */ | 1376 | /* Do per-CPU MCA-related initialization. */ |
1197 | 1377 | ||
1198 | void __devinit | 1378 | void __devinit |
@@ -1205,19 +1385,28 @@ ia64_mca_cpu_init(void *cpu_data) | |||
1205 | int cpu; | 1385 | int cpu; |
1206 | 1386 | ||
1207 | mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) | 1387 | mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) |
1208 | * NR_CPUS); | 1388 | * NR_CPUS + KERNEL_STACK_SIZE); |
1389 | mca_data = (void *)(((unsigned long)mca_data + | ||
1390 | KERNEL_STACK_SIZE - 1) & | ||
1391 | (-KERNEL_STACK_SIZE)); | ||
1209 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 1392 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
1393 | format_mca_init_stack(mca_data, | ||
1394 | offsetof(struct ia64_mca_cpu, mca_stack), | ||
1395 | "MCA", cpu); | ||
1396 | format_mca_init_stack(mca_data, | ||
1397 | offsetof(struct ia64_mca_cpu, init_stack), | ||
1398 | "INIT", cpu); | ||
1210 | __per_cpu_mca[cpu] = __pa(mca_data); | 1399 | __per_cpu_mca[cpu] = __pa(mca_data); |
1211 | mca_data += sizeof(struct ia64_mca_cpu); | 1400 | mca_data += sizeof(struct ia64_mca_cpu); |
1212 | } | 1401 | } |
1213 | } | 1402 | } |
1214 | 1403 | ||
1215 | /* | 1404 | /* |
1216 | * The MCA info structure was allocated earlier and its | 1405 | * The MCA info structure was allocated earlier and its |
1217 | * physical address saved in __per_cpu_mca[cpu]. Copy that | 1406 | * physical address saved in __per_cpu_mca[cpu]. Copy that |
1218 | * address * to ia64_mca_data so we can access it as a per-CPU | 1407 | * address * to ia64_mca_data so we can access it as a per-CPU |
1219 | * variable. | 1408 | * variable. |
1220 | */ | 1409 | */ |
1221 | __get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()]; | 1410 | __get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()]; |
1222 | 1411 | ||
1223 | /* | 1412 | /* |
@@ -1227,11 +1416,11 @@ ia64_mca_cpu_init(void *cpu_data) | |||
1227 | __get_cpu_var(ia64_mca_per_cpu_pte) = | 1416 | __get_cpu_var(ia64_mca_per_cpu_pte) = |
1228 | pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL)); | 1417 | pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL)); |
1229 | 1418 | ||
1230 | /* | 1419 | /* |
1231 | * Also, stash away a copy of the PAL address and the PTE | 1420 | * Also, stash away a copy of the PAL address and the PTE |
1232 | * needed to map it. | 1421 | * needed to map it. |
1233 | */ | 1422 | */ |
1234 | pal_vaddr = efi_get_pal_addr(); | 1423 | pal_vaddr = efi_get_pal_addr(); |
1235 | if (!pal_vaddr) | 1424 | if (!pal_vaddr) |
1236 | return; | 1425 | return; |
1237 | __get_cpu_var(ia64_mca_pal_base) = | 1426 | __get_cpu_var(ia64_mca_pal_base) = |
@@ -1263,8 +1452,8 @@ ia64_mca_cpu_init(void *cpu_data) | |||
1263 | void __init | 1452 | void __init |
1264 | ia64_mca_init(void) | 1453 | ia64_mca_init(void) |
1265 | { | 1454 | { |
1266 | ia64_fptr_t *mon_init_ptr = (ia64_fptr_t *)ia64_monarch_init_handler; | 1455 | ia64_fptr_t *init_hldlr_ptr_monarch = (ia64_fptr_t *)ia64_os_init_dispatch_monarch; |
1267 | ia64_fptr_t *slave_init_ptr = (ia64_fptr_t *)ia64_slave_init_handler; | 1456 | ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave; |
1268 | ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch; | 1457 | ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch; |
1269 | int i; | 1458 | int i; |
1270 | s64 rc; | 1459 | s64 rc; |
@@ -1342,9 +1531,9 @@ ia64_mca_init(void) | |||
1342 | * XXX - disable SAL checksum by setting size to 0, should be | 1531 | * XXX - disable SAL checksum by setting size to 0, should be |
1343 | * size of the actual init handler in mca_asm.S. | 1532 | * size of the actual init handler in mca_asm.S. |
1344 | */ | 1533 | */ |
1345 | ia64_mc_info.imi_monarch_init_handler = ia64_tpa(mon_init_ptr->fp); | 1534 | ia64_mc_info.imi_monarch_init_handler = ia64_tpa(init_hldlr_ptr_monarch->fp); |
1346 | ia64_mc_info.imi_monarch_init_handler_size = 0; | 1535 | ia64_mc_info.imi_monarch_init_handler_size = 0; |
1347 | ia64_mc_info.imi_slave_init_handler = ia64_tpa(slave_init_ptr->fp); | 1536 | ia64_mc_info.imi_slave_init_handler = ia64_tpa(init_hldlr_ptr_slave->fp); |
1348 | ia64_mc_info.imi_slave_init_handler_size = 0; | 1537 | ia64_mc_info.imi_slave_init_handler_size = 0; |
1349 | 1538 | ||
1350 | IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__, | 1539 | IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__, |