diff options
| author | Keith Owens <kaos@sgi.com> | 2005-09-11 03:22:53 -0400 | 
|---|---|---|
| committer | Tony Luck <tony.luck@intel.com> | 2005-09-11 17:08:41 -0400 | 
| commit | 7f613c7d2203ae137d98fc1c38abc30fd7048637 (patch) | |
| tree | d8155a5cca33e4fe178625396886fcbb81f39e7a /arch/ia64/kernel/mca.c | |
| parent | 289d773ee89ea80dcc364ef97d1be7ad1817387e (diff) | |
[PATCH] MCA/INIT: use per cpu stacks
The bulk of the change.  Use per cpu MCA/INIT stacks.  Change the SAL
to OS state (sos) to be per process.  Do all the assembler work on the
MCA/INIT stacks, leaving the original stack alone.  Pass per cpu state
data to the C handlers for MCA and INIT, which also means changing the
mca_drv interfaces slightly.  Lots of verification on whether the
original stack is usable before converting it to a sleeping process.
Signed-off-by: Keith Owens <kaos@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/mca.c')
| -rw-r--r-- | arch/ia64/kernel/mca.c | 821 | 
1 files changed, 505 insertions, 316 deletions
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 8d484204a3ff..6dc726ad7137 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c  | |||
| @@ -48,6 +48,9 @@ | |||
| 48 | * Delete dead variables and functions. | 48 | * Delete dead variables and functions. | 
| 49 | * Reorder to remove the need for forward declarations and to consolidate | 49 | * Reorder to remove the need for forward declarations and to consolidate | 
| 50 | * related code. | 50 | * related code. | 
| 51 | * | ||
| 52 | * 2005-08-12 Keith Owens <kaos@sgi.com> | ||
| 53 | * Convert MCA/INIT handlers to use per event stacks and SAL/OS state. | ||
| 51 | */ | 54 | */ | 
| 52 | #include <linux/config.h> | 55 | #include <linux/config.h> | 
| 53 | #include <linux/types.h> | 56 | #include <linux/types.h> | 
| @@ -77,6 +80,8 @@ | |||
| 77 | #include <asm/irq.h> | 80 | #include <asm/irq.h> | 
| 78 | #include <asm/hw_irq.h> | 81 | #include <asm/hw_irq.h> | 
| 79 | 82 | ||
| 83 | #include "entry.h" | ||
| 84 | |||
| 80 | #if defined(IA64_MCA_DEBUG_INFO) | 85 | #if defined(IA64_MCA_DEBUG_INFO) | 
| 81 | # define IA64_MCA_DEBUG(fmt...) printk(fmt) | 86 | # define IA64_MCA_DEBUG(fmt...) printk(fmt) | 
| 82 | #else | 87 | #else | 
| @@ -84,9 +89,7 @@ | |||
| 84 | #endif | 89 | #endif | 
| 85 | 90 | ||
| 86 | /* Used by mca_asm.S */ | 91 | /* Used by mca_asm.S */ | 
| 87 | ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state; | 92 | u32 ia64_mca_serialize; | 
| 88 | ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state; | ||
| 89 | u64 ia64_mca_serialize; | ||
| 90 | DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ | 93 | DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ | 
| 91 | DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ | 94 | DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ | 
| 92 | DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ | 95 | DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ | 
| @@ -95,8 +98,10 @@ DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */ | |||
| 95 | unsigned long __per_cpu_mca[NR_CPUS]; | 98 | unsigned long __per_cpu_mca[NR_CPUS]; | 
| 96 | 99 | ||
| 97 | /* In mca_asm.S */ | 100 | /* In mca_asm.S */ | 
| 98 | extern void ia64_monarch_init_handler (void); | 101 | extern void ia64_os_init_dispatch_monarch (void); | 
| 99 | extern void ia64_slave_init_handler (void); | 102 | extern void ia64_os_init_dispatch_slave (void); | 
| 103 | |||
| 104 | static int monarch_cpu = -1; | ||
| 100 | 105 | ||
| 101 | static ia64_mc_info_t ia64_mc_info; | 106 | static ia64_mc_info_t ia64_mc_info; | 
| 102 | 107 | ||
| @@ -234,7 +239,8 @@ ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe) | |||
| 234 | * This function retrieves a specified error record type from SAL | 239 | * This function retrieves a specified error record type from SAL | 
| 235 | * and wakes up any processes waiting for error records. | 240 | * and wakes up any processes waiting for error records. | 
| 236 | * | 241 | * | 
| 237 | * Inputs : sal_info_type (Type of error record MCA/CMC/CPE/INIT) | 242 | * Inputs : sal_info_type (Type of error record MCA/CMC/CPE) | 
| 243 | * FIXME: remove MCA and irq_safe. | ||
| 238 | */ | 244 | */ | 
| 239 | static void | 245 | static void | 
| 240 | ia64_mca_log_sal_error_record(int sal_info_type) | 246 | ia64_mca_log_sal_error_record(int sal_info_type) | 
| @@ -242,7 +248,7 @@ ia64_mca_log_sal_error_record(int sal_info_type) | |||
| 242 | u8 *buffer; | 248 | u8 *buffer; | 
| 243 | sal_log_record_header_t *rh; | 249 | sal_log_record_header_t *rh; | 
| 244 | u64 size; | 250 | u64 size; | 
| 245 | int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA && sal_info_type != SAL_INFO_TYPE_INIT; | 251 | int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA; | 
| 246 | #ifdef IA64_MCA_DEBUG_INFO | 252 | #ifdef IA64_MCA_DEBUG_INFO | 
| 247 | static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" }; | 253 | static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" }; | 
| 248 | #endif | 254 | #endif | 
| @@ -330,182 +336,6 @@ ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) | |||
| 330 | 336 | ||
| 331 | #endif /* CONFIG_ACPI */ | 337 | #endif /* CONFIG_ACPI */ | 
| 332 | 338 | ||
| 333 | static void | ||
| 334 | show_min_state (pal_min_state_area_t *minstate) | ||
| 335 | { | ||
| 336 | u64 iip = minstate->pmsa_iip + ((struct ia64_psr *)(&minstate->pmsa_ipsr))->ri; | ||
| 337 | u64 xip = minstate->pmsa_xip + ((struct ia64_psr *)(&minstate->pmsa_xpsr))->ri; | ||
| 338 | |||
| 339 | printk("NaT bits\t%016lx\n", minstate->pmsa_nat_bits); | ||
| 340 | printk("pr\t\t%016lx\n", minstate->pmsa_pr); | ||
| 341 | printk("b0\t\t%016lx ", minstate->pmsa_br0); print_symbol("%s\n", minstate->pmsa_br0); | ||
| 342 | printk("ar.rsc\t\t%016lx\n", minstate->pmsa_rsc); | ||
| 343 | printk("cr.iip\t\t%016lx ", iip); print_symbol("%s\n", iip); | ||
| 344 | printk("cr.ipsr\t\t%016lx\n", minstate->pmsa_ipsr); | ||
| 345 | printk("cr.ifs\t\t%016lx\n", minstate->pmsa_ifs); | ||
| 346 | printk("xip\t\t%016lx ", xip); print_symbol("%s\n", xip); | ||
| 347 | printk("xpsr\t\t%016lx\n", minstate->pmsa_xpsr); | ||
| 348 | printk("xfs\t\t%016lx\n", minstate->pmsa_xfs); | ||
| 349 | printk("b1\t\t%016lx ", minstate->pmsa_br1); | ||
| 350 | print_symbol("%s\n", minstate->pmsa_br1); | ||
| 351 | |||
| 352 | printk("\nstatic registers r0-r15:\n"); | ||
| 353 | printk(" r0- 3 %016lx %016lx %016lx %016lx\n", | ||
| 354 | 0UL, minstate->pmsa_gr[0], minstate->pmsa_gr[1], minstate->pmsa_gr[2]); | ||
| 355 | printk(" r4- 7 %016lx %016lx %016lx %016lx\n", | ||
| 356 | minstate->pmsa_gr[3], minstate->pmsa_gr[4], | ||
| 357 | minstate->pmsa_gr[5], minstate->pmsa_gr[6]); | ||
| 358 | printk(" r8-11 %016lx %016lx %016lx %016lx\n", | ||
| 359 | minstate->pmsa_gr[7], minstate->pmsa_gr[8], | ||
| 360 | minstate->pmsa_gr[9], minstate->pmsa_gr[10]); | ||
| 361 | printk("r12-15 %016lx %016lx %016lx %016lx\n", | ||
| 362 | minstate->pmsa_gr[11], minstate->pmsa_gr[12], | ||
| 363 | minstate->pmsa_gr[13], minstate->pmsa_gr[14]); | ||
| 364 | |||
| 365 | printk("\nbank 0:\n"); | ||
| 366 | printk("r16-19 %016lx %016lx %016lx %016lx\n", | ||
| 367 | minstate->pmsa_bank0_gr[0], minstate->pmsa_bank0_gr[1], | ||
| 368 | minstate->pmsa_bank0_gr[2], minstate->pmsa_bank0_gr[3]); | ||
| 369 | printk("r20-23 %016lx %016lx %016lx %016lx\n", | ||
| 370 | minstate->pmsa_bank0_gr[4], minstate->pmsa_bank0_gr[5], | ||
| 371 | minstate->pmsa_bank0_gr[6], minstate->pmsa_bank0_gr[7]); | ||
| 372 | printk("r24-27 %016lx %016lx %016lx %016lx\n", | ||
| 373 | minstate->pmsa_bank0_gr[8], minstate->pmsa_bank0_gr[9], | ||
| 374 | minstate->pmsa_bank0_gr[10], minstate->pmsa_bank0_gr[11]); | ||
| 375 | printk("r28-31 %016lx %016lx %016lx %016lx\n", | ||
| 376 | minstate->pmsa_bank0_gr[12], minstate->pmsa_bank0_gr[13], | ||
| 377 | minstate->pmsa_bank0_gr[14], minstate->pmsa_bank0_gr[15]); | ||
| 378 | |||
| 379 | printk("\nbank 1:\n"); | ||
| 380 | printk("r16-19 %016lx %016lx %016lx %016lx\n", | ||
| 381 | minstate->pmsa_bank1_gr[0], minstate->pmsa_bank1_gr[1], | ||
| 382 | minstate->pmsa_bank1_gr[2], minstate->pmsa_bank1_gr[3]); | ||
| 383 | printk("r20-23 %016lx %016lx %016lx %016lx\n", | ||
| 384 | minstate->pmsa_bank1_gr[4], minstate->pmsa_bank1_gr[5], | ||
| 385 | minstate->pmsa_bank1_gr[6], minstate->pmsa_bank1_gr[7]); | ||
| 386 | printk("r24-27 %016lx %016lx %016lx %016lx\n", | ||
| 387 | minstate->pmsa_bank1_gr[8], minstate->pmsa_bank1_gr[9], | ||
| 388 | minstate->pmsa_bank1_gr[10], minstate->pmsa_bank1_gr[11]); | ||
| 389 | printk("r28-31 %016lx %016lx %016lx %016lx\n", | ||
| 390 | minstate->pmsa_bank1_gr[12], minstate->pmsa_bank1_gr[13], | ||
| 391 | minstate->pmsa_bank1_gr[14], minstate->pmsa_bank1_gr[15]); | ||
| 392 | } | ||
| 393 | |||
| 394 | static void | ||
| 395 | fetch_min_state (pal_min_state_area_t *ms, struct pt_regs *pt, struct switch_stack *sw) | ||
| 396 | { | ||
| 397 | u64 *dst_banked, *src_banked, bit, shift, nat_bits; | ||
| 398 | int i; | ||
| 399 | |||
| 400 | /* | ||
| 401 | * First, update the pt-regs and switch-stack structures with the contents stored | ||
| 402 | * in the min-state area: | ||
| 403 | */ | ||
| 404 | if (((struct ia64_psr *) &ms->pmsa_ipsr)->ic == 0) { | ||
| 405 | pt->cr_ipsr = ms->pmsa_xpsr; | ||
| 406 | pt->cr_iip = ms->pmsa_xip; | ||
| 407 | pt->cr_ifs = ms->pmsa_xfs; | ||
| 408 | } else { | ||
| 409 | pt->cr_ipsr = ms->pmsa_ipsr; | ||
| 410 | pt->cr_iip = ms->pmsa_iip; | ||
| 411 | pt->cr_ifs = ms->pmsa_ifs; | ||
| 412 | } | ||
| 413 | pt->ar_rsc = ms->pmsa_rsc; | ||
| 414 | pt->pr = ms->pmsa_pr; | ||
| 415 | pt->r1 = ms->pmsa_gr[0]; | ||
| 416 | pt->r2 = ms->pmsa_gr[1]; | ||
| 417 | pt->r3 = ms->pmsa_gr[2]; | ||
| 418 | sw->r4 = ms->pmsa_gr[3]; | ||
| 419 | sw->r5 = ms->pmsa_gr[4]; | ||
| 420 | sw->r6 = ms->pmsa_gr[5]; | ||
| 421 | sw->r7 = ms->pmsa_gr[6]; | ||
| 422 | pt->r8 = ms->pmsa_gr[7]; | ||
| 423 | pt->r9 = ms->pmsa_gr[8]; | ||
| 424 | pt->r10 = ms->pmsa_gr[9]; | ||
| 425 | pt->r11 = ms->pmsa_gr[10]; | ||
| 426 | pt->r12 = ms->pmsa_gr[11]; | ||
| 427 | pt->r13 = ms->pmsa_gr[12]; | ||
| 428 | pt->r14 = ms->pmsa_gr[13]; | ||
| 429 | pt->r15 = ms->pmsa_gr[14]; | ||
| 430 | dst_banked = &pt->r16; /* r16-r31 are contiguous in struct pt_regs */ | ||
| 431 | src_banked = ms->pmsa_bank1_gr; | ||
| 432 | for (i = 0; i < 16; ++i) | ||
| 433 | dst_banked[i] = src_banked[i]; | ||
| 434 | pt->b0 = ms->pmsa_br0; | ||
| 435 | sw->b1 = ms->pmsa_br1; | ||
| 436 | |||
| 437 | /* construct the NaT bits for the pt-regs structure: */ | ||
| 438 | # define PUT_NAT_BIT(dst, addr) \ | ||
| 439 | do { \ | ||
| 440 | bit = nat_bits & 1; nat_bits >>= 1; \ | ||
| 441 | shift = ((unsigned long) addr >> 3) & 0x3f; \ | ||
| 442 | dst = ((dst) & ~(1UL << shift)) | (bit << shift); \ | ||
| 443 | } while (0) | ||
| 444 | |||
| 445 | /* Rotate the saved NaT bits such that bit 0 corresponds to pmsa_gr[0]: */ | ||
| 446 | shift = ((unsigned long) &ms->pmsa_gr[0] >> 3) & 0x3f; | ||
| 447 | nat_bits = (ms->pmsa_nat_bits >> shift) | (ms->pmsa_nat_bits << (64 - shift)); | ||
| 448 | |||
| 449 | PUT_NAT_BIT(sw->caller_unat, &pt->r1); | ||
| 450 | PUT_NAT_BIT(sw->caller_unat, &pt->r2); | ||
| 451 | PUT_NAT_BIT(sw->caller_unat, &pt->r3); | ||
| 452 | PUT_NAT_BIT(sw->ar_unat, &sw->r4); | ||
| 453 | PUT_NAT_BIT(sw->ar_unat, &sw->r5); | ||
| 454 | PUT_NAT_BIT(sw->ar_unat, &sw->r6); | ||
| 455 | PUT_NAT_BIT(sw->ar_unat, &sw->r7); | ||
| 456 | PUT_NAT_BIT(sw->caller_unat, &pt->r8); PUT_NAT_BIT(sw->caller_unat, &pt->r9); | ||
| 457 | PUT_NAT_BIT(sw->caller_unat, &pt->r10); PUT_NAT_BIT(sw->caller_unat, &pt->r11); | ||
| 458 | PUT_NAT_BIT(sw->caller_unat, &pt->r12); PUT_NAT_BIT(sw->caller_unat, &pt->r13); | ||
| 459 | PUT_NAT_BIT(sw->caller_unat, &pt->r14); PUT_NAT_BIT(sw->caller_unat, &pt->r15); | ||
| 460 | nat_bits >>= 16; /* skip over bank0 NaT bits */ | ||
| 461 | PUT_NAT_BIT(sw->caller_unat, &pt->r16); PUT_NAT_BIT(sw->caller_unat, &pt->r17); | ||
| 462 | PUT_NAT_BIT(sw->caller_unat, &pt->r18); PUT_NAT_BIT(sw->caller_unat, &pt->r19); | ||
| 463 | PUT_NAT_BIT(sw->caller_unat, &pt->r20); PUT_NAT_BIT(sw->caller_unat, &pt->r21); | ||
| 464 | PUT_NAT_BIT(sw->caller_unat, &pt->r22); PUT_NAT_BIT(sw->caller_unat, &pt->r23); | ||
| 465 | PUT_NAT_BIT(sw->caller_unat, &pt->r24); PUT_NAT_BIT(sw->caller_unat, &pt->r25); | ||
| 466 | PUT_NAT_BIT(sw->caller_unat, &pt->r26); PUT_NAT_BIT(sw->caller_unat, &pt->r27); | ||
| 467 | PUT_NAT_BIT(sw->caller_unat, &pt->r28); PUT_NAT_BIT(sw->caller_unat, &pt->r29); | ||
| 468 | PUT_NAT_BIT(sw->caller_unat, &pt->r30); PUT_NAT_BIT(sw->caller_unat, &pt->r31); | ||
| 469 | } | ||
| 470 | |||
| 471 | static void | ||
| 472 | init_handler_platform (pal_min_state_area_t *ms, | ||
| 473 | struct pt_regs *pt, struct switch_stack *sw) | ||
| 474 | { | ||
| 475 | struct unw_frame_info info; | ||
| 476 | |||
| 477 | /* if a kernel debugger is available call it here else just dump the registers */ | ||
| 478 | |||
| 479 | /* | ||
| 480 | * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be | ||
| 481 | * generated via the BMC's command-line interface, but since the console is on the | ||
| 482 | * same serial line, the user will need some time to switch out of the BMC before | ||
| 483 | * the dump begins. | ||
| 484 | */ | ||
| 485 | printk("Delaying for 5 seconds...\n"); | ||
| 486 | udelay(5*1000000); | ||
| 487 | show_min_state(ms); | ||
| 488 | |||
| 489 | printk("Backtrace of current task (pid %d, %s)\n", current->pid, current->comm); | ||
| 490 | fetch_min_state(ms, pt, sw); | ||
| 491 | unw_init_from_interruption(&info, current, pt, sw); | ||
| 492 | ia64_do_show_stack(&info, NULL); | ||
| 493 | |||
| 494 | if (read_trylock(&tasklist_lock)) { | ||
| 495 | struct task_struct *g, *t; | ||
| 496 | do_each_thread (g, t) { | ||
| 497 | if (t == current) | ||
| 498 | continue; | ||
| 499 | |||
| 500 | printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); | ||
| 501 | show_stack(t, NULL); | ||
| 502 | } while_each_thread (g, t); | ||
| 503 | } | ||
| 504 | |||
| 505 | printk("\nINIT dump complete. Please reboot now.\n"); | ||
| 506 | while (1); /* hang city if no debugger */ | ||
| 507 | } | ||
| 508 | |||
| 509 | #ifdef CONFIG_ACPI | 339 | #ifdef CONFIG_ACPI | 
| 510 | /* | 340 | /* | 
| 511 | * ia64_mca_register_cpev | 341 | * ia64_mca_register_cpev | 
| @@ -648,42 +478,6 @@ ia64_mca_cmc_vector_enable_keventd(void *unused) | |||
| 648 | } | 478 | } | 
| 649 | 479 | ||
| 650 | /* | 480 | /* | 
| 651 | * ia64_mca_wakeup_ipi_wait | ||
| 652 | * | ||
| 653 | * Wait for the inter-cpu interrupt to be sent by the | ||
| 654 | * monarch processor once it is done with handling the | ||
| 655 | * MCA. | ||
| 656 | * | ||
| 657 | * Inputs : None | ||
| 658 | * Outputs : None | ||
| 659 | */ | ||
| 660 | static void | ||
| 661 | ia64_mca_wakeup_ipi_wait(void) | ||
| 662 | { | ||
| 663 | int irr_num = (IA64_MCA_WAKEUP_VECTOR >> 6); | ||
| 664 | int irr_bit = (IA64_MCA_WAKEUP_VECTOR & 0x3f); | ||
| 665 | u64 irr = 0; | ||
| 666 | |||
| 667 | do { | ||
| 668 | switch(irr_num) { | ||
| 669 | case 0: | ||
| 670 | irr = ia64_getreg(_IA64_REG_CR_IRR0); | ||
| 671 | break; | ||
| 672 | case 1: | ||
| 673 | irr = ia64_getreg(_IA64_REG_CR_IRR1); | ||
| 674 | break; | ||
| 675 | case 2: | ||
| 676 | irr = ia64_getreg(_IA64_REG_CR_IRR2); | ||
| 677 | break; | ||
| 678 | case 3: | ||
| 679 | irr = ia64_getreg(_IA64_REG_CR_IRR3); | ||
| 680 | break; | ||
| 681 | } | ||
| 682 | cpu_relax(); | ||
| 683 | } while (!(irr & (1UL << irr_bit))) ; | ||
| 684 | } | ||
| 685 | |||
| 686 | /* | ||
| 687 | * ia64_mca_wakeup | 481 | * ia64_mca_wakeup | 
| 688 | * | 482 | * | 
| 689 | * Send an inter-cpu interrupt to wake-up a particular cpu | 483 | * Send an inter-cpu interrupt to wake-up a particular cpu | 
| @@ -748,11 +542,9 @@ ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) | |||
| 748 | */ | 542 | */ | 
| 749 | ia64_sal_mc_rendez(); | 543 | ia64_sal_mc_rendez(); | 
| 750 | 544 | ||
| 751 | /* Wait for the wakeup IPI from the monarch | 545 | /* Wait for the monarch cpu to exit. */ | 
| 752 | * This waiting is done by polling on the wakeup-interrupt | 546 | while (monarch_cpu != -1) | 
| 753 | * vector bit in the processor's IRRs | 547 | cpu_relax(); /* spin until monarch leaves */ | 
| 754 | */ | ||
| 755 | ia64_mca_wakeup_ipi_wait(); | ||
| 756 | 548 | ||
| 757 | /* Enable all interrupts */ | 549 | /* Enable all interrupts */ | 
| 758 | local_irq_restore(flags); | 550 | local_irq_restore(flags); | 
| @@ -780,53 +572,13 @@ ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs) | |||
| 780 | return IRQ_HANDLED; | 572 | return IRQ_HANDLED; | 
| 781 | } | 573 | } | 
| 782 | 574 | ||
| 783 | /* | ||
| 784 | * ia64_return_to_sal_check | ||
| 785 | * | ||
| 786 | * This is function called before going back from the OS_MCA handler | ||
| 787 | * to the OS_MCA dispatch code which finally takes the control back | ||
| 788 | * to the SAL. | ||
| 789 | * The main purpose of this routine is to setup the OS_MCA to SAL | ||
| 790 | * return state which can be used by the OS_MCA dispatch code | ||
| 791 | * just before going back to SAL. | ||
| 792 | * | ||
| 793 | * Inputs : None | ||
| 794 | * Outputs : None | ||
| 795 | */ | ||
| 796 | |||
| 797 | static void | ||
| 798 | ia64_return_to_sal_check(int recover) | ||
| 799 | { | ||
| 800 | |||
| 801 | /* Copy over some relevant stuff from the sal_to_os_mca_handoff | ||
| 802 | * so that it can be used at the time of os_mca_to_sal_handoff | ||
| 803 | */ | ||
| 804 | ia64_os_to_sal_handoff_state.imots_sal_gp = | ||
| 805 | ia64_sal_to_os_handoff_state.imsto_sal_gp; | ||
| 806 | |||
| 807 | ia64_os_to_sal_handoff_state.imots_sal_check_ra = | ||
| 808 | ia64_sal_to_os_handoff_state.imsto_sal_check_ra; | ||
| 809 | |||
| 810 | if (recover) | ||
| 811 | ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED; | ||
| 812 | else | ||
| 813 | ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT; | ||
| 814 | |||
| 815 | /* Default = tell SAL to return to same context */ | ||
| 816 | ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT; | ||
| 817 | |||
| 818 | ia64_os_to_sal_handoff_state.imots_new_min_state = | ||
| 819 | (u64 *)ia64_sal_to_os_handoff_state.pal_min_state; | ||
| 820 | |||
| 821 | } | ||
| 822 | |||
| 823 | /* Function pointer for extra MCA recovery */ | 575 | /* Function pointer for extra MCA recovery */ | 
| 824 | int (*ia64_mca_ucmc_extension) | 576 | int (*ia64_mca_ucmc_extension) | 
| 825 | (void*,ia64_mca_sal_to_os_state_t*,ia64_mca_os_to_sal_state_t*) | 577 | (void*,struct ia64_sal_os_state*) | 
| 826 | = NULL; | 578 | = NULL; | 
| 827 | 579 | ||
| 828 | int | 580 | int | 
| 829 | ia64_reg_MCA_extension(void *fn) | 581 | ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *)) | 
| 830 | { | 582 | { | 
| 831 | if (ia64_mca_ucmc_extension) | 583 | if (ia64_mca_ucmc_extension) | 
| 832 | return 1; | 584 | return 1; | 
| @@ -845,8 +597,321 @@ ia64_unreg_MCA_extension(void) | |||
| 845 | EXPORT_SYMBOL(ia64_reg_MCA_extension); | 597 | EXPORT_SYMBOL(ia64_reg_MCA_extension); | 
| 846 | EXPORT_SYMBOL(ia64_unreg_MCA_extension); | 598 | EXPORT_SYMBOL(ia64_unreg_MCA_extension); | 
| 847 | 599 | ||
| 600 | |||
| 601 | static inline void | ||
| 602 | copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat) | ||
| 603 | { | ||
| 604 | u64 fslot, tslot, nat; | ||
| 605 | *tr = *fr; | ||
| 606 | fslot = ((unsigned long)fr >> 3) & 63; | ||
| 607 | tslot = ((unsigned long)tr >> 3) & 63; | ||
| 608 | *tnat &= ~(1UL << tslot); | ||
| 609 | nat = (fnat >> fslot) & 1; | ||
| 610 | *tnat |= (nat << tslot); | ||
| 611 | } | ||
| 612 | |||
| 613 | /* On entry to this routine, we are running on the per cpu stack, see | ||
| 614 | * mca_asm.h. The original stack has not been touched by this event. Some of | ||
| 615 | * the original stack's registers will be in the RBS on this stack. This stack | ||
| 616 | * also contains a partial pt_regs and switch_stack, the rest of the data is in | ||
| 617 | * PAL minstate. | ||
| 618 | * | ||
| 619 | * The first thing to do is modify the original stack to look like a blocked | ||
| 620 | * task so we can run backtrace on the original task. Also mark the per cpu | ||
| 621 | * stack as current to ensure that we use the correct task state, it also means | ||
| 622 | * that we can do backtrace on the MCA/INIT handler code itself. | ||
| 623 | */ | ||
| 624 | |||
| 625 | static task_t * | ||
| 626 | ia64_mca_modify_original_stack(struct pt_regs *regs, | ||
| 627 | const struct switch_stack *sw, | ||
| 628 | struct ia64_sal_os_state *sos, | ||
| 629 | const char *type) | ||
| 630 | { | ||
| 631 | char *p, comm[sizeof(current->comm)]; | ||
| 632 | ia64_va va; | ||
| 633 | extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ | ||
| 634 | const pal_min_state_area_t *ms = sos->pal_min_state; | ||
| 635 | task_t *previous_current; | ||
| 636 | struct pt_regs *old_regs; | ||
| 637 | struct switch_stack *old_sw; | ||
| 638 | unsigned size = sizeof(struct pt_regs) + | ||
| 639 | sizeof(struct switch_stack) + 16; | ||
| 640 | u64 *old_bspstore, *old_bsp; | ||
| 641 | u64 *new_bspstore, *new_bsp; | ||
| 642 | u64 old_unat, old_rnat, new_rnat, nat; | ||
| 643 | u64 slots, loadrs = regs->loadrs; | ||
| 644 | u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1]; | ||
| 645 | u64 ar_bspstore = regs->ar_bspstore; | ||
| 646 | u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16); | ||
| 647 | const u64 *bank; | ||
| 648 | const char *msg; | ||
| 649 | int cpu = smp_processor_id(); | ||
| 650 | |||
| 651 | previous_current = curr_task(cpu); | ||
| 652 | set_curr_task(cpu, current); | ||
| 653 | if ((p = strchr(current->comm, ' '))) | ||
| 654 | *p = '\0'; | ||
| 655 | |||
| 656 | /* Best effort attempt to cope with MCA/INIT delivered while in | ||
| 657 | * physical mode. | ||
| 658 | */ | ||
| 659 | regs->cr_ipsr = ms->pmsa_ipsr; | ||
| 660 | if (ia64_psr(regs)->dt == 0) { | ||
| 661 | va.l = r12; | ||
| 662 | if (va.f.reg == 0) { | ||
| 663 | va.f.reg = 7; | ||
| 664 | r12 = va.l; | ||
| 665 | } | ||
| 666 | va.l = r13; | ||
| 667 | if (va.f.reg == 0) { | ||
| 668 | va.f.reg = 7; | ||
| 669 | r13 = va.l; | ||
| 670 | } | ||
| 671 | } | ||
| 672 | if (ia64_psr(regs)->rt == 0) { | ||
| 673 | va.l = ar_bspstore; | ||
| 674 | if (va.f.reg == 0) { | ||
| 675 | va.f.reg = 7; | ||
| 676 | ar_bspstore = va.l; | ||
| 677 | } | ||
| 678 | va.l = ar_bsp; | ||
| 679 | if (va.f.reg == 0) { | ||
| 680 | va.f.reg = 7; | ||
| 681 | ar_bsp = va.l; | ||
| 682 | } | ||
| 683 | } | ||
| 684 | |||
| 685 | /* mca_asm.S ia64_old_stack() cannot assume that the dirty registers | ||
| 686 | * have been copied to the old stack, the old stack may fail the | ||
| 687 | * validation tests below. So ia64_old_stack() must restore the dirty | ||
| 688 | * registers from the new stack. The old and new bspstore probably | ||
| 689 | * have different alignments, so loadrs calculated on the old bsp | ||
| 690 | * cannot be used to restore from the new bsp. Calculate a suitable | ||
| 691 | * loadrs for the new stack and save it in the new pt_regs, where | ||
| 692 | * ia64_old_stack() can get it. | ||
| 693 | */ | ||
| 694 | old_bspstore = (u64 *)ar_bspstore; | ||
| 695 | old_bsp = (u64 *)ar_bsp; | ||
| 696 | slots = ia64_rse_num_regs(old_bspstore, old_bsp); | ||
| 697 | new_bspstore = (u64 *)((u64)current + IA64_RBS_OFFSET); | ||
| 698 | new_bsp = ia64_rse_skip_regs(new_bspstore, slots); | ||
| 699 | regs->loadrs = (new_bsp - new_bspstore) * 8 << 16; | ||
| 700 | |||
| 701 | /* Verify the previous stack state before we change it */ | ||
| 702 | if (user_mode(regs)) { | ||
| 703 | msg = "occurred in user space"; | ||
| 704 | goto no_mod; | ||
| 705 | } | ||
| 706 | if (r13 != sos->prev_IA64_KR_CURRENT) { | ||
| 707 | msg = "inconsistent previous current and r13"; | ||
| 708 | goto no_mod; | ||
| 709 | } | ||
| 710 | if ((r12 - r13) >= KERNEL_STACK_SIZE) { | ||
| 711 | msg = "inconsistent r12 and r13"; | ||
| 712 | goto no_mod; | ||
| 713 | } | ||
| 714 | if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) { | ||
| 715 | msg = "inconsistent ar.bspstore and r13"; | ||
| 716 | goto no_mod; | ||
| 717 | } | ||
| 718 | va.p = old_bspstore; | ||
| 719 | if (va.f.reg < 5) { | ||
| 720 | msg = "old_bspstore is in the wrong region"; | ||
| 721 | goto no_mod; | ||
| 722 | } | ||
| 723 | if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) { | ||
| 724 | msg = "inconsistent ar.bsp and r13"; | ||
| 725 | goto no_mod; | ||
| 726 | } | ||
| 727 | size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8; | ||
| 728 | if (ar_bspstore + size > r12) { | ||
| 729 | msg = "no room for blocked state"; | ||
| 730 | goto no_mod; | ||
| 731 | } | ||
| 732 | |||
| 733 | /* Change the comm field on the MCA/INT task to include the pid that | ||
| 734 | * was interrupted, it makes for easier debugging. If that pid was 0 | ||
| 735 | * (swapper or nested MCA/INIT) then use the start of the previous comm | ||
| 736 | * field suffixed with its cpu. | ||
| 737 | */ | ||
| 738 | if (previous_current->pid) | ||
| 739 | snprintf(comm, sizeof(comm), "%s %d", | ||
| 740 | current->comm, previous_current->pid); | ||
| 741 | else { | ||
| 742 | int l; | ||
| 743 | if ((p = strchr(previous_current->comm, ' '))) | ||
| 744 | l = p - previous_current->comm; | ||
| 745 | else | ||
| 746 | l = strlen(previous_current->comm); | ||
| 747 | snprintf(comm, sizeof(comm), "%s %*s %d", | ||
| 748 | current->comm, l, previous_current->comm, | ||
| 749 | previous_current->thread_info->cpu); | ||
| 750 | } | ||
| 751 | memcpy(current->comm, comm, sizeof(current->comm)); | ||
| 752 | |||
| 753 | /* Make the original task look blocked. First stack a struct pt_regs, | ||
| 754 | * describing the state at the time of interrupt. mca_asm.S built a | ||
| 755 | * partial pt_regs, copy it and fill in the blanks using minstate. | ||
| 756 | */ | ||
| 757 | p = (char *)r12 - sizeof(*regs); | ||
| 758 | old_regs = (struct pt_regs *)p; | ||
| 759 | memcpy(old_regs, regs, sizeof(*regs)); | ||
| 760 | /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use | ||
| 761 | * pmsa_{xip,xpsr,xfs} | ||
| 762 | */ | ||
| 763 | if (ia64_psr(regs)->ic) { | ||
| 764 | old_regs->cr_iip = ms->pmsa_iip; | ||
| 765 | old_regs->cr_ipsr = ms->pmsa_ipsr; | ||
| 766 | old_regs->cr_ifs = ms->pmsa_ifs; | ||
| 767 | } else { | ||
| 768 | old_regs->cr_iip = ms->pmsa_xip; | ||
| 769 | old_regs->cr_ipsr = ms->pmsa_xpsr; | ||
| 770 | old_regs->cr_ifs = ms->pmsa_xfs; | ||
| 771 | } | ||
| 772 | old_regs->pr = ms->pmsa_pr; | ||
| 773 | old_regs->b0 = ms->pmsa_br0; | ||
| 774 | old_regs->loadrs = loadrs; | ||
| 775 | old_regs->ar_rsc = ms->pmsa_rsc; | ||
| 776 | old_unat = old_regs->ar_unat; | ||
| 777 | copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, &old_regs->r1, &old_unat); | ||
| 778 | copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, &old_regs->r2, &old_unat); | ||
| 779 | copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, &old_regs->r3, &old_unat); | ||
| 780 | copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, &old_regs->r8, &old_unat); | ||
| 781 | copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, &old_regs->r9, &old_unat); | ||
| 782 | copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, &old_regs->r10, &old_unat); | ||
| 783 | copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, &old_regs->r11, &old_unat); | ||
| 784 | copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, &old_regs->r12, &old_unat); | ||
| 785 | copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, &old_regs->r13, &old_unat); | ||
| 786 | copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, &old_regs->r14, &old_unat); | ||
| 787 | copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, &old_regs->r15, &old_unat); | ||
| 788 | if (ia64_psr(old_regs)->bn) | ||
| 789 | bank = ms->pmsa_bank1_gr; | ||
| 790 | else | ||
| 791 | bank = ms->pmsa_bank0_gr; | ||
| 792 | copy_reg(&bank[16-16], ms->pmsa_nat_bits, &old_regs->r16, &old_unat); | ||
| 793 | copy_reg(&bank[17-16], ms->pmsa_nat_bits, &old_regs->r17, &old_unat); | ||
| 794 | copy_reg(&bank[18-16], ms->pmsa_nat_bits, &old_regs->r18, &old_unat); | ||
| 795 | copy_reg(&bank[19-16], ms->pmsa_nat_bits, &old_regs->r19, &old_unat); | ||
| 796 | copy_reg(&bank[20-16], ms->pmsa_nat_bits, &old_regs->r20, &old_unat); | ||
| 797 | copy_reg(&bank[21-16], ms->pmsa_nat_bits, &old_regs->r21, &old_unat); | ||
| 798 | copy_reg(&bank[22-16], ms->pmsa_nat_bits, &old_regs->r22, &old_unat); | ||
| 799 | copy_reg(&bank[23-16], ms->pmsa_nat_bits, &old_regs->r23, &old_unat); | ||
| 800 | copy_reg(&bank[24-16], ms->pmsa_nat_bits, &old_regs->r24, &old_unat); | ||
| 801 | copy_reg(&bank[25-16], ms->pmsa_nat_bits, &old_regs->r25, &old_unat); | ||
| 802 | copy_reg(&bank[26-16], ms->pmsa_nat_bits, &old_regs->r26, &old_unat); | ||
| 803 | copy_reg(&bank[27-16], ms->pmsa_nat_bits, &old_regs->r27, &old_unat); | ||
| 804 | copy_reg(&bank[28-16], ms->pmsa_nat_bits, &old_regs->r28, &old_unat); | ||
| 805 | copy_reg(&bank[29-16], ms->pmsa_nat_bits, &old_regs->r29, &old_unat); | ||
| 806 | copy_reg(&bank[30-16], ms->pmsa_nat_bits, &old_regs->r30, &old_unat); | ||
| 807 | copy_reg(&bank[31-16], ms->pmsa_nat_bits, &old_regs->r31, &old_unat); | ||
| 808 | |||
| 809 | /* Next stack a struct switch_stack. mca_asm.S built a partial | ||
| 810 | * switch_stack, copy it and fill in the blanks using pt_regs and | ||
| 811 | * minstate. | ||
| 812 | * | ||
| 813 | * In the synthesized switch_stack, b0 points to ia64_leave_kernel, | ||
| 814 | * ar.pfs is set to 0. | ||
| 815 | * | ||
| 816 | * unwind.c::unw_unwind() does special processing for interrupt frames. | ||
| 817 | * It checks if the PRED_NON_SYSCALL predicate is set, if the predicate | ||
| 818 | * is clear then unw_unwind() does _not_ adjust bsp over pt_regs. Not | ||
| 819 | * that this is documented, of course. Set PRED_NON_SYSCALL in the | ||
| 820 | * switch_stack on the original stack so it will unwind correctly when | ||
| 821 | * unwind.c reads pt_regs. | ||
| 822 | * | ||
| 823 | * thread.ksp is updated to point to the synthesized switch_stack. | ||
| 824 | */ | ||
| 825 | p -= sizeof(struct switch_stack); | ||
| 826 | old_sw = (struct switch_stack *)p; | ||
| 827 | memcpy(old_sw, sw, sizeof(*sw)); | ||
| 828 | old_sw->caller_unat = old_unat; | ||
| 829 | old_sw->ar_fpsr = old_regs->ar_fpsr; | ||
| 830 | copy_reg(&ms->pmsa_gr[4-1], ms->pmsa_nat_bits, &old_sw->r4, &old_unat); | ||
| 831 | copy_reg(&ms->pmsa_gr[5-1], ms->pmsa_nat_bits, &old_sw->r5, &old_unat); | ||
| 832 | copy_reg(&ms->pmsa_gr[6-1], ms->pmsa_nat_bits, &old_sw->r6, &old_unat); | ||
| 833 | copy_reg(&ms->pmsa_gr[7-1], ms->pmsa_nat_bits, &old_sw->r7, &old_unat); | ||
| 834 | old_sw->b0 = (u64)ia64_leave_kernel; | ||
| 835 | old_sw->b1 = ms->pmsa_br1; | ||
| 836 | old_sw->ar_pfs = 0; | ||
| 837 | old_sw->ar_unat = old_unat; | ||
| 838 | old_sw->pr = old_regs->pr | (1UL << PRED_NON_SYSCALL); | ||
| 839 | previous_current->thread.ksp = (u64)p - 16; | ||
| 840 | |||
| 841 | /* Finally copy the original stack's registers back to its RBS. | ||
| 842 | * Registers from ar.bspstore through ar.bsp at the time of the event | ||
| 843 | * are in the current RBS, copy them back to the original stack. The | ||
| 844 | * copy must be done register by register because the original bspstore | ||
| 845 | * and the current one have different alignments, so the saved RNAT | ||
| 846 | * data occurs at different places. | ||
| 847 | * | ||
| 848 | * mca_asm does cover, so the old_bsp already includes all registers at | ||
| 849 | * the time of MCA/INIT. It also does flushrs, so all registers before | ||
| 850 | * this function have been written to backing store on the MCA/INIT | ||
| 851 | * stack. | ||
| 852 | */ | ||
| 853 | new_rnat = ia64_get_rnat(ia64_rse_rnat_addr(new_bspstore)); | ||
| 854 | old_rnat = regs->ar_rnat; | ||
| 855 | while (slots--) { | ||
| 856 | if (ia64_rse_is_rnat_slot(new_bspstore)) { | ||
| 857 | new_rnat = ia64_get_rnat(new_bspstore++); | ||
| 858 | } | ||
| 859 | if (ia64_rse_is_rnat_slot(old_bspstore)) { | ||
| 860 | *old_bspstore++ = old_rnat; | ||
| 861 | old_rnat = 0; | ||
| 862 | } | ||
| 863 | nat = (new_rnat >> ia64_rse_slot_num(new_bspstore)) & 1UL; | ||
| 864 | old_rnat &= ~(1UL << ia64_rse_slot_num(old_bspstore)); | ||
| 865 | old_rnat |= (nat << ia64_rse_slot_num(old_bspstore)); | ||
| 866 | *old_bspstore++ = *new_bspstore++; | ||
| 867 | } | ||
| 868 | old_sw->ar_bspstore = (unsigned long)old_bspstore; | ||
| 869 | old_sw->ar_rnat = old_rnat; | ||
| 870 | |||
| 871 | sos->prev_task = previous_current; | ||
| 872 | return previous_current; | ||
| 873 | |||
| 874 | no_mod: | ||
| 875 | printk(KERN_INFO "cpu %d, %s %s, original stack not modified\n", | ||
| 876 | smp_processor_id(), type, msg); | ||
| 877 | return previous_current; | ||
| 878 | } | ||
| 879 | |||
| 880 | /* The monarch/slave interaction is based on monarch_cpu and requires that all | ||
| 881 | * slaves have entered rendezvous before the monarch leaves. If any cpu has | ||
| 882 | * not entered rendezvous yet then wait a bit. The assumption is that any | ||
| 883 | * slave that has not rendezvoused after a reasonable time is never going to do | ||
| 884 | * so. In this context, slave includes cpus that respond to the MCA rendezvous | ||
| 885 | * interrupt, as well as cpus that receive the INIT slave event. | ||
| 886 | */ | ||
| 887 | |||
| 888 | static void | ||
| 889 | ia64_wait_for_slaves(int monarch) | ||
| 890 | { | ||
| 891 | int c, wait = 0; | ||
| 892 | for_each_online_cpu(c) { | ||
| 893 | if (c == monarch) | ||
| 894 | continue; | ||
| 895 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { | ||
| 896 | udelay(1000); /* short wait first */ | ||
| 897 | wait = 1; | ||
| 898 | break; | ||
| 899 | } | ||
| 900 | } | ||
| 901 | if (!wait) | ||
| 902 | return; | ||
| 903 | for_each_online_cpu(c) { | ||
| 904 | if (c == monarch) | ||
| 905 | continue; | ||
| 906 | if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { | ||
| 907 | udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */ | ||
| 908 | break; | ||
| 909 | } | ||
| 910 | } | ||
| 911 | } | ||
| 912 | |||
| 848 | /* | 913 | /* | 
| 849 | * ia64_mca_ucmc_handler | 914 | * ia64_mca_handler | 
| 850 | * | 915 | * | 
| 851 | * This is uncorrectable machine check handler called from OS_MCA | 916 | * This is uncorrectable machine check handler called from OS_MCA | 
| 852 | * dispatch code which is in turn called from SAL_CHECK(). | 917 | * dispatch code which is in turn called from SAL_CHECK(). | 
| @@ -857,16 +922,28 @@ EXPORT_SYMBOL(ia64_unreg_MCA_extension); | |||
| 857 | * further MCA logging is enabled by clearing logs. | 922 | * further MCA logging is enabled by clearing logs. | 
| 858 | * Monarch also has the duty of sending wakeup-IPIs to pull the | 923 | * Monarch also has the duty of sending wakeup-IPIs to pull the | 
| 859 | * slave processors out of rendezvous spinloop. | 924 | * slave processors out of rendezvous spinloop. | 
| 860 | * | ||
| 861 | * Inputs : None | ||
| 862 | * Outputs : None | ||
| 863 | */ | 925 | */ | 
| 864 | void | 926 | void | 
| 865 | ia64_mca_ucmc_handler(void) | 927 | ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, | 
| 928 | struct ia64_sal_os_state *sos) | ||
| 866 | { | 929 | { | 
| 867 | pal_processor_state_info_t *psp = (pal_processor_state_info_t *) | 930 | pal_processor_state_info_t *psp = (pal_processor_state_info_t *) | 
| 868 | &ia64_sal_to_os_handoff_state.proc_state_param; | 931 | &sos->proc_state_param; | 
| 869 | int recover; | 932 | int recover, cpu = smp_processor_id(); | 
| 933 | task_t *previous_current; | ||
| 934 | |||
| 935 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ | ||
| 936 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); | ||
| 937 | monarch_cpu = cpu; | ||
| 938 | ia64_wait_for_slaves(cpu); | ||
| 939 | |||
| 940 | /* Wakeup all the processors which are spinning in the rendezvous loop. | ||
| 941 | * They will leave SAL, then spin in the OS with interrupts disabled | ||
| 942 | * until this monarch cpu leaves the MCA handler. That gets control | ||
| 943 | * back to the OS so we can backtrace the other cpus, backtrace when | ||
| 944 | * spinning in SAL does not work. | ||
| 945 | */ | ||
| 946 | ia64_mca_wakeup_all(); | ||
| 870 | 947 | ||
| 871 | /* Get the MCA error record and log it */ | 948 | /* Get the MCA error record and log it */ | 
| 872 | ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); | 949 | ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); | 
| @@ -874,25 +951,20 @@ ia64_mca_ucmc_handler(void) | |||
| 874 | /* TLB error is only exist in this SAL error record */ | 951 | /* TLB error is only exist in this SAL error record */ | 
| 875 | recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) | 952 | recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) | 
| 876 | /* other error recovery */ | 953 | /* other error recovery */ | 
| 877 | || (ia64_mca_ucmc_extension | 954 | || (ia64_mca_ucmc_extension | 
| 878 | && ia64_mca_ucmc_extension( | 955 | && ia64_mca_ucmc_extension( | 
| 879 | IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), | 956 | IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), | 
| 880 | &ia64_sal_to_os_handoff_state, | 957 | sos)); | 
| 881 | &ia64_os_to_sal_handoff_state)); | ||
| 882 | 958 | ||
| 883 | if (recover) { | 959 | if (recover) { | 
| 884 | sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA); | 960 | sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA); | 
| 885 | rh->severity = sal_log_severity_corrected; | 961 | rh->severity = sal_log_severity_corrected; | 
| 886 | ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); | 962 | ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); | 
| 963 | sos->os_status = IA64_MCA_CORRECTED; | ||
| 887 | } | 964 | } | 
| 888 | /* | ||
| 889 | * Wakeup all the processors which are spinning in the rendezvous | ||
| 890 | * loop. | ||
| 891 | */ | ||
| 892 | ia64_mca_wakeup_all(); | ||
| 893 | 965 | ||
| 894 | /* Return to SAL */ | 966 | set_curr_task(cpu, previous_current); | 
| 895 | ia64_return_to_sal_check(recover); | 967 | monarch_cpu = -1; | 
| 896 | } | 968 | } | 
| 897 | 969 | ||
| 898 | static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL); | 970 | static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL); | 
| @@ -1116,34 +1188,114 @@ ia64_mca_cpe_poll (unsigned long dummy) | |||
| 1116 | /* | 1188 | /* | 
| 1117 | * C portion of the OS INIT handler | 1189 | * C portion of the OS INIT handler | 
| 1118 | * | 1190 | * | 
| 1119 | * Called from ia64_monarch_init_handler | 1191 | * Called from ia64_os_init_dispatch | 
| 1120 | * | ||
| 1121 | * Inputs: pointer to pt_regs where processor info was saved. | ||
| 1122 | * | 1192 | * | 
| 1123 | * Returns: | 1193 | * Inputs: pointer to pt_regs where processor info was saved. SAL/OS state for | 
| 1124 | * 0 if SAL must warm boot the System | 1194 | * this event. This code is used for both monarch and slave INIT events, see | 
| 1125 | * 1 if SAL must return to interrupted context using PAL_MC_RESUME | 1195 | * sos->monarch. | 
| 1126 | * | 1196 | * | 
| 1197 | * All INIT events switch to the INIT stack and change the previous process to | ||
| 1198 | * blocked status. If one of the INIT events is the monarch then we are | ||
| 1199 | * probably processing the nmi button/command. Use the monarch cpu to dump all | ||
| 1200 | * the processes. The slave INIT events all spin until the monarch cpu | ||
| 1201 | * returns. We can also get INIT slave events for MCA, in which case the MCA | ||
| 1202 | * process is the monarch. | ||
| 1127 | */ | 1203 | */ | 
| 1204 | |||
| 1128 | void | 1205 | void | 
| 1129 | ia64_init_handler (struct pt_regs *pt, struct switch_stack *sw) | 1206 | ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, | 
| 1207 | struct ia64_sal_os_state *sos) | ||
| 1130 | { | 1208 | { | 
| 1131 | pal_min_state_area_t *ms; | 1209 | static atomic_t slaves; | 
| 1210 | static atomic_t monarchs; | ||
| 1211 | task_t *previous_current; | ||
| 1212 | int cpu = smp_processor_id(), c; | ||
| 1213 | struct task_struct *g, *t; | ||
| 1132 | 1214 | ||
| 1133 | oops_in_progress = 1; /* avoid deadlock in printk, but it makes recovery dodgy */ | 1215 | oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */ | 
| 1134 | console_loglevel = 15; /* make sure printks make it to console */ | 1216 | console_loglevel = 15; /* make sure printks make it to console */ | 
| 1135 | 1217 | ||
| 1136 | printk(KERN_INFO "Entered OS INIT handler. PSP=%lx\n", | 1218 | printk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", | 
| 1137 | ia64_sal_to_os_handoff_state.proc_state_param); | 1219 | sos->proc_state_param, cpu, sos->monarch); | 
| 1220 | salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); | ||
| 1138 | 1221 | ||
| 1139 | /* | 1222 | previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "INIT"); | 
| 1140 | * Address of minstate area provided by PAL is physical, | 1223 | sos->os_status = IA64_INIT_RESUME; | 
| 1141 | * uncacheable (bit 63 set). Convert to Linux virtual | 1224 | |
| 1142 | * address in region 6. | 1225 | /* FIXME: Workaround for broken proms that drive all INIT events as | 
| 1226 | * slaves. The last slave that enters is promoted to be a monarch. | ||
| 1227 | * Remove this code in September 2006, that gives platforms a year to | ||
| 1228 | * fix their proms and get their customers updated. | ||
| 1143 | */ | 1229 | */ | 
| 1144 | ms = (pal_min_state_area_t *)(ia64_sal_to_os_handoff_state.pal_min_state | (6ul<<61)); | 1230 | if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { | 
| 1231 | printk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", | ||
| 1232 | __FUNCTION__, cpu); | ||
| 1233 | atomic_dec(&slaves); | ||
| 1234 | sos->monarch = 1; | ||
| 1235 | } | ||
| 1145 | 1236 | ||
| 1146 | init_handler_platform(ms, pt, sw); /* call platform specific routines */ | 1237 | /* FIXME: Workaround for broken proms that drive all INIT events as | 
| 1238 | * monarchs. Second and subsequent monarchs are demoted to slaves. | ||
| 1239 | * Remove this code in September 2006, that gives platforms a year to | ||
| 1240 | * fix their proms and get their customers updated. | ||
| 1241 | */ | ||
| 1242 | if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { | ||
| 1243 | printk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", | ||
| 1244 | __FUNCTION__, cpu); | ||
| 1245 | atomic_dec(&monarchs); | ||
| 1246 | sos->monarch = 0; | ||
| 1247 | } | ||
| 1248 | |||
| 1249 | if (!sos->monarch) { | ||
| 1250 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; | ||
| 1251 | while (monarch_cpu == -1) | ||
| 1252 | cpu_relax(); /* spin until monarch enters */ | ||
| 1253 | while (monarch_cpu != -1) | ||
| 1254 | cpu_relax(); /* spin until monarch leaves */ | ||
| 1255 | printk("Slave on cpu %d returning to normal service.\n", cpu); | ||
| 1256 | set_curr_task(cpu, previous_current); | ||
| 1257 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; | ||
| 1258 | atomic_dec(&slaves); | ||
| 1259 | return; | ||
| 1260 | } | ||
| 1261 | |||
| 1262 | monarch_cpu = cpu; | ||
| 1263 | |||
| 1264 | /* | ||
| 1265 | * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be | ||
| 1266 | * generated via the BMC's command-line interface, but since the console is on the | ||
| 1267 | * same serial line, the user will need some time to switch out of the BMC before | ||
| 1268 | * the dump begins. | ||
| 1269 | */ | ||
| 1270 | printk("Delaying for 5 seconds...\n"); | ||
| 1271 | udelay(5*1000000); | ||
| 1272 | ia64_wait_for_slaves(cpu); | ||
| 1273 | printk(KERN_ERR "Processes interrupted by INIT -"); | ||
| 1274 | for_each_online_cpu(c) { | ||
| 1275 | struct ia64_sal_os_state *s; | ||
| 1276 | t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); | ||
| 1277 | s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); | ||
| 1278 | g = s->prev_task; | ||
| 1279 | if (g) { | ||
| 1280 | if (g->pid) | ||
| 1281 | printk(" %d", g->pid); | ||
| 1282 | else | ||
| 1283 | printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g); | ||
| 1284 | } | ||
| 1285 | } | ||
| 1286 | printk("\n\n"); | ||
| 1287 | if (read_trylock(&tasklist_lock)) { | ||
| 1288 | do_each_thread (g, t) { | ||
| 1289 | printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); | ||
| 1290 | show_stack(t, NULL); | ||
| 1291 | } while_each_thread (g, t); | ||
| 1292 | read_unlock(&tasklist_lock); | ||
| 1293 | } | ||
| 1294 | printk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); | ||
| 1295 | atomic_dec(&monarchs); | ||
| 1296 | set_curr_task(cpu, previous_current); | ||
| 1297 | monarch_cpu = -1; | ||
| 1298 | return; | ||
| 1147 | } | 1299 | } | 
| 1148 | 1300 | ||
| 1149 | static int __init | 1301 | static int __init | 
| @@ -1193,6 +1345,34 @@ static struct irqaction mca_cpep_irqaction = { | |||
| 1193 | }; | 1345 | }; | 
| 1194 | #endif /* CONFIG_ACPI */ | 1346 | #endif /* CONFIG_ACPI */ | 
| 1195 | 1347 | ||
| 1348 | /* Minimal format of the MCA/INIT stacks. The pseudo processes that run on | ||
| 1349 | * these stacks can never sleep, they cannot return from the kernel to user | ||
| 1350 | * space, they do not appear in a normal ps listing. So there is no need to | ||
| 1351 | * format most of the fields. | ||
| 1352 | */ | ||
| 1353 | |||
| 1354 | static void | ||
| 1355 | format_mca_init_stack(void *mca_data, unsigned long offset, | ||
| 1356 | const char *type, int cpu) | ||
| 1357 | { | ||
| 1358 | struct task_struct *p = (struct task_struct *)((char *)mca_data + offset); | ||
| 1359 | struct thread_info *ti; | ||
| 1360 | memset(p, 0, KERNEL_STACK_SIZE); | ||
| 1361 | ti = (struct thread_info *)((char *)p + IA64_TASK_SIZE); | ||
| 1362 | ti->flags = _TIF_MCA_INIT; | ||
| 1363 | ti->preempt_count = 1; | ||
| 1364 | ti->task = p; | ||
| 1365 | ti->cpu = cpu; | ||
| 1366 | p->thread_info = ti; | ||
| 1367 | p->state = TASK_UNINTERRUPTIBLE; | ||
| 1368 | __set_bit(cpu, &p->cpus_allowed); | ||
| 1369 | INIT_LIST_HEAD(&p->tasks); | ||
| 1370 | p->parent = p->real_parent = p->group_leader = p; | ||
| 1371 | INIT_LIST_HEAD(&p->children); | ||
| 1372 | INIT_LIST_HEAD(&p->sibling); | ||
| 1373 | strncpy(p->comm, type, sizeof(p->comm)-1); | ||
| 1374 | } | ||
| 1375 | |||
| 1196 | /* Do per-CPU MCA-related initialization. */ | 1376 | /* Do per-CPU MCA-related initialization. */ | 
| 1197 | 1377 | ||
| 1198 | void __devinit | 1378 | void __devinit | 
| @@ -1205,19 +1385,28 @@ ia64_mca_cpu_init(void *cpu_data) | |||
| 1205 | int cpu; | 1385 | int cpu; | 
| 1206 | 1386 | ||
| 1207 | mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) | 1387 | mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) | 
| 1208 | * NR_CPUS); | 1388 | * NR_CPUS + KERNEL_STACK_SIZE); | 
| 1389 | mca_data = (void *)(((unsigned long)mca_data + | ||
| 1390 | KERNEL_STACK_SIZE - 1) & | ||
| 1391 | (-KERNEL_STACK_SIZE)); | ||
| 1209 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 1392 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 
| 1393 | format_mca_init_stack(mca_data, | ||
| 1394 | offsetof(struct ia64_mca_cpu, mca_stack), | ||
| 1395 | "MCA", cpu); | ||
| 1396 | format_mca_init_stack(mca_data, | ||
| 1397 | offsetof(struct ia64_mca_cpu, init_stack), | ||
| 1398 | "INIT", cpu); | ||
| 1210 | __per_cpu_mca[cpu] = __pa(mca_data); | 1399 | __per_cpu_mca[cpu] = __pa(mca_data); | 
| 1211 | mca_data += sizeof(struct ia64_mca_cpu); | 1400 | mca_data += sizeof(struct ia64_mca_cpu); | 
| 1212 | } | 1401 | } | 
| 1213 | } | 1402 | } | 
| 1214 | 1403 | ||
| 1215 | /* | 1404 | /* | 
| 1216 | * The MCA info structure was allocated earlier and its | 1405 | * The MCA info structure was allocated earlier and its | 
| 1217 | * physical address saved in __per_cpu_mca[cpu]. Copy that | 1406 | * physical address saved in __per_cpu_mca[cpu]. Copy that | 
| 1218 | * address * to ia64_mca_data so we can access it as a per-CPU | 1407 | * address * to ia64_mca_data so we can access it as a per-CPU | 
| 1219 | * variable. | 1408 | * variable. | 
| 1220 | */ | 1409 | */ | 
| 1221 | __get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()]; | 1410 | __get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()]; | 
| 1222 | 1411 | ||
| 1223 | /* | 1412 | /* | 
| @@ -1227,11 +1416,11 @@ ia64_mca_cpu_init(void *cpu_data) | |||
| 1227 | __get_cpu_var(ia64_mca_per_cpu_pte) = | 1416 | __get_cpu_var(ia64_mca_per_cpu_pte) = | 
| 1228 | pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL)); | 1417 | pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL)); | 
| 1229 | 1418 | ||
| 1230 | /* | 1419 | /* | 
| 1231 | * Also, stash away a copy of the PAL address and the PTE | 1420 | * Also, stash away a copy of the PAL address and the PTE | 
| 1232 | * needed to map it. | 1421 | * needed to map it. | 
| 1233 | */ | 1422 | */ | 
| 1234 | pal_vaddr = efi_get_pal_addr(); | 1423 | pal_vaddr = efi_get_pal_addr(); | 
| 1235 | if (!pal_vaddr) | 1424 | if (!pal_vaddr) | 
| 1236 | return; | 1425 | return; | 
| 1237 | __get_cpu_var(ia64_mca_pal_base) = | 1426 | __get_cpu_var(ia64_mca_pal_base) = | 
| @@ -1263,8 +1452,8 @@ ia64_mca_cpu_init(void *cpu_data) | |||
| 1263 | void __init | 1452 | void __init | 
| 1264 | ia64_mca_init(void) | 1453 | ia64_mca_init(void) | 
| 1265 | { | 1454 | { | 
| 1266 | ia64_fptr_t *mon_init_ptr = (ia64_fptr_t *)ia64_monarch_init_handler; | 1455 | ia64_fptr_t *init_hldlr_ptr_monarch = (ia64_fptr_t *)ia64_os_init_dispatch_monarch; | 
| 1267 | ia64_fptr_t *slave_init_ptr = (ia64_fptr_t *)ia64_slave_init_handler; | 1456 | ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave; | 
| 1268 | ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch; | 1457 | ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch; | 
| 1269 | int i; | 1458 | int i; | 
| 1270 | s64 rc; | 1459 | s64 rc; | 
| @@ -1342,9 +1531,9 @@ ia64_mca_init(void) | |||
| 1342 | * XXX - disable SAL checksum by setting size to 0, should be | 1531 | * XXX - disable SAL checksum by setting size to 0, should be | 
| 1343 | * size of the actual init handler in mca_asm.S. | 1532 | * size of the actual init handler in mca_asm.S. | 
| 1344 | */ | 1533 | */ | 
| 1345 | ia64_mc_info.imi_monarch_init_handler = ia64_tpa(mon_init_ptr->fp); | 1534 | ia64_mc_info.imi_monarch_init_handler = ia64_tpa(init_hldlr_ptr_monarch->fp); | 
| 1346 | ia64_mc_info.imi_monarch_init_handler_size = 0; | 1535 | ia64_mc_info.imi_monarch_init_handler_size = 0; | 
| 1347 | ia64_mc_info.imi_slave_init_handler = ia64_tpa(slave_init_ptr->fp); | 1536 | ia64_mc_info.imi_slave_init_handler = ia64_tpa(init_hldlr_ptr_slave->fp); | 
| 1348 | ia64_mc_info.imi_slave_init_handler_size = 0; | 1537 | ia64_mc_info.imi_slave_init_handler_size = 0; | 
| 1349 | 1538 | ||
| 1350 | IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__, | 1539 | IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__, | 
